21 #include <x86intrin.h> 25 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 26 #pragma GCC optimize ("O3") 31 __m128i encrypt_key[15];
32 __m128i decrypt_key[15];
39 __m128i r0, r1, r2, r3, c0, c1, c2, c3, f;
42 f = _mm_loadu_si128 ((__m128i *) iv);
46 _mm_prefetch (src + 128, _MM_HINT_T0);
47 _mm_prefetch (dst + 128, _MM_HINT_T0);
49 c0 = _mm_loadu_si128 (((__m128i *) src + 0));
50 c1 = _mm_loadu_si128 (((__m128i *) src + 1));
51 c2 = _mm_loadu_si128 (((__m128i *) src + 2));
52 c3 = _mm_loadu_si128 (((__m128i *) src + 3));
59 for (i = 1; i < rounds; i++)
61 r0 = _mm_aesdec_si128 (r0, k[i]);
62 r1 = _mm_aesdec_si128 (r1, k[i]);
63 r2 = _mm_aesdec_si128 (r2, k[i]);
64 r3 = _mm_aesdec_si128 (r3, k[i]);
67 r0 = _mm_aesdeclast_si128 (r0, k[i]);
68 r1 = _mm_aesdeclast_si128 (r1, k[i]);
69 r2 = _mm_aesdeclast_si128 (r2, k[i]);
70 r3 = _mm_aesdeclast_si128 (r3, k[i]);
72 _mm_storeu_si128 ((__m128i *) dst + 0, r0 ^ f);
73 _mm_storeu_si128 ((__m128i *) dst + 1, r1 ^ c0);
74 _mm_storeu_si128 ((__m128i *) dst + 2, r2 ^ c1);
75 _mm_storeu_si128 ((__m128i *) dst + 3, r3 ^ c2);
86 c0 = _mm_loadu_si128 (((__m128i *) src));
88 for (i = 1; i < rounds; i++)
89 r0 = _mm_aesdec_si128 (r0, k[i]);
90 r0 = _mm_aesdeclast_si128 (r0, k[i]);
91 _mm_storeu_si128 ((__m128i *) dst, r0 ^ f);
111 u32x4 dummy_mask = { };
114 __m128i r[4] = { }, k[4][rounds + 1];
117 for (i = 0; i < 4; i++)
123 src[
i] = dst[
i] = dummy;
124 len[
i] =
sizeof (dummy);
132 _mm_storeu_si128 ((__m128i *) ops[0]->
iv, r[i]);
133 ptd->
cbc_iv[
i] = _mm_aesenc_si128 (r[i], r[i]);
136 r[
i] = _mm_loadu_si128 ((__m128i *) ops[0]->
iv);
137 src[
i] = ops[0]->
src;
138 dst[
i] = ops[0]->
dst;
139 len[
i] = ops[0]->
len;
141 if (key_index[i] != ops[0]->key_index)
147 (rounds + 1) *
sizeof (__m128i));
149 ops[0]->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
159 for (i = 0; i <
count; i += 16)
161 r[0] ^= _mm_loadu_si128 ((__m128i *) (src[0] + i)) ^ k[0][0];
162 r[1] ^= _mm_loadu_si128 ((__m128i *) (src[1] + i)) ^ k[1][0];
163 r[2] ^= _mm_loadu_si128 ((__m128i *) (src[2] + i)) ^ k[2][0];
164 r[3] ^= _mm_loadu_si128 ((__m128i *) (src[3] + i)) ^ k[3][0];
166 for (j = 1; j < rounds; j++)
168 r[0] = _mm_aesenc_si128 (r[0], k[0][j]);
169 r[1] = _mm_aesenc_si128 (r[1], k[1][j]);
170 r[2] = _mm_aesenc_si128 (r[2], k[2][j]);
171 r[3] = _mm_aesenc_si128 (r[3], k[3][j]);
174 r[0] = _mm_aesenclast_si128 (r[0], k[0][j]);
175 r[1] = _mm_aesenclast_si128 (r[1], k[1][j]);
176 r[2] = _mm_aesenclast_si128 (r[2], k[2][j]);
177 r[3] = _mm_aesenclast_si128 (r[3], k[3][j]);
179 _mm_storeu_si128 ((__m128i *) (dst[0] + i), r[0]);
180 _mm_storeu_si128 ((__m128i *) (dst[1] + i), r[1]);
181 _mm_storeu_si128 ((__m128i *) (dst[2] + i), r[2]);
182 _mm_storeu_si128 ((__m128i *) (dst[3] + i), r[3]);
185 for (i = 0; i < 4; i++)
195 if (!u32x4_is_all_zero (len & dummy_mask))
215 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
238 #define foreach_aesni_cbc_handler_type _(128) _(192) _(256) 241 static u32 aesni_ops_dec_aes_cbc_##x \ 242 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 243 { return aesni_ops_dec_aes_cbc (vm, ops, n_ops, AESNI_KEY_##x); } \ 244 static u32 aesni_ops_enc_aes_cbc_##x \ 245 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 246 { return aesni_ops_enc_aes_cbc (vm, ops, n_ops, AESNI_KEY_##x); } \ 247 static void * aesni_cbc_key_exp_##x (vnet_crypto_key_t *key) \ 248 { return aesni_cbc_key_exp (key, AESNI_KEY_##x); } 269 if ((fd = open (
"/dev/urandom", O_RDONLY)) < 0)
275 for (
int i = 0;
i < 4;
i++)
288 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 289 VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \ 290 aesni_ops_enc_aes_cbc_##x); \ 291 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 292 VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \ 293 aesni_ops_dec_aes_cbc_##x); \ 294 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CBC] = aesni_cbc_key_exp_##x;
crypto_ia32_main_t crypto_ia32_main
static_always_inline u32 u32x4_min_scalar(u32x4 v)
#define clib_memcpy_fast(a, b, c)
#define AESNI_KEY_ROUNDS(x)
static_always_inline void * aesni_cbc_key_exp(vnet_crypto_key_t *key, aesni_key_size_t ks)
static_always_inline void aes_key_expand(__m128i *k, u8 *key, aesni_key_size_t ks)
#define static_always_inline
static_always_inline u32 aesni_ops_dec_aes_cbc(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aesni_key_size_t ks)
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return_unix(e, args...)
#define VNET_CRYPTO_OP_FLAG_INIT_IV
crypto_ia32_per_thread_data_t * per_thread_data
clib_error_t * crypto_ia32_aesni_cbc_init_avx2(vlib_main_t *vm)
static_always_inline void aes_cbc_dec(__m128i *k, u8 *src, u8 *dst, u8 *iv, int count, aesni_key_size_t rounds)
u32 vnet_crypto_key_index_t
static_always_inline u32 aesni_ops_enc_aes_cbc(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aesni_key_size_t ks)
static_always_inline void aes_key_enc_to_dec(__m128i *k, aesni_key_size_t ks)
clib_error_t * crypto_ia32_aesni_cbc_init_sse42(vlib_main_t *vm)
vnet_crypto_op_status_t status
static void * clib_mem_alloc_aligned(uword size, uword align)
#define vec_foreach(var, vec)
Vector iterator.
#define foreach_aesni_cbc_handler_type
#define CLIB_CACHE_LINE_BYTES
clib_error_t * crypto_ia32_aesni_cbc_init_avx512(vlib_main_t *vm)