24 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
25 #pragma GCC optimize ("O3")
32 u8x64 decrypt_key[15];
62 for (
int i = 1;
i < rounds;
i++)
75 for (
int i = 0;
i < rounds - 1;
i++)
77 r[0] = vaesimcq_u8 (vaesdq_u8 (
r[0], k[
i]));
78 r[1] = vaesimcq_u8 (vaesdq_u8 (
r[1], k[
i]));
79 r[2] = vaesimcq_u8 (vaesdq_u8 (
r[2], k[
i]));
80 r[3] = vaesimcq_u8 (vaesdq_u8 (
r[3], k[
i]));
82 r[0] = vaesdq_u8 (
r[0], k[rounds - 1]) ^ k[rounds];
83 r[1] = vaesdq_u8 (
r[1], k[rounds - 1]) ^ k[rounds];
84 r[2] = vaesdq_u8 (
r[2], k[rounds - 1]) ^ k[rounds];
85 r[3] = vaesdq_u8 (
r[3], k[rounds - 1]) ^ k[rounds];
100 c[0] =
r[0] =
src[0];
103 for (
int i = 1;
i < rounds;
i++)
107 c[0] =
r[0] =
src[0];
108 for (
int i = 0;
i < rounds - 1;
i++)
109 r[0] = vaesimcq_u8 (vaesdq_u8 (
r[0], k[
i]));
110 r[0] = vaesdq_u8 (
r[0], k[rounds - 1]) ^ k[rounds];
125 aes_block_load_x4 (
u8 *
src[],
int i)
147 __m512i perm = { 6, 7, 8, 9, 10, 11, 12, 13 };
148 return (
u8x64) _mm512_permutex2var_epi64 ((__m512i)
a, perm, (__m512i)
b);
157 int i, n_blocks =
count >> 4;
159 f = (
u8x64) _mm512_mask_loadu_epi64 (_mm512_setzero_si512 (), 0xc0,
160 (__m512i *) (
iv - 3));
162 while (n_blocks >= 16)
174 for (
i = 1;
i < rounds;
i++)
176 r[0] = aes_dec_round_x4 (
r[0], k[
i]);
177 r[1] = aes_dec_round_x4 (
r[1], k[
i]);
178 r[2] = aes_dec_round_x4 (
r[2], k[
i]);
179 r[3] = aes_dec_round_x4 (
r[3], k[
i]);
182 r[0] = aes_dec_last_round_x4 (
r[0], k[
i]);
183 r[1] = aes_dec_last_round_x4 (
r[1], k[
i]);
184 r[2] = aes_dec_last_round_x4 (
r[2], k[
i]);
185 r[3] = aes_dec_last_round_x4 (
r[3], k[
i]);
187 dst[0] =
r[0] ^= aes_cbc_dec_permute (
f,
c[0]);
188 dst[1] =
r[1] ^= aes_cbc_dec_permute (
c[0],
c[1]);
189 dst[2] =
r[2] ^= aes_cbc_dec_permute (
c[1],
c[2]);
190 dst[4] =
r[3] ^= aes_cbc_dec_permute (
c[2],
c[3]);
200 m = (1 << (n_blocks * 2)) - 1;
201 c[0] = (
u8x64) _mm512_mask_loadu_epi64 ((__m512i)
c[0], m,
203 f = aes_cbc_dec_permute (
f,
c[0]);
205 for (
i = 1;
i < rounds;
i++)
206 r[0] = aes_dec_round_x4 (
r[0], k[
i]);
207 r[0] = aes_dec_last_round_x4 (
r[0], k[
i]);
208 _mm512_mask_storeu_epi64 ((__m512i *)
dst, m, (__m512i) (
r[0] ^
f));
221 #define u32xN_min_scalar u32x16_min_scalar
222 #define u32xN_is_all_zero u32x16_is_all_zero
223 #define u32xN_splat u32x16_splat
227 #define u32xN_min_scalar u32x4_min_scalar
228 #define u32xN_is_all_zero u32x4_is_all_zero
229 #define u32xN_splat u32x4_splat
240 u8 placeholder[8192];
242 u32xN placeholder_mask = { };
256 for (
i = 0;
i <
N;
i++)
260 for (
i = 0;
i <
N;
i++)
267 len[
i] =
sizeof (placeholder);
268 placeholder_mask[
i] = 0;
276 *(u8x16u *) ops[0]->
iv = t;
290 placeholder_mask[
i] = ~0;
291 if (key_index[
i] != ops[0]->key_index)
296 for (j = 0; j < rounds + 1; j++)
306 ops[0]->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
324 for (j = 1; j < rounds; j++)
326 r[0] = aes_enc_round_x4 (
r[0], k[j][0]);
327 r[1] = aes_enc_round_x4 (
r[1], k[j][1]);
328 r[2] = aes_enc_round_x4 (
r[2], k[j][2]);
329 r[3] = aes_enc_round_x4 (
r[3], k[j][3]);
331 r[0] = aes_enc_last_round_x4 (
r[0], k[j][0]);
332 r[1] = aes_enc_last_round_x4 (
r[1], k[j][1]);
333 r[2] = aes_enc_last_round_x4 (
r[2], k[j][2]);
334 r[3] = aes_enc_last_round_x4 (
r[3], k[j][3]);
336 aes_block_store_x4 (
dst,
i,
r[0]);
337 aes_block_store_x4 (
dst + 4,
i,
r[1]);
338 aes_block_store_x4 (
dst + 8,
i,
r[2]);
339 aes_block_store_x4 (
dst + 12,
i,
r[3]);
347 for (j = 1; j < rounds; j++)
369 for (j = 0; j < rounds - 1; j++)
371 r[0] = vaesmcq_u8 (vaeseq_u8 (
r[0], k[j][0]));
372 r[1] = vaesmcq_u8 (vaeseq_u8 (
r[1], k[j][1]));
373 r[2] = vaesmcq_u8 (vaeseq_u8 (
r[2], k[j][2]));
374 r[3] = vaesmcq_u8 (vaeseq_u8 (
r[3], k[j][3]));
376 r[0] = vaeseq_u8 (
r[0], k[j][0]) ^ k[rounds][0];
377 r[1] = vaeseq_u8 (
r[1], k[j][1]) ^ k[rounds][1];
378 r[2] = vaeseq_u8 (
r[2], k[j][2]) ^ k[rounds][2];
379 r[3] = vaeseq_u8 (
r[3], k[j][3]) ^ k[rounds][3];
390 for (
i = 0;
i <
N;
i++)
421 (u8x16u *) op->
iv, op->
len, rounds);
424 (u8x16u *) op->
iv, op->
len, rounds);
426 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
458 #define foreach_aes_cbc_handler_type _(128) _(192) _(256)
461 static u32 aes_ops_dec_aes_cbc_##x \
462 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
463 { return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
464 static u32 aes_ops_enc_aes_cbc_##x \
465 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
466 { return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
467 static void * aes_cbc_key_exp_##x (vnet_crypto_key_t *key) \
468 { return aes_cbc_key_exp (key, AES_KEY_##x); }
493 if ((fd = open (
"/dev/urandom", O_RDONLY)) < 0)
499 for (
int i = 0;
i < 4;
i++)
512 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
513 VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
514 aes_ops_enc_aes_cbc_##x); \
515 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
516 VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
517 aes_ops_dec_aes_cbc_##x); \
518 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CBC] = aes_cbc_key_exp_##x;