25 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 26 #pragma GCC optimize ("O3") 64 static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
73 for (
int i = 0;
i < n_blocks;
i++)
75 r[
i] = k ^ (u8x16) ctr->
Y;
82 for (
int i = 0;
i < n_blocks;
i++)
84 r[
i] = k ^ (u8x16) ctr->
Y;
86 ctr->
Y[3] = clib_host_to_net_u32 (ctr->
counter + 1);
94 for (
int i = 0;
i < n_blocks;
i++)
100 int rounds,
int n_blocks)
104 for (
int i = 10;
i < rounds;
i++)
107 for (
int i = 0; i < n_blocks; i++)
114 u8x16u * in,
int n_blocks)
117 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - n_blocks;
119 for (
int i = 1;
i < n_blocks;
i++)
130 while (n_left >= 128)
174 const u8x16 *rk = (u8x16 *) kd->
Ke;
176 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - ghash_blocks;
203 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
211 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
219 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
227 if (f & AES_GCM_F_WITH_GHASH)
231 if (f & AES_GCM_F_ENCRYPT)
241 if (f & AES_GCM_F_WITH_GHASH)
255 if (f & AES_GCM_F_WITH_GHASH)
268 const u8x16 *rk = (u8x16 *) kd->
Ke;
269 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - 8;
331 if (f & AES_GCM_F_DECRYPT)
375 if (f & AES_GCM_F_ENCRYPT)
401 int n_blocks,
int n_bytes)
404 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - n_blocks;
423 static const u32x16 ctr_inv_1234 = {
424 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
427 static const u32x16 ctr_inv_4444 = {
428 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
431 static const u32x16 ctr_1234 = {
432 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
449 r[
i] = k ^ (u8x64) ctr->
Y4;
450 ctr->
Y4 += ctr_inv_4444;
459 r[
i] = k ^ (u8x64) ctr->
Y4;
460 Yc = u32x16_splat (ctr->
counter + 4 * (
i + 1)) + ctr_1234;
469 r[
i] = k ^ (u8x64) ctr->
Y4;
470 ctr->
Y4 += ctr_inv_4444;
477 aes4_gcm_enc_round (u8x64 * r, u8x64 k,
int n_blocks)
479 for (
int i = 0;
i < n_blocks;
i++)
480 r[
i] = aes_enc_round_x4 (r[
i], k);
484 aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64
const *k,
485 int rounds,
int n_blocks)
489 for (
int i = 10;
i < rounds;
i++)
490 aes4_gcm_enc_round (r, k[
i], n_blocks);
492 for (
int i = 0; i < n_blocks; i++)
493 d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
501 ghash4_data_t _gd, *gd = &_gd;
502 const u8x64 *rk = (u8x64 *) kd->Ke4;
503 int i, ghash_blocks, gc = 1;
504 u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
506 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
513 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - ghash_blocks * 4);
519 int n_128bit_blocks = n * 4;
524 n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
525 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - n_128bit_blocks);
529 aes4_gcm_enc_first_round (r, ctr, rk[0], n);
530 aes4_gcm_enc_round (r, rk[1], n);
548 aes4_gcm_enc_round (r, rk[2], n);
549 aes4_gcm_enc_round (r, rk[3], n);
552 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
556 aes4_gcm_enc_round (r, rk[4], n);
557 aes4_gcm_enc_round (r, rk[5], n);
560 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
564 aes4_gcm_enc_round (r, rk[6], n);
565 aes4_gcm_enc_round (r, rk[7], n);
568 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
572 if (f & AES_GCM_F_ENCRYPT)
582 aes4_gcm_enc_round (r, rk[8], n);
583 aes4_gcm_enc_round (r, rk[9], n);
586 aes4_gcm_enc_last_round (r, d, rk, rounds, n);
602 return ghash4_final (gd);
611 ghash4_data_t _gd, *gd = &_gd;
612 const u8x64 *rk = (u8x64 *) kd->Ke4;
613 u8x64 *Hi4 = (u8x64 *) (kd->
Hi +
NUM_HI - 32);
614 u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
617 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
618 aes4_gcm_enc_round (r, rk[1], 4);
622 for (
int i = 0;
i < 4;
i++)
630 aes4_gcm_enc_round (r, rk[2], 4);
631 aes4_gcm_enc_round (r, rk[3], 4);
637 aes4_gcm_enc_round (r, rk[4], 4);
638 aes4_gcm_enc_round (r, rk[5], 4);
644 aes4_gcm_enc_round (r, rk[6], 4);
645 aes4_gcm_enc_round (r, rk[7], 4);
651 aes4_gcm_enc_round (r, rk[8], 4);
652 aes4_gcm_enc_round (r, rk[9], 4);
656 for (
int i = 0;
i < 4;
i++)
660 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
663 for (
int i = 0;
i < 4;
i++)
667 if (f & AES_GCM_F_DECRYPT)
668 for (
int i = 0;
i < 4;
i++)
675 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
676 aes4_gcm_enc_round (r, rk[1], 4);
682 aes4_gcm_enc_round (r, rk[2], 4);
683 aes4_gcm_enc_round (r, rk[3], 4);
689 aes4_gcm_enc_round (r, rk[4], 4);
690 aes4_gcm_enc_round (r, rk[5], 4);
696 aes4_gcm_enc_round (r, rk[6], 4);
697 aes4_gcm_enc_round (r, rk[7], 4);
703 aes4_gcm_enc_round (r, rk[8], 4);
704 aes4_gcm_enc_round (r, rk[9], 4);
710 if (f & AES_GCM_F_ENCRYPT)
711 for (
int i = 0;
i < 4;
i++)
715 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
718 for (
int i = 0;
i < 4;
i++)
722 return ghash4_final (gd);
727 int n,
int last_4block_bytes)
729 ghash4_data_t _gd, *gd = &_gd;
732 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
733 n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
734 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - n_128bit_blocks);
747 return ghash4_final (gd);
753 u8x16u * inv, u8x16u * outv,
u32 n_left,
int rounds)
768 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
769 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
771 else if (n_left > 128)
774 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
775 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
777 else if (n_left > 64)
780 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
781 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
785 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
786 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
790 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
799 while (n_left >= 512)
801 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
809 while (n_left >= 256)
811 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
820 return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
827 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
828 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
834 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
835 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
841 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
842 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
845 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
846 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
855 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
858 else if (n_left > 32)
861 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
864 else if (n_left > 16)
867 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
872 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
877 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
886 while (n_left >= 128)
898 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
914 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
921 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
928 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
932 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
939 u8x16u * inv, u8x16u * outv,
u32 n_left,
int rounds)
945 while (n_left >= 512)
947 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
955 while (n_left >= 256)
957 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
971 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
974 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
977 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
979 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
982 while (n_left >= 128)
994 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
1008 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);
1011 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);
1014 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);
1016 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
1021 aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u *
iv, u8x16u * tag,
1023 int aes_rounds,
int is_encrypt)
1038 else if (aad_bytes == 12)
1054 T =
aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
1056 T =
aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
1062 r = (u8x16) ((
u64x2) {data_bytes, aad_bytes} << 3);
1067 r = kd->
Ke[0] ^ (u8x16) Y0;
1068 for (i = 1; i < 5; i += 1)
1072 for (; i < 9; i += 1)
1075 for (; i < aes_rounds; i += 1)
1094 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
1116 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1140 (u8x16u *) op->
iv, (u8x16u *) op->
tag, op->
len,
1146 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1150 op->
status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
1179 u8x64 *Ke4 = (u8x64 *) kd->Ke4;
1186 #define foreach_aes_gcm_handler_type _(128) _(192) _(256) 1189 static u32 aes_ops_dec_aes_gcm_##x \ 1190 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 1191 { return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \ 1192 static u32 aes_ops_enc_aes_gcm_##x \ 1193 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 1194 { return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \ 1195 static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \ 1196 { return aes_gcm_key_exp (key, AES_KEY_##x); } 1209 crypto_native_aes_gcm_init_neon (
vlib_main_t * vm)
1217 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 1218 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ 1219 aes_ops_enc_aes_gcm_##x); \ 1220 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 1221 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ 1222 aes_ops_dec_aes_gcm_##x); \ 1223 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
static_always_inline u8x16 aes_gcm_ghash_last(u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, int n_blocks, int n_bytes)
static_always_inline u8x16 aes_gcm_ghash(u8x16 T, aes_gcm_key_data_t *kd, u8x16u *in, u32 n_left)
crypto_native_main_t crypto_native_main
static_always_inline u8x16 aes_load_partial(u8x16u *p, int n_bytes)
static_always_inline void clib_prefetch_load(void *p)
static_always_inline void aes_gcm_enc_last_round(u8x16 *r, u8x16 *d, u8x16 const *k, int rounds, int n_blocks)
static_always_inline u32 aes_ops_dec_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks)
static_always_inline void aes_store_partial(void *p, u8x16 r, int n_bytes)
#define foreach_aes_gcm_handler_type
static_always_inline u8x16 u8x16_reflect(u8x16 v)
for(i=1;i<=collision_buckets;i++)
static_always_inline u8x64 u8x64_reflect_u8x16(u8x64 x)
#define AES_KEY_ROUNDS(x)
static_always_inline u8x64 u8x64_mask_load(u8x64 a, void *p, u64 mask)
static_always_inline int aes_gcm(u8x16u *in, u8x16u *out, u8x16u *addt, u8x16u *iv, u8x16u *tag, u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd, int aes_rounds, int is_encrypt)
static_always_inline u8x16 aes_gcm_dec(u8x16 T, aes_gcm_key_data_t *kd, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, u32 n_left, int rounds)
static_always_inline void ghash_precompute(u8x16 H, u8x16 *Hi, int n)
static_always_inline u8x16 aes_byte_mask(u8x16 x, u8 n_bytes)
static_always_inline u8x16 ghash_final(ghash_data_t *gd)
static_always_inline u8x64 u8x64_splat_u8x16(u8x16 a)
static_always_inline u32x16 u32x16_splat_u32x4(u32x4 a)
#define static_always_inline
static_always_inline void ghash_reduce(ghash_data_t *gd)
#define u8x64_insert_u8x16(a, b, n)
static_always_inline void ghash_mul_next(ghash_data_t *gd, u8x16 a, u8x16 b)
static_always_inline void ghash_reduce2(ghash_data_t *gd)
static_always_inline u8x16 aes_enc_round(u8x16 a, u8x16 k)
epu8_epi32 epu16_epi32 u64x2
static_always_inline void aes_key_expand(u8x16 *key_schedule, u8 const *key, aes_key_size_t ks)
static_always_inline u8x16 aes_enc_last_round(u8x16 a, u8x16 k)
static_always_inline u32 aes_ops_enc_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks)
static_always_inline u8x16 ghash_mul(u8x16 a, u8x16 b)
static_always_inline void ghash_mul_first(ghash_data_t *gd, u8x16 a, u8x16 b)
static_always_inline u32x16 u32x16_mask_blend(u32x16 a, u32x16 b, u16 mask)
static_always_inline void aes_gcm_enc_first_round(u8x16 *r, aes_gcm_counter_t *ctr, u8x16 k, int n_blocks)
static_always_inline u16 u8x16_msb_mask(u8x16 v)
static_always_inline u8x16 aes_gcm_enc(u8x16 T, aes_gcm_key_data_t *kd, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, u32 n_left, int rounds)
sll srl srl sll sra u16x4 i
static_always_inline void * aes_gcm_key_exp(vnet_crypto_key_t *key, aes_key_size_t ks)
static_always_inline void aes_gcm_enc_round(u8x16 *r, u8x16 k, int n_blocks)
static_always_inline u8x16 aes_gcm_calc_double(u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds, aes_gcm_flags_t f)
static_always_inline u8x16 aes_gcm_ghash_blocks(u8x16 T, aes_gcm_key_data_t *kd, u8x16u *in, int n_blocks)
clib_error_t * crypto_native_aes_gcm_init_slm(vlib_main_t *vm)
vnet_crypto_op_status_t status
static void * clib_mem_alloc_aligned(uword size, uword align)
static_always_inline u8x64 u8x64_mask_blend(u8x64 a, u8x64 b, u64 mask)
static_always_inline void u8x64_mask_store(u8x64 a, void *p, u64 mask)
static_always_inline u8x16 aes_encrypt_block(u8x16 block, const u8x16 *round_keys, aes_key_size_t ks)
#define CLIB_CACHE_LINE_BYTES
static_always_inline u8x16 aes_gcm_calc(u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
static const u32x4 ctr_inv_1