25 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 26 #pragma GCC optimize ("O3") 64 static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
72 for (
int i = 0;
i < n_blocks;
i++)
74 r[
i] = k ^ (u8x16) ctr->
Y;
81 for (
int i = 0;
i < n_blocks;
i++)
83 r[
i] = k ^ (u8x16) ctr->
Y;
85 ctr->
Y[3] = clib_host_to_net_u32 (ctr->
counter + 1);
93 for (
int i = 0;
i < n_blocks;
i++)
99 int rounds,
int n_blocks)
103 for (
int i = 10;
i < rounds;
i++)
106 for (
int i = 0; i < n_blocks; i++)
112 u8x16u * in,
int n_blocks)
115 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - n_blocks;
117 for (
int i = 1;
i < n_blocks;
i++)
128 while (n_left >= 128)
171 const u8x16 *rk = (u8x16 *) kd->
Ke;
173 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - ghash_blocks;
200 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
208 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
216 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
224 if (f & AES_GCM_F_WITH_GHASH)
228 if (f & AES_GCM_F_ENCRYPT)
238 if (f & AES_GCM_F_WITH_GHASH)
252 if (f & AES_GCM_F_WITH_GHASH)
265 const u8x16 *rk = (u8x16 *) kd->
Ke;
266 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - 8;
328 if (f & AES_GCM_F_DECRYPT)
372 if (f & AES_GCM_F_ENCRYPT)
398 int n_blocks,
int n_bytes)
401 u8x16 *Hi = (u8x16 *) kd->
Hi +
NUM_HI - n_blocks;
419 static const u32x16 ctr_inv_1234 = {
420 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
423 static const u32x16 ctr_inv_4444 = {
424 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
427 static const u32x16 ctr_1234 = {
428 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
445 r[
i] = k ^ (u8x64) ctr->
Y4;
446 ctr->
Y4 += ctr_inv_4444;
455 r[
i] = k ^ (u8x64) ctr->
Y4;
456 Yc = u32x16_splat (ctr->
counter + 4 * (
i + 1)) + ctr_1234;
465 r[
i] = k ^ (u8x64) ctr->
Y4;
466 ctr->
Y4 += ctr_inv_4444;
473 aes4_gcm_enc_round (u8x64 * r, u8x64 k,
int n_blocks)
475 for (
int i = 0;
i < n_blocks;
i++)
476 r[
i] = aes_enc_round_x4 (r[
i], k);
480 aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64
const *k,
481 int rounds,
int n_blocks)
485 for (
int i = 10;
i < rounds;
i++)
486 aes4_gcm_enc_round (r, k[
i], n_blocks);
488 for (
int i = 0; i < n_blocks; i++)
489 d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
497 ghash4_data_t _gd, *gd = &_gd;
498 const u8x64 *rk = (u8x64 *) kd->Ke4;
499 int i, ghash_blocks, gc = 1;
500 u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
502 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
509 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - ghash_blocks * 4);
515 int n_128bit_blocks = n * 4;
520 n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
521 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - n_128bit_blocks);
525 aes4_gcm_enc_first_round (r, ctr, rk[0], n);
526 aes4_gcm_enc_round (r, rk[1], n);
544 aes4_gcm_enc_round (r, rk[2], n);
545 aes4_gcm_enc_round (r, rk[3], n);
548 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
552 aes4_gcm_enc_round (r, rk[4], n);
553 aes4_gcm_enc_round (r, rk[5], n);
556 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
560 aes4_gcm_enc_round (r, rk[6], n);
561 aes4_gcm_enc_round (r, rk[7], n);
564 if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
568 if (f & AES_GCM_F_ENCRYPT)
578 aes4_gcm_enc_round (r, rk[8], n);
579 aes4_gcm_enc_round (r, rk[9], n);
582 aes4_gcm_enc_last_round (r, d, rk, rounds, n);
598 return ghash4_final (gd);
607 ghash4_data_t _gd, *gd = &_gd;
608 const u8x64 *rk = (u8x64 *) kd->Ke4;
609 u8x64 *Hi4 = (u8x64 *) (kd->
Hi +
NUM_HI - 32);
610 u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
613 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
614 aes4_gcm_enc_round (r, rk[1], 4);
618 for (
int i = 0;
i < 4;
i++)
626 aes4_gcm_enc_round (r, rk[2], 4);
627 aes4_gcm_enc_round (r, rk[3], 4);
633 aes4_gcm_enc_round (r, rk[4], 4);
634 aes4_gcm_enc_round (r, rk[5], 4);
640 aes4_gcm_enc_round (r, rk[6], 4);
641 aes4_gcm_enc_round (r, rk[7], 4);
647 aes4_gcm_enc_round (r, rk[8], 4);
648 aes4_gcm_enc_round (r, rk[9], 4);
652 for (
int i = 0;
i < 4;
i++)
656 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
659 for (
int i = 0;
i < 4;
i++)
663 if (f & AES_GCM_F_DECRYPT)
664 for (
int i = 0;
i < 4;
i++)
671 aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
672 aes4_gcm_enc_round (r, rk[1], 4);
678 aes4_gcm_enc_round (r, rk[2], 4);
679 aes4_gcm_enc_round (r, rk[3], 4);
685 aes4_gcm_enc_round (r, rk[4], 4);
686 aes4_gcm_enc_round (r, rk[5], 4);
692 aes4_gcm_enc_round (r, rk[6], 4);
693 aes4_gcm_enc_round (r, rk[7], 4);
699 aes4_gcm_enc_round (r, rk[8], 4);
700 aes4_gcm_enc_round (r, rk[9], 4);
706 if (f & AES_GCM_F_ENCRYPT)
707 for (
int i = 0;
i < 4;
i++)
711 aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
714 for (
int i = 0;
i < 4;
i++)
718 return ghash4_final (gd);
723 int n,
int last_4block_bytes)
725 ghash4_data_t _gd, *gd = &_gd;
728 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
729 n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
730 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - n_128bit_blocks);
743 return ghash4_final (gd);
749 u8x16u * inv, u8x16u * outv,
u32 n_left,
int rounds)
765 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
766 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
768 else if (n_left > 128)
771 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
772 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
774 else if (n_left > 64)
777 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
778 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
782 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
783 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
787 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
796 while (n_left >= 512)
798 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
806 while (n_left >= 256)
808 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
817 return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
824 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
825 return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
831 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
832 return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
838 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
839 return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
842 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
843 return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
852 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
855 else if (n_left > 32)
858 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
861 else if (n_left > 16)
864 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
869 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
874 aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
883 while (n_left >= 128)
895 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
911 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
918 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
925 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
929 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
935 u8x16u * inv, u8x16u * outv,
u32 n_left,
int rounds)
941 while (n_left >= 512)
943 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
951 while (n_left >= 256)
953 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
967 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
970 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
973 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
975 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
978 while (n_left >= 128)
990 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
1004 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);
1007 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);
1010 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);
1012 return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
1017 aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u *
iv, u8x16u * tag,
1019 int aes_rounds,
int is_encrypt)
1034 else if (aad_bytes == 12)
1050 T =
aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
1052 T =
aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
1058 r = (u8x16) ((
u64x2) {data_bytes, aad_bytes} << 3);
1063 r = kd->
Ke[0] ^ (u8x16) Y0;
1064 for (i = 1; i < 5; i += 1)
1068 for (; i < 9; i += 1)
1071 for (; i < aes_rounds; i += 1)
1090 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
1112 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1136 (u8x16u *) op->
iv, (u8x16u *) op->
tag, op->
len,
1142 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1146 op->
status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
1175 u8x64 *Ke4 = (u8x64 *) kd->Ke4;
1182 #define foreach_aes_gcm_handler_type _(128) _(192) _(256) 1185 static u32 aes_ops_dec_aes_gcm_##x \ 1186 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 1187 { return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \ 1188 static u32 aes_ops_enc_aes_gcm_##x \ 1189 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 1190 { return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \ 1191 static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \ 1192 { return aes_gcm_key_exp (key, AES_KEY_##x); } 1205 crypto_native_aes_gcm_init_neon (
vlib_main_t * vm)
1213 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 1214 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ 1215 aes_ops_enc_aes_gcm_##x); \ 1216 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 1217 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ 1218 aes_ops_dec_aes_gcm_##x); \ 1219 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
static_always_inline u8x16 aes_gcm_ghash_last(u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, int n_blocks, int n_bytes)
static_always_inline u8x16 aes_gcm_ghash(u8x16 T, aes_gcm_key_data_t *kd, u8x16u *in, u32 n_left)
crypto_native_main_t crypto_native_main
static_always_inline u8x16 aes_load_partial(u8x16u *p, int n_bytes)
static_always_inline void clib_prefetch_load(void *p)
static_always_inline void aes_gcm_enc_last_round(u8x16 *r, u8x16 *d, u8x16 const *k, int rounds, int n_blocks)
static_always_inline u32 aes_ops_dec_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks)
static_always_inline void aes_store_partial(void *p, u8x16 r, int n_bytes)
#define foreach_aes_gcm_handler_type
static_always_inline u8x16 u8x16_reflect(u8x16 v)
for(i=1;i<=collision_buckets;i++)
static_always_inline u8x64 u8x64_reflect_u8x16(u8x64 x)
#define AES_KEY_ROUNDS(x)
static_always_inline u8x64 u8x64_mask_load(u8x64 a, void *p, u64 mask)
static_always_inline int aes_gcm(u8x16u *in, u8x16u *out, u8x16u *addt, u8x16u *iv, u8x16u *tag, u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd, int aes_rounds, int is_encrypt)
static_always_inline u8x16 aes_gcm_dec(u8x16 T, aes_gcm_key_data_t *kd, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, u32 n_left, int rounds)
static_always_inline void ghash_precompute(u8x16 H, u8x16 *Hi, int n)
static_always_inline u8x16 aes_byte_mask(u8x16 x, u8 n_bytes)
static_always_inline u8x16 ghash_final(ghash_data_t *gd)
static_always_inline u8x64 u8x64_splat_u8x16(u8x16 a)
static_always_inline u32x16 u32x16_splat_u32x4(u32x4 a)
#define static_always_inline
static_always_inline void ghash_reduce(ghash_data_t *gd)
#define u8x64_insert_u8x16(a, b, n)
static_always_inline void ghash_mul_next(ghash_data_t *gd, u8x16 a, u8x16 b)
static_always_inline void ghash_reduce2(ghash_data_t *gd)
static_always_inline u8x16 aes_enc_round(u8x16 a, u8x16 k)
epu8_epi32 epu16_epi32 u64x2
static_always_inline void aes_key_expand(u8x16 *key_schedule, u8 const *key, aes_key_size_t ks)
static_always_inline u8x16 aes_enc_last_round(u8x16 a, u8x16 k)
static_always_inline u32 aes_ops_enc_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks)
static_always_inline u8x16 ghash_mul(u8x16 a, u8x16 b)
static_always_inline void ghash_mul_first(ghash_data_t *gd, u8x16 a, u8x16 b)
static_always_inline u32x16 u32x16_mask_blend(u32x16 a, u32x16 b, u16 mask)
static_always_inline void aes_gcm_enc_first_round(u8x16 *r, aes_gcm_counter_t *ctr, u8x16 k, int n_blocks)
static_always_inline u16 u8x16_msb_mask(u8x16 v)
static_always_inline u8x16 aes_gcm_enc(u8x16 T, aes_gcm_key_data_t *kd, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, u32 n_left, int rounds)
sll srl srl sll sra u16x4 i
static_always_inline void * aes_gcm_key_exp(vnet_crypto_key_t *key, aes_key_size_t ks)
static_always_inline void aes_gcm_enc_round(u8x16 *r, u8x16 k, int n_blocks)
static_always_inline u8x16 aes_gcm_calc_double(u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds, aes_gcm_flags_t f)
static_always_inline u8x16 aes_gcm_ghash_blocks(u8x16 T, aes_gcm_key_data_t *kd, u8x16u *in, int n_blocks)
clib_error_t * crypto_native_aes_gcm_init_slm(vlib_main_t *vm)
vnet_crypto_op_status_t status
static void * clib_mem_alloc_aligned(uword size, uword align)
static_always_inline u8x64 u8x64_mask_blend(u8x64 a, u8x64 b, u64 mask)
static_always_inline void u8x64_mask_store(u8x64 a, void *p, u64 mask)
static_always_inline u8x16 aes_encrypt_block(u8x16 block, const u8x16 *round_keys, aes_key_size_t ks)
#define CLIB_CACHE_LINE_BYTES
static_always_inline u8x16 aes_gcm_calc(u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
static const u32x4 ctr_inv_1