21 #include <x86intrin.h> 26 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 27 #pragma GCC optimize ("O3") 39 static const __m128i
zero = { 0, 0 };
42 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
52 return _mm_shuffle_epi8 (x, (__m128i) bswap_mask);
60 return _mm_blendv_epi8 (zero, x, (__m128i) mask);
67 return _mm_mask_loadu_epi8 (zero, (1 << n_bytes) - 1, p);
77 _mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, r);
80 _mm_maskmoveu_si128 (r, (__m128i) mask, p);
87 for (
int i = 0;
i < n - 1;
i++)
88 d[
i] = _mm_loadu_si128 (inv +
i);
90 _mm_loadu_si128 (inv + n - 1);
96 for (
int i = 0;
i < n - 1;
i++)
97 _mm_storeu_si128 (outv +
i, d[
i]);
101 _mm_storeu_si128 (outv + n - 1, d[n - 1]);
112 for (i = 0; i < n_blocks; i++)
114 Y[0] = _mm_add_epi32 (Y[0], last_byte_one);
121 for (i = 0; i < n_blocks; i++)
123 Y[0] = _mm_insert_epi32 (Y[0], clib_host_to_net_u32 (++ctr[0]), 3);
132 for (
int i = 0;
i < n_blocks;
i++)
133 r[
i] = _mm_aesenc_si128 (r[
i], k);
138 int rounds,
int n_blocks)
142 for (
int i = 10;
i < rounds;
i++)
145 for (
int i = 0; i < n_blocks; i++)
146 d[i] ^= _mm_aesenclast_si128 (r[i], k[rounds]);
151 const __m128i * in,
int n_blocks)
154 const __m128i *Hi = kd->
Hi + n_blocks - 1;
156 for (
int i = 1;
i < n_blocks;
i++)
168 while (n_left >= 128)
206 __m128i * Y,
u32 * ctr, __m128i * inv, __m128i * outv,
207 int rounds,
int n,
int last_block_bytes,
int with_ghash,
212 const __m128i *k = kd->
Ke;
213 int hidx = is_encrypt ? 4 : n, didx = 0;
215 _mm_prefetch (inv + 4, _MM_HINT_T0);
234 if (with_ghash && hidx)
242 if (with_ghash && hidx)
250 if (with_ghash && hidx)
284 __m128i * Y,
u32 * ctr, __m128i * inv, __m128i * outv,
285 int rounds,
int is_encrypt)
289 const __m128i *k = kd->
Ke;
395 int n_blocks,
int n_bytes)
417 const u8 * out,
u32 n_left,
int rounds)
419 __m128i *inv = (__m128i *) in, *outv = (__m128i *) out;
431 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, n_left,
435 else if (n_left > 32)
438 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3, n_left,
442 else if (n_left > 16)
445 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2, n_left,
452 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
458 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0,
466 while (n_left >= 128)
479 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0,
494 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, n_left,
502 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3, n_left,
510 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2, n_left,
516 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
523 const u8 * out,
u32 n_left,
int rounds)
525 __m128i *inv = (__m128i *) in, *outv = (__m128i *) out;
529 while (n_left >= 128)
542 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0, 1, 0);
568 return aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
575 int aes_rounds,
int is_encrypt)
578 __m128i r, Y0, T = { };
581 _mm_prefetch (iv, _MM_HINT_T0);
582 _mm_prefetch (in, _MM_HINT_T0);
588 else if (aad_bytes == 12)
594 Y0 = _mm_loadu_si128 ((__m128i *) iv);
595 Y0 = _mm_insert_epi32 (Y0, clib_host_to_net_u32 (1), 3);
599 T =
aesni_gcm_enc (T, kd, Y0, in, out, data_bytes, aes_rounds);
601 T =
aesni_gcm_dec (T, kd, Y0, in, out, data_bytes, aes_rounds);
603 _mm_prefetch (tag, _MM_HINT_T0);
615 for (i = 1; i < 5; i += 1)
616 r = _mm_aesenc_si128 (r, kd->
Ke[i]);
619 for (; i < 9; i += 1)
620 r = _mm_aesenc_si128 (r, kd->
Ke[i]);
622 for (; i < aes_rounds; i += 1)
623 r = _mm_aesenc_si128 (r, kd->
Ke[i]);
624 r = _mm_aesenclast_si128 (r, kd->
Ke[aes_rounds]);
636 _mm_storeu_si128 ((__m128i *) tag, T);
641 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
642 r = _mm_loadu_si128 ((__m128i *) tag);
643 if (_mm_movemask_epi8 (r == T) != tag_mask)
663 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
692 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
696 op->
status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
724 H = _mm_aesenc_si128 (H, kd->
Ke[i]);
725 H = _mm_aesenclast_si128 (H, kd->
Ke[i]);
731 #define foreach_aesni_gcm_handler_type _(128) _(192) _(256) 734 static u32 aesni_ops_dec_aes_gcm_##x \ 735 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 736 { return aesni_ops_dec_aes_gcm (vm, ops, n_ops, AESNI_KEY_##x); } \ 737 static u32 aesni_ops_enc_aes_gcm_##x \ 738 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 739 { return aesni_ops_enc_aes_gcm (vm, ops, n_ops, AESNI_KEY_##x); } \ 740 static void * aesni_gcm_key_exp_##x (vnet_crypto_key_t *key) \ 741 { return aesni_gcm_key_exp (key, AESNI_KEY_##x); } 758 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 759 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ 760 aesni_ops_enc_aes_gcm_##x); \ 761 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 762 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ 763 aesni_ops_dec_aes_gcm_##x); \ 764 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aesni_gcm_key_exp_##x;
crypto_ia32_main_t crypto_ia32_main
static_always_inline __m128i aesni_gcm_ghash(__m128i T, aes_gcm_key_data_t *kd, const __m128i *in, u32 n_left)
static_always_inline void aesni_gcm_store_partial(void *p, __m128i r, int n_bytes)
static const __m128i last_byte_one
static_always_inline void aesni_gcm_load(__m128i *d, __m128i *inv, int n, int n_bytes)
static_always_inline void aesni_gcm_store(__m128i *d, __m128i *outv, int n, int n_bytes)
static_always_inline void aesni_gcm_enc_round(__m128i *r, __m128i k, int n_blocks)
static_always_inline __m128i aesni_gcm_calc(__m128i T, aes_gcm_key_data_t *kd, __m128i *d, __m128i *Y, u32 *ctr, __m128i *inv, __m128i *outv, int rounds, int n, int last_block_bytes, int with_ghash, int is_encrypt)
clib_error_t * crypto_ia32_aesni_gcm_init_sse42(vlib_main_t *vm)
#define AESNI_KEY_ROUNDS(x)
static const u8x16 bswap_mask
static_always_inline void aes_key_expand(__m128i *k, u8 *key, aesni_key_size_t ks)
static_always_inline void ghash_precompute(__m128i H, __m128i *Hi, int count)
#define static_always_inline
static const u8x16 byte_mask_scale
static_always_inline u32 aesni_ops_enc_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aesni_key_size_t ks)
static_always_inline void ghash_reduce(ghash_data_t *gd)
static_always_inline int aes_gcm(const u8 *in, u8 *out, const u8 *addt, const u8 *iv, u8 *tag, u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd, int aes_rounds, int is_encrypt)
static_always_inline void ghash_reduce2(ghash_data_t *gd)
static_always_inline __m128i aesni_gcm_ghash_blocks(__m128i T, aes_gcm_key_data_t *kd, const __m128i *in, int n_blocks)
static const __m128i zero
static_always_inline __m128i aesni_gcm_load_partial(__m128i *p, int n_bytes)
clib_error_t * crypto_ia32_aesni_gcm_init_avx512(vlib_main_t *vm)
static_always_inline __m128i aesni_gcm_bswap(__m128i x)
static_always_inline __m128i ghash_final(ghash_data_t *gd)
static_always_inline __m128i aesni_gcm_calc_double(__m128i T, aes_gcm_key_data_t *kd, __m128i *d, __m128i *Y, u32 *ctr, __m128i *inv, __m128i *outv, int rounds, int is_encrypt)
static_always_inline __m128i aesni_gcm_enc(__m128i T, aes_gcm_key_data_t *kd, __m128i Y, const u8 *in, const u8 *out, u32 n_left, int rounds)
static_always_inline void aesni_gcm_enc_last_round(__m128i *r, __m128i *d, const __m128i *k, int rounds, int n_blocks)
static_always_inline __m128i aesni_gcm_byte_mask(__m128i x, u8 n_bytes)
static_always_inline u8x16 u8x16_is_greater(u8x16 v1, u8x16 v2)
static_always_inline __m128i aesni_gcm_dec(__m128i T, aes_gcm_key_data_t *kd, __m128i Y, const u8 *in, const u8 *out, u32 n_left, int rounds)
clib_error_t * crypto_ia32_aesni_gcm_init_avx2(vlib_main_t *vm)
static_always_inline u32 aesni_ops_dec_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aesni_key_size_t ks)
#define foreach_aesni_gcm_handler_type
static_always_inline void * aesni_gcm_key_exp(vnet_crypto_key_t *key, aesni_key_size_t ks)
static_always_inline void aesni_gcm_enc_first_round(__m128i *r, __m128i *Y, u32 *ctr, __m128i k, int n_blocks)
vnet_crypto_op_status_t status
static void * clib_mem_alloc_aligned(uword size, uword align)
static_always_inline __m128i ghash_mul(__m128i a, __m128i b)
static_always_inline __m128i aesni_gcm_ghash_last(__m128i T, aes_gcm_key_data_t *kd, __m128i *d, int n_blocks, int n_bytes)
static_always_inline void ghash_mul_next(ghash_data_t *gd, __m128i a, __m128i b)
#define CLIB_CACHE_LINE_BYTES
static_always_inline void ghash_mul_first(ghash_data_t *gd, __m128i a, __m128i b)