21 #include <x86intrin.h> 26 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 27 #pragma GCC optimize ("O3") 39 static const __m128i
zero = { 0, 0 };
42 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
52 return _mm_shuffle_epi8 (x, (__m128i) bswap_mask);
60 return _mm_blendv_epi8 (zero, x, (__m128i) mask);
68 return _mm_mask_loadu_epi8 (zero, (1 << n_bytes) - 1, p);
79 _mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, r);
82 _mm_maskmoveu_si128 (r, (__m128i) mask, p);
89 for (
int i = 0;
i < n - 1;
i++)
90 d[
i] = _mm_loadu_si128 (inv +
i);
92 _mm_loadu_si128 (inv + n - 1);
98 for (
int i = 0;
i < n - 1;
i++)
99 _mm_storeu_si128 (outv +
i, d[
i]);
103 _mm_storeu_si128 (outv + n - 1, d[n - 1]);
114 for (i = 0; i < n_blocks; i++)
116 Y[0] = _mm_add_epi32 (Y[0], last_byte_one);
123 for (i = 0; i < n_blocks; i++)
125 Y[0] = _mm_insert_epi32 (Y[0], clib_host_to_net_u32 (++ctr[0]), 3);
134 for (
int i = 0;
i < n_blocks;
i++)
135 r[
i] = _mm_aesenc_si128 (r[
i], k);
140 int rounds,
int n_blocks)
144 for (
int i = 10;
i < rounds;
i++)
147 for (
int i = 0; i < n_blocks; i++)
148 d[i] ^= _mm_aesenclast_si128 (r[i], k[rounds]);
153 const __m128i * in,
int n_blocks)
156 const __m128i *Hi = kd->
Hi + n_blocks - 1;
158 for (
int i = 1;
i < n_blocks;
i++)
170 while (n_left >= 128)
208 __m128i * Y,
u32 * ctr, __m128i * inv, __m128i * outv,
209 int rounds,
int n,
int last_block_bytes,
int with_ghash,
214 const __m128i *k = kd->
Ke;
215 int hidx = is_encrypt ? 4 : n, didx = 0;
217 _mm_prefetch (inv + 4, _MM_HINT_T0);
236 if (with_ghash && hidx)
244 if (with_ghash && hidx)
252 if (with_ghash && hidx)
286 __m128i * Y,
u32 * ctr, __m128i * inv, __m128i * outv,
287 int rounds,
int is_encrypt)
291 const __m128i *k = kd->
Ke;
397 int n_blocks,
int n_bytes)
419 const u8 * out,
u32 n_left,
int rounds)
421 __m128i *inv = (__m128i *) in, *outv = (__m128i *) out;
433 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, n_left,
437 else if (n_left > 32)
440 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3, n_left,
444 else if (n_left > 16)
447 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2, n_left,
454 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
460 aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0,
468 while (n_left >= 128)
481 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0,
496 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, n_left,
504 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 3, n_left,
512 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 2, n_left,
518 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
525 const u8 * out,
u32 n_left,
int rounds)
527 __m128i *inv = (__m128i *) in, *outv = (__m128i *) out;
531 while (n_left >= 128)
544 T =
aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 4, 0, 1, 0);
570 return aesni_gcm_calc (T, kd, d, &Y, &ctr, inv, outv, rounds, 1, n_left,
577 int aes_rounds,
int is_encrypt)
580 __m128i r, Y0, T = { };
583 _mm_prefetch (iv, _MM_HINT_T0);
584 _mm_prefetch (in, _MM_HINT_T0);
590 else if (aad_bytes == 12)
597 Y0 = _mm_insert_epi32 (Y0, clib_host_to_net_u32 (1), 3);
601 T =
aesni_gcm_enc (T, kd, Y0, in, out, data_bytes, aes_rounds);
603 T =
aesni_gcm_dec (T, kd, Y0, in, out, data_bytes, aes_rounds);
605 _mm_prefetch (tag, _MM_HINT_T0);
617 for (i = 1; i < 5; i += 1)
618 r = _mm_aesenc_si128 (r, kd->
Ke[i]);
621 for (; i < 9; i += 1)
622 r = _mm_aesenc_si128 (r, kd->
Ke[i]);
624 for (; i < aes_rounds; i += 1)
625 r = _mm_aesenc_si128 (r, kd->
Ke[i]);
626 r = _mm_aesenclast_si128 (r, kd->
Ke[aes_rounds]);
638 _mm_storeu_si128 ((__m128i *) tag, T);
643 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
644 r = _mm_loadu_si128 ((__m128i *) tag);
645 if (_mm_movemask_epi8 (r == T) != tag_mask)
665 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
694 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
698 op->
status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
726 H = _mm_aesenc_si128 (H, kd->
Ke[i]);
727 H = _mm_aesenclast_si128 (H, kd->
Ke[i]);
733 #define foreach_aesni_gcm_handler_type _(128) _(192) _(256) 736 static u32 aesni_ops_dec_aes_gcm_##x \ 737 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 738 { return aesni_ops_dec_aes_gcm (vm, ops, n_ops, AESNI_KEY_##x); } \ 739 static u32 aesni_ops_enc_aes_gcm_##x \ 740 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ 741 { return aesni_ops_enc_aes_gcm (vm, ops, n_ops, AESNI_KEY_##x); } \ 742 static void * aesni_gcm_key_exp_##x (vnet_crypto_key_t *key) \ 743 { return aesni_gcm_key_exp (key, AESNI_KEY_##x); } 760 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 761 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ 762 aesni_ops_enc_aes_gcm_##x); \ 763 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ 764 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ 765 aesni_ops_dec_aes_gcm_##x); \ 766 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aesni_gcm_key_exp_##x;
crypto_ia32_main_t crypto_ia32_main
static_always_inline __m128i aesni_gcm_ghash(__m128i T, aes_gcm_key_data_t *kd, const __m128i *in, u32 n_left)
static_always_inline void aesni_gcm_store_partial(void *p, __m128i r, int n_bytes)
static const __m128i last_byte_one
static_always_inline void aesni_gcm_load(__m128i *d, __m128i *inv, int n, int n_bytes)
static_always_inline void aesni_gcm_store(__m128i *d, __m128i *outv, int n, int n_bytes)
static_always_inline void aesni_gcm_enc_round(__m128i *r, __m128i k, int n_blocks)
static_always_inline __m128i aesni_gcm_calc(__m128i T, aes_gcm_key_data_t *kd, __m128i *d, __m128i *Y, u32 *ctr, __m128i *inv, __m128i *outv, int rounds, int n, int last_block_bytes, int with_ghash, int is_encrypt)
clib_error_t * crypto_ia32_aesni_gcm_init_sse42(vlib_main_t *vm)
#define AESNI_KEY_ROUNDS(x)
static const u8x16 bswap_mask
static_always_inline void aes_key_expand(__m128i *k, u8 *key, aesni_key_size_t ks)
static_always_inline void ghash_precompute(__m128i H, __m128i *Hi, int count)
#define static_always_inline
static const u8x16 byte_mask_scale
static_always_inline u32 aesni_ops_enc_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aesni_key_size_t ks)
static_always_inline void ghash_reduce(ghash_data_t *gd)
static_always_inline int aes_gcm(const u8 *in, u8 *out, const u8 *addt, const u8 *iv, u8 *tag, u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd, int aes_rounds, int is_encrypt)
static_always_inline void ghash_reduce2(ghash_data_t *gd)
static_always_inline __m128i aesni_gcm_ghash_blocks(__m128i T, aes_gcm_key_data_t *kd, const __m128i *in, int n_blocks)
static const __m128i zero
static_always_inline __m128i aesni_gcm_load_partial(__m128i *p, int n_bytes)
clib_error_t * crypto_ia32_aesni_gcm_init_avx512(vlib_main_t *vm)
static_always_inline __m128i aesni_gcm_bswap(__m128i x)
static_always_inline __m128i ghash_final(ghash_data_t *gd)
static_always_inline __m128i aesni_gcm_calc_double(__m128i T, aes_gcm_key_data_t *kd, __m128i *d, __m128i *Y, u32 *ctr, __m128i *inv, __m128i *outv, int rounds, int is_encrypt)
static_always_inline __m128i aesni_gcm_enc(__m128i T, aes_gcm_key_data_t *kd, __m128i Y, const u8 *in, const u8 *out, u32 n_left, int rounds)
static_always_inline void aesni_gcm_enc_last_round(__m128i *r, __m128i *d, const __m128i *k, int rounds, int n_blocks)
static_always_inline __m128i aesni_gcm_byte_mask(__m128i x, u8 n_bytes)
static_always_inline u8x16 u8x16_is_greater(u8x16 v1, u8x16 v2)
static_always_inline __m128i aesni_gcm_dec(__m128i T, aes_gcm_key_data_t *kd, __m128i Y, const u8 *in, const u8 *out, u32 n_left, int rounds)
clib_error_t * crypto_ia32_aesni_gcm_init_avx2(vlib_main_t *vm)
static_always_inline u32 aesni_ops_dec_aes_gcm(vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aesni_key_size_t ks)
#define foreach_aesni_gcm_handler_type
static_always_inline void * aesni_gcm_key_exp(vnet_crypto_key_t *key, aesni_key_size_t ks)
static_always_inline void aesni_gcm_enc_first_round(__m128i *r, __m128i *Y, u32 *ctr, __m128i k, int n_blocks)
vnet_crypto_op_status_t status
static void * clib_mem_alloc_aligned(uword size, uword align)
static_always_inline __m128i ghash_mul(__m128i a, __m128i b)
#define CLIB_MEM_OVERFLOW_LOAD(f, src)
static_always_inline __m128i aesni_gcm_ghash_last(__m128i T, aes_gcm_key_data_t *kd, __m128i *d, int n_blocks, int n_bytes)
static_always_inline void ghash_mul_next(ghash_data_t *gd, __m128i a, __m128i b)
#define CLIB_CACHE_LINE_BYTES
static_always_inline void ghash_mul_first(ghash_data_t *gd, __m128i a, __m128i b)