25 #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
26 #pragma GCC optimize ("O3")
73 for (
int i = 0;
i < n_blocks;
i++)
82 for (
int i = 0;
i < n_blocks;
i++)
86 ctr->
Y[3] = clib_host_to_net_u32 (ctr->
counter + 1);
94 for (
int i = 0;
i < n_blocks;
i++)
100 int rounds,
int n_blocks)
104 for (
int i = 10;
i < rounds;
i++)
107 for (
int i = 0;
i < n_blocks;
i++)
114 u8x16u * in,
int n_blocks)
119 for (
int i = 1;
i < n_blocks;
i++)
423 static const u32x16 ctr_inv_1234 = {
424 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
427 static const u32x16 ctr_inv_4444 = {
428 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
431 static const u32x16 ctr_1234 = {
432 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
450 ctr->
Y4 += ctr_inv_4444;
460 Yc = u32x16_splat (ctr->
counter + 4 * (
i + 1)) + ctr_1234;
470 ctr->
Y4 += ctr_inv_4444;
477 aes4_gcm_enc_round (
u8x64 *
r,
u8x64 k,
int n_blocks)
479 for (
int i = 0;
i < n_blocks;
i++)
480 r[
i] = aes_enc_round_x4 (
r[
i], k);
485 int rounds,
int n_blocks)
489 for (
int i = 10;
i < rounds;
i++)
490 aes4_gcm_enc_round (
r, k[
i], n_blocks);
492 for (
int i = 0;
i < n_blocks;
i++)
493 d[
i] ^= aes_enc_last_round_x4 (
r[
i], k[rounds]);
501 ghash4_data_t _gd, *gd = &_gd;
503 int i, ghash_blocks, gc = 1;
504 u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
506 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
513 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - ghash_blocks * 4);
519 int n_128bit_blocks = n * 4;
524 n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
525 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - n_128bit_blocks);
529 aes4_gcm_enc_first_round (
r, ctr, rk[0], n);
530 aes4_gcm_enc_round (
r, rk[1], n);
539 d[
i] = u8x64_mask_load (u8x64_splat (0), inv +
i, byte_mask);
548 aes4_gcm_enc_round (
r, rk[2], n);
549 aes4_gcm_enc_round (
r, rk[3], n);
556 aes4_gcm_enc_round (
r, rk[4], n);
557 aes4_gcm_enc_round (
r, rk[5], n);
564 aes4_gcm_enc_round (
r, rk[6], n);
565 aes4_gcm_enc_round (
r, rk[7], n);
578 d[
i] = u8x64_mask_load (u8x64_splat (0), inv +
i, byte_mask);
582 aes4_gcm_enc_round (
r, rk[8], n);
583 aes4_gcm_enc_round (
r, rk[9], n);
586 aes4_gcm_enc_last_round (
r, d, rk, rounds, n);
593 u8x64_mask_store (d[
i], outv +
i, byte_mask);
602 return ghash4_final (gd);
611 ghash4_data_t _gd, *gd = &_gd;
614 u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
617 aes4_gcm_enc_first_round (
r, ctr, rk[0], 4);
618 aes4_gcm_enc_round (
r, rk[1], 4);
622 for (
int i = 0;
i < 4;
i++)
630 aes4_gcm_enc_round (
r, rk[2], 4);
631 aes4_gcm_enc_round (
r, rk[3], 4);
637 aes4_gcm_enc_round (
r, rk[4], 4);
638 aes4_gcm_enc_round (
r, rk[5], 4);
644 aes4_gcm_enc_round (
r, rk[6], 4);
645 aes4_gcm_enc_round (
r, rk[7], 4);
651 aes4_gcm_enc_round (
r, rk[8], 4);
652 aes4_gcm_enc_round (
r, rk[9], 4);
656 for (
int i = 0;
i < 4;
i++)
660 aes4_gcm_enc_last_round (
r, d, rk, rounds, 4);
663 for (
int i = 0;
i < 4;
i++)
668 for (
int i = 0;
i < 4;
i++)
675 aes4_gcm_enc_first_round (
r, ctr, rk[0], 4);
676 aes4_gcm_enc_round (
r, rk[1], 4);
682 aes4_gcm_enc_round (
r, rk[2], 4);
683 aes4_gcm_enc_round (
r, rk[3], 4);
689 aes4_gcm_enc_round (
r, rk[4], 4);
690 aes4_gcm_enc_round (
r, rk[5], 4);
696 aes4_gcm_enc_round (
r, rk[6], 4);
697 aes4_gcm_enc_round (
r, rk[7], 4);
703 aes4_gcm_enc_round (
r, rk[8], 4);
704 aes4_gcm_enc_round (
r, rk[9], 4);
711 for (
int i = 0;
i < 4;
i++)
715 aes4_gcm_enc_last_round (
r, d, rk, rounds, 4);
718 for (
int i = 0;
i < 4;
i++)
722 return ghash4_final (gd);
727 int n,
int last_4block_bytes)
729 ghash4_data_t _gd, *gd = &_gd;
732 u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
733 n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
734 Hi4 = (u8x64u *) (kd->
Hi +
NUM_HI - n_128bit_blocks);
747 return ghash4_final (gd);
753 u8x16u * inv, u8x16u * outv,
u32 n_left,
int rounds)
768 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
n_left,
f);
769 return aes4_gcm_ghash_last (T, kd, d4, 4,
n_left);
774 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
n_left,
f);
775 return aes4_gcm_ghash_last (T, kd, d4, 3,
n_left);
780 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
n_left,
f);
781 return aes4_gcm_ghash_last (T, kd, d4, 2,
n_left);
785 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1,
n_left,
f);
786 return aes4_gcm_ghash_last (T, kd, d4, 1,
n_left);
790 aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0,
f);
801 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds,
f);
811 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0,
f);
820 return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
827 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
n_left,
f);
828 return aes4_gcm_ghash_last (T, kd, d4, 4,
n_left);
834 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
n_left,
f);
835 return aes4_gcm_ghash_last (T, kd, d4, 3,
n_left);
841 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
n_left,
f);
842 return aes4_gcm_ghash_last (T, kd, d4, 2,
n_left);
845 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1,
n_left,
f);
846 return aes4_gcm_ghash_last (T, kd, d4, 1,
n_left);
898 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0,
f);
939 u8x16u * inv, u8x16u * outv,
u32 n_left,
int rounds)
947 T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds,
f);
957 T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0,
f);
971 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
974 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
977 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
979 return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1,
n_left,
f);
994 T =
aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0,
f);
1021 aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u *
iv, u8x16u * tag,
1023 int aes_rounds,
int is_encrypt)
1038 else if (aad_bytes == 12)
1054 T =
aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
1056 T =
aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
1062 r = (
u8x16) ((
u64x2) {data_bytes, aad_bytes} << 3);
1068 for (
i = 1;
i < 5;
i += 1)
1072 for (;
i < 9;
i += 1)
1075 for (;
i < aes_rounds;
i += 1)
1094 u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
1116 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1140 (u8x16u *) op->
iv, (u8x16u *) op->
tag, op->
len,
1146 op->
status = VNET_CRYPTO_OP_STATUS_COMPLETED;
1150 op->
status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
1186 #define foreach_aes_gcm_handler_type _(128) _(192) _(256)
1189 static u32 aes_ops_dec_aes_gcm_##x \
1190 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
1191 { return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
1192 static u32 aes_ops_enc_aes_gcm_##x \
1193 (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
1194 { return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
1195 static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
1196 { return aes_gcm_key_exp (key, AES_KEY_##x); }
1217 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
1218 VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
1219 aes_ops_enc_aes_gcm_##x); \
1220 vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
1221 VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
1222 aes_ops_dec_aes_gcm_##x); \
1223 cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;