111 #if defined (__PCLMUL__) 112 return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x00);
113 #elif defined (__ARM_FEATURE_CRYPTO) 114 return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
115 (poly64_t) vget_low_p64 ((poly64x2_t) b));
122 #if defined (__PCLMUL__) 123 return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x01);
124 #elif defined (__ARM_FEATURE_CRYPTO) 125 return (u8x16) vmull_p64 ((poly64_t) vget_high_p64 ((poly64x2_t) a),
126 (poly64_t) vget_low_p64 ((poly64x2_t) b));
133 #if defined (__PCLMUL__) 134 return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x10);
135 #elif defined (__ARM_FEATURE_CRYPTO) 136 return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
137 (poly64_t) vget_high_p64 ((poly64x2_t) b));
144 #if defined (__PCLMUL__) 145 return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x11);
146 #elif defined (__ARM_FEATURE_CRYPTO) 147 return (u8x16) vmull_high_p64 ((poly64x2_t) a, (poly64x2_t) b);
158 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
159 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
163 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
260 #ifdef __VPCLMULQDQ__ 262 static const u8x64 ghash4_poly2 = {
263 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
264 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
265 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
266 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
267 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
269 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
270 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
275 u8x64
hi,
lo, mid, tmp_lo, tmp_hi;
280 gmul4_lo_lo (u8x64
a, u8x64 b)
282 return (u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i) b, 0x00);
286 gmul4_hi_lo (u8x64
a, u8x64 b)
288 return (u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i) b, 0x01);
292 gmul4_lo_hi (u8x64
a, u8x64 b)
294 return (u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i) b, 0x10);
298 gmul4_hi_hi (u8x64
a, u8x64 b)
300 return (u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i) b, 0x11);
305 ghash4_mul_first (ghash4_data_t * gd, u8x64
a, u8x64 b)
307 gd->hi = gmul4_hi_hi (
a, b);
308 gd->lo = gmul4_lo_lo (
a, b);
309 gd->mid = (gmul4_hi_lo (
a, b) ^ gmul4_lo_hi (
a, b));
314 ghash4_mul_next (ghash4_data_t * gd, u8x64
a, u8x64 b)
316 u8x64
hi = gmul4_hi_hi (
a, b);
317 u8x64
lo = gmul4_lo_lo (
a, b);
333 gd->mid =
u8x64_xor3 (gd->mid, gmul4_hi_lo (
a, b), gmul4_lo_hi (
a, b));
337 ghash4_reduce (ghash4_data_t * gd)
350 gd->lo =
u8x64_xor3 (gd->lo, gd->tmp_lo, midl);
351 gd->hi =
u8x64_xor3 (gd->hi, gd->tmp_hi, midr);
359 r = gmul4_hi_lo (ghash4_poly2, gd->lo);
365 ghash4_reduce2 (ghash4_data_t * gd)
367 gd->tmp_lo = gmul4_lo_lo (ghash4_poly2, gd->lo);
368 gd->tmp_hi = gmul4_lo_hi (ghash4_poly2, gd->lo);
372 ghash4_final (ghash4_data_t * gd)
382 return u8x32_extract_hi (t) ^ u8x32_extract_lo (t);
392 r8 = (u8x16) ((
u64x2) H >> 63);
393 H = (u8x16) ((
u64x2) H << 1);
402 r32 = r32 == (
u32x4) {1, 0, 0, 1};
404 Hi[n - 1] = H = H ^ ((u8x16) r32 &
ghash_poly);
407 for (
int i = n - 2;
i >= 0;
i--)
#define u8x64_word_shift_left(a, n)
static const u8x16 ghash_poly2
#define u8x16_word_shift_left(x, n)
#define u8x64_word_shift_right(a, n)
static_always_inline u8x32 u8x64_extract_hi(u8x64 v)
static_always_inline void ghash_precompute(u8x16 H, u8x16 *Hi, int n)
static_always_inline u8x16 ghash_final(ghash_data_t *gd)
#define static_always_inline
static_always_inline void ghash_reduce(ghash_data_t *gd)
static_always_inline void ghash_mul_next(ghash_data_t *gd, u8x16 a, u8x16 b)
static_always_inline void ghash_reduce2(ghash_data_t *gd)
static_always_inline u8x64 u8x64_xor3(u8x64 a, u8x64 b, u8x64 c)
epu8_epi32 epu16_epi32 u64x2
static_always_inline u8x16 gmul_lo_hi(u8x16 a, u8x16 b)
static_always_inline u8x16 gmul_lo_lo(u8x16 a, u8x16 b)
static_always_inline u8x16 ghash_mul(u8x16 a, u8x16 b)
static_always_inline void ghash_mul_first(ghash_data_t *gd, u8x16 a, u8x16 b)
static_always_inline u32x4 u32x4_shuffle(u32x4 v, const int a, const int b, const int c, const int d)
static_always_inline u8x16 u8x16_xor3(u8x16 a, u8x16 b, u8x16 c)
static_always_inline u8x16 gmul_hi_lo(u8x16 a, u8x16 b)
sll srl srl sll sra u16x4 i
static const u8x16 ghash_poly
static_always_inline u8x32 u8x64_extract_lo(u8x64 v)
static_always_inline u8x16 gmul_hi_hi(u8x16 a, u8x16 b)
#define u8x16_word_shift_right(x, n)