111 #if defined (__PCLMUL__)
112 return (
u8x16) _mm_clmulepi64_si128 ((__m128i)
a, (__m128i)
b, 0x00);
113 #elif defined (__ARM_FEATURE_CRYPTO)
114 return (
u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t)
a),
115 (poly64_t) vget_low_p64 ((poly64x2_t)
b));
122 #if defined (__PCLMUL__)
123 return (
u8x16) _mm_clmulepi64_si128 ((__m128i)
a, (__m128i)
b, 0x01);
124 #elif defined (__ARM_FEATURE_CRYPTO)
125 return (
u8x16) vmull_p64 ((poly64_t) vget_high_p64 ((poly64x2_t)
a),
126 (poly64_t) vget_low_p64 ((poly64x2_t)
b));
133 #if defined (__PCLMUL__)
134 return (
u8x16) _mm_clmulepi64_si128 ((__m128i)
a, (__m128i)
b, 0x10);
135 #elif defined (__ARM_FEATURE_CRYPTO)
136 return (
u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t)
a),
137 (poly64_t) vget_high_p64 ((poly64x2_t)
b));
144 #if defined (__PCLMUL__)
145 return (
u8x16) _mm_clmulepi64_si128 ((__m128i)
a, (__m128i)
b, 0x11);
146 #elif defined (__ARM_FEATURE_CRYPTO)
147 return (
u8x16) vmull_high_p64 ((poly64x2_t)
a, (poly64x2_t)
b);
158 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
159 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
163 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
260 #ifdef __VPCLMULQDQ__
262 static const u8x64 ghash4_poly2 = {
263 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
264 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
265 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
266 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
267 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
269 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
270 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
282 return (
u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i)
b, 0x00);
288 return (
u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i)
b, 0x01);
294 return (
u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i)
b, 0x10);
300 return (
u8x64) _mm512_clmulepi64_epi128 ((__m512i)
a, (__m512i)
b, 0x11);
307 gd->hi = gmul4_hi_hi (
a,
b);
308 gd->lo = gmul4_lo_lo (
a,
b);
309 gd->mid = (gmul4_hi_lo (
a,
b) ^ gmul4_lo_hi (
a,
b));
333 gd->mid =
u8x64_xor3 (gd->mid, gmul4_hi_lo (
a,
b), gmul4_lo_hi (
a,
b));
337 ghash4_reduce (ghash4_data_t * gd)
350 gd->lo =
u8x64_xor3 (gd->lo, gd->tmp_lo, midl);
351 gd->hi =
u8x64_xor3 (gd->hi, gd->tmp_hi, midr);
359 r = gmul4_hi_lo (ghash4_poly2, gd->lo);
365 ghash4_reduce2 (ghash4_data_t * gd)
367 gd->tmp_lo = gmul4_lo_lo (ghash4_poly2, gd->lo);
368 gd->tmp_hi = gmul4_lo_hi (ghash4_poly2, gd->lo);
372 ghash4_final (ghash4_data_t * gd)
381 t = u8x64_extract_lo (
r) ^ u8x64_extract_hi (
r);
382 return u8x32_extract_hi (t) ^ u8x32_extract_lo (t);
402 r32 = r32 == (
u32x4) {1, 0, 0, 1};
407 for (
int i = n - 2;
i >= 0;
i--)