38 #ifndef included_clib_string_h 39 #define included_clib_string_h 44 #ifdef CLIB_LINUX_KERNEL 45 #include <linux/string.h> 52 #ifdef CLIB_STANDALONE 53 #include <vppinfra/standalone_string.h> 57 #include <x86intrin.h> 75 #define clib_memcpy(a,b,c) memcpy(a,b,c) 78 #define clib_memcpy(a,b,c) memcpy(a,b,c) 91 #if defined (__AVX512F__) 92 __m512i r0 = _mm512_loadu_si512 (s);
94 _mm512_storeu_si512 (d0, r0);
95 _mm512_storeu_si512 (d1, r0);
96 _mm512_storeu_si512 (d2, r0);
97 _mm512_storeu_si512 (d3, r0);
99 #elif defined (__AVX2__) 100 __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
101 __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
103 _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
104 _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
106 _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
107 _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
109 _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
110 _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
112 _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
113 _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
115 #elif defined (__SSSE3__) 116 __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
117 __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
118 __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
119 __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
121 _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
122 _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
123 _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
124 _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
126 _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
127 _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
128 _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
129 _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
131 _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
132 _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
133 _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
134 _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
136 _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
137 _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
138 _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
139 _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
153 #if defined(CLIB_HAVE_VEC512) 154 u64x8 v512 = u64x8_splat (val);
157 u64x8_store_unaligned (v512, ptr);
164 #if defined(CLIB_HAVE_VEC256) 165 u64x4 v256 = u64x4_splat (val);
168 u64x4_store_unaligned (v256, ptr);
177 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
190 #if defined(CLIB_HAVE_VEC512) 191 u32x16 v512 = u32x16_splat (val);
194 u32x16_store_unaligned (v512, ptr);
201 #if defined(CLIB_HAVE_VEC256) 202 u32x8 v256 = u32x8_splat (val);
205 u32x8_store_unaligned (v256, ptr);
212 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 213 u32x4 v128 = u32x4_splat (val);
216 u32x4_store_unaligned (v128, ptr);
223 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
236 #if defined(CLIB_HAVE_VEC512) 237 u16x32 v512 = u16x32_splat (val);
240 u16x32_store_unaligned (v512, ptr);
247 #if defined(CLIB_HAVE_VEC256) 248 u16x16 v256 = u16x16_splat (val);
251 u16x16_store_unaligned (v256, ptr);
258 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 259 u16x8 v128 = u16x8_splat (val);
262 u16x8_store_unaligned (v128, ptr);
269 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
282 #if defined(CLIB_HAVE_VEC512) 283 u8x64 v512 = u8x64_splat (val);
286 u8x64_store_unaligned (v512, ptr);
293 #if defined(CLIB_HAVE_VEC256) 294 u8x32 v256 = u8x32_splat (val);
297 u8x32_store_unaligned (v256, ptr);
304 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 305 u8x16 v128 = u8x16_splat (val);
308 u8x16_store_unaligned (v128, ptr);
315 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
332 if (data[0] != data[1])
338 #if defined(CLIB_HAVE_VEC256) 339 u64x4 splat = u64x4_splat (first);
343 bmp =
u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
344 if (bmp != 0xffffffff)
353 if (count >= max_count)
359 while (count + 3 < max_count &&
360 ((data[0] ^ first) | (data[1] ^ first) |
361 (data[2] ^ first) | (data[3] ^ first)) == 0)
366 while (count < max_count && (data[0] == first))
382 if (data[0] != data[1])
388 #if defined(CLIB_HAVE_VEC256) 389 u32x8 splat = u32x8_splat (first);
393 bmp =
u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
394 if (bmp != 0xffffffff)
403 if (count >= max_count)
406 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) 407 u32x4 splat = u32x4_splat (first);
411 bmp =
u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
421 if (count >= max_count)
427 while (count + 3 < max_count &&
428 ((data[0] ^ first) | (data[1] ^ first) |
429 (data[2] ^ first) | (data[3] ^ first)) == 0)
434 while (count < max_count && (data[0] == first))
450 if (data[0] != data[1])
456 #if defined(CLIB_HAVE_VEC256) 457 u16x16 splat = u16x16_splat (first);
461 bmp =
u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
462 if (bmp != 0xffffffff)
471 if (count >= max_count)
474 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) 475 u16x8 splat = u16x8_splat (first);
479 bmp =
u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
489 if (count >= max_count)
495 while (count + 3 < max_count &&
496 ((data[0] ^ first) | (data[1] ^ first) |
497 (data[2] ^ first) | (data[3] ^ first)) == 0)
502 while (count < max_count && (data[0] == first))
518 if (data[0] != data[1])
524 #if defined(CLIB_HAVE_VEC256) 525 u8x32 splat = u8x32_splat (first);
529 bmp =
u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
530 if (bmp != 0xffffffff)
539 if (count >= max_count)
542 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) 543 u8x16 splat = u8x16_splat (first);
547 bmp =
u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
557 if (count >= max_count)
563 while (count + 3 < max_count &&
564 ((data[0] ^ first) | (data[1] ^ first) |
565 (data[2] ^ first) | (data[3] ^ first)) == 0)
570 while (count < max_count && (data[0] == first))
u16x16 u64x4 static_always_inline u32 u8x32_msb_mask(u8x32 v)
void clib_memswap(void *_a, void *_b, uword bytes)
#define count_trailing_zeros(x)
#define static_always_inline
static_always_inline u16 u8x16_msb_mask(u8x16 v)
static heap_elt_t * first(heap_header_t *h)
static_always_inline uword clib_count_equal_u64(u64 *data, uword max_count)
static_always_inline uword clib_count_equal_u16(u16 *data, uword max_count)
static_always_inline void clib_memcpy64_x4(void *d0, void *d1, void *d2, void *d3, void *s)
#define clib_memcpy(a, b, c)
static_always_inline void clib_memset_u8(void *p, u8 val, uword count)
static_always_inline void clib_memset_u16(void *p, u16 val, uword count)
static_always_inline uword clib_count_equal_u8(u8 *data, uword max_count)
static_always_inline void clib_memset_u64(void *p, u64 val, uword count)
static_always_inline uword clib_count_equal_u32(u32 *data, uword max_count)
static_always_inline void clib_memset_u32(void *p, u32 val, uword count)