38 #ifndef included_clib_string_h 39 #define included_clib_string_h 44 #ifdef CLIB_LINUX_KERNEL 45 #include <linux/string.h> 52 #ifdef CLIB_STANDALONE 53 #include <vppinfra/standalone_string.h> 57 #include <x86intrin.h> 75 #define clib_memcpy(a,b,c) memcpy(a,b,c) 78 #define clib_memcpy(a,b,c) memcpy(a,b,c) 91 #if defined (__AVX512F__) 92 __m512i r0 = _mm512_loadu_si512 (s);
94 _mm512_storeu_si512 (d0, r0);
95 _mm512_storeu_si512 (d1, r0);
96 _mm512_storeu_si512 (d2, r0);
97 _mm512_storeu_si512 (d3, r0);
99 #elif defined (__AVX2__) 100 __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
101 __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
103 _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
104 _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
106 _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
107 _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
109 _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
110 _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
112 _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
113 _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
115 #elif defined (__SSSE3__) 116 __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
117 __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
118 __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
119 __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
121 _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
122 _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
123 _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
124 _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
126 _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
127 _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
128 _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
129 _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
131 _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
132 _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
133 _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
134 _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
136 _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
137 _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
138 _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
139 _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
153 #if defined(CLIB_HAVE_VEC512) 154 u64x8 v512 = u64x8_splat (val);
157 u64x8_store_unaligned (v512, ptr);
164 #if defined(CLIB_HAVE_VEC256) 165 u64x4 v256 = u64x4_splat (val);
168 u64x4_store_unaligned (v256, ptr);
177 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
190 #if defined(CLIB_HAVE_VEC512) 191 u32x16 v512 = u32x16_splat (val);
194 u32x16_store_unaligned (v512, ptr);
201 #if defined(CLIB_HAVE_VEC256) 202 u32x8 v256 = u32x8_splat (val);
205 u32x8_store_unaligned (v256, ptr);
212 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 213 u32x4 v128 = u32x4_splat (val);
216 u32x4_store_unaligned (v128, ptr);
223 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
236 #if defined(CLIB_HAVE_VEC512) 237 u16x32 v512 = u16x32_splat (val);
240 u16x32_store_unaligned (v512, ptr);
247 #if defined(CLIB_HAVE_VEC256) 248 u16x16 v256 = u16x16_splat (val);
251 u16x16_store_unaligned (v256, ptr);
258 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 259 u16x8 v128 = u16x8_splat (val);
262 u16x8_store_unaligned (v128, ptr);
269 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
282 #if defined(CLIB_HAVE_VEC512) 283 u8x64 v512 = u8x64_splat (val);
286 u8x64_store_unaligned (v512, ptr);
293 #if defined(CLIB_HAVE_VEC256) 294 u8x32 v256 = u8x32_splat (val);
297 u8x32_store_unaligned (v256, ptr);
304 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 305 u8x16 v128 = u8x16_splat (val);
308 u8x16_store_unaligned (v128, ptr);
315 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
330 #if defined(CLIB_HAVE_VEC512) 331 while (u64x8_is_all_equal (u64x8_load_unaligned (data), first))
335 if (count >= max_count)
339 #if defined(CLIB_HAVE_VEC256) 340 while (u64x4_is_all_equal (u64x4_load_unaligned (data), first))
344 if (count >= max_count)
348 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 349 while (u64x2_is_all_equal (u64x2_load_unaligned (data), first))
353 if (count >= max_count)
357 while (count < max_count && (data[0] == first))
371 #if defined(CLIB_HAVE_VEC512) 372 while (u32x16_is_all_equal (u32x16_load_unaligned (data), first))
376 if (count >= max_count)
380 #if defined(CLIB_HAVE_VEC256) 381 while (u32x8_is_all_equal (u32x8_load_unaligned (data), first))
385 if (count >= max_count)
389 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 390 while (u32x4_is_all_equal (u32x4_load_unaligned (data), first))
394 if (count >= max_count)
398 while (count < max_count && (data[0] == first))
412 #if defined(CLIB_HAVE_VEC512) 413 while (count + 32 <= max_count &&
414 u16x32_is_all_equal (u16x32_load_unaligned (data), first))
420 #if defined(CLIB_HAVE_VEC256) 421 while (count + 16 <= max_count &&
422 u16x16_is_all_equal (u16x16_load_unaligned (data), first))
428 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 429 while (count + 8 <= max_count &&
430 u16x8_is_all_equal (u16x8_load_unaligned (data), first))
436 while (count < max_count && (data[0] == first))
450 #if defined(CLIB_HAVE_VEC512) 451 while (count + 64 <= max_count &&
452 u8x64_is_all_equal (u8x64_load_unaligned (data), first))
458 #if defined(CLIB_HAVE_VEC256) 459 while (count + 32 <= max_count &&
460 u8x32_is_all_equal (u8x32_load_unaligned (data), first))
466 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) 467 while (count + 16 <= max_count &&
468 u8x16_is_all_equal (u8x16_load_unaligned (data), first))
474 while (count < max_count && (data[0] == first))
static_always_inline u32 clib_count_equal_u8(u32 *data, uword max_count)
void clib_memswap(void *_a, void *_b, uword bytes)
#define static_always_inline
static heap_elt_t * first(heap_header_t *h)
static_always_inline uword clib_count_equal_u64(u64 *data, uword max_count)
static_always_inline uword clib_count_equal_u16(u16 *data, uword max_count)
static_always_inline void clib_memcpy64_x4(void *d0, void *d1, void *d2, void *d3, void *s)
#define clib_memcpy(a, b, c)
static_always_inline void clib_memset_u8(void *p, u8 val, uword count)
static_always_inline void clib_memset_u16(void *p, u16 val, uword count)
static_always_inline void clib_memset_u64(void *p, u64 val, uword count)
static_always_inline uword clib_count_equal_u32(u32 *data, uword max_count)
static_always_inline void clib_memset_u32(void *p, u32 val, uword count)