FD.io VPP  v18.10-34-gcce845e
Vector Packet Processing
string.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
17 
18  Permission is hereby granted, free of charge, to any person obtaining
19  a copy of this software and associated documentation files (the
20  "Software"), to deal in the Software without restriction, including
21  without limitation the rights to use, copy, modify, merge, publish,
22  distribute, sublicense, and/or sell copies of the Software, and to
23  permit persons to whom the Software is furnished to do so, subject to
24  the following conditions:
25 
26  The above copyright notice and this permission notice shall be
27  included in all copies or substantial portions of the Software.
28 
29  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36 */
37 
38 #ifndef included_clib_string_h
39 #define included_clib_string_h
40 
41 #include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
42 #include <vppinfra/vector.h>
43 
44 #ifdef CLIB_LINUX_KERNEL
45 #include <linux/string.h>
46 #endif
47 
48 #ifdef CLIB_UNIX
49 #include <string.h>
50 #endif
51 
52 #ifdef CLIB_STANDALONE
53 #include <vppinfra/standalone_string.h>
54 #endif
55 
56 #if _x86_64_
57 #include <x86intrin.h>
58 #endif
59 
60 /* Exchanges source and destination. */
61 void clib_memswap (void *_a, void *_b, uword bytes);
62 
63 /*
64  * the vector unit memcpy variants confuse coverity
65  * so don't let it anywhere near them.
66  */
67 #ifndef __COVERITY__
68 #if __AVX512F__
69 #include <vppinfra/memcpy_avx512.h>
70 #elif __AVX2__
71 #include <vppinfra/memcpy_avx2.h>
72 #elif __SSSE3__
73 #include <vppinfra/memcpy_sse3.h>
74 #else
75 #define clib_memcpy(a,b,c) memcpy(a,b,c)
76 #endif
77 #else /* __COVERITY__ */
78 #define clib_memcpy(a,b,c) memcpy(a,b,c)
79 #endif
80 
81 /*
82  * Copy 64 bytes of data to 4 destinations
83  * this function is typically used in quad-loop case when whole cacheline
84  * needs to be copied to 4 different places. First it reads whole cacheline
85  * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
86  */
87 
89 clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
90 {
91 #if defined (__AVX512F__)
92  __m512i r0 = _mm512_loadu_si512 (s);
93 
94  _mm512_storeu_si512 (d0, r0);
95  _mm512_storeu_si512 (d1, r0);
96  _mm512_storeu_si512 (d2, r0);
97  _mm512_storeu_si512 (d3, r0);
98 
99 #elif defined (__AVX2__)
100  __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
101  __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
102 
103  _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
104  _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
105 
106  _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
107  _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
108 
109  _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
110  _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
111 
112  _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
113  _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
114 
115 #elif defined (__SSSE3__)
116  __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
117  __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
118  __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
119  __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
120 
121  _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
122  _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
123  _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
124  _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
125 
126  _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
127  _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
128  _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
129  _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
130 
131  _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
132  _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
133  _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
134  _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
135 
136  _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
137  _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
138  _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
139  _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
140 
141 #else
142  clib_memcpy (d0, s, 64);
143  clib_memcpy (d1, s, 64);
144  clib_memcpy (d2, s, 64);
145  clib_memcpy (d3, s, 64);
146 #endif
147 }
148 
151 {
152  u64 *ptr = p;
153 #if defined(CLIB_HAVE_VEC512)
154  u64x8 v512 = u64x8_splat (val);
155  while (count >= 8)
156  {
157  u64x8_store_unaligned (v512, ptr);
158  ptr += 8;
159  count -= 8;
160  }
161  if (count == 0)
162  return;
163 #endif
164 #if defined(CLIB_HAVE_VEC256)
165  u64x4 v256 = u64x4_splat (val);
166  while (count >= 4)
167  {
168  u64x4_store_unaligned (v256, ptr);
169  ptr += 4;
170  count -= 4;
171  }
172  if (count == 0)
173  return;
174 #else
175  while (count >= 4)
176  {
177  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
178  ptr += 4;
179  count -= 4;
180  }
181 #endif
182  while (count--)
183  ptr++[0] = val;
184 }
185 
188 {
189  u32 *ptr = p;
190 #if defined(CLIB_HAVE_VEC512)
191  u32x16 v512 = u32x16_splat (val);
192  while (count >= 16)
193  {
194  u32x16_store_unaligned (v512, ptr);
195  ptr += 16;
196  count -= 16;
197  }
198  if (count == 0)
199  return;
200 #endif
201 #if defined(CLIB_HAVE_VEC256)
202  u32x8 v256 = u32x8_splat (val);
203  while (count >= 8)
204  {
205  u32x8_store_unaligned (v256, ptr);
206  ptr += 8;
207  count -= 8;
208  }
209  if (count == 0)
210  return;
211 #endif
212 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
213  u32x4 v128 = u32x4_splat (val);
214  while (count >= 4)
215  {
216  u32x4_store_unaligned (v128, ptr);
217  ptr += 4;
218  count -= 4;
219  }
220 #else
221  while (count >= 4)
222  {
223  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
224  ptr += 4;
225  count -= 4;
226  }
227 #endif
228  while (count--)
229  ptr++[0] = val;
230 }
231 
234 {
235  u16 *ptr = p;
236 #if defined(CLIB_HAVE_VEC512)
237  u16x32 v512 = u16x32_splat (val);
238  while (count >= 32)
239  {
240  u16x32_store_unaligned (v512, ptr);
241  ptr += 32;
242  count -= 32;
243  }
244  if (count == 0)
245  return;
246 #endif
247 #if defined(CLIB_HAVE_VEC256)
248  u16x16 v256 = u16x16_splat (val);
249  while (count >= 16)
250  {
251  u16x16_store_unaligned (v256, ptr);
252  ptr += 16;
253  count -= 16;
254  }
255  if (count == 0)
256  return;
257 #endif
258 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
259  u16x8 v128 = u16x8_splat (val);
260  while (count >= 8)
261  {
262  u16x8_store_unaligned (v128, ptr);
263  ptr += 8;
264  count -= 8;
265  }
266 #else
267  while (count >= 4)
268  {
269  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
270  ptr += 4;
271  count -= 4;
272  }
273 #endif
274  while (count--)
275  ptr++[0] = val;
276 }
277 
279 clib_memset_u8 (void *p, u8 val, uword count)
280 {
281  u8 *ptr = p;
282 #if defined(CLIB_HAVE_VEC512)
283  u8x64 v512 = u8x64_splat (val);
284  while (count >= 64)
285  {
286  u8x64_store_unaligned (v512, ptr);
287  ptr += 64;
288  count -= 64;
289  }
290  if (count == 0)
291  return;
292 #endif
293 #if defined(CLIB_HAVE_VEC256)
294  u8x32 v256 = u8x32_splat (val);
295  while (count >= 32)
296  {
297  u8x32_store_unaligned (v256, ptr);
298  ptr += 32;
299  count -= 32;
300  }
301  if (count == 0)
302  return;
303 #endif
304 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
305  u8x16 v128 = u8x16_splat (val);
306  while (count >= 16)
307  {
308  u8x16_store_unaligned (v128, ptr);
309  ptr += 16;
310  count -= 16;
311  }
312 #else
313  while (count >= 4)
314  {
315  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
316  ptr += 4;
317  count -= 4;
318  }
319 #endif
320  while (count--)
321  ptr++[0] = val;
322 }
323 
325 clib_count_equal_u64 (u64 * data, uword max_count)
326 {
327  uword count;
328  u64 first;
329 
330  if (max_count == 1)
331  return 1;
332  if (data[0] != data[1])
333  return 1;
334 
335  count = 0;
336  first = data[0];
337 
338 #if defined(CLIB_HAVE_VEC256)
339  u64x4 splat = u64x4_splat (first);
340  while (1)
341  {
342  u64 bmp;
343  bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
344  if (bmp != 0xffffffff)
345  {
346  count += count_trailing_zeros (~bmp) / 8;
347  return clib_min (count, max_count);
348  }
349 
350  data += 4;
351  count += 4;
352 
353  if (count >= max_count)
354  return max_count;
355  }
356 #endif
357  count += 2;
358  data += 2;
359  while (count + 3 < max_count &&
360  ((data[0] ^ first) | (data[1] ^ first) |
361  (data[2] ^ first) | (data[3] ^ first)) == 0)
362  {
363  data += 4;
364  count += 4;
365  }
366  while (count < max_count && (data[0] == first))
367  {
368  data += 1;
369  count += 1;
370  }
371  return count;
372 }
373 
375 clib_count_equal_u32 (u32 * data, uword max_count)
376 {
377  uword count;
378  u32 first;
379 
380  if (max_count == 1)
381  return 1;
382  if (data[0] != data[1])
383  return 1;
384 
385  count = 0;
386  first = data[0];
387 
388 #if defined(CLIB_HAVE_VEC256)
389  u32x8 splat = u32x8_splat (first);
390  while (1)
391  {
392  u64 bmp;
393  bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
394  if (bmp != 0xffffffff)
395  {
396  count += count_trailing_zeros (~bmp) / 4;
397  return clib_min (count, max_count);
398  }
399 
400  data += 8;
401  count += 8;
402 
403  if (count >= max_count)
404  return max_count;
405  }
406 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
407  u32x4 splat = u32x4_splat (first);
408  while (1)
409  {
410  u64 bmp;
411  bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
412  if (bmp != 0xffff)
413  {
414  count += count_trailing_zeros (~bmp) / 4;
415  return clib_min (count, max_count);
416  }
417 
418  data += 4;
419  count += 4;
420 
421  if (count >= max_count)
422  return max_count;
423  }
424 #endif
425  count += 2;
426  data += 2;
427  while (count + 3 < max_count &&
428  ((data[0] ^ first) | (data[1] ^ first) |
429  (data[2] ^ first) | (data[3] ^ first)) == 0)
430  {
431  data += 4;
432  count += 4;
433  }
434  while (count < max_count && (data[0] == first))
435  {
436  data += 1;
437  count += 1;
438  }
439  return count;
440 }
441 
443 clib_count_equal_u16 (u16 * data, uword max_count)
444 {
445  uword count;
446  u16 first;
447 
448  if (max_count == 1)
449  return 1;
450  if (data[0] != data[1])
451  return 1;
452 
453  count = 0;
454  first = data[0];
455 
456 #if defined(CLIB_HAVE_VEC256)
457  u16x16 splat = u16x16_splat (first);
458  while (1)
459  {
460  u64 bmp;
461  bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
462  if (bmp != 0xffffffff)
463  {
464  count += count_trailing_zeros (~bmp) / 2;
465  return clib_min (count, max_count);
466  }
467 
468  data += 16;
469  count += 16;
470 
471  if (count >= max_count)
472  return max_count;
473  }
474 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
475  u16x8 splat = u16x8_splat (first);
476  while (1)
477  {
478  u64 bmp;
479  bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
480  if (bmp != 0xffff)
481  {
482  count += count_trailing_zeros (~bmp) / 2;
483  return clib_min (count, max_count);
484  }
485 
486  data += 8;
487  count += 8;
488 
489  if (count >= max_count)
490  return max_count;
491  }
492 #endif
493  count += 2;
494  data += 2;
495  while (count + 3 < max_count &&
496  ((data[0] ^ first) | (data[1] ^ first) |
497  (data[2] ^ first) | (data[3] ^ first)) == 0)
498  {
499  data += 4;
500  count += 4;
501  }
502  while (count < max_count && (data[0] == first))
503  {
504  data += 1;
505  count += 1;
506  }
507  return count;
508 }
509 
511 clib_count_equal_u8 (u8 * data, uword max_count)
512 {
513  uword count;
514  u8 first;
515 
516  if (max_count == 1)
517  return 1;
518  if (data[0] != data[1])
519  return 1;
520 
521  count = 0;
522  first = data[0];
523 
524 #if defined(CLIB_HAVE_VEC256)
525  u8x32 splat = u8x32_splat (first);
526  while (1)
527  {
528  u64 bmp;
529  bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
530  if (bmp != 0xffffffff)
531  {
532  count += count_trailing_zeros (~bmp);
533  return clib_min (count, max_count);
534  }
535 
536  data += 32;
537  count += 32;
538 
539  if (count >= max_count)
540  return max_count;
541  }
542 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
543  u8x16 splat = u8x16_splat (first);
544  while (1)
545  {
546  u64 bmp;
547  bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
548  if (bmp != 0xffff)
549  {
550  count += count_trailing_zeros (~bmp);
551  return clib_min (count, max_count);
552  }
553 
554  data += 16;
555  count += 16;
556 
557  if (count >= max_count)
558  return max_count;
559  }
560 #endif
561  count += 2;
562  data += 2;
563  while (count + 3 < max_count &&
564  ((data[0] ^ first) | (data[1] ^ first) |
565  (data[2] ^ first) | (data[3] ^ first)) == 0)
566  {
567  data += 4;
568  count += 4;
569  }
570  while (count < max_count && (data[0] == first))
571  {
572  data += 1;
573  count += 1;
574  }
575  return count;
576 }
577 
578 
579 #endif /* included_clib_string_h */
580 
581 /*
582  * fd.io coding-style-patch-verification: ON
583  *
584  * Local Variables:
585  * eval: (c-set-style "gnu")
586  * End:
587  */
#define clib_min(x, y)
Definition: clib.h:291
unsigned long u64
Definition: types.h:89
u16x16 u64x4 static_always_inline u32 u8x32_msb_mask(u8x32 v)
Definition: vector_avx2.h:108
void clib_memswap(void *_a, void *_b, uword bytes)
Definition: string.c:43
unsigned char u8
Definition: types.h:56
#define count_trailing_zeros(x)
Definition: clib.h:135
#define static_always_inline
Definition: clib.h:95
static_always_inline u16 u8x16_msb_mask(u8x16 v)
Definition: vector_sse42.h:586
unsigned int u32
Definition: types.h:88
static heap_elt_t * first(heap_header_t *h)
Definition: heap.c:59
unsigned short u16
Definition: types.h:57
static_always_inline uword clib_count_equal_u64(u64 *data, uword max_count)
Definition: string.h:325
static_always_inline uword clib_count_equal_u16(u16 *data, uword max_count)
Definition: string.h:443
static_always_inline void clib_memcpy64_x4(void *d0, void *d1, void *d2, void *d3, void *s)
Definition: string.h:89
#define clib_memcpy(a, b, c)
Definition: string.h:75
size_t count
Definition: vapi.c:46
static_always_inline void clib_memset_u8(void *p, u8 val, uword count)
Definition: string.h:279
static_always_inline void clib_memset_u16(void *p, u16 val, uword count)
Definition: string.h:233
static_always_inline uword clib_count_equal_u8(u8 *data, uword max_count)
Definition: string.h:511
u64 uword
Definition: types.h:112
static_always_inline void clib_memset_u64(void *p, u64 val, uword count)
Definition: string.h:150
u64x4
Definition: vector_avx2.h:121
static_always_inline uword clib_count_equal_u32(u32 *data, uword max_count)
Definition: string.h:375
unsigned long long u32x4
Definition: ixge.c:28
epu16_epi64 u16x16
Definition: vector_avx2.h:123
static_always_inline void clib_memset_u32(void *p, u32 val, uword count)
Definition: string.h:187