FD.io VPP  v16.06
Vector Packet Processing
vhash.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  Copyright (c) 2010 Eliot Dresselhaus
17 
18  Permission is hereby granted, free of charge, to any person obtaining
19  a copy of this software and associated documentation files (the
20  "Software"), to deal in the Software without restriction, including
21  without limitation the rights to use, copy, modify, merge, publish,
22  distribute, sublicense, and/or sell copies of the Software, and to
23  permit persons to whom the Software is furnished to do so, subject to
24  the following conditions:
25 
26  The above copyright notice and this permission notice shall be
27  included in all copies or substantial portions of the Software.
28 
29  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36 */
37 
38 #ifndef included_clib_vhash_h
39 #define included_clib_vhash_h
40 
41 #include <vppinfra/vector.h>
42 
43 #ifdef CLIB_HAVE_VEC128
44 
45 #include <vppinfra/cache.h>
46 #include <vppinfra/hash.h>
47 #include <vppinfra/pipeline.h>
48 
49 /* Gathers 32 bits worth of key with given index. */
50 typedef u32 (vhash_key_function_t) (void * state, u32 vector_index, u32 key_word_index);
51 typedef u32x4 (vhash_4key_function_t) (void * state, u32 vector_index, u32 key_word_index);
52 /* Sets/gets result of hash lookup. */
53 typedef u32 (vhash_result_function_t) (void * state, u32 vector_index, u32 result, u32 n_key_u32);
54 typedef u32x4 (vhash_4result_function_t) (void * state, u32 vector_index, u32x4 results, u32 n_key_u32);
55 
56 typedef struct {
57  u32x4_union_t hashed_key[3];
58 } vhash_hashed_key_t;
59 
60 /* Search buckets are really this structure. */
61 typedef struct {
62  /* 4 results for this bucket.
63  Zero is used to mark empty results. This means user can't use the result ~0
64  since user results differ from internal results stored in buckets by 1.
65  e.g. internal result = user result + 1. */
66  u32x4_union_t result;
67 
68  /* n_key_u32s u32x4s of key data follow. */
69  u32x4_union_t key[0];
70 } vhash_search_bucket_t;
71 
72 typedef struct {
73  u32x4_union_t * search_buckets;
74 
75  /* Vector of bucket free indices. */
76  u32 * free_indices;
77 
78  /* Number of entries in this overflow bucket. */
79  u32 n_overflow;
80 } vhash_overflow_buckets_t;
81 
82 typedef struct {
83  /* 2^log2_n_keys keys grouped in groups of 4.
84  Each bucket contains 4 results plus 4 keys for a
85  total of (1 + n_key_u32) u32x4s. */
86  u32x4_union_t * search_buckets;
87 
88  /* When a bucket of 4 results/keys are full we search
89  the overflow. hash_key is used to select which overflow
90  bucket. */
91  vhash_overflow_buckets_t overflow_buckets[16];
92 
93  /* Total count of occupied elements in hash table. */
94  u32 n_elts;
95 
96  u32 log2_n_keys;
97 
98  /* Number of 32 bit words in a hash key. */
99  u32 n_key_u32;
100 
101  u32x4_union_t bucket_mask;
102 
103  /* table[i] = min_log2 (first_set (~i)). */
104  u8 find_first_zero_table[16];
105 
106  /* Hash seeds for Jenkins hash. */
107  u32x4_union_t hash_seeds[3];
108 
109  /* Key work space is a vector of length
110  n_key_u32s << log2_n_key_word_len_u32x. */
111  u32 log2_n_key_word_len_u32x;
112 
113  /* Work space to store keys between pipeline stages. */
114  u32x4_union_t * key_work_space;
115 
116  /* Hash work space to store Jenkins hash values between
117  pipeline stages. */
118  vhash_hashed_key_t * hash_work_space;
119 } vhash_t;
120 
121 always_inline vhash_overflow_buckets_t *
122 vhash_get_overflow_buckets (vhash_t * h, u32 key)
123 {
124  u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
125  ASSERT (i < ARRAY_LEN (h->overflow_buckets));
126  return h->overflow_buckets + i;
127 }
128 
130 vhash_is_non_empty_overflow_bucket (vhash_t * h, u32 key)
131 {
132  u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
133  ASSERT (i < ARRAY_LEN (h->overflow_buckets));
134  return h->overflow_buckets[i].n_overflow > 0;
135 }
136 
137 always_inline void
138 vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
139 {
140  vec_free (obs->search_buckets);
141  vec_free (obs->free_indices);
142 }
143 
144 always_inline void
145 vhash_free (vhash_t * h)
146 {
147  uword i;
148  for (i = 0; i < ARRAY_LEN (h->overflow_buckets); i++)
149  vhash_free_overflow_buckets (&h->overflow_buckets[i]);
150  vec_free (h->search_buckets);
151  vec_free (h->key_work_space);
152  vec_free (h->hash_work_space);
153 }
154 
155 always_inline void
156 vhash_set_key_word (vhash_t * h, u32 wi, u32 vi, u32 value)
157 {
158  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
159  u32 i1 = vi % 4;
160  vec_elt (h->key_work_space, i0).as_u32[i1] = value;
161 }
162 
163 always_inline void
164 vhash_set_key_word_u32x (vhash_t * h, u32 wi, u32 vi, u32x value)
165 {
166  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
167  vec_elt (h->key_work_space, i0).as_u32x4 = value;
168 }
169 
171 vhash_get_key_word (vhash_t * h, u32 wi, u32 vi)
172 {
173  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
174  u32 i1 = vi % 4;
175  return vec_elt (h->key_work_space, i0).as_u32[i1];
176 }
177 
178 always_inline u32x
179 vhash_get_key_word_u32x (vhash_t * h, u32 wi, u32 vi)
180 {
181  u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi;
182  return vec_elt (h->key_work_space, i0).as_u32x4;
183 }
184 
185 always_inline void
186 vhash_validate_sizes (vhash_t * h, u32 n_key_u32, u32 n_vectors)
187 {
188  u32 n, l;
189 
190  n = max_pow2 (n_vectors) / 4;
191  n = clib_max (n, 8);
192 
193  h->log2_n_key_word_len_u32x = l = min_log2 (n);
194  vec_validate_aligned (h->key_work_space, (n_key_u32 << l) - 1, CLIB_CACHE_LINE_BYTES);
195  vec_validate_aligned (h->hash_work_space, n - 1, CLIB_CACHE_LINE_BYTES);
196 }
197 
198 always_inline void
199 vhash_gather_key_stage (vhash_t * h,
200  u32 vector_index,
201  u32 n_vectors,
202  vhash_key_function_t key_function,
203  void * state,
204  u32 n_key_u32s)
205 {
206  u32 i, j, vi;
207 
208  /* Gather keys for 4 packets (for 128 bit vector length e.g. u32x4). */
209  for (i = 0; i < n_vectors; i++)
210  {
211  vi = vector_index * 4 + i;
212  for (j = 0; j < n_key_u32s; j++)
213  vhash_set_key_word (h, j, vi,
214  key_function (state, vi, j));
215  }
216 }
217 
218 always_inline void
219 vhash_gather_4key_stage (vhash_t * h,
220  u32 vector_index,
221  vhash_4key_function_t key_function,
222  void * state,
223  u32 n_key_u32s)
224 {
225  u32 j, vi;
226  vi = vector_index * 4;
227  for (j = 0; j < n_key_u32s; j++)
228  vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j));
229 }
230 
231 always_inline void
232 vhash_mix_stage (vhash_t * h,
233  u32 vector_index,
234  u32 n_key_u32s)
235 {
236  i32 i, n_left;
237  u32x a, b, c;
238 
239  /* Only need to do this for keys longer than 12 bytes. */
240  ASSERT (n_key_u32s > 3);
241 
242  a = h->hash_seeds[0].as_u32x4;
243  b = h->hash_seeds[1].as_u32x4;
244  c = h->hash_seeds[2].as_u32x4;
245  for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3)
246  {
247  a += vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index);
248  if (n_left > 1)
249  b += vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index);
250  if (n_left > 2)
251  c += vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index);
252 
253  hash_v3_mix_u32x (a, b, c);
254  }
255 
256  /* Save away a, b, c for later finalize. */
257  {
258  vhash_hashed_key_t * hk = vec_elt_at_index (h->hash_work_space, vector_index);
259  hk->hashed_key[0].as_u32x4 = a;
260  hk->hashed_key[1].as_u32x4 = b;
261  hk->hashed_key[2].as_u32x4 = c;
262  }
263 }
264 
265 always_inline vhash_search_bucket_t *
266 vhash_get_search_bucket_with_index (vhash_t * h, u32 i, u32 n_key_u32s)
267 {
268  return ((vhash_search_bucket_t *)
269  vec_elt_at_index (h->search_buckets,
270  (i / 4) * ((sizeof (vhash_search_bucket_t) / sizeof (u32x4)) + n_key_u32s)));
271 }
272 
273 always_inline vhash_search_bucket_t *
274 vhash_get_search_bucket (vhash_t * h, u32 key_hash, u32 n_key_u32s)
275 {
276  u32 i = key_hash & h->bucket_mask.as_u32[0];
277  return vhash_get_search_bucket_with_index (h, i, n_key_u32s);
278 }
279 
280 always_inline u32x4
281 vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash, u32 n_key_u32s)
282 {
283  vhash_search_bucket_t * b;
284  u32 n_bytes_per_bucket = sizeof (b[0]) + n_key_u32s * sizeof (b->key[0]);
285  u32x4 r = key_hash & h->bucket_mask.as_u32x4;
286 
287  /* Multiply with shifts and adds to get bucket byte offset. */
288 #define _(x) u32x4_ishift_left (r, (x) - 2)
289  if (n_bytes_per_bucket == (1 << 5))
290  r = _ (5);
291  else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
292  r = _ (5) + _ (4);
293  else if (n_bytes_per_bucket == (1 << 6))
294  r = _ (6);
295  else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
296  r = _ (6) + _ (4);
297  else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
298  r = _ (6) + _ (5);
299  else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
300  r = _ (6) + _ (5) + _ (4);
301  else
302  ASSERT (0);
303 #undef _
304  return r;
305 }
306 
307 always_inline void
308 vhash_finalize_stage (vhash_t * h,
309  u32 vector_index,
310  u32 n_key_u32s)
311 {
312  i32 n_left;
313  u32x a, b, c;
314  vhash_hashed_key_t * hk = vec_elt_at_index (h->hash_work_space, vector_index);
315 
316  if (n_key_u32s <= 3)
317  {
318  a = h->hash_seeds[0].as_u32x4;
319  b = h->hash_seeds[1].as_u32x4;
320  c = h->hash_seeds[2].as_u32x4;
321  n_left = n_key_u32s;
322  }
323  else
324  {
325  a = hk->hashed_key[0].as_u32x4;
326  b = hk->hashed_key[1].as_u32x4;
327  c = hk->hashed_key[2].as_u32x4;
328  n_left = 3;
329  }
330 
331  if (n_left > 0)
332  a += vhash_get_key_word_u32x (h, 0, vector_index);
333  if (n_left > 1)
334  b += vhash_get_key_word_u32x (h, 1, vector_index);
335  if (n_left > 2)
336  c += vhash_get_key_word_u32x (h, 2, vector_index);
337 
338  hash_v3_finalize_u32x (a, b, c);
339 
340  /* Only save away last 32 bits of hash code. */
341  hk->hashed_key[2].as_u32x4 = c;
342 
343  /* Prefetch buckets. This costs a bit for small tables but saves
344  big for large ones. */
345  {
346  vhash_search_bucket_t * b0, * b1, * b2, * b3;
347  u32x4_union_t kh;
348 
349  kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s);
350  hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
351 
352  b0 = (void *) h->search_buckets + kh.as_u32[0];
353  b1 = (void *) h->search_buckets + kh.as_u32[1];
354  b2 = (void *) h->search_buckets + kh.as_u32[2];
355  b3 = (void *) h->search_buckets + kh.as_u32[3];
356 
357  CLIB_PREFETCH (b0, sizeof (b0[0]) + n_key_u32s * sizeof (b0->key[0]), READ);
358  CLIB_PREFETCH (b1, sizeof (b1[0]) + n_key_u32s * sizeof (b1->key[0]), READ);
359  CLIB_PREFETCH (b2, sizeof (b2[0]) + n_key_u32s * sizeof (b2->key[0]), READ);
360  CLIB_PREFETCH (b3, sizeof (b3[0]) + n_key_u32s * sizeof (b3->key[0]), READ);
361  }
362 }
363 
365 vhash_merge_results (u32x4 r)
366 {
367  r = r | u32x4_word_shift_right (r, 2);
368  r = r | u32x4_word_shift_right (r, 1);
369  return u32x4_get0 (r);
370 }
371 
372 /* Bucket is full if none of its 4 results are 0. */
374 vhash_search_bucket_is_full (u32x4 r)
375 { return u32x4_zero_byte_mask (r) == 0; }
376 
378 vhash_non_empty_result_index (u32x4 x)
379 {
380  u32 empty_mask = u32x4_zero_byte_mask (x);
381  ASSERT (empty_mask != 0xffff);
382  return min_log2 (0xffff &~ empty_mask) / 4;
383 }
384 
386 vhash_empty_result_index (u32x4 x)
387 {
388  u32 empty_mask = u32x4_zero_byte_mask (x);
389  ASSERT (empty_mask != 0);
390  return min_log2 (0xffff & empty_mask) / 4;
391 }
392 
393 always_inline u32x4
394 vhash_bucket_compare (vhash_t * h,
395  u32x4_union_t * bucket,
396  u32 key_word_index,
397  u32 vi)
398 {
399  u32 k = vhash_get_key_word (h, key_word_index, vi);
400  u32x4 x = {k, k, k, k};
401  return u32x4_is_equal (bucket[key_word_index].as_u32x4, x);
402 }
403 
404 #define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \
405 do { \
406  u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \
407  u32x4 _k0 = u32x4_splat_word (_k4, 0); \
408  u32x4 _k1 = u32x4_splat_word (_k4, 1); \
409  u32x4 _k2 = u32x4_splat_word (_k4, 2); \
410  u32x4 _k3 = u32x4_splat_word (_k4, 3); \
411  \
412  cmp0 = u32x4_is_equal (b0->key[wi].as_u32x4, _k0); \
413  cmp1 = u32x4_is_equal (b1->key[wi].as_u32x4, _k1); \
414  cmp2 = u32x4_is_equal (b2->key[wi].as_u32x4, _k2); \
415  cmp3 = u32x4_is_equal (b3->key[wi].as_u32x4, _k3); \
416 } while (0)
417 
418 u32 vhash_get_overflow (vhash_t * h,
419  u32 key_hash,
420  u32 vi,
421  u32 n_key_u32s);
422 
423 always_inline void
424 vhash_get_stage (vhash_t * h,
425  u32 vector_index,
426  u32 n_vectors,
427  vhash_result_function_t result_function,
428  void * state,
429  u32 n_key_u32s)
430 {
431  u32 i, j;
432  vhash_hashed_key_t * hk = vec_elt_at_index (h->hash_work_space, vector_index);
433  vhash_search_bucket_t * b;
434 
435  for (i = 0; i < n_vectors; i++)
436  {
437  u32 vi = vector_index * 4 + i;
438  u32 key_hash = hk->hashed_key[2].as_u32[i];
439  u32 result;
440  u32x4 r, r0;
441 
442  b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
443 
444  r = r0 = b->result.as_u32x4;
445  for (j = 0; j < n_key_u32s; j++)
446  r &= vhash_bucket_compare (h, &b->key[0], j, vi);
447 
448  /* At this point only one of 4 results should be non-zero.
449  So we can or all 4 together and get the valid result (if there is one). */
450  result = vhash_merge_results (r);
451 
452  if (! result && vhash_search_bucket_is_full (r0))
453  result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
454 
455  result_function (state, vi, result - 1, n_key_u32s);
456  }
457 }
458 
459 always_inline void
460 vhash_get_4_stage (vhash_t * h,
461  u32 vector_index,
462  vhash_4result_function_t result_function,
463  void * state,
464  u32 n_key_u32s)
465 {
466  u32 i, vi;
467  vhash_hashed_key_t * hk = vec_elt_at_index (h->hash_work_space, vector_index);
468  vhash_search_bucket_t * b0, * b1, * b2, * b3;
469  u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
470  u32x4_union_t kh;
471 
472  kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
473 
474  b0 = (void *) h->search_buckets + kh.as_u32[0];
475  b1 = (void *) h->search_buckets + kh.as_u32[1];
476  b2 = (void *) h->search_buckets + kh.as_u32[2];
477  b3 = (void *) h->search_buckets + kh.as_u32[3];
478 
479  r0 = r0_before = b0->result.as_u32x4;
480  r1 = r1_before = b1->result.as_u32x4;
481  r2 = r2_before = b2->result.as_u32x4;
482  r3 = r3_before = b3->result.as_u32x4;
483 
484  vi = vector_index * 4;
485 
486  for (i = 0; i < n_key_u32s; i++)
487  {
488  u32x4 c0, c1, c2, c3;
489  vhash_bucket_compare_4 (h, i, vector_index,
490  b0, b1, b2, b3,
491  c0, c1, c2, c3);
492  r0 &= c0;
493  r1 &= c1;
494  r2 &= c2;
495  r3 &= c3;
496  }
497 
498  u32x4_transpose (r0, r1, r2, r3);
499 
500  /* Gather together 4 results. */
501  {
502  u32x4_union_t r;
503  u32x4 ones = {1,1,1,1};
504  u32 not_found_mask;
505 
506  r.as_u32x4 = r0 | r1 | r2 | r3;
507  not_found_mask = u32x4_zero_byte_mask (r.as_u32x4);
508  not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4*0))
509  | (vhash_search_bucket_is_full (r1_before) << (4*1))
510  | (vhash_search_bucket_is_full (r2_before) << (4*2))
511  | (vhash_search_bucket_is_full (r3_before) << (4*3)));
512  if (not_found_mask)
513  {
514  u32x4_union_t key_hash;
515 
516  key_hash.as_u32x4 = hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4;
517 
518  /* Slow path: one of the buckets may have been full and we need to search overflow. */
519  if (not_found_mask & (1 << (4*0)))
520  r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0],
521  vi + 0, n_key_u32s);
522  if (not_found_mask & (1 << (4*1)))
523  r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1],
524  vi + 1, n_key_u32s);
525  if (not_found_mask & (1 << (4*2)))
526  r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2],
527  vi + 2, n_key_u32s);
528  if (not_found_mask & (1 << (4*3)))
529  r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3],
530  vi + 3, n_key_u32s);
531  }
532 
533  result_function (state, vi, r.as_u32x4 - ones, n_key_u32s);
534  }
535 }
536 
537 u32
538 vhash_set_overflow (vhash_t * h,
539  u32 key_hash,
540  u32 vi,
541  u32 new_result,
542  u32 n_key_u32s);
543 
544 always_inline void
545 vhash_set_stage (vhash_t * h,
546  u32 vector_index,
547  u32 n_vectors,
548  vhash_result_function_t result_function,
549  void * state,
550  u32 n_key_u32s)
551 {
552  u32 i, j, n_new_elts = 0;
553  vhash_hashed_key_t * hk = vec_elt_at_index (h->hash_work_space, vector_index);
554  vhash_search_bucket_t * b;
555 
556  for (i = 0; i < n_vectors; i++)
557  {
558  u32 vi = vector_index * 4 + i;
559  u32 key_hash = hk->hashed_key[2].as_u32[i];
560  u32 old_result, new_result;
561  u32 i_set;
562  u32x4 r, r0, cmp;
563 
564  b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
565 
566  cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
567  for (j = 1; j < n_key_u32s; j++)
568  cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
569 
570  r0 = b->result.as_u32x4;
571  r = r0 & cmp;
572 
573  /* At this point only one of 4 results should be non-zero.
574  So we can or all 4 together and get the valid result (if there is one). */
575  old_result = vhash_merge_results (r);
576 
577  if (! old_result && vhash_search_bucket_is_full (r0))
578  old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
579 
580  /* Get new result; possibly do something with old result. */
581  new_result = result_function (state, vi, old_result - 1, n_key_u32s);
582 
583  /* User cannot use ~0 as a hash result since a result of 0 is
584  used to mark unused bucket entries. */
585  ASSERT (new_result + 1 != 0);
586  new_result += 1;
587 
588  /* Set over-writes existing result. */
589  if (old_result)
590  {
591  i_set = vhash_non_empty_result_index (r);
592  b->result.as_u32[i_set] = new_result;
593  }
594  else
595  {
596  /* Set allocates new result. */
597  u32 valid_mask;
598 
599  valid_mask = (((b->result.as_u32[0] != 0) << 0)
600  | ((b->result.as_u32[1] != 0) << 1)
601  | ((b->result.as_u32[2] != 0) << 2)
602  | ((b->result.as_u32[3] != 0) << 3));
603 
604  /* Rotate 4 bit valid mask so that key_hash corresponds to bit 0. */
605  i_set = key_hash & 3;
606  valid_mask = ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
607 
608  /* Insert into first empty position in bucket after key_hash. */
609  i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3;
610 
611  if (valid_mask != 0xf)
612  {
613  n_new_elts += 1;
614 
615  b->result.as_u32[i_set] = new_result;
616 
617  /* Insert new key into search bucket. */
618  for (j = 0; j < n_key_u32s; j++)
619  b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi);
620  }
621  else
622  vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s);
623  }
624  }
625 
626  h->n_elts += n_new_elts;
627 }
628 
629 u32
630 vhash_unset_overflow (vhash_t * h,
631  u32 key_hash,
632  u32 vi,
633  u32 n_key_u32s);
634 
635 void
636 vhash_unset_refill_from_overflow (vhash_t * h,
637  vhash_search_bucket_t * b,
638  u32 key_hash,
639  u32 n_key_u32s);
640 
641 /* Note: Eliot tried doing 4 unsets at once and could not get a speed up
642  and abandoned vhash_unset_4_stage. */
643 always_inline void
644 vhash_unset_stage (vhash_t * h,
645  u32 vector_index,
646  u32 n_vectors,
647  vhash_result_function_t result_function,
648  void * state,
649  u32 n_key_u32s)
650 {
651  u32 i, j, n_elts_unset = 0;
652  vhash_hashed_key_t * hk = vec_elt_at_index (h->hash_work_space, vector_index);
653  vhash_search_bucket_t * b;
654 
655  for (i = 0; i < n_vectors; i++)
656  {
657  u32 vi = vector_index * 4 + i;
658  u32 key_hash = hk->hashed_key[2].as_u32[i];
659  u32 old_result;
660  u32x4 cmp, r0;
661 
662  b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
663 
664  cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
665  for (j = 1; j < n_key_u32s; j++)
666  cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
667 
668  r0 = b->result.as_u32x4;
669 
670  /* At this point cmp is all ones where key matches and zero otherwise.
671  So, this will invalidate results for matching key and do nothing otherwise. */
672  b->result.as_u32x4 = r0 & ~cmp;
673 
674  old_result = vhash_merge_results (r0 & cmp);
675 
676  n_elts_unset += old_result != 0;
677 
678  if (vhash_search_bucket_is_full (r0))
679  {
680  if (old_result)
681  vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s);
682  else
683  old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s);
684  }
685 
686  result_function (state, vi, old_result - 1, n_key_u32s);
687  }
688  ASSERT (h->n_elts >= n_elts_unset);
689  h->n_elts -= n_elts_unset;
690 }
691 
692 void vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32,
693  u32 * hash_seeds);
694 
695 void vhash_resize (vhash_t * old, u32 log2_n_keys);
696 
697 typedef struct {
698  vhash_t * vhash;
699 
700  union {
701  struct {
702  u32 * keys;
703  u32 * results;
704  };
705 
706  /* Vector layout for get keys. */
707  struct {
708  u32x4_union_t * get_keys;
709  u32x4_union_t * get_results;
710  };
711  };
712 
713  u32 n_vectors_div_4;
714  u32 n_vectors_mod_4;
715 
716  u32 n_key_u32;
717 
718  u32 n_keys;
719 } vhash_main_t;
720 
722 vhash_get_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
723 {
724  u32 i, n;
725 
726  i = vm->n_keys;
727  vm->n_keys = i + n_keys;
728 
729  n = (round_pow2 (vm->n_keys, 4) / 4) * n_key_u32;
730 
731  vec_validate_aligned (vm->get_keys, n - 1, sizeof (vm->get_keys[0]));
732  vec_validate_aligned (vm->get_results, n - 1, sizeof (vm->get_results[0]));
733 
734  return i;
735 }
736 
737 always_inline void
738 vhash_get_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
739  u32 value)
740 {
741  u32x4_union_t * k = vec_elt_at_index (vm->get_keys, (vi / 4) * n_key_u32);
742  ASSERT (wi < n_key_u32);
743  k[wi].as_u32[vi % 4] = value;
744 }
745 
747 vhash_get_fetch_result (vhash_main_t * vm, u32 vi)
748 {
749  u32x4_union_t * r = vec_elt_at_index (vm->get_results, vi / 4);
750  return r->as_u32[vi % 4];
751 }
752 
753 void vhash_main_get (vhash_main_t * vm);
754 
756 vhash_set_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
757 {
758  u32 i;
759 
760  i = vm->n_keys;
761  vm->n_keys = i + n_keys;
762 
763  vec_resize (vm->keys, n_keys * n_key_u32);
764  vec_resize (vm->results, n_keys);
765 
766  return i;
767 }
768 
769 always_inline void
770 vhash_set_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
771  u32 value)
772 {
773  u32 * k = vec_elt_at_index (vm->keys, vi * n_key_u32);
774  ASSERT (wi < n_key_u32);
775  k[wi] = value;
776 }
777 
778 always_inline void
779 vhash_set_set_result (vhash_main_t * vm, u32 vi, u32 result)
780 {
781  u32 * r = vec_elt_at_index (vm->results, vi);
782  r[0] = result;
783 }
784 
786 vhash_set_fetch_old_result (vhash_main_t * vm, u32 vi)
787 {
788  u32 * r = vec_elt_at_index (vm->results, vi);
789  return r[0];
790 }
791 
792 void vhash_main_set (vhash_main_t * vm);
793 
795 vhash_unset_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
796 { return vhash_set_alloc_keys (vm, n_keys, n_key_u32); }
797 
798 always_inline void
799 vhash_unset_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
800  u32 value)
801 { vhash_set_set_key_word (vm, vi, wi, n_key_u32, value); }
802 
803 always_inline void
804 vhash_unset_set_result (vhash_main_t * vm, u32 vi, u32 result)
805 { vhash_set_set_result (vm, vi, result); }
806 
808 vhash_unset_fetch_old_result (vhash_main_t * vm, u32 vi)
809 { return vhash_set_fetch_old_result (vm, vi); }
810 
811 void vhash_main_unset (vhash_main_t * vm);
812 
813 typedef struct {
814  vhash_main_t new;
815 
816  vhash_t * old;
817 } vhash_resize_t;
818 
819 u32 vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index, u32 n_vectors);
820 
821 #endif /* CLIB_HAVE_VEC128 */
822 
823 #endif /* included_clib_vhash_h */
always_inline uword max_pow2(uword x)
Definition: clib.h:245
always_inline uword round_pow2(uword x, uword pow2)
Definition: clib.h:255
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:267
a
Definition: bitmap.h:393
always_inline u32x4 u32x4_is_equal(u32x4 x, u32x4 y)
Definition: vector_sse2.h:392
#define hash_v3_mix_u32x(a, b, c)
Definition: hash.h:577
always_inline u32 u32x4_zero_byte_mask(u32x4 x)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:405
#define always_inline
Definition: clib.h:84
int i32
Definition: types.h:81
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define u32x4_word_shift_right(a, n)
Definition: vector_sse2.h:311
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V...
Definition: vec.h:199
#define hash_v3_finalize_u32x(a, b, c)
Definition: hash.h:583
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:82
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:298
#define u32x4_transpose(x0, x1, x2, x3)
Definition: vector_funcs.h:292
#define ARRAY_LEN(x)
Definition: clib.h:59
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
vhost_vring_state_t state
Definition: vhost-user.h:77
#define clib_max(x, y)
Definition: clib.h:288
u64 uword
Definition: types.h:112
#define vec_elt(v, i)
Get vector value at index i.
unsigned char u8
Definition: types.h:56
always_inline uword min_log2(uword x)
Definition: clib.h:181
vslo vsro vslo vsro always_inline u32 u32x4_get0(u32x4 x)
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67