38 #ifndef included_clib_vhash_h 39 #define included_clib_vhash_h 43 #ifdef CLIB_HAVE_VEC128 50 typedef u32 (vhash_key_function_t) (
void *
state,
u32 vector_index,
52 typedef u32x4 (vhash_4key_function_t) (
void *
state,
u32 vector_index,
55 typedef u32 (vhash_result_function_t) (
void *
state,
u32 vector_index,
56 u32 result,
u32 n_key_u32);
57 typedef u32x4 (vhash_4result_function_t) (
void *
state,
u32 vector_index,
62 u32x4_union_t hashed_key[3];
76 } vhash_search_bucket_t;
80 u32x4_union_t *search_buckets;
87 } vhash_overflow_buckets_t;
94 u32x4_union_t *search_buckets;
99 vhash_overflow_buckets_t overflow_buckets[16];
109 u32x4_union_t bucket_mask;
112 u8 find_first_zero_table[16];
115 u32x4_union_t hash_seeds[3];
119 u32 log2_n_key_word_len_u32x;
122 u32x4_union_t *key_work_space;
126 vhash_hashed_key_t *hash_work_space;
130 vhash_get_overflow_buckets (vhash_t * h,
u32 key)
132 u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
134 return h->overflow_buckets +
i;
138 vhash_is_non_empty_overflow_bucket (vhash_t * h,
u32 key)
140 u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
142 return h->overflow_buckets[
i].n_overflow > 0;
146 vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
153 vhash_free (vhash_t * h)
156 for (i = 0; i <
ARRAY_LEN (h->overflow_buckets); i++)
157 vhash_free_overflow_buckets (&h->overflow_buckets[i]);
164 vhash_set_key_word (vhash_t * h,
u32 wi,
u32 vi,
u32 value)
166 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
168 vec_elt (h->key_work_space, i0).as_u32[i1] = value;
172 vhash_set_key_word_u32x (vhash_t * h,
u32 wi,
u32 vi, u32x value)
174 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
175 vec_elt (h->key_work_space, i0).as_u32x4 = value;
179 vhash_get_key_word (vhash_t * h,
u32 wi,
u32 vi)
181 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
183 return vec_elt (h->key_work_space, i0).as_u32[i1];
187 vhash_get_key_word_u32x (vhash_t * h,
u32 wi,
u32 vi)
189 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi;
190 return vec_elt (h->key_work_space, i0).as_u32x4;
194 vhash_validate_sizes (vhash_t * h,
u32 n_key_u32,
u32 n_vectors)
201 h->log2_n_key_word_len_u32x = l =
min_log2 (n);
208 vhash_gather_key_stage (vhash_t * h,
211 vhash_key_function_t key_function,
212 void *state,
u32 n_key_u32s)
217 for (i = 0; i < n_vectors; i++)
219 vi = vector_index * 4 +
i;
220 for (j = 0; j < n_key_u32s; j++)
221 vhash_set_key_word (h, j, vi, key_function (state, vi, j));
226 vhash_gather_4key_stage (vhash_t * h,
228 vhash_4key_function_t key_function,
229 void *state,
u32 n_key_u32s)
232 vi = vector_index * 4;
233 for (j = 0; j < n_key_u32s; j++)
234 vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j));
238 vhash_mix_stage (vhash_t * h,
u32 vector_index,
u32 n_key_u32s)
246 a = h->hash_seeds[0].as_u32x4;
247 b = h->hash_seeds[1].as_u32x4;
248 c = h->hash_seeds[2].as_u32x4;
249 for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3)
252 vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index);
255 vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index);
258 vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index);
265 vhash_hashed_key_t *hk =
267 hk->hashed_key[0].as_u32x4 =
a;
268 hk->hashed_key[1].as_u32x4 = b;
269 hk->hashed_key[2].as_u32x4 =
c;
274 vhash_get_search_bucket_with_index (vhash_t * h,
u32 i,
u32 n_key_u32s)
276 return ((vhash_search_bucket_t *)
279 ((sizeof (vhash_search_bucket_t) /
280 sizeof (
u32x4)) + n_key_u32s)));
284 vhash_get_search_bucket (vhash_t * h,
u32 key_hash,
u32 n_key_u32s)
286 u32 i = key_hash & h->bucket_mask.as_u32[0];
287 return vhash_get_search_bucket_with_index (h, i, n_key_u32s);
291 vhash_get_4_search_bucket_byte_offsets (vhash_t * h,
u32x4 key_hash,
294 vhash_search_bucket_t *b;
295 u32 n_bytes_per_bucket =
sizeof (b[0]) + n_key_u32s *
sizeof (b->key[0]);
296 u32x4 r = key_hash & h->bucket_mask.as_u32x4;
299 #define _(x) u32x4_ishift_left (r, (x) - 2) 300 if (n_bytes_per_bucket == (1 << 5))
302 else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
304 else if (n_bytes_per_bucket == (1 << 6))
306 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
308 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
310 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
311 r = _(6) + _(5) + _(4);
319 vhash_finalize_stage (vhash_t * h,
u32 vector_index,
u32 n_key_u32s)
323 vhash_hashed_key_t *hk =
328 a = h->hash_seeds[0].as_u32x4;
329 b = h->hash_seeds[1].as_u32x4;
330 c = h->hash_seeds[2].as_u32x4;
335 a = hk->hashed_key[0].as_u32x4;
336 b = hk->hashed_key[1].as_u32x4;
337 c = hk->hashed_key[2].as_u32x4;
342 a += vhash_get_key_word_u32x (h, 0, vector_index);
344 b += vhash_get_key_word_u32x (h, 1, vector_index);
346 c += vhash_get_key_word_u32x (h, 2, vector_index);
351 hk->hashed_key[2].as_u32x4 =
c;
356 vhash_search_bucket_t *b0, *b1, *b2, *b3;
359 kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s);
360 hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
362 b0 = (
void *) h->search_buckets + kh.as_u32[0];
363 b1 = (
void *) h->search_buckets + kh.as_u32[1];
364 b2 = (
void *) h->search_buckets + kh.as_u32[2];
365 b3 = (
void *) h->search_buckets + kh.as_u32[3];
367 CLIB_PREFETCH (b0,
sizeof (b0[0]) + n_key_u32s *
sizeof (b0->key[0]),
369 CLIB_PREFETCH (b1,
sizeof (b1[0]) + n_key_u32s *
sizeof (b1->key[0]),
371 CLIB_PREFETCH (b2,
sizeof (b2[0]) + n_key_u32s *
sizeof (b2->key[0]),
373 CLIB_PREFETCH (b3,
sizeof (b3[0]) + n_key_u32s *
sizeof (b3->key[0]),
379 vhash_merge_results (
u32x4 r)
388 vhash_search_bucket_is_full (
u32x4 r)
394 vhash_non_empty_result_index (
u32x4 x)
397 ASSERT (empty_mask != 0xffff);
398 return min_log2 (0xffff & ~empty_mask) / 4;
402 vhash_empty_result_index (
u32x4 x)
406 return min_log2 (0xffff & empty_mask) / 4;
410 vhash_bucket_compare (vhash_t * h,
411 u32x4_union_t * bucket,
u32 key_word_index,
u32 vi)
413 u32 k = vhash_get_key_word (h, key_word_index, vi);
414 u32x4 x = { k, k, k, k };
415 return u32x4_is_equal (bucket[key_word_index].as_u32x4, x);
418 #define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \ 420 u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \ 421 u32x4 _k0 = u32x4_splat_word (_k4, 0); \ 422 u32x4 _k1 = u32x4_splat_word (_k4, 1); \ 423 u32x4 _k2 = u32x4_splat_word (_k4, 2); \ 424 u32x4 _k3 = u32x4_splat_word (_k4, 3); \ 426 cmp0 = u32x4_is_equal (b0->key[wi].as_u32x4, _k0); \ 427 cmp1 = u32x4_is_equal (b1->key[wi].as_u32x4, _k1); \ 428 cmp2 = u32x4_is_equal (b2->key[wi].as_u32x4, _k2); \ 429 cmp3 = u32x4_is_equal (b3->key[wi].as_u32x4, _k3); \ 432 u32 vhash_get_overflow (vhash_t * h,
u32 key_hash,
u32 vi,
u32 n_key_u32s);
435 vhash_get_stage (vhash_t * h,
438 vhash_result_function_t result_function,
439 void *state,
u32 n_key_u32s)
442 vhash_hashed_key_t *hk =
444 vhash_search_bucket_t *b;
446 for (i = 0; i < n_vectors; i++)
448 u32 vi = vector_index * 4 +
i;
449 u32 key_hash = hk->hashed_key[2].as_u32[
i];
453 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
455 r = r0 = b->result.as_u32x4;
456 for (j = 0; j < n_key_u32s; j++)
457 r &= vhash_bucket_compare (h, &b->key[0], j, vi);
461 result = vhash_merge_results (r);
463 if (!result && vhash_search_bucket_is_full (r0))
464 result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
466 result_function (state, vi, result - 1, n_key_u32s);
471 vhash_get_4_stage (vhash_t * h,
473 vhash_4result_function_t result_function,
474 void *state,
u32 n_key_u32s)
477 vhash_hashed_key_t *hk =
479 vhash_search_bucket_t *b0, *b1, *b2, *b3;
480 u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
483 kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
485 b0 = (
void *) h->search_buckets + kh.as_u32[0];
486 b1 = (
void *) h->search_buckets + kh.as_u32[1];
487 b2 = (
void *) h->search_buckets + kh.as_u32[2];
488 b3 = (
void *) h->search_buckets + kh.as_u32[3];
490 r0 = r0_before = b0->result.as_u32x4;
491 r1 = r1_before = b1->result.as_u32x4;
492 r2 = r2_before = b2->result.as_u32x4;
493 r3 = r3_before = b3->result.as_u32x4;
495 vi = vector_index * 4;
497 for (i = 0; i < n_key_u32s; i++)
499 u32x4 c0, c1, c2, c3;
500 vhash_bucket_compare_4 (h, i, vector_index,
501 b0, b1, b2, b3, c0, c1, c2, c3);
513 u32x4 ones = { 1, 1, 1, 1 };
516 r.as_u32x4 = r0 | r1 | r2 | r3;
518 not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0))
519 | (vhash_search_bucket_is_full (r1_before) << (4 * 1))
520 | (vhash_search_bucket_is_full (r2_before) << (4 * 2))
521 | (vhash_search_bucket_is_full (r3_before) <<
525 u32x4_union_t key_hash;
528 hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4;
531 if (not_found_mask & (1 << (4 * 0)))
532 r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0],
534 if (not_found_mask & (1 << (4 * 1)))
535 r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1],
537 if (not_found_mask & (1 << (4 * 2)))
538 r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2],
540 if (not_found_mask & (1 << (4 * 3)))
541 r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3],
545 result_function (state, vi, r.as_u32x4 - ones, n_key_u32s);
550 vhash_set_overflow (vhash_t * h,
554 vhash_set_stage (vhash_t * h,
557 vhash_result_function_t result_function,
558 void *state,
u32 n_key_u32s)
560 u32 i, j, n_new_elts = 0;
561 vhash_hashed_key_t *hk =
563 vhash_search_bucket_t *b;
565 for (i = 0; i < n_vectors; i++)
567 u32 vi = vector_index * 4 +
i;
568 u32 key_hash = hk->hashed_key[2].as_u32[
i];
569 u32 old_result, new_result;
573 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
575 cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
576 for (j = 1; j < n_key_u32s; j++)
577 cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
579 r0 = b->result.as_u32x4;
584 old_result = vhash_merge_results (r);
586 if (!old_result && vhash_search_bucket_is_full (r0))
587 old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
590 new_result = result_function (state, vi, old_result - 1, n_key_u32s);
594 ASSERT (new_result + 1 != 0);
600 i_set = vhash_non_empty_result_index (r);
601 b->result.as_u32[i_set] = new_result;
608 valid_mask = (((b->result.as_u32[0] != 0) << 0)
609 | ((b->result.as_u32[1] != 0) << 1)
610 | ((b->result.as_u32[2] != 0) << 2)
611 | ((b->result.as_u32[3] != 0) << 3));
614 i_set = key_hash & 3;
616 ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
619 i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3;
621 if (valid_mask != 0xf)
625 b->result.as_u32[i_set] = new_result;
628 for (j = 0; j < n_key_u32s; j++)
629 b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi);
632 vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s);
636 h->n_elts += n_new_elts;
639 u32 vhash_unset_overflow (vhash_t * h,
u32 key_hash,
u32 vi,
u32 n_key_u32s);
642 vhash_unset_refill_from_overflow (vhash_t * h,
643 vhash_search_bucket_t * b,
644 u32 key_hash,
u32 n_key_u32s);
649 vhash_unset_stage (vhash_t * h,
652 vhash_result_function_t result_function,
653 void *state,
u32 n_key_u32s)
655 u32 i, j, n_elts_unset = 0;
656 vhash_hashed_key_t *hk =
658 vhash_search_bucket_t *b;
660 for (i = 0; i < n_vectors; i++)
662 u32 vi = vector_index * 4 +
i;
663 u32 key_hash = hk->hashed_key[2].as_u32[
i];
667 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
669 cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
670 for (j = 1; j < n_key_u32s; j++)
671 cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
673 r0 = b->result.as_u32x4;
677 b->result.as_u32x4 = r0 & ~cmp;
679 old_result = vhash_merge_results (r0 & cmp);
681 n_elts_unset += old_result != 0;
683 if (vhash_search_bucket_is_full (r0))
686 vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s);
688 old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s);
691 result_function (state, vi, old_result - 1, n_key_u32s);
693 ASSERT (h->n_elts >= n_elts_unset);
694 h->n_elts -= n_elts_unset;
697 void vhash_init (vhash_t * h,
u32 log2_n_keys,
u32 n_key_u32,
700 void vhash_resize (vhash_t * old,
u32 log2_n_keys);
717 u32x4_union_t *get_keys;
718 u32x4_union_t *get_results;
731 vhash_get_alloc_keys (vhash_main_t * vm,
u32 n_keys,
u32 n_key_u32)
736 vm->n_keys = i + n_keys;
738 n = (
round_pow2 (vm->n_keys, 4) / 4) * n_key_u32;
747 vhash_get_set_key_word (vhash_main_t * vm,
u32 vi,
u32 wi,
u32 n_key_u32,
752 k[wi].as_u32[vi % 4] = value;
756 vhash_get_fetch_result (vhash_main_t * vm,
u32 vi)
759 return r->as_u32[vi % 4];
762 void vhash_main_get (vhash_main_t * vm);
765 vhash_set_alloc_keys (vhash_main_t * vm,
u32 n_keys,
u32 n_key_u32)
770 vm->n_keys = i + n_keys;
779 vhash_set_set_key_word (vhash_main_t * vm,
u32 vi,
u32 wi,
u32 n_key_u32,
788 vhash_set_set_result (vhash_main_t * vm,
u32 vi,
u32 result)
795 vhash_set_fetch_old_result (vhash_main_t * vm,
u32 vi)
801 void vhash_main_set (vhash_main_t * vm);
804 vhash_unset_alloc_keys (vhash_main_t * vm,
u32 n_keys,
u32 n_key_u32)
806 return vhash_set_alloc_keys (vm, n_keys, n_key_u32);
810 vhash_unset_set_key_word (vhash_main_t * vm,
u32 vi,
u32 wi,
u32 n_key_u32,
813 vhash_set_set_key_word (vm, vi, wi, n_key_u32, value);
817 vhash_unset_set_result (vhash_main_t * vm,
u32 vi,
u32 result)
819 vhash_set_set_result (vm, vi, result);
823 vhash_unset_fetch_old_result (vhash_main_t * vm,
u32 vi)
825 return vhash_set_fetch_old_result (vm, vi);
828 void vhash_main_unset (vhash_main_t * vm);
837 u32 vhash_resize_incremental (vhash_resize_t * vr,
u32 vector_index,
#define hash_v3_mix_u32x(a, b, c)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
static uword min_log2(uword x)
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define u32x4_word_shift_right(a, n)
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V...
vslo vsro vslo static vsro u32 u32x4_get0(u32x4 x)
#define hash_v3_finalize_u32x(a, b, c)
#define CLIB_PREFETCH(addr, size, type)
#define vec_free(V)
Free vector's memory (no header).
#define u32x4_transpose(x0, x1, x2, x3)
static uword max_pow2(uword x)
static uword round_pow2(uword x, uword pow2)
vhost_vring_state_t state
#define vec_elt(v, i)
Get vector value at index i.
#define CLIB_CACHE_LINE_BYTES
static u32 u32x4_zero_byte_mask(u32x4 x)