38 #ifndef included_clib_vhash_h 39 #define included_clib_vhash_h 43 #ifdef CLIB_HAVE_VEC128 50 typedef u32 (vhash_key_function_t) (
void *
state,
u32 vector_index,
u32 key_word_index);
51 typedef u32x4 (vhash_4key_function_t) (
void *
state,
u32 vector_index,
u32 key_word_index);
53 typedef u32 (vhash_result_function_t) (
void *
state,
u32 vector_index,
u32 result,
u32 n_key_u32);
54 typedef u32x4 (vhash_4result_function_t) (
void *
state,
u32 vector_index, u32x4 results,
u32 n_key_u32);
57 u32x4_union_t hashed_key[3];
70 } vhash_search_bucket_t;
73 u32x4_union_t * search_buckets;
80 } vhash_overflow_buckets_t;
86 u32x4_union_t * search_buckets;
91 vhash_overflow_buckets_t overflow_buckets[16];
101 u32x4_union_t bucket_mask;
104 u8 find_first_zero_table[16];
107 u32x4_union_t hash_seeds[3];
111 u32 log2_n_key_word_len_u32x;
114 u32x4_union_t * key_work_space;
118 vhash_hashed_key_t * hash_work_space;
122 vhash_get_overflow_buckets (vhash_t * h,
u32 key)
124 u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
126 return h->overflow_buckets +
i;
130 vhash_is_non_empty_overflow_bucket (vhash_t * h,
u32 key)
132 u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
134 return h->overflow_buckets[
i].n_overflow > 0;
138 vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
145 vhash_free (vhash_t * h)
148 for (i = 0; i <
ARRAY_LEN (h->overflow_buckets); i++)
149 vhash_free_overflow_buckets (&h->overflow_buckets[i]);
156 vhash_set_key_word (vhash_t * h,
u32 wi,
u32 vi,
u32 value)
158 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
160 vec_elt (h->key_work_space, i0).as_u32[i1] = value;
164 vhash_set_key_word_u32x (vhash_t * h,
u32 wi,
u32 vi, u32x value)
166 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
167 vec_elt (h->key_work_space, i0).as_u32x4 = value;
171 vhash_get_key_word (vhash_t * h,
u32 wi,
u32 vi)
173 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
175 return vec_elt (h->key_work_space, i0).as_u32[i1];
179 vhash_get_key_word_u32x (vhash_t * h,
u32 wi,
u32 vi)
181 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi;
182 return vec_elt (h->key_work_space, i0).as_u32x4;
186 vhash_validate_sizes (vhash_t * h,
u32 n_key_u32,
u32 n_vectors)
193 h->log2_n_key_word_len_u32x = l =
min_log2 (n);
199 vhash_gather_key_stage (vhash_t * h,
202 vhash_key_function_t key_function,
209 for (i = 0; i < n_vectors; i++)
211 vi = vector_index * 4 +
i;
212 for (j = 0; j < n_key_u32s; j++)
213 vhash_set_key_word (h, j, vi,
214 key_function (state, vi, j));
219 vhash_gather_4key_stage (vhash_t * h,
221 vhash_4key_function_t key_function,
226 vi = vector_index * 4;
227 for (j = 0; j < n_key_u32s; j++)
228 vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j));
232 vhash_mix_stage (vhash_t * h,
242 a = h->hash_seeds[0].as_u32x4;
243 b = h->hash_seeds[1].as_u32x4;
244 c = h->hash_seeds[2].as_u32x4;
245 for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3)
247 a += vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index);
249 b += vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index);
251 c += vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index);
258 vhash_hashed_key_t * hk =
vec_elt_at_index (h->hash_work_space, vector_index);
259 hk->hashed_key[0].as_u32x4 =
a;
260 hk->hashed_key[1].as_u32x4 = b;
261 hk->hashed_key[2].as_u32x4 = c;
266 vhash_get_search_bucket_with_index (vhash_t * h,
u32 i,
u32 n_key_u32s)
268 return ((vhash_search_bucket_t *)
270 (i / 4) * ((sizeof (vhash_search_bucket_t) /
sizeof (u32x4)) + n_key_u32s)));
274 vhash_get_search_bucket (vhash_t * h,
u32 key_hash,
u32 n_key_u32s)
276 u32 i = key_hash & h->bucket_mask.as_u32[0];
277 return vhash_get_search_bucket_with_index (h, i, n_key_u32s);
281 vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash,
u32 n_key_u32s)
283 vhash_search_bucket_t * b;
284 u32 n_bytes_per_bucket =
sizeof (b[0]) + n_key_u32s *
sizeof (b->key[0]);
285 u32x4 r = key_hash & h->bucket_mask.as_u32x4;
288 #define _(x) u32x4_ishift_left (r, (x) - 2) 289 if (n_bytes_per_bucket == (1 << 5))
291 else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
293 else if (n_bytes_per_bucket == (1 << 6))
295 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
297 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
299 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
300 r = _ (6) + _ (5) + _ (4);
308 vhash_finalize_stage (vhash_t * h,
314 vhash_hashed_key_t * hk =
vec_elt_at_index (h->hash_work_space, vector_index);
318 a = h->hash_seeds[0].as_u32x4;
319 b = h->hash_seeds[1].as_u32x4;
320 c = h->hash_seeds[2].as_u32x4;
325 a = hk->hashed_key[0].as_u32x4;
326 b = hk->hashed_key[1].as_u32x4;
327 c = hk->hashed_key[2].as_u32x4;
332 a += vhash_get_key_word_u32x (h, 0, vector_index);
334 b += vhash_get_key_word_u32x (h, 1, vector_index);
336 c += vhash_get_key_word_u32x (h, 2, vector_index);
341 hk->hashed_key[2].as_u32x4 = c;
346 vhash_search_bucket_t * b0, * b1, * b2, * b3;
349 kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s);
350 hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
352 b0 = (
void *) h->search_buckets + kh.as_u32[0];
353 b1 = (
void *) h->search_buckets + kh.as_u32[1];
354 b2 = (
void *) h->search_buckets + kh.as_u32[2];
355 b3 = (
void *) h->search_buckets + kh.as_u32[3];
357 CLIB_PREFETCH (b0,
sizeof (b0[0]) + n_key_u32s *
sizeof (b0->key[0]), READ);
358 CLIB_PREFETCH (b1,
sizeof (b1[0]) + n_key_u32s *
sizeof (b1->key[0]), READ);
359 CLIB_PREFETCH (b2,
sizeof (b2[0]) + n_key_u32s *
sizeof (b2->key[0]), READ);
360 CLIB_PREFETCH (b3,
sizeof (b3[0]) + n_key_u32s *
sizeof (b3->key[0]), READ);
365 vhash_merge_results (u32x4 r)
374 vhash_search_bucket_is_full (u32x4 r)
378 vhash_non_empty_result_index (u32x4 x)
381 ASSERT (empty_mask != 0xffff);
382 return min_log2 (0xffff &~ empty_mask) / 4;
386 vhash_empty_result_index (u32x4 x)
390 return min_log2 (0xffff & empty_mask) / 4;
394 vhash_bucket_compare (vhash_t * h,
395 u32x4_union_t * bucket,
399 u32 k = vhash_get_key_word (h, key_word_index, vi);
400 u32x4 x = {k, k, k, k};
404 #define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \ 406 u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \ 407 u32x4 _k0 = u32x4_splat_word (_k4, 0); \ 408 u32x4 _k1 = u32x4_splat_word (_k4, 1); \ 409 u32x4 _k2 = u32x4_splat_word (_k4, 2); \ 410 u32x4 _k3 = u32x4_splat_word (_k4, 3); \ 412 cmp0 = u32x4_is_equal (b0->key[wi].as_u32x4, _k0); \ 413 cmp1 = u32x4_is_equal (b1->key[wi].as_u32x4, _k1); \ 414 cmp2 = u32x4_is_equal (b2->key[wi].as_u32x4, _k2); \ 415 cmp3 = u32x4_is_equal (b3->key[wi].as_u32x4, _k3); \ 418 u32 vhash_get_overflow (vhash_t * h,
424 vhash_get_stage (vhash_t * h,
427 vhash_result_function_t result_function,
432 vhash_hashed_key_t * hk =
vec_elt_at_index (h->hash_work_space, vector_index);
433 vhash_search_bucket_t * b;
435 for (i = 0; i < n_vectors; i++)
437 u32 vi = vector_index * 4 +
i;
438 u32 key_hash = hk->hashed_key[2].as_u32[
i];
442 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
444 r = r0 = b->result.as_u32x4;
445 for (j = 0; j < n_key_u32s; j++)
446 r &= vhash_bucket_compare (h, &b->key[0], j, vi);
450 result = vhash_merge_results (r);
452 if (! result && vhash_search_bucket_is_full (r0))
453 result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
455 result_function (state, vi, result - 1, n_key_u32s);
460 vhash_get_4_stage (vhash_t * h,
462 vhash_4result_function_t result_function,
467 vhash_hashed_key_t * hk =
vec_elt_at_index (h->hash_work_space, vector_index);
468 vhash_search_bucket_t * b0, * b1, * b2, * b3;
469 u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
472 kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
474 b0 = (
void *) h->search_buckets + kh.as_u32[0];
475 b1 = (
void *) h->search_buckets + kh.as_u32[1];
476 b2 = (
void *) h->search_buckets + kh.as_u32[2];
477 b3 = (
void *) h->search_buckets + kh.as_u32[3];
479 r0 = r0_before = b0->result.as_u32x4;
480 r1 = r1_before = b1->result.as_u32x4;
481 r2 = r2_before = b2->result.as_u32x4;
482 r3 = r3_before = b3->result.as_u32x4;
484 vi = vector_index * 4;
486 for (i = 0; i < n_key_u32s; i++)
488 u32x4 c0, c1, c2, c3;
489 vhash_bucket_compare_4 (h, i, vector_index,
503 u32x4 ones = {1,1,1,1};
506 r.as_u32x4 = r0 | r1 | r2 | r3;
508 not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4*0))
509 | (vhash_search_bucket_is_full (r1_before) << (4*1))
510 | (vhash_search_bucket_is_full (r2_before) << (4*2))
511 | (vhash_search_bucket_is_full (r3_before) << (4*3)));
514 u32x4_union_t key_hash;
516 key_hash.as_u32x4 = hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4;
519 if (not_found_mask & (1 << (4*0)))
520 r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0],
522 if (not_found_mask & (1 << (4*1)))
523 r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1],
525 if (not_found_mask & (1 << (4*2)))
526 r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2],
528 if (not_found_mask & (1 << (4*3)))
529 r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3],
533 result_function (state, vi, r.as_u32x4 - ones, n_key_u32s);
538 vhash_set_overflow (vhash_t * h,
545 vhash_set_stage (vhash_t * h,
548 vhash_result_function_t result_function,
552 u32 i, j, n_new_elts = 0;
553 vhash_hashed_key_t * hk =
vec_elt_at_index (h->hash_work_space, vector_index);
554 vhash_search_bucket_t * b;
556 for (i = 0; i < n_vectors; i++)
558 u32 vi = vector_index * 4 +
i;
559 u32 key_hash = hk->hashed_key[2].as_u32[
i];
560 u32 old_result, new_result;
564 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
566 cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
567 for (j = 1; j < n_key_u32s; j++)
568 cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
570 r0 = b->result.as_u32x4;
575 old_result = vhash_merge_results (r);
577 if (! old_result && vhash_search_bucket_is_full (r0))
578 old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
581 new_result = result_function (state, vi, old_result - 1, n_key_u32s);
585 ASSERT (new_result + 1 != 0);
591 i_set = vhash_non_empty_result_index (r);
592 b->result.as_u32[i_set] = new_result;
599 valid_mask = (((b->result.as_u32[0] != 0) << 0)
600 | ((b->result.as_u32[1] != 0) << 1)
601 | ((b->result.as_u32[2] != 0) << 2)
602 | ((b->result.as_u32[3] != 0) << 3));
605 i_set = key_hash & 3;
606 valid_mask = ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
609 i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3;
611 if (valid_mask != 0xf)
615 b->result.as_u32[i_set] = new_result;
618 for (j = 0; j < n_key_u32s; j++)
619 b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi);
622 vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s);
626 h->n_elts += n_new_elts;
630 vhash_unset_overflow (vhash_t * h,
636 vhash_unset_refill_from_overflow (vhash_t * h,
637 vhash_search_bucket_t * b,
644 vhash_unset_stage (vhash_t * h,
647 vhash_result_function_t result_function,
651 u32 i, j, n_elts_unset = 0;
652 vhash_hashed_key_t * hk =
vec_elt_at_index (h->hash_work_space, vector_index);
653 vhash_search_bucket_t * b;
655 for (i = 0; i < n_vectors; i++)
657 u32 vi = vector_index * 4 +
i;
658 u32 key_hash = hk->hashed_key[2].as_u32[
i];
662 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
664 cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
665 for (j = 1; j < n_key_u32s; j++)
666 cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
668 r0 = b->result.as_u32x4;
672 b->result.as_u32x4 = r0 & ~cmp;
674 old_result = vhash_merge_results (r0 & cmp);
676 n_elts_unset += old_result != 0;
678 if (vhash_search_bucket_is_full (r0))
681 vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s);
683 old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s);
686 result_function (state, vi, old_result - 1, n_key_u32s);
688 ASSERT (h->n_elts >= n_elts_unset);
689 h->n_elts -= n_elts_unset;
692 void vhash_init (vhash_t * h,
u32 log2_n_keys,
u32 n_key_u32,
695 void vhash_resize (vhash_t * old,
u32 log2_n_keys);
708 u32x4_union_t * get_keys;
709 u32x4_union_t * get_results;
722 vhash_get_alloc_keys (vhash_main_t * vm,
u32 n_keys,
u32 n_key_u32)
727 vm->n_keys = i + n_keys;
729 n = (
round_pow2 (vm->n_keys, 4) / 4) * n_key_u32;
738 vhash_get_set_key_word (vhash_main_t * vm,
u32 vi,
u32 wi,
u32 n_key_u32,
743 k[wi].as_u32[vi % 4] = value;
747 vhash_get_fetch_result (vhash_main_t * vm,
u32 vi)
750 return r->as_u32[vi % 4];
753 void vhash_main_get (vhash_main_t * vm);
756 vhash_set_alloc_keys (vhash_main_t * vm,
u32 n_keys,
u32 n_key_u32)
761 vm->n_keys = i + n_keys;
770 vhash_set_set_key_word (vhash_main_t * vm,
u32 vi,
u32 wi,
u32 n_key_u32,
779 vhash_set_set_result (vhash_main_t * vm,
u32 vi,
u32 result)
786 vhash_set_fetch_old_result (vhash_main_t * vm,
u32 vi)
792 void vhash_main_set (vhash_main_t * vm);
795 vhash_unset_alloc_keys (vhash_main_t * vm,
u32 n_keys,
u32 n_key_u32)
796 {
return vhash_set_alloc_keys (vm, n_keys, n_key_u32); }
799 vhash_unset_set_key_word (vhash_main_t * vm,
u32 vi,
u32 wi,
u32 n_key_u32,
801 { vhash_set_set_key_word (vm, vi, wi, n_key_u32, value); }
804 vhash_unset_set_result (vhash_main_t * vm,
u32 vi,
u32 result)
805 { vhash_set_set_result (vm, vi, result); }
808 vhash_unset_fetch_old_result (vhash_main_t * vm,
u32 vi)
809 {
return vhash_set_fetch_old_result (vm, vi); }
811 void vhash_main_unset (vhash_main_t * vm);
819 u32 vhash_resize_incremental (vhash_resize_t * vr,
u32 vector_index,
u32 n_vectors);
always_inline uword max_pow2(uword x)
always_inline uword round_pow2(uword x, uword pow2)
sll srl srl sll sra u16x4 i
always_inline u32x4 u32x4_is_equal(u32x4 x, u32x4 y)
#define hash_v3_mix_u32x(a, b, c)
always_inline u32 u32x4_zero_byte_mask(u32x4 x)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define u32x4_word_shift_right(a, n)
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V...
#define hash_v3_finalize_u32x(a, b, c)
#define CLIB_PREFETCH(addr, size, type)
#define vec_free(V)
Free vector's memory (no header).
#define u32x4_transpose(x0, x1, x2, x3)
vhost_vring_state_t state
#define vec_elt(v, i)
Get vector value at index i.
always_inline uword min_log2(uword x)
vslo vsro vslo vsro always_inline u32 u32x4_get0(u32x4 x)
#define CLIB_CACHE_LINE_BYTES