38 #ifndef included_clib_vhash_h
39 #define included_clib_vhash_h
43 #ifdef CLIB_HAVE_VEC128
50 typedef u32 (vhash_key_function_t) (
void *
state,
u32 vector_index,
52 typedef u32x4 (vhash_4key_function_t) (
void *
state,
u32 vector_index,
55 typedef u32 (vhash_result_function_t) (
void *
state,
u32 vector_index,
56 u32 result,
u32 n_key_u32);
57 typedef u32x4 (vhash_4result_function_t) (
void *
state,
u32 vector_index,
62 u32x4_union_t hashed_key[3];
76 } vhash_search_bucket_t;
80 u32x4_union_t *search_buckets;
87 } vhash_overflow_buckets_t;
94 u32x4_union_t *search_buckets;
99 vhash_overflow_buckets_t overflow_buckets[16];
109 u32x4_union_t bucket_mask;
112 u8 find_first_zero_table[16];
115 u32x4_union_t hash_seeds[3];
119 u32 log2_n_key_word_len_u32x;
122 u32x4_union_t *key_work_space;
126 vhash_hashed_key_t *hash_work_space;
130 vhash_get_overflow_buckets (vhash_t *
h,
u32 key)
132 u32 i = (((
key &
h->bucket_mask.as_u32[0]) >> 2) & 0xf);
134 return h->overflow_buckets +
i;
138 vhash_is_non_empty_overflow_bucket (vhash_t *
h,
u32 key)
140 u32 i = (((
key &
h->bucket_mask.as_u32[0]) >> 2) & 0xf);
142 return h->overflow_buckets[
i].n_overflow > 0;
146 vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
153 vhash_free (vhash_t *
h)
157 vhash_free_overflow_buckets (&
h->overflow_buckets[
i]);
166 u32 i0 = (wi <<
h->log2_n_key_word_len_u32x) + (vi / 4);
172 vhash_set_key_word_u32x (vhash_t *
h,
u32 wi,
u32 vi, u32x
value)
174 u32 i0 = (wi <<
h->log2_n_key_word_len_u32x) + (vi / 4);
179 vhash_get_key_word (vhash_t *
h,
u32 wi,
u32 vi)
181 u32 i0 = (wi <<
h->log2_n_key_word_len_u32x) + (vi / 4);
183 return vec_elt (
h->key_work_space, i0).as_u32[i1];
187 vhash_get_key_word_u32x (vhash_t *
h,
u32 wi,
u32 vi)
189 u32 i0 = (wi <<
h->log2_n_key_word_len_u32x) + vi;
190 return vec_elt (
h->key_work_space, i0).as_u32x4;
201 h->log2_n_key_word_len_u32x = l =
min_log2 (n);
208 vhash_gather_key_stage (vhash_t *
h,
211 vhash_key_function_t key_function,
219 vi = vector_index * 4 +
i;
220 for (j = 0; j < n_key_u32s; j++)
221 vhash_set_key_word (
h, j, vi, key_function (
state, vi, j));
226 vhash_gather_4key_stage (vhash_t *
h,
228 vhash_4key_function_t key_function,
232 vi = vector_index * 4;
233 for (j = 0; j < n_key_u32s; j++)
234 vhash_set_key_word_u32x (
h, j, vi, key_function (
state, vi, j));
238 vhash_mix_stage (vhash_t *
h,
u32 vector_index,
u32 n_key_u32s)
246 a =
h->hash_seeds[0].as_u32x4;
247 b =
h->hash_seeds[1].as_u32x4;
248 c =
h->hash_seeds[2].as_u32x4;
252 vhash_get_key_word_u32x (
h, n_key_u32s - 1 - (
i + 0), vector_index);
255 vhash_get_key_word_u32x (
h, n_key_u32s - 1 - (
i + 1), vector_index);
258 vhash_get_key_word_u32x (
h, n_key_u32s - 1 - (
i + 2), vector_index);
265 vhash_hashed_key_t *hk =
267 hk->hashed_key[0].as_u32x4 =
a;
268 hk->hashed_key[1].as_u32x4 =
b;
269 hk->hashed_key[2].as_u32x4 =
c;
274 vhash_get_search_bucket_with_index (vhash_t *
h,
u32 i,
u32 n_key_u32s)
276 return ((vhash_search_bucket_t *)
279 ((sizeof (vhash_search_bucket_t) /
280 sizeof (
u32x4)) + n_key_u32s)));
284 vhash_get_search_bucket (vhash_t *
h,
u32 key_hash,
u32 n_key_u32s)
286 u32 i = key_hash &
h->bucket_mask.as_u32[0];
287 return vhash_get_search_bucket_with_index (
h,
i, n_key_u32s);
291 vhash_get_4_search_bucket_byte_offsets (vhash_t *
h,
u32x4 key_hash,
294 vhash_search_bucket_t *
b;
295 u32 n_bytes_per_bucket =
sizeof (
b[0]) + n_key_u32s *
sizeof (
b->key[0]);
296 u32x4 r = key_hash &
h->bucket_mask.as_u32x4;
299 #define _(x) u32x4_ishift_left (r, (x) - 2)
300 if (n_bytes_per_bucket == (1 << 5))
302 else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
304 else if (n_bytes_per_bucket == (1 << 6))
306 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
308 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
310 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
311 r = _(6) + _(5) + _(4);
319 vhash_finalize_stage (vhash_t *
h,
u32 vector_index,
u32 n_key_u32s)
323 vhash_hashed_key_t *hk =
328 a =
h->hash_seeds[0].as_u32x4;
329 b =
h->hash_seeds[1].as_u32x4;
330 c =
h->hash_seeds[2].as_u32x4;
335 a = hk->hashed_key[0].as_u32x4;
336 b = hk->hashed_key[1].as_u32x4;
337 c = hk->hashed_key[2].as_u32x4;
342 a += vhash_get_key_word_u32x (
h, 0, vector_index);
344 b += vhash_get_key_word_u32x (
h, 1, vector_index);
346 c += vhash_get_key_word_u32x (
h, 2, vector_index);
351 hk->hashed_key[2].as_u32x4 =
c;
356 vhash_search_bucket_t *b0, *b1, *b2, *b3;
359 kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (
h,
c, n_key_u32s);
360 hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
362 b0 = (
void *)
h->search_buckets + kh.as_u32[0];
363 b1 = (
void *)
h->search_buckets + kh.as_u32[1];
364 b2 = (
void *)
h->search_buckets + kh.as_u32[2];
365 b3 = (
void *)
h->search_buckets + kh.as_u32[3];
367 CLIB_PREFETCH (b0,
sizeof (b0[0]) + n_key_u32s *
sizeof (b0->key[0]),
369 CLIB_PREFETCH (b1,
sizeof (b1[0]) + n_key_u32s *
sizeof (b1->key[0]),
371 CLIB_PREFETCH (b2,
sizeof (b2[0]) + n_key_u32s *
sizeof (b2->key[0]),
373 CLIB_PREFETCH (b3,
sizeof (b3[0]) + n_key_u32s *
sizeof (b3->key[0]),
379 vhash_merge_results (
u32x4 r)
383 return u32x4_get0 (
r);
388 vhash_search_bucket_is_full (
u32x4 r)
390 return u32x4_zero_byte_mask (
r) == 0;
394 vhash_non_empty_result_index (
u32x4 x)
396 u32 empty_mask = u32x4_zero_byte_mask (x);
397 ASSERT (empty_mask != 0xffff);
398 return min_log2 (0xffff & ~empty_mask) / 4;
402 vhash_empty_result_index (
u32x4 x)
404 u32 empty_mask = u32x4_zero_byte_mask (x);
406 return min_log2 (0xffff & empty_mask) / 4;
410 vhash_bucket_compare (vhash_t *
h,
411 u32x4_union_t * bucket,
u32 key_word_index,
u32 vi)
413 u32 k = vhash_get_key_word (
h, key_word_index, vi);
414 u32x4 x = { k, k, k, k };
415 return (bucket[key_word_index].as_u32x4 == x);
418 #define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \
420 u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \
421 u32x4 _k0 = u32x4_splat_word (_k4, 0); \
422 u32x4 _k1 = u32x4_splat_word (_k4, 1); \
423 u32x4 _k2 = u32x4_splat_word (_k4, 2); \
424 u32x4 _k3 = u32x4_splat_word (_k4, 3); \
426 cmp0 = (b0->key[wi].as_u32x4 == _k0); \
427 cmp1 = (b1->key[wi].as_u32x4 == _k1); \
428 cmp2 = (b2->key[wi].as_u32x4 == _k2); \
429 cmp3 = (b3->key[wi].as_u32x4 == _k3); \
432 u32 vhash_get_overflow (vhash_t *
h,
u32 key_hash,
u32 vi,
u32 n_key_u32s);
435 vhash_get_stage (vhash_t *
h,
438 vhash_result_function_t result_function,
442 vhash_hashed_key_t *hk =
444 vhash_search_bucket_t *
b;
448 u32 vi = vector_index * 4 +
i;
449 u32 key_hash = hk->hashed_key[2].as_u32[
i];
453 b = vhash_get_search_bucket (
h, key_hash, n_key_u32s);
455 r = r0 =
b->result.as_u32x4;
456 for (j = 0; j < n_key_u32s; j++)
457 r &= vhash_bucket_compare (
h, &
b->key[0], j, vi);
461 result = vhash_merge_results (
r);
463 if (!result && vhash_search_bucket_is_full (r0))
464 result = vhash_get_overflow (
h, key_hash, vi, n_key_u32s);
466 result_function (
state, vi, result - 1, n_key_u32s);
471 vhash_get_4_stage (vhash_t *
h,
473 vhash_4result_function_t result_function,
477 vhash_hashed_key_t *hk =
479 vhash_search_bucket_t *b0, *b1, *b2, *b3;
480 u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
483 kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
485 b0 = (
void *)
h->search_buckets + kh.as_u32[0];
486 b1 = (
void *)
h->search_buckets + kh.as_u32[1];
487 b2 = (
void *)
h->search_buckets + kh.as_u32[2];
488 b3 = (
void *)
h->search_buckets + kh.as_u32[3];
490 r0 = r0_before = b0->result.as_u32x4;
491 r1 = r1_before = b1->result.as_u32x4;
492 r2 = r2_before = b2->result.as_u32x4;
493 r3 = r3_before = b3->result.as_u32x4;
495 vi = vector_index * 4;
497 for (
i = 0;
i < n_key_u32s;
i++)
499 u32x4 c0, c1, c2, c3;
500 vhash_bucket_compare_4 (
h,
i, vector_index,
501 b0, b1, b2, b3, c0, c1, c2, c3);
508 u32x4_transpose (r0, r1, r2, r3);
513 u32x4 ones = { 1, 1, 1, 1 };
516 r.as_u32x4 = r0 | r1 | r2 | r3;
517 not_found_mask = u32x4_zero_byte_mask (
r.as_u32x4);
518 not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0))
519 | (vhash_search_bucket_is_full (r1_before) << (4 * 1))
520 | (vhash_search_bucket_is_full (r2_before) << (4 * 2))
521 | (vhash_search_bucket_is_full (r3_before) <<
525 u32x4_union_t key_hash;
528 hk->hashed_key[2].as_u32x4 &
h->bucket_mask.as_u32x4;
531 if (not_found_mask & (1 << (4 * 0)))
532 r.as_u32[0] = vhash_get_overflow (
h, key_hash.as_u32[0],
534 if (not_found_mask & (1 << (4 * 1)))
535 r.as_u32[1] = vhash_get_overflow (
h, key_hash.as_u32[1],
537 if (not_found_mask & (1 << (4 * 2)))
538 r.as_u32[2] = vhash_get_overflow (
h, key_hash.as_u32[2],
540 if (not_found_mask & (1 << (4 * 3)))
541 r.as_u32[3] = vhash_get_overflow (
h, key_hash.as_u32[3],
545 result_function (
state, vi,
r.as_u32x4 - ones, n_key_u32s);
550 vhash_set_overflow (vhash_t *
h,
554 vhash_set_stage (vhash_t *
h,
557 vhash_result_function_t result_function,
560 u32 i, j, n_new_elts = 0;
561 vhash_hashed_key_t *hk =
563 vhash_search_bucket_t *
b;
567 u32 vi = vector_index * 4 +
i;
568 u32 key_hash = hk->hashed_key[2].as_u32[
i];
569 u32 old_result, new_result;
573 b = vhash_get_search_bucket (
h, key_hash, n_key_u32s);
575 cmp = vhash_bucket_compare (
h, &
b->key[0], 0, vi);
576 for (j = 1; j < n_key_u32s; j++)
577 cmp &= vhash_bucket_compare (
h, &
b->key[0], j, vi);
579 r0 =
b->result.as_u32x4;
584 old_result = vhash_merge_results (
r);
586 if (!old_result && vhash_search_bucket_is_full (r0))
587 old_result = vhash_get_overflow (
h, key_hash, vi, n_key_u32s);
590 new_result = result_function (
state, vi, old_result - 1, n_key_u32s);
594 ASSERT (new_result + 1 != 0);
600 i_set = vhash_non_empty_result_index (
r);
601 b->result.as_u32[i_set] = new_result;
608 valid_mask = (((
b->result.as_u32[0] != 0) << 0)
609 | ((
b->result.as_u32[1] != 0) << 1)
610 | ((
b->result.as_u32[2] != 0) << 2)
611 | ((
b->result.as_u32[3] != 0) << 3));
614 i_set = key_hash & 3;
616 ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
619 i_set = (i_set +
h->find_first_zero_table[valid_mask]) & 3;
621 if (valid_mask != 0xf)
625 b->result.as_u32[i_set] = new_result;
628 for (j = 0; j < n_key_u32s; j++)
629 b->key[j].as_u32[i_set] = vhash_get_key_word (
h, j, vi);
632 vhash_set_overflow (
h, key_hash, vi, new_result, n_key_u32s);
636 h->n_elts += n_new_elts;
639 u32 vhash_unset_overflow (vhash_t *
h,
u32 key_hash,
u32 vi,
u32 n_key_u32s);
642 vhash_unset_refill_from_overflow (vhash_t *
h,
643 vhash_search_bucket_t *
b,
644 u32 key_hash,
u32 n_key_u32s);
649 vhash_unset_stage (vhash_t *
h,
652 vhash_result_function_t result_function,
655 u32 i, j, n_elts_unset = 0;
656 vhash_hashed_key_t *hk =
658 vhash_search_bucket_t *
b;
662 u32 vi = vector_index * 4 +
i;
663 u32 key_hash = hk->hashed_key[2].as_u32[
i];
667 b = vhash_get_search_bucket (
h, key_hash, n_key_u32s);
669 cmp = vhash_bucket_compare (
h, &
b->key[0], 0, vi);
670 for (j = 1; j < n_key_u32s; j++)
671 cmp &= vhash_bucket_compare (
h, &
b->key[0], j, vi);
673 r0 =
b->result.as_u32x4;
677 b->result.as_u32x4 = r0 & ~cmp;
679 old_result = vhash_merge_results (r0 & cmp);
681 n_elts_unset += old_result != 0;
683 if (vhash_search_bucket_is_full (r0))
686 vhash_unset_refill_from_overflow (
h,
b, key_hash, n_key_u32s);
688 old_result = vhash_unset_overflow (
h, key_hash, vi, n_key_u32s);
691 result_function (
state, vi, old_result - 1, n_key_u32s);
693 ASSERT (
h->n_elts >= n_elts_unset);
694 h->n_elts -= n_elts_unset;
697 void vhash_init (vhash_t *
h,
u32 log2_n_keys,
u32 n_key_u32,
700 void vhash_resize (vhash_t * old,
u32 log2_n_keys);
717 u32x4_union_t *get_keys;
718 u32x4_union_t *get_results;
731 vhash_get_alloc_keys (vhash_main_t *
vm,
u32 n_keys,
u32 n_key_u32)
736 vm->n_keys =
i + n_keys;
747 vhash_get_set_key_word (vhash_main_t *
vm,
u32 vi,
u32 wi,
u32 n_key_u32,
752 k[wi].as_u32[vi % 4] =
value;
756 vhash_get_fetch_result (vhash_main_t *
vm,
u32 vi)
759 return r->as_u32[vi % 4];
762 void vhash_main_get (vhash_main_t *
vm);
765 vhash_set_alloc_keys (vhash_main_t *
vm,
u32 n_keys,
u32 n_key_u32)
770 vm->n_keys =
i + n_keys;
779 vhash_set_set_key_word (vhash_main_t *
vm,
u32 vi,
u32 wi,
u32 n_key_u32,
788 vhash_set_set_result (vhash_main_t *
vm,
u32 vi,
u32 result)
795 vhash_set_fetch_old_result (vhash_main_t *
vm,
u32 vi)
801 void vhash_main_set (vhash_main_t *
vm);
804 vhash_unset_alloc_keys (vhash_main_t *
vm,
u32 n_keys,
u32 n_key_u32)
806 return vhash_set_alloc_keys (
vm, n_keys, n_key_u32);
810 vhash_unset_set_key_word (vhash_main_t *
vm,
u32 vi,
u32 wi,
u32 n_key_u32,
813 vhash_set_set_key_word (
vm, vi, wi, n_key_u32,
value);
817 vhash_unset_set_result (vhash_main_t *
vm,
u32 vi,
u32 result)
819 vhash_set_set_result (
vm, vi, result);
823 vhash_unset_fetch_old_result (vhash_main_t *
vm,
u32 vi)
825 return vhash_set_fetch_old_result (
vm, vi);
828 void vhash_main_unset (vhash_main_t *
vm);
837 u32 vhash_resize_incremental (vhash_resize_t * vr,
u32 vector_index,