FD.io VPP  v20.05.1-6-gf53edbc3b
Vector Packet Processing
bihash_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 /** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */
17 
18 #ifndef MAP_HUGE_SHIFT
19 #define MAP_HUGE_SHIFT 26
20 #endif
21 
22 static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
23 {
24  uword rv;
25 
26  /* Round to an even number of cache lines */
27  nbytes += CLIB_CACHE_LINE_BYTES - 1;
28  nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1);
29 
30  rv = alloc_arena_next (h);
31  alloc_arena_next (h) += nbytes;
32 
33  if (alloc_arena_next (h) > alloc_arena_size (h))
35 
36  if (alloc_arena_next (h) > alloc_arena_mapped (h))
37  {
38  void *base, *rv;
39  uword alloc = alloc_arena_next (h) - alloc_arena_mapped (h);
40  int mmap_flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS;
41  int mmap_flags_huge = (mmap_flags | MAP_HUGETLB | MAP_LOCKED |
42  BIHASH_LOG2_HUGEPAGE_SIZE << MAP_HUGE_SHIFT);
43 
44  /* new allocation is 25% of existing one */
45  if (alloc_arena_mapped (h) >> 2 > alloc)
46  alloc = alloc_arena_mapped (h) >> 2;
47 
48  /* round allocation to page size */
49  alloc = round_pow2 (alloc, 1 << BIHASH_LOG2_HUGEPAGE_SIZE);
50 
51  base = (void *) (uword) (alloc_arena (h) + alloc_arena_mapped (h));
52 
53  rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags_huge, -1, 0);
54 
55  /* fallback - maybe we are still able to allocate normal pages */
56  if (rv == MAP_FAILED || mlock (base, alloc) != 0)
57  rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
58 
59  if (rv == MAP_FAILED)
61 
62  alloc_arena_mapped (h) += alloc;
63  }
64 
65  return (void *) (uword) (rv + alloc_arena (h));
66 }
67 
68 static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
69 {
70  uword bucket_size;
71 
72  alloc_arena (h) = clib_mem_vm_reserve (0, h->memory_size,
73  BIHASH_LOG2_HUGEPAGE_SIZE);
74  if (alloc_arena (h) == ~0)
76  alloc_arena_next (h) = 0;
77  alloc_arena_size (h) = h->memory_size;
78  alloc_arena_mapped (h) = 0;
79 
80  bucket_size = h->nbuckets * sizeof (h->buckets[0]);
81 
83  bucket_size +=
84  h->nbuckets * BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv));
85 
86  h->buckets = BV (alloc_aligned) (h, bucket_size);
87 
89  {
90  int i;
91  BVT (clib_bihash_bucket) * b;
92 
93  b = h->buckets;
94 
95  for (i = 0; i < h->nbuckets; i++)
96  {
97  b->offset = BV (clib_bihash_get_offset) (h, (void *) (b + 1));
98  b->refcnt = 1;
99  /* Mark all elements free */
100  clib_memset ((b + 1), 0xff,
101  BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv)));
102 
103  /* Compute next bucket start address */
104  b = (void *) (((uword) b) + sizeof (*b) +
105  (BIHASH_KVP_PER_PAGE *
106  sizeof (BVT (clib_bihash_kv))));
107  }
108  }
110  h->instantiated = 1;
111 }
112 
113 void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
114 {
115  int i;
116  void *oldheap;
117  BVT (clib_bihash) * h = a->h;
118 
119  a->nbuckets = 1 << (max_log2 (a->nbuckets));
120 
121  h->name = (u8 *) a->name;
122  h->nbuckets = a->nbuckets;
123  h->log2_nbuckets = max_log2 (a->nbuckets);
124  h->memory_size = a->memory_size;
125  h->instantiated = 0;
126  h->fmt_fn = a->fmt_fn;
127 
128  alloc_arena (h) = 0;
129 
130  /*
131  * Make sure the requested size is rational. The max table
132  * size without playing the alignment card is 64 Gbytes.
133  * If someone starts complaining that's not enough, we can shift
134  * the offset by CLIB_LOG2_CACHE_LINE_BYTES...
135  */
136  ASSERT (h->memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));
137 
138  /* Add this hash table to the list */
139  if (a->dont_add_to_all_bihash_list == 0)
140  {
141  for (i = 0; i < vec_len (clib_all_bihashes); i++)
142  if (clib_all_bihashes[i] == h)
143  goto do_lock;
144  oldheap = clib_all_bihash_set_heap ();
145  vec_add1 (clib_all_bihashes, (void *) h);
146  clib_mem_set_heap (oldheap);
147  }
148 
149 do_lock:
150  if (h->alloc_lock)
151  clib_mem_free ((void *) h->alloc_lock);
152 
153  /*
154  * Set up the lock now, so we can use it to make the first add
155  * thread-safe
156  */
159  h->alloc_lock[0] = 0;
160 
161 #if BIHASH_LAZY_INSTANTIATE
162  if (a->instantiate_immediately)
163 #endif
164  BV (clib_bihash_instantiate) (h);
165 }
166 
167 void BV (clib_bihash_init)
168  (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size)
169 {
170  BVT (clib_bihash_init2_args) _a, *a = &_a;
171 
172  memset (a, 0, sizeof (*a));
173 
174  a->h = h;
175  a->name = name;
176  a->nbuckets = nbuckets;
177  a->memory_size = memory_size;
178 
179  BV (clib_bihash_init2) (a);
180 }
181 
182 #if BIHASH_32_64_SVM
183 #if !defined (MFD_ALLOW_SEALING)
184 #define MFD_ALLOW_SEALING 0x0002U
185 #endif
186 
187 void BV (clib_bihash_master_init_svm)
188  (BVT (clib_bihash) * h, char *name, u32 nbuckets, u64 memory_size)
189 {
190  uword bucket_size;
191  u8 *mmap_addr;
192  vec_header_t *freelist_vh;
193  int fd;
194 
195  ASSERT (memory_size < (1ULL << 32));
196  /* Set up for memfd sharing */
197  if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
198  {
199  clib_unix_warning ("memfd_create");
200  return;
201  }
202 
203  if (ftruncate (fd, memory_size) < 0)
204  {
205  clib_unix_warning ("ftruncate");
206  return;
207  }
208 
209  /* Not mission-critical, complain and continue */
210  if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
211  clib_unix_warning ("fcntl (F_ADD_SEALS)");
212 
213  mmap_addr = mmap (0, memory_size,
214  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 /* offset */ );
215 
216  if (mmap_addr == MAP_FAILED)
217  {
218  clib_unix_warning ("mmap failed");
219  ASSERT (0);
220  }
221 
222  h->sh = (void *) mmap_addr;
223  h->memfd = fd;
224  nbuckets = 1 << (max_log2 (nbuckets));
225 
226  h->name = (u8 *) name;
227  h->sh->nbuckets = h->nbuckets = nbuckets;
228  h->log2_nbuckets = max_log2 (nbuckets);
229 
230  alloc_arena (h) = (u64) (uword) mmap_addr;
231  alloc_arena_next (h) = CLIB_CACHE_LINE_BYTES;
232  alloc_arena_size (h) = memory_size;
233 
234  bucket_size = nbuckets * sizeof (h->buckets[0]);
235  h->buckets = BV (alloc_aligned) (h, bucket_size);
236  h->sh->buckets_as_u64 = (u64) BV (clib_bihash_get_offset) (h, h->buckets);
237 
238  h->alloc_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES);
239  h->alloc_lock[0] = 0;
240 
241  h->sh->alloc_lock_as_u64 =
242  (u64) BV (clib_bihash_get_offset) (h, (void *) h->alloc_lock);
243  freelist_vh =
244  BV (alloc_aligned) (h,
245  sizeof (vec_header_t) +
246  BIHASH_FREELIST_LENGTH * sizeof (u64));
247  freelist_vh->len = BIHASH_FREELIST_LENGTH;
248  h->sh->freelists_as_u64 =
249  (u64) BV (clib_bihash_get_offset) (h, freelist_vh->vector_data);
250  h->freelists = (void *) (freelist_vh->vector_data);
251 
252  h->fmt_fn = NULL;
253  h->instantiated = 1;
254 }
255 
256 void BV (clib_bihash_slave_init_svm)
257  (BVT (clib_bihash) * h, char *name, int fd)
258 {
259  u8 *mmap_addr;
261  BVT (clib_bihash_shared_header) * sh;
262 
263  /* Trial mapping, to learn the segment size */
264  mmap_addr = mmap (0, 4096, PROT_READ, MAP_SHARED, fd, 0 /* offset */ );
265  if (mmap_addr == MAP_FAILED)
266  {
267  clib_unix_warning ("trial mmap failed");
268  ASSERT (0);
269  }
270 
271  sh = (BVT (clib_bihash_shared_header) *) mmap_addr;
272 
273  memory_size = sh->alloc_arena_size;
274 
275  munmap (mmap_addr, 4096);
276 
277  /* Actual mapping, at the required size */
278  mmap_addr = mmap (0, memory_size,
279  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 /* offset */ );
280 
281  if (mmap_addr == MAP_FAILED)
282  {
283  clib_unix_warning ("mmap failed");
284  ASSERT (0);
285  }
286 
287  (void) close (fd);
288 
289  h->sh = (void *) mmap_addr;
290  alloc_arena (h) = (u64) (uword) mmap_addr;
291  h->memfd = -1;
292 
293  h->name = (u8 *) name;
294  h->buckets = BV (clib_bihash_get_value) (h, h->sh->buckets_as_u64);
295  h->nbuckets = h->sh->nbuckets;
296  h->log2_nbuckets = max_log2 (h->nbuckets);
297 
298  h->alloc_lock = BV (clib_bihash_get_value) (h, h->sh->alloc_lock_as_u64);
299  h->freelists = BV (clib_bihash_get_value) (h, h->sh->freelists_as_u64);
300  h->fmt_fn = NULL;
301 }
302 #endif /* BIHASH_32_64_SVM */
303 
304 void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
305  format_function_t * fmt_fn)
306 {
307  h->fmt_fn = fmt_fn;
308 }
309 
310 void BV (clib_bihash_free) (BVT (clib_bihash) * h)
311 {
312  int i;
313 
314  if (PREDICT_FALSE (h->instantiated == 0))
315  goto never_initialized;
316 
317  h->instantiated = 0;
318  vec_free (h->working_copies);
319  vec_free (h->working_copy_lengths);
320 #if BIHASH_32_64_SVM == 0
321  vec_free (h->freelists);
322 #else
323  if (h->memfd > 0)
324  (void) close (h->memfd);
325 #endif
326  clib_mem_vm_free ((void *) (uword) (alloc_arena (h)), alloc_arena_size (h));
327 never_initialized:
328  clib_memset (h, 0, sizeof (*h));
329  for (i = 0; i < vec_len (clib_all_bihashes); i++)
330  {
331  if ((void *) h == clib_all_bihashes[i])
332  {
334  return;
335  }
336  }
337  clib_warning ("Couldn't find hash table %llx on clib_all_bihashes...",
338  (u64) (uword) h);
339 }
340 
341 static
343 BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
344 {
345  BVT (clib_bihash_value) * rv = 0;
346 
347  ASSERT (h->alloc_lock[0]);
348 
349 #if BIHASH_32_64_SVM
350  ASSERT (log2_pages < vec_len (h->freelists));
351 #endif
352 
353  if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0)
354  {
355  vec_validate_init_empty (h->freelists, log2_pages, 0);
356  rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages)));
357  goto initialize;
358  }
359  rv = BV (clib_bihash_get_value) (h, (uword) h->freelists[log2_pages]);
360  h->freelists[log2_pages] = rv->next_free_as_u64;
361 
362 initialize:
363  ASSERT (rv);
364  /*
365  * Latest gcc complains that the length arg is zero
366  * if we replace (1<<log2_pages) with vec_len(rv).
367  * No clue.
368  */
369  clib_memset (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
370  return rv;
371 }
372 
373 static void
374 BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v,
375  u32 log2_pages)
376 {
377  ASSERT (h->alloc_lock[0]);
378 
379  ASSERT (vec_len (h->freelists) > log2_pages);
380 
381  if (CLIB_DEBUG > 0)
382  clib_memset (v, 0xFE, sizeof (*v) * (1 << log2_pages));
383 
384  v->next_free_as_u64 = (u64) h->freelists[log2_pages];
385  h->freelists[log2_pages] = (u64) BV (clib_bihash_get_offset) (h, v);
386 }
387 
388 static inline void
389 BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b)
390 {
391  BVT (clib_bihash_value) * v;
392  BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8)));
393  BVT (clib_bihash_value) * working_copy;
394  u32 thread_index = os_get_thread_index ();
395  int log2_working_copy_length;
396 
397  ASSERT (h->alloc_lock[0]);
398 
399  if (thread_index >= vec_len (h->working_copies))
400  {
401  vec_validate (h->working_copies, thread_index);
402  vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0);
403  }
404 
405  /*
406  * working_copies are per-cpu so that near-simultaneous
407  * updates from multiple threads will not result in sporadic, spurious
408  * lookup failures.
409  */
410  working_copy = h->working_copies[thread_index];
411  log2_working_copy_length = h->working_copy_lengths[thread_index];
412 
413  h->saved_bucket.as_u64 = b->as_u64;
414 
415  if (b->log2_pages > log2_working_copy_length)
416  {
417  /*
418  * It's not worth the bookkeeping to free working copies
419  * if (working_copy)
420  * clib_mem_free (working_copy);
421  */
422  working_copy = BV (alloc_aligned)
423  (h, sizeof (working_copy[0]) * (1 << b->log2_pages));
424  h->working_copy_lengths[thread_index] = b->log2_pages;
425  h->working_copies[thread_index] = working_copy;
426 
427  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_working_copy_lost,
428  1ULL << b->log2_pages);
429  }
430 
431  v = BV (clib_bihash_get_value) (h, b->offset);
432 
433  clib_memcpy_fast (working_copy, v, sizeof (*v) * (1 << b->log2_pages));
434  working_bucket.as_u64 = b->as_u64;
435  working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
437  b->as_u64 = working_bucket.as_u64;
438  h->working_copies[thread_index] = working_copy;
439 }
440 
441 static
443 BV (split_and_rehash)
444  (BVT (clib_bihash) * h,
445  BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
446  u32 new_log2_pages)
447 {
448  BVT (clib_bihash_value) * new_values, *new_v;
449  int i, j, length_in_kvs;
450 
451  ASSERT (h->alloc_lock[0]);
452 
453  new_values = BV (value_alloc) (h, new_log2_pages);
454  length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
455 
456  for (i = 0; i < length_in_kvs; i++)
457  {
458  u64 new_hash;
459 
460  /* Entry not in use? Forget it */
461  if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
462  continue;
463 
464  /* rehash the item onto its new home-page */
465  new_hash = BV (clib_bihash_hash) (&(old_values->kvp[i]));
466  new_hash = extract_bits (new_hash, h->log2_nbuckets, new_log2_pages);
467  new_v = &new_values[new_hash];
468 
469  /* Across the new home-page */
470  for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
471  {
472  /* Empty slot */
473  if (BV (clib_bihash_is_free) (&(new_v->kvp[j])))
474  {
475  clib_memcpy_fast (&(new_v->kvp[j]), &(old_values->kvp[i]),
476  sizeof (new_v->kvp[j]));
477  goto doublebreak;
478  }
479  }
480  /* Crap. Tell caller to try again */
481  BV (value_free) (h, new_values, new_log2_pages);
482  return 0;
483  doublebreak:;
484  }
485 
486  return new_values;
487 }
488 
489 static
492  (BVT (clib_bihash) * h,
493  BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
494  u32 new_log2_pages)
495 {
496  BVT (clib_bihash_value) * new_values;
497  int i, j, new_length, old_length;
498 
499  ASSERT (h->alloc_lock[0]);
500 
501  new_values = BV (value_alloc) (h, new_log2_pages);
502  new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE;
503  old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
504 
505  j = 0;
506  /* Across the old value array */
507  for (i = 0; i < old_length; i++)
508  {
509  /* Find a free slot in the new linear scan bucket */
510  for (; j < new_length; j++)
511  {
512  /* Old value not in use? Forget it. */
513  if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
514  goto doublebreak;
515 
516  /* New value should never be in use */
517  if (BV (clib_bihash_is_free) (&(new_values->kvp[j])))
518  {
519  /* Copy the old value and move along */
520  clib_memcpy_fast (&(new_values->kvp[j]), &(old_values->kvp[i]),
521  sizeof (new_values->kvp[j]));
522  j++;
523  goto doublebreak;
524  }
525  }
526  /* This should never happen... */
527  clib_warning ("BUG: linear rehash failed!");
528  BV (value_free) (h, new_values, new_log2_pages);
529  return 0;
530 
531  doublebreak:;
532  }
533  return new_values;
534 }
535 
536 static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
537  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, u64 hash, int is_add,
538  int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
539 {
540  BVT (clib_bihash_bucket) * b, tmp_b;
541  BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
542  int i, limit;
543  u64 new_hash;
544  u32 new_log2_pages, old_log2_pages;
545  u32 thread_index = os_get_thread_index ();
546  int mark_bucket_linear;
547  int resplit_once;
548 
549  /* *INDENT-OFF* */
550  static const BVT (clib_bihash_bucket) mask = {
551  .linear_search = 1,
552  .log2_pages = -1
553  };
554  /* *INDENT-ON* */
555 
556 #if BIHASH_LAZY_INSTANTIATE
557  /*
558  * Create the table (is_add=1,2), or flunk the request now (is_add=0)
559  * Use the alloc_lock to protect the instantiate operation.
560  */
561  if (PREDICT_FALSE (h->instantiated == 0))
562  {
563  if (is_add == 0)
564  return (-1);
565 
566  BV (clib_bihash_alloc_lock) (h);
567  if (h->instantiated == 0)
568  BV (clib_bihash_instantiate) (h);
569  BV (clib_bihash_alloc_unlock) (h);
570  }
571 #else
572  /* Debug image: make sure the table has been instantiated */
573  ASSERT (h->instantiated != 0);
574 #endif
575 
576  b = BV (clib_bihash_get_bucket) (h, hash);
577 
578  BV (clib_bihash_lock_bucket) (b);
579 
580  /* First elt in the bucket? */
581  if (BIHASH_KVP_AT_BUCKET_LEVEL == 0 && BV (clib_bihash_bucket_is_empty) (b))
582  {
583  if (is_add == 0)
584  {
585  BV (clib_bihash_unlock_bucket) (b);
586  return (-1);
587  }
588 
589  BV (clib_bihash_alloc_lock) (h);
590  v = BV (value_alloc) (h, 0);
591  BV (clib_bihash_alloc_unlock) (h);
592 
593  *v->kvp = *add_v;
594  tmp_b.as_u64 = 0; /* clears bucket lock */
595  tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
596  tmp_b.refcnt = 1;
598 
599  b->as_u64 = tmp_b.as_u64; /* unlocks the bucket */
600  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_alloc_add, 1);
601 
602  return (0);
603  }
604 
605  /* WARNING: we're still looking at the live copy... */
606  limit = BIHASH_KVP_PER_PAGE;
607  v = BV (clib_bihash_get_value) (h, b->offset);
608 
609  if (PREDICT_FALSE (b->as_u64 & mask.as_u64))
610  {
611  if (PREDICT_FALSE (b->linear_search))
612  limit <<= b->log2_pages;
613  else
614  v += extract_bits (hash, h->log2_nbuckets, b->log2_pages);
615  }
616 
617  if (is_add)
618  {
619  /*
620  * Because reader threads are looking at live data,
621  * we have to be extra careful. Readers do NOT hold the
622  * bucket lock. We need to be SLOWER than a search, past the
623  * point where readers CHECK the bucket lock.
624  */
625 
626  /*
627  * For obvious (in hindsight) reasons, see if we're supposed to
628  * replace an existing key, then look for an empty slot.
629  */
630  for (i = 0; i < limit; i++)
631  {
632  if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
633  {
634  /* Add but do not overwrite? */
635  if (is_add == 2)
636  {
637  BV (clib_bihash_unlock_bucket) (b);
638  return (-2);
639  }
640 
641  clib_memcpy_fast (&(v->kvp[i].value),
642  &add_v->value, sizeof (add_v->value));
643  BV (clib_bihash_unlock_bucket) (b);
644  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1);
645  return (0);
646  }
647  }
648  /*
649  * Look for an empty slot. If found, use it
650  */
651  for (i = 0; i < limit; i++)
652  {
653  if (BV (clib_bihash_is_free) (&(v->kvp[i])))
654  {
655  /*
656  * Copy the value first, so that if a reader manages
657  * to match the new key, the value will be right...
658  */
659  clib_memcpy_fast (&(v->kvp[i].value),
660  &add_v->value, sizeof (add_v->value));
661  CLIB_MEMORY_STORE_BARRIER (); /* Make sure the value has settled */
662  clib_memcpy_fast (&(v->kvp[i]), &add_v->key,
663  sizeof (add_v->key));
664  b->refcnt++;
665  ASSERT (b->refcnt > 0);
666  BV (clib_bihash_unlock_bucket) (b);
667  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_add, 1);
668  return (0);
669  }
670  }
671  /* look for stale data to overwrite */
672  if (is_stale_cb)
673  {
674  for (i = 0; i < limit; i++)
675  {
676  if (is_stale_cb (&(v->kvp[i]), arg))
677  {
678  clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
680  BV (clib_bihash_unlock_bucket) (b);
681  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1);
682  return (0);
683  }
684  }
685  }
686  /* Out of space in this bucket, split the bucket... */
687  }
688  else /* delete case */
689  {
690  for (i = 0; i < limit; i++)
691  {
692  /* Found the key? Kill it... */
693  if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
694  {
695  clib_memset_u8 (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
696  /* Is the bucket empty? */
697  if (PREDICT_TRUE (b->refcnt > 1))
698  {
699  b->refcnt--;
700  /* Switch back to the bucket-level kvp array? */
701  if (BIHASH_KVP_AT_BUCKET_LEVEL && b->refcnt == 1
702  && b->log2_pages > 0)
703  {
704  tmp_b.as_u64 = b->as_u64;
705  b->offset = BV (clib_bihash_get_offset)
706  (h, (void *) (b + 1));
707  b->linear_search = 0;
708  b->log2_pages = 0;
709  /* Clean up the bucket-level kvp array */
710  clib_memset_u8 ((b + 1), 0xff, BIHASH_KVP_PER_PAGE *
711  sizeof (BVT (clib_bihash_kv)));
713  BV (clib_bihash_unlock_bucket) (b);
714  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1);
715  goto free_backing_store;
716  }
717 
719  BV (clib_bihash_unlock_bucket) (b);
720  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1);
721  return (0);
722  }
723  else /* yes, free it */
724  {
725  /* Save old bucket value, need log2_pages to free it */
726  tmp_b.as_u64 = b->as_u64;
727 
728  /* Kill and unlock the bucket */
729  b->as_u64 = 0;
730 
731  free_backing_store:
732  /* And free the backing storage */
733  BV (clib_bihash_alloc_lock) (h);
734  /* Note: v currently points into the middle of the bucket */
735  v = BV (clib_bihash_get_value) (h, tmp_b.offset);
736  BV (value_free) (h, v, tmp_b.log2_pages);
737  BV (clib_bihash_alloc_unlock) (h);
738  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del_free,
739  1);
740  return (0);
741  }
742  }
743  }
744  /* Not found... */
745  BV (clib_bihash_unlock_bucket) (b);
746  return (-3);
747  }
748 
749  /* Move readers to a (locked) temp copy of the bucket */
750  BV (clib_bihash_alloc_lock) (h);
751  BV (make_working_copy) (h, b);
752 
753  v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
754 
755  old_log2_pages = h->saved_bucket.log2_pages;
756  new_log2_pages = old_log2_pages + 1;
757  mark_bucket_linear = 0;
758  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_split_add, 1);
759  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits, old_log2_pages);
760 
761  working_copy = h->working_copies[thread_index];
762  resplit_once = 0;
763  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits, 1);
764 
765  new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
766  new_log2_pages);
767  if (new_v == 0)
768  {
769  try_resplit:
770  resplit_once = 1;
771  new_log2_pages++;
772  /* Try re-splitting. If that fails, fall back to linear search */
773  new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
774  new_log2_pages);
775  if (new_v == 0)
776  {
777  mark_linear:
778  new_log2_pages--;
779  /* pinned collisions, use linear search */
780  new_v =
781  BV (split_and_rehash_linear) (h, working_copy, old_log2_pages,
782  new_log2_pages);
783  mark_bucket_linear = 1;
784  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_linear, 1);
785  }
786  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_resplit, 1);
787  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits,
788  old_log2_pages + 1);
789  }
790 
791  /* Try to add the new entry */
792  save_new_v = new_v;
793  new_hash = BV (clib_bihash_hash) (add_v);
794  limit = BIHASH_KVP_PER_PAGE;
795  if (mark_bucket_linear)
796  limit <<= new_log2_pages;
797  else
798  new_v += extract_bits (new_hash, h->log2_nbuckets, new_log2_pages);
799 
800  for (i = 0; i < limit; i++)
801  {
802  if (BV (clib_bihash_is_free) (&(new_v->kvp[i])))
803  {
804  clib_memcpy_fast (&(new_v->kvp[i]), add_v, sizeof (*add_v));
805  goto expand_ok;
806  }
807  }
808 
809  /* Crap. Try again */
810  BV (value_free) (h, save_new_v, new_log2_pages);
811  /*
812  * If we've already doubled the size of the bucket once,
813  * fall back to linear search now.
814  */
815  if (resplit_once)
816  goto mark_linear;
817  else
818  goto try_resplit;
819 
820 expand_ok:
821  tmp_b.log2_pages = new_log2_pages;
822  tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
823  tmp_b.linear_search = mark_bucket_linear;
824 #if BIHASH_KVP_AT_BUCKET_LEVEL
825  /* Compensate for permanent refcount bump at the bucket level */
826  if (new_log2_pages > 0)
827 #endif
828  tmp_b.refcnt = h->saved_bucket.refcnt + 1;
829  ASSERT (tmp_b.refcnt > 0);
830  tmp_b.lock = 0;
832  b->as_u64 = tmp_b.as_u64;
833 
834 #if BIHASH_KVP_AT_BUCKET_LEVEL
835  if (h->saved_bucket.log2_pages > 0)
836  {
837 #endif
838 
839  /* free the old bucket, except at the bucket level if so configured */
840  v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
841  BV (value_free) (h, v, h->saved_bucket.log2_pages);
842 
843 #if BIHASH_KVP_AT_BUCKET_LEVEL
844  }
845 #endif
846 
847 
848  BV (clib_bihash_alloc_unlock) (h);
849  return (0);
850 }
851 
852 static_always_inline int BV (clib_bihash_add_del_inline)
853  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add,
854  int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
855 {
856  u64 hash = BV (clib_bihash_hash) (add_v);
857  return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, is_add,
858  is_stale_cb, arg);
859 }
860 
861 int BV (clib_bihash_add_del)
862  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add)
863 {
864  return BV (clib_bihash_add_del_inline) (h, add_v, is_add, 0, 0);
865 }
866 
867 int BV (clib_bihash_add_or_overwrite_stale)
868  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
869  int (*stale_callback) (BVT (clib_bihash_kv) *, void *), void *arg)
870 {
871  return BV (clib_bihash_add_del_inline) (h, add_v, 1, stale_callback, arg);
872 }
873 
874 int BV (clib_bihash_search)
875  (BVT (clib_bihash) * h,
876  BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
877 {
878  return BV (clib_bihash_search_inline_2) (h, search_key, valuep);
879 }
880 
881 u8 *BV (format_bihash) (u8 * s, va_list * args)
882 {
883  BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *);
884  int verbose = va_arg (*args, int);
885  BVT (clib_bihash_bucket) * b;
886  BVT (clib_bihash_value) * v;
887  int i, j, k;
888  u64 active_elements = 0;
889  u64 active_buckets = 0;
890  u64 linear_buckets = 0;
891  u64 used_bytes;
892 
893  s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)");
894 
895 #if BIHASH_LAZY_INSTANTIATE
896  if (PREDICT_FALSE (alloc_arena (h) == 0))
897  return format (s, "[empty, uninitialized]");
898 #endif
899 
900  for (i = 0; i < h->nbuckets; i++)
901  {
902  b = BV (clib_bihash_get_bucket) (h, i);
903  if (BV (clib_bihash_bucket_is_empty) (b))
904  {
905  if (verbose > 1)
906  s = format (s, "[%d]: empty\n", i);
907  continue;
908  }
909 
910  active_buckets++;
911 
912  if (b->linear_search)
913  linear_buckets++;
914 
915  if (verbose)
916  {
917  s = format
918  (s, "[%d]: heap offset %lld, len %d, refcnt %d, linear %d\n", i,
919  b->offset, (1 << b->log2_pages), b->refcnt, b->linear_search);
920  }
921 
922  v = BV (clib_bihash_get_value) (h, b->offset);
923  for (j = 0; j < (1 << b->log2_pages); j++)
924  {
925  for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
926  {
927  if (BV (clib_bihash_is_free) (&v->kvp[k]))
928  {
929  if (verbose > 1)
930  s = format (s, " %d: empty\n",
931  j * BIHASH_KVP_PER_PAGE + k);
932  continue;
933  }
934  if (verbose)
935  {
936  if (h->fmt_fn)
937  {
938  s = format (s, " %d: %U\n",
939  j * BIHASH_KVP_PER_PAGE + k,
940  h->fmt_fn, &(v->kvp[k]), verbose);
941  }
942  else
943  {
944  s = format (s, " %d: %U\n",
945  j * BIHASH_KVP_PER_PAGE + k,
946  BV (format_bihash_kvp), &(v->kvp[k]));
947  }
948  }
949  active_elements++;
950  }
951  v++;
952  }
953  }
954 
955  s = format (s, " %lld active elements %lld active buckets\n",
956  active_elements, active_buckets);
957  s = format (s, " %d free lists\n", vec_len (h->freelists));
958 
959  for (i = 0; i < vec_len (h->freelists); i++)
960  {
961  u32 nfree = 0;
962  BVT (clib_bihash_value) * free_elt;
963  u64 free_elt_as_u64 = h->freelists[i];
964 
965  while (free_elt_as_u64)
966  {
967  free_elt = BV (clib_bihash_get_value) (h, free_elt_as_u64);
968  nfree++;
969  free_elt_as_u64 = free_elt->next_free_as_u64;
970  }
971 
972  if (nfree || verbose)
973  s = format (s, " [len %d] %u free elts\n", 1 << i, nfree);
974  }
975 
976  s = format (s, " %lld linear search buckets\n", linear_buckets);
977  used_bytes = alloc_arena_next (h);
978  s = format (s,
979  " arena: base %llx, next %llx\n"
980  " used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n",
981  alloc_arena (h), alloc_arena_next (h),
982  used_bytes, used_bytes >> 20,
983  alloc_arena_size (h), alloc_arena_size (h) >> 20);
984  return s;
985 }
986 
988  (BVT (clib_bihash) * h,
989  BV (clib_bihash_foreach_key_value_pair_cb) cb, void *arg)
990 {
991  int i, j, k;
992  BVT (clib_bihash_bucket) * b;
993  BVT (clib_bihash_value) * v;
994 
995 
996 #if BIHASH_LAZY_INSTANTIATE
997  if (PREDICT_FALSE (alloc_arena (h) == 0))
998  return;
999 #endif
1000 
1001  for (i = 0; i < h->nbuckets; i++)
1002  {
1003  b = BV (clib_bihash_get_bucket) (h, i);
1004  if (BV (clib_bihash_bucket_is_empty) (b))
1005  continue;
1006 
1007  v = BV (clib_bihash_get_value) (h, b->offset);
1008  for (j = 0; j < (1 << b->log2_pages); j++)
1009  {
1010  for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
1011  {
1012  if (BV (clib_bihash_is_free) (&v->kvp[k]))
1013  continue;
1014 
1015  if (BIHASH_WALK_STOP == cb (&v->kvp[k], arg))
1016  return;
1017  /*
1018  * In case the callback deletes the last entry in the bucket...
1019  */
1020  if (BV (clib_bihash_bucket_is_empty) (b))
1021  goto doublebreak;
1022  }
1023  v++;
1024  }
1025  doublebreak:
1026  ;
1027  }
1028 }
1029 
1030 /** @endcond */
1031 
1032 /*
1033  * fd.io coding-style-patch-verification: ON
1034  *
1035  * Local Variables:
1036  * eval: (c-set-style "gnu")
1037  * End:
1038  */
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:507
#define BIHASH_KVP_PER_PAGE
Definition: bihash_16_8.h:24
a
Definition: bitmap.h:538
void clib_bihash_free(clib_bihash *h)
Destroy a bounded index extensible hash table.
#define PREDICT_TRUE(x)
Definition: clib.h:119
unsigned long u64
Definition: types.h:89
#define CLIB_MEMORY_STORE_BARRIER()
Definition: clib.h:133
#define clib_memcpy_fast(a, b, c)
Definition: string.h:81
#define F_ADD_SEALS
Definition: mem.c:40
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int memfd_create(const char *name, unsigned int flags)
Definition: syscall.h:52
void os_out_of_memory(void)
Definition: unix-misc.c:220
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:590
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
unsigned char u8
Definition: types.h:56
u8 *() format_function_t(u8 *s, va_list *args)
Definition: format.h:48
#define MFD_ALLOW_SEALING
Definition: main.c:104
#define static_always_inline
Definition: clib.h:106
int clib_bihash_add_del(clib_bihash *h, clib_bihash_kv *add_v, int is_add)
Add or delete a (key,value) pair from a bi-hash table.
unsigned int u32
Definition: types.h:88
#define F_SEAL_SHRINK
Definition: mem.c:44
static uword clib_bihash_get_offset(clib_bihash *h, void *v)
Get clib mheap offset given a pointer.
int(* clib_bihash_foreach_key_value_pair_cb)(clib_bihash_kv *kv, void *ctx)
Definition: bihash_doc.h:175
void clib_bihash_foreach_key_value_pair(clib_bihash *h, clib_bihash_foreach_key_value_pair_cb *callback, void *arg)
Visit active (key,value) pairs in a bi-hash table.
u64 memory_size
Definition: vhost_user.h:151
vec_header_t h
Definition: buffer.c:322
static vnet_classify_entry_t * split_and_rehash_linear(vnet_classify_table_t *t, vnet_classify_entry_t *old_values, u32 old_log2_pages, u32 new_log2_pages)
#define PREDICT_FALSE(x)
Definition: clib.h:118
void clib_bihash_init(clib_bihash *h, char *name, u32 nbuckets, uword memory_size)
initialize a bounded index extensible hash table
uword clib_mem_vm_reserve(uword start, uword size, u32 log2_page_sz)
Definition: mem.c:348
BVT(clib_bihash)
Definition: l2_fib.c:972
static vnet_classify_entry_t * split_and_rehash(vnet_classify_table_t *t, vnet_classify_entry_t *old_values, u32 old_log2_pages, u32 new_log2_pages)
#define BIHASH_KVP_AT_BUCKET_LEVEL
Definition: bihash_16_8.h:25
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:317
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:380
static void * clib_mem_set_heap(void *heap)
Definition: mem.h:268
#define clib_warning(format, args...)
Definition: error.h:59
static uword round_pow2(uword x, uword pow2)
Definition: clib.h:256
u32 len
Number of elements in vector (NOT its allocated length).
Definition: vec_bootstrap.h:57
string name[64]
Definition: ip.api:44
static void make_working_copy(vnet_classify_table_t *t, vnet_classify_bucket_t *b)
#define ASSERT(truth)
int clib_bihash_search_inline_2(clib_bihash *h, clib_bihash_kv *search_key, clib_bihash_kv *valuep)
Search a bi-hash table.
#define vec_delete(V, N, M)
Delete N elements starting at element M.
Definition: vec.h:852
static void clib_mem_free(void *p)
Definition: mem.h:215
u8 log2_pages
Definition: bihash_doc.h:62
vector header structure
Definition: vec_bootstrap.h:55
static_always_inline void clib_memset_u8(void *p, u8 val, uword count)
Definition: string.h:424
static uword extract_bits(uword x, int start, int count)
Definition: clib.h:304
template key/value backing page structure
Definition: bihash_doc.h:44
static void clib_mem_vm_free(void *addr, uword size)
Definition: mem.h:338
u8 vector_data[0]
Vector data .
Definition: vec_bootstrap.h:60
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static uword max_log2(uword x)
Definition: clib.h:206
u64 uword
Definition: types.h:112
#define clib_unix_warning(format, args...)
Definition: error.h:68
static_always_inline uword os_get_thread_index(void)
Definition: os.h:63
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:165
static void * clib_bihash_get_value(clib_bihash *h, uword offset)
Get pointer to value page given its clib mheap offset.
void ** clib_all_bihashes
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
Definition: vec.h:554
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
void * clib_all_bihash_set_heap(void)
static void * alloc_aligned(uword size, uword log2_align, void **ptr_to_free)
Definition: test_vec.h:191