FD.io VPP  v17.10-9-gd594711
Vector Packet Processing
load_balance.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/load_balance.h>
19 #include <vnet/dpo/drop_dpo.h>
20 #include <vppinfra/math.h> /* for fabs */
21 #include <vnet/adj/adj.h>
22 #include <vnet/adj/adj_internal.h>
23 #include <vnet/fib/fib_urpf_list.h>
24 
25 /*
26  * distribution error tolerance for load-balancing
27  */
29 
30 #undef LB_DEBUG
31 
32 #ifdef LB_DEBUG
33 #define LB_DBG(_lb, _fmt, _args...) \
34 { \
35  u8* _tmp =NULL; \
36  clib_warning("lb:[%s]:" _fmt, \
37  load_balance_format(load_balance_get_index((_lb)), \
38  0, _tmp), \
39  ##_args); \
40  vec_free(_tmp); \
41 }
42 #else
43 #define LB_DBG(_p, _fmt, _args...)
44 #endif
45 
46 
47 /**
48  * Pool of all DPOs. It's not static so the DP can have fast access
49  */
51 
52 /**
53  * The one instance of load-balance main
54  */
56 
57 f64
59 {
61 }
62 
63 static inline index_t
65 {
66  return (lb - load_balance_pool);
67 }
68 
69 static inline dpo_id_t*
71 {
72  if (LB_HAS_INLINE_BUCKETS(lb))
73  {
74  return (lb->lb_buckets_inline);
75  }
76  else
77  {
78  return (lb->lb_buckets);
79  }
80 }
81 
82 static load_balance_t *
84 {
85  load_balance_t *lb;
86 
87  pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
88  memset(lb, 0, sizeof(*lb));
89 
90  lb->lb_map = INDEX_INVALID;
91  lb->lb_urpf = INDEX_INVALID;
96  vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
98  vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
100 
101  return (lb);
102 }
103 
104 static u8*
107  u32 indent,
108  u8 *s)
109 {
110  vlib_counter_t to, via;
111  load_balance_t *lb;
112  dpo_id_t *buckets;
113  u32 i;
114 
115  lb = load_balance_get(lbi);
116  vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
117  vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
118  buckets = load_balance_get_buckets(lb);
119 
120  s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
121  s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
122  s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
123  s = format(s, "uRPF:%d ", lb->lb_urpf);
124  s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
125  if (0 != via.packets)
126  {
127  s = format(s, " via:[%Ld:%Ld]",
128  via.packets, via.bytes);
129  }
130  s = format(s, "]");
131 
132  if (INDEX_INVALID != lb->lb_map)
133  {
134  s = format(s, "\n%U%U",
135  format_white_space, indent+4,
136  format_load_balance_map, lb->lb_map, indent+4);
137  }
138  for (i = 0; i < lb->lb_n_buckets; i++)
139  {
140  s = format(s, "\n%U[%d] %U",
141  format_white_space, indent+2,
142  i,
144  &buckets[i], indent+6);
145  }
146  return (s);
147 }
148 
149 u8*
150 format_load_balance (u8 * s, va_list * args)
151 {
152  index_t lbi = va_arg(*args, index_t);
154 
155  return (load_balance_format(lbi, flags, 0, s));
156 }
157 static u8*
158 format_load_balance_dpo (u8 * s, va_list * args)
159 {
160  index_t lbi = va_arg(*args, index_t);
161  u32 indent = va_arg(*args, u32);
162 
163  return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
164 }
165 
166 
167 static load_balance_t *
169  dpo_proto_t lb_proto,
170  flow_hash_config_t fhc)
171 {
172  load_balance_t *lb;
173 
174  lb = load_balance_alloc_i();
175  lb->lb_hash_config = fhc;
176  lb->lb_n_buckets = num_buckets;
177  lb->lb_n_buckets_minus_1 = num_buckets-1;
178  lb->lb_proto = lb_proto;
179 
180  if (!LB_HAS_INLINE_BUCKETS(lb))
181  {
183  lb->lb_n_buckets - 1,
185  }
186 
187  LB_DBG(lb, "create");
188 
189  return (lb);
190 }
191 
192 index_t
194  dpo_proto_t lb_proto,
195  flow_hash_config_t fhc)
196 {
197  return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
198 }
199 
200 static inline void
202  u32 bucket,
203  dpo_id_t *buckets,
204  const dpo_id_t *next)
205 {
206  dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
207 }
208 
209 void
211  u32 bucket,
212  const dpo_id_t *next)
213 {
214  load_balance_t *lb;
215  dpo_id_t *buckets;
216 
217  lb = load_balance_get(lbi);
218  buckets = load_balance_get_buckets(lb);
219 
220  ASSERT(bucket < lb->lb_n_buckets);
221 
222  load_balance_set_bucket_i(lb, bucket, buckets, next);
223 }
224 
225 int
227 {
228  load_balance_t *lb;
229 
230  if (DPO_LOAD_BALANCE != dpo->dpoi_type)
231  return (0);
232 
233  lb = load_balance_get(dpo->dpoi_index);
234 
235  if (1 == lb->lb_n_buckets)
236  {
237  return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
238  }
239  return (0);
240 }
241 
242 void
245 {
246  load_balance_t *lb;
247 
248  lb = load_balance_get(lbi);
250 }
251 
252 
253 void
255  index_t urpf)
256 {
257  load_balance_t *lb;
258  index_t old;
259 
260  lb = load_balance_get(lbi);
261 
262  /*
263  * packets in flight we see this change. but it's atomic, so :P
264  */
265  old = lb->lb_urpf;
266  lb->lb_urpf = urpf;
267 
269  fib_urpf_list_lock(urpf);
270 }
271 
272 index_t
274 {
275  load_balance_t *lb;
276 
277  lb = load_balance_get(lbi);
278 
279  return (lb->lb_urpf);
280 }
281 
282 const dpo_id_t *
284  u32 bucket)
285 {
286  load_balance_t *lb;
287 
288  lb = load_balance_get(lbi);
289 
290  return (load_balance_get_bucket_i(lb, bucket));
291 }
292 
293 static int
295  const load_balance_path_t * n2)
296 {
297  return ((int) n1->path_weight - (int) n2->path_weight);
298 }
299 
300 /* Given next hop vector is over-written with normalized one with sorted weights and
301  with weights corresponding to the number of adjacencies for each next hop.
302  Returns number of adjacencies in block. */
303 u32
305  load_balance_path_t ** normalized_next_hops,
306  u32 *sum_weight_in,
308 {
309  load_balance_path_t * nhs;
310  uword n_nhs, n_adj, n_adj_left, i, sum_weight;
311  f64 norm, error;
312 
313  n_nhs = vec_len (raw_next_hops);
314  ASSERT (n_nhs > 0);
315  if (n_nhs == 0)
316  return 0;
317 
318  /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
319  nhs = *normalized_next_hops;
320  vec_validate (nhs, 2*n_nhs - 1);
321 
322  /* Fast path: 1 next hop in block. */
323  n_adj = n_nhs;
324  if (n_nhs == 1)
325  {
326  nhs[0] = raw_next_hops[0];
327  nhs[0].path_weight = 1;
328  _vec_len (nhs) = 1;
329  sum_weight = 1;
330  goto done;
331  }
332 
333  else if (n_nhs == 2)
334  {
335  int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
336 
337  /* Fast sort. */
338  nhs[0] = raw_next_hops[cmp];
339  nhs[1] = raw_next_hops[cmp ^ 1];
340 
341  /* Fast path: equal cost multipath with 2 next hops. */
342  if (nhs[0].path_weight == nhs[1].path_weight)
343  {
344  nhs[0].path_weight = nhs[1].path_weight = 1;
345  _vec_len (nhs) = 2;
346  sum_weight = 2;
347  goto done;
348  }
349  }
350  else
351  {
352  clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
353  qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
354  }
355 
356  /* Find total weight to normalize weights. */
357  sum_weight = 0;
358  for (i = 0; i < n_nhs; i++)
359  sum_weight += nhs[i].path_weight;
360 
361  /* In the unlikely case that all weights are given as 0, set them all to 1. */
362  if (sum_weight == 0)
363  {
364  for (i = 0; i < n_nhs; i++)
365  nhs[i].path_weight = 1;
366  sum_weight = n_nhs;
367  }
368 
369  /* Save copies of all next hop weights to avoid being overwritten in loop below. */
370  for (i = 0; i < n_nhs; i++)
371  nhs[n_nhs + i].path_weight = nhs[i].path_weight;
372 
373  /* Try larger and larger power of 2 sized adjacency blocks until we
374  find one where traffic flows to within 1% of specified weights. */
375  for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
376  {
377  error = 0;
378 
379  norm = n_adj / ((f64) sum_weight);
380  n_adj_left = n_adj;
381  for (i = 0; i < n_nhs; i++)
382  {
383  f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
384  word n = flt_round_nearest (nf);
385 
386  n = n > n_adj_left ? n_adj_left : n;
387  n_adj_left -= n;
388  error += fabs (nf - n);
389  nhs[i].path_weight = n;
390 
391  if (0 == nhs[i].path_weight)
392  {
393  /*
394  * when the weight skew is high (norm is small) and n == nf.
395  * without this correction the path with a low weight would have
396  * no represenation in the load-balanace - don't want that.
397  * If the weight skew is high so the load-balance has many buckets
398  * to allow it. pays ya money takes ya choice.
399  */
400  error = n_adj;
401  break;
402  }
403  }
404 
405  nhs[0].path_weight += n_adj_left;
406 
407  /* Less than 5% average error per adjacency with this size adjacency block? */
408  if (error <= multipath_next_hop_error_tolerance*n_adj)
409  {
410  /* Truncate any next hops with zero weight. */
411  _vec_len (nhs) = i;
412  break;
413  }
414  }
415 
416 done:
417  /* Save vector for next call. */
418  *normalized_next_hops = nhs;
419  *sum_weight_in = sum_weight;
420  return n_adj;
421 }
422 
423 static load_balance_path_t *
425  dpo_proto_t drop_proto)
426 {
427  if (0 == vec_len(nhs))
428  {
429  load_balance_path_t *new_nhs = NULL, *nh;
430 
431  /*
432  * we need something for the load-balance. so use the drop
433  */
434  vec_add2(new_nhs, nh, 1);
435 
436  nh->path_weight = 1;
437  dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
438 
439  return (new_nhs);
440  }
441 
442  return (NULL);
443 }
444 
445 /*
446  * Fill in adjacencies in block based on corresponding
447  * next hop adjacencies.
448  */
449 static void
451  load_balance_path_t *nhs,
452  dpo_id_t *buckets,
453  u32 n_buckets)
454 {
455  load_balance_path_t * nh;
456  u16 ii, bucket;
457 
458  bucket = 0;
459 
460  /*
461  * the next-hops have normalised weights. that means their sum is the number
462  * of buckets we need to fill.
463  */
464  vec_foreach (nh, nhs)
465  {
466  for (ii = 0; ii < nh->path_weight; ii++)
467  {
468  ASSERT(bucket < n_buckets);
469  load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
470  }
471  }
472 }
473 
474 static inline void
476  u32 n_buckets)
477 {
478  lb->lb_n_buckets = n_buckets;
479  lb->lb_n_buckets_minus_1 = n_buckets-1;
480 }
481 
482 void
484  const load_balance_path_t * raw_nhs,
486 {
487  load_balance_path_t *nh, *nhs, *fixed_nhs;
488  u32 sum_of_weights, n_buckets, ii;
489  index_t lbmi, old_lbmi;
490  load_balance_t *lb;
491  dpo_id_t *tmp_dpo;
492 
493  nhs = NULL;
494 
496  lb = load_balance_get(dpo->dpoi_index);
497  fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
498  n_buckets =
499  ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
500  raw_nhs :
501  fixed_nhs),
502  &nhs,
503  &sum_of_weights,
505 
506  ASSERT (n_buckets >= vec_len (raw_nhs));
507 
508  /*
509  * Save the old load-balance map used, and get a new one if required.
510  */
511  old_lbmi = lb->lb_map;
512  if (flags & LOAD_BALANCE_FLAG_USES_MAP)
513  {
514  lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
515  }
516  else
517  {
518  lbmi = INDEX_INVALID;
519  }
520 
521  if (0 == lb->lb_n_buckets)
522  {
523  /*
524  * first time initialisation. no packets inflight, so we can write
525  * at leisure.
526  */
527  load_balance_set_n_buckets(lb, n_buckets);
528 
529  if (!LB_HAS_INLINE_BUCKETS(lb))
531  lb->lb_n_buckets - 1,
533 
536  n_buckets);
537  lb->lb_map = lbmi;
538  }
539  else
540  {
541  /*
542  * This is a modification of an existing load-balance.
543  * We need to ensure that packets inflight see a consistent state, that
544  * is the number of reported buckets the LB has (read from
545  * lb_n_buckets_minus_1) is not more than it actually has. So if the
546  * number of buckets is increasing, we must update the bucket array first,
547  * then the reported number. vice-versa if the number of buckets goes down.
548  */
549  if (n_buckets == lb->lb_n_buckets)
550  {
551  /*
552  * no change in the number of buckets. we can simply fill what
553  * is new over what is old.
554  */
557  n_buckets);
558  lb->lb_map = lbmi;
559  }
560  else if (n_buckets > lb->lb_n_buckets)
561  {
562  /*
563  * we have more buckets. the old load-balance map (if there is one)
564  * will remain valid, i.e. mapping to indices within range, so we
565  * update it last.
566  */
567  if (n_buckets > LB_NUM_INLINE_BUCKETS &&
569  {
570  /*
571  * the new increased number of buckets is crossing the threshold
572  * from the inline storage to out-line. Alloc the outline buckets
573  * first, then fixup the number. then reset the inlines.
574  */
575  ASSERT(NULL == lb->lb_buckets);
577  n_buckets - 1,
579 
581  lb->lb_buckets,
582  n_buckets);
584  load_balance_set_n_buckets(lb, n_buckets);
585 
587 
588  for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
589  {
590  dpo_reset(&lb->lb_buckets_inline[ii]);
591  }
592  }
593  else
594  {
595  if (n_buckets <= LB_NUM_INLINE_BUCKETS)
596  {
597  /*
598  * we are not crossing the threshold and it's still inline buckets.
599  * we can write the new on the old..
600  */
603  n_buckets);
605  load_balance_set_n_buckets(lb, n_buckets);
606  }
607  else
608  {
609  /*
610  * we are not crossing the threshold. We need a new bucket array to
611  * hold the increased number of choices.
612  */
613  dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
614 
615  new_buckets = NULL;
616  old_buckets = load_balance_get_buckets(lb);
617 
618  vec_validate_aligned(new_buckets,
619  n_buckets - 1,
621 
622  load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
624  lb->lb_buckets = new_buckets;
626  load_balance_set_n_buckets(lb, n_buckets);
627 
628  vec_foreach(tmp_dpo, old_buckets)
629  {
630  dpo_reset(tmp_dpo);
631  }
632  vec_free(old_buckets);
633  }
634  }
635 
636  /*
637  * buckets fixed. ready for the MAP update.
638  */
639  lb->lb_map = lbmi;
640  }
641  else
642  {
643  /*
644  * bucket size shrinkage.
645  * Any map we have will be based on the old
646  * larger number of buckets, so will be translating to indices
647  * out of range. So the new MAP must be installed first.
648  */
649  lb->lb_map = lbmi;
651 
652 
653  if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
655  {
656  /*
657  * the new decreased number of buckets is crossing the threshold
658  * from out-line storage to inline:
659  * 1 - Fill the inline buckets,
660  * 2 - fixup the number (and this point the inline buckets are
661  * used).
662  * 3 - free the outline buckets
663  */
665  lb->lb_buckets_inline,
666  n_buckets);
668  load_balance_set_n_buckets(lb, n_buckets);
670 
671  vec_foreach(tmp_dpo, lb->lb_buckets)
672  {
673  dpo_reset(tmp_dpo);
674  }
675  vec_free(lb->lb_buckets);
676  }
677  else
678  {
679  /*
680  * not crossing the threshold.
681  * 1 - update the number to the smaller size
682  * 2 - write the new buckets
683  * 3 - reset those no longer used.
684  */
685  dpo_id_t *buckets;
686  u32 old_n_buckets;
687 
688  old_n_buckets = lb->lb_n_buckets;
689  buckets = load_balance_get_buckets(lb);
690 
691  load_balance_set_n_buckets(lb, n_buckets);
693 
695  buckets,
696  n_buckets);
697 
698  for (ii = n_buckets; ii < old_n_buckets; ii++)
699  {
700  dpo_reset(&buckets[ii]);
701  }
702  }
703  }
704  }
705 
706  vec_foreach (nh, nhs)
707  {
708  dpo_reset(&nh->path_dpo);
709  }
710  vec_free(nhs);
711  vec_free(fixed_nhs);
712 
713  load_balance_map_unlock(old_lbmi);
714 }
715 
716 static void
718 {
719  load_balance_t *lb;
720 
721  lb = load_balance_get(dpo->dpoi_index);
722 
723  lb->lb_locks++;
724 }
725 
726 static void
728 {
729  dpo_id_t *buckets;
730  int i;
731 
732  buckets = load_balance_get_buckets(lb);
733 
734  for (i = 0; i < lb->lb_n_buckets; i++)
735  {
736  dpo_reset(&buckets[i]);
737  }
738 
739  LB_DBG(lb, "destroy");
740  if (!LB_HAS_INLINE_BUCKETS(lb))
741  {
742  vec_free(lb->lb_buckets);
743  }
744 
747 
748  pool_put(load_balance_pool, lb);
749 }
750 
751 static void
753 {
754  load_balance_t *lb;
755 
756  lb = load_balance_get(dpo->dpoi_index);
757 
758  lb->lb_locks--;
759 
760  if (0 == lb->lb_locks)
761  {
763  }
764 }
765 
766 static void
768 {
769  fib_show_memory_usage("load-balance",
770  pool_elts(load_balance_pool),
771  pool_len(load_balance_pool),
772  sizeof(load_balance_t));
774 }
775 
776 const static dpo_vft_t lb_vft = {
778  .dv_unlock = load_balance_unlock,
779  .dv_format = format_load_balance_dpo,
780  .dv_mem_show = load_balance_mem_show,
781 };
782 
783 /**
784  * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
785  * object.
786  *
787  * this means that these graph nodes are ones from which a load-balance is the
788  * parent object in the DPO-graph.
789  *
790  * We do not list all the load-balance nodes, such as the *-lookup. instead
791  * we are relying on the correct use of the .sibling_of field when setting
792  * up these sibling nodes.
793  */
794 const static char* const load_balance_ip4_nodes[] =
795 {
796  "ip4-load-balance",
797  NULL,
798 };
799 const static char* const load_balance_ip6_nodes[] =
800 {
801  "ip6-load-balance",
802  NULL,
803 };
804 const static char* const load_balance_mpls_nodes[] =
805 {
806  "mpls-load-balance",
807  NULL,
808 };
809 const static char* const load_balance_l2_nodes[] =
810 {
811  "l2-load-balance",
812  NULL,
813 };
814 const static char* const load_balance_nsh_nodes[] =
815 {
816  "nsh-load-balance",
817  NULL,
818 };
819 const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
820 {
826 };
827 
828 void
830 {
831  index_t lbi;
832 
834 
835  /*
836  * Special LB with index zero. we need to define this since the v4 mtrie
837  * assumes an index of 0 implies the ply is empty. therefore all 'real'
838  * adjs need a non-zero index.
839  * This should never be used, but just in case, stack it on a drop.
840  */
841  lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
843 
845 }
846 
847 static clib_error_t *
849  unformat_input_t * input,
850  vlib_cli_command_t * cmd)
851 {
852  index_t lbi = INDEX_INVALID;
853 
855  {
856  if (unformat (input, "%d", &lbi))
857  ;
858  else
859  break;
860  }
861 
862  if (INDEX_INVALID != lbi)
863  {
864  vlib_cli_output (vm, "%U", format_load_balance, lbi,
866  }
867  else
868  {
869  load_balance_t *lb;
870 
871  pool_foreach(lb, load_balance_pool,
872  ({
876  }));
877  }
878 
879  return 0;
880 }
881 
882 VLIB_CLI_COMMAND (load_balance_show_command, static) = {
883  .path = "show load-balance",
884  .short_help = "show load-balance [<index>]",
885  .function = load_balance_show,
886 };
887 
888 
890 ip_flow_hash (void *data)
891 {
892  ip4_header_t *iph = (ip4_header_t *) data;
893 
894  if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
896  else
898 }
899 
902 {
903  return (*((u64 *) m) & 0xffffffffffff);
904 }
905 
908 {
909  ethernet_header_t *eh;
910  u64 a, b, c;
911  uword is_ip, eh_size;
912  u16 eh_type;
913 
914  eh = vlib_buffer_get_current (b0);
915  eh_type = clib_net_to_host_u16 (eh->type);
916  eh_size = ethernet_buffer_header_size (b0);
917 
918  is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
919 
920  /* since we have 2 cache lines, use them */
921  if (is_ip)
922  a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
923  else
924  a = eh->type;
925 
926  b = mac_to_u64 ((u8 *) eh->dst_address);
927  c = mac_to_u64 ((u8 *) eh->src_address);
928  hash_mix64 (a, b, c);
929 
930  return (u32) c;
931 }
932 
933 typedef struct load_balance_trace_t_
934 {
937 
938 static uword
940  vlib_node_runtime_t * node,
941  vlib_frame_t * frame)
942 {
943  u32 n_left_from, next_index, *from, *to_next;
944 
945  from = vlib_frame_vector_args (frame);
946  n_left_from = frame->n_vectors;
947 
948  next_index = node->cached_next_index;
949 
950  while (n_left_from > 0)
951  {
952  u32 n_left_to_next;
953 
954  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
955 
956  while (n_left_from > 0 && n_left_to_next > 0)
957  {
958  vlib_buffer_t *b0;
959  u32 bi0, lbi0, next0;
960  const dpo_id_t *dpo0;
961  const load_balance_t *lb0;
962 
963  bi0 = from[0];
964  to_next[0] = bi0;
965  from += 1;
966  to_next += 1;
967  n_left_from -= 1;
968  n_left_to_next -= 1;
969 
970  b0 = vlib_get_buffer (vm, bi0);
971 
972  /* lookup dst + src mac */
973  lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
974  lb0 = load_balance_get(lbi0);
975 
976  vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
977 
978  dpo0 = load_balance_get_bucket_i(lb0,
979  vnet_buffer(b0)->ip.flow_hash &
980  (lb0->lb_n_buckets_minus_1));
981 
982  next0 = dpo0->dpoi_next_node;
983  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
984 
986  {
987  load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
988  sizeof (*tr));
989  tr->lb_index = lbi0;
990  }
991  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
992  n_left_to_next, bi0, next0);
993  }
994 
995  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
996  }
997 
998  return frame->n_vectors;
999 }
1000 
1001 static u8 *
1002 format_l2_load_balance_trace (u8 * s, va_list * args)
1003 {
1004  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1005  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1006  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1007 
1008  s = format (s, "L2-load-balance: index %d", t->lb_index);
1009  return s;
1010 }
1011 
1012 /**
1013  * @brief
1014  */
1016  .function = l2_load_balance,
1017  .name = "l2-load-balance",
1018  .vector_size = sizeof (u32),
1019 
1020  .format_trace = format_l2_load_balance_trace,
1021  .n_next_nodes = 1,
1022  .next_nodes = {
1023  [0] = "error-drop",
1024  },
1025 };
1026 
1027 static uword
1029  vlib_node_runtime_t * node,
1030  vlib_frame_t * frame)
1031 {
1032  u32 n_left_from, next_index, *from, *to_next;
1033 
1034  from = vlib_frame_vector_args (frame);
1035  n_left_from = frame->n_vectors;
1036 
1037  next_index = node->cached_next_index;
1038 
1039  while (n_left_from > 0)
1040  {
1041  u32 n_left_to_next;
1042 
1043  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1044 
1045  while (n_left_from > 0 && n_left_to_next > 0)
1046  {
1047  vlib_buffer_t *b0;
1048  u32 bi0, lbi0, next0, *nsh0;
1049  const dpo_id_t *dpo0;
1050  const load_balance_t *lb0;
1051 
1052  bi0 = from[0];
1053  to_next[0] = bi0;
1054  from += 1;
1055  to_next += 1;
1056  n_left_from -= 1;
1057  n_left_to_next -= 1;
1058 
1059  b0 = vlib_get_buffer (vm, bi0);
1060 
1061  lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1062  lb0 = load_balance_get(lbi0);
1063 
1064  /* SPI + SI are the second word of the NSH header */
1065  nsh0 = vlib_buffer_get_current (b0);
1066  vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
1067 
1068  dpo0 = load_balance_get_bucket_i(lb0,
1069  vnet_buffer(b0)->ip.flow_hash &
1070  (lb0->lb_n_buckets_minus_1));
1071 
1072  next0 = dpo0->dpoi_next_node;
1073  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1074 
1076  {
1077  load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1078  sizeof (*tr));
1079  tr->lb_index = lbi0;
1080  }
1081  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1082  n_left_to_next, bi0, next0);
1083  }
1084 
1085  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1086  }
1087 
1088  return frame->n_vectors;
1089 }
1090 
1091 static u8 *
1092 format_nsh_load_balance_trace (u8 * s, va_list * args)
1093 {
1094  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1095  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1096  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1097 
1098  s = format (s, "NSH-load-balance: index %d", t->lb_index);
1099  return s;
1100 }
1101 
1102 /**
1103  * @brief
1104  */
1106  .function = nsh_load_balance,
1107  .name = "nsh-load-balance",
1108  .vector_size = sizeof (u32),
1109 
1110  .format_trace = format_nsh_load_balance_trace,
1111  .n_next_nodes = 1,
1112  .next_nodes = {
1113  [0] = "error-drop",
1114  },
1115 };
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:432
u16 lb_n_buckets
number of buckets in the load-balance.
Definition: load_balance.h:88
static const char *const load_balance_ip6_nodes[]
Definition: load_balance.c:799
dpo_id_t * lb_buckets
Vector of buckets containing the next DPOs, sized as lbo_num.
Definition: load_balance.h:133
dpo_lock_fn_t dv_lock
A reference counting lock function.
Definition: dpo.h:350
static u8 * format_l2_load_balance_trace(u8 *s, va_list *args)
void load_balance_map_unlock(index_t lbmi)
vlib_combined_counter_main_t lbm_to_counters
Definition: load_balance.h:46
index_t load_balance_map_add_or_lock(u32 n_buckets, u32 sum_of_weights, const load_balance_path_t *paths)
fib_entry_flag_t lb_fib_entry_flags
Flags from the load-balance&#39;s associated fib_entry_t.
Definition: load_balance.h:105
static index_t load_balance_get_index(const load_balance_t *lb)
Definition: load_balance.c:64
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:337
static const char *const *const load_balance_nodes[DPO_PROTO_NUM]
Definition: load_balance.c:819
#define CLIB_UNUSED(x)
Definition: clib.h:79
A virtual function table regisitered for a DPO type.
Definition: dpo.h:345
static const char *const load_balance_mpls_nodes[]
Definition: load_balance.c:804
void vlib_validate_combined_counter(vlib_combined_counter_main_t *cm, u32 index)
validate a combined counter
Definition: counter.c:89
a
Definition: bitmap.h:516
u8 * format_dpo_type(u8 *s, va_list *args)
format a DPO type
Definition: dpo.c:133
dpo_id_t path_dpo
ID of the Data-path object.
Definition: load_balance.h:66
static void load_balance_set_n_buckets(load_balance_t *lb, u32 n_buckets)
Definition: load_balance.c:475
static u32 ip_flow_hash(void *data)
Definition: load_balance.c:890
enum load_balance_format_flags_t_ load_balance_format_flags_t
Flags controlling load-balance formatting/display.
Definitions for all things IP (v4|v6) unicast and multicast lookup related.
#define NULL
Definition: clib.h:55
static u32 ip4_compute_flow_hash(const ip4_header_t *ip, flow_hash_config_t flow_hash_config)
Definition: ip4.h:287
void load_balance_set_urpf(index_t lbi, index_t urpf)
Definition: load_balance.c:254
#define ethernet_buffer_header_size(b)
Determine the size of the Ethernet headers of the current frame in the buffer.
Definition: ethernet.h:390
flow_hash_config_t lb_hash_config
the hash config to use when selecting a bucket.
Definition: load_balance.h:128
u8 src_address[6]
Definition: packet.h:54
void dpo_copy(dpo_id_t *dst, const dpo_id_t *src)
atomic copy a data-plane object.
Definition: dpo.c:255
u32 index_t
A Data-Path Object is an object that represents actions that are applied to packets are they are swit...
Definition: dpo.h:41
Combined counter to hold both packets and byte differences.
Definition: counter.h:139
static const char *const load_balance_ip4_nodes[]
The per-protocol VLIB graph nodes that are assigned to a load-balance object.
Definition: load_balance.c:794
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:557
static u8 * format_load_balance_dpo(u8 *s, va_list *args)
Definition: load_balance.c:158
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:419
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:443
static u8 * load_balance_format(index_t lbi, load_balance_format_flags_t flags, u32 indent, u8 *s)
Definition: load_balance.c:105
static load_balance_t * load_balance_alloc_i(void)
Definition: load_balance.c:83
index_t load_balance_get_urpf(index_t lbi)
Definition: load_balance.c:273
static const char *const load_balance_l2_nodes[]
Definition: load_balance.c:809
#define pool_len(p)
Number of elements in pool vector.
Definition: pool.h:140
index_t load_balance_create(u32 n_buckets, dpo_proto_t lb_proto, flow_hash_config_t fhc)
Definition: load_balance.c:193
const dpo_id_t * drop_dpo_get(dpo_proto_t proto)
Definition: drop_dpo.c:25
void dpo_register(dpo_type_t type, const dpo_vft_t *vft, const char *const *const *nodes)
For a given DPO type Register:
Definition: dpo.c:303
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:437
load_balance_t * load_balance_pool
Pool of all DPOs.
Definition: load_balance.c:50
void load_balance_map_module_init(void)
Make/add a new or lock an existing Load-balance map.
static dpo_id_t * load_balance_get_buckets(load_balance_t *lb)
Definition: load_balance.c:70
#define always_inline
Definition: clib.h:84
void load_balance_module_init(void)
Definition: load_balance.c:829
u16 lb_n_buckets_minus_1
number of buckets in the load-balance - 1.
Definition: load_balance.h:93
u8 dst_address[6]
Definition: packet.h:53
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:113
static int next_hop_sort_by_weight(const load_balance_path_t *n1, const load_balance_path_t *n2)
Definition: load_balance.c:294
static void load_balance_mem_show(void)
Definition: load_balance.c:767
void fib_urpf_list_lock(index_t ui)
Definition: fib_urpf_list.c:87
static load_balance_t * load_balance_create_i(u32 num_buckets, dpo_proto_t lb_proto, flow_hash_config_t fhc)
Definition: load_balance.c:168
void fib_show_memory_usage(const char *name, u32 in_use_elts, u32 allocd_elts, size_t size_elt)
Show the memory usage for a type.
Definition: fib_node.c:221
void load_balance_multipath_update(const dpo_id_t *dpo, const load_balance_path_t *raw_nhs, load_balance_flags_t flags)
Definition: load_balance.c:483
unsigned long u64
Definition: types.h:89
f64 load_balance_get_multipath_tolerance(void)
Definition: load_balance.c:58
enum dpo_proto_t_ dpo_proto_t
Data path protocol.
static void load_balance_lock(dpo_id_t *dpo)
Definition: load_balance.c:717
int load_balance_is_drop(const dpo_id_t *dpo)
Definition: load_balance.c:226
static void load_balance_unlock(dpo_id_t *dpo)
Definition: load_balance.c:752
The identity of a DPO is a combination of its type and its instance number/index of objects of that t...
Definition: dpo.h:150
static load_balance_path_t * load_balance_multipath_next_hop_fixup(const load_balance_path_t *nhs, dpo_proto_t drop_proto)
Definition: load_balance.c:424
static void vlib_zero_combined_counter(vlib_combined_counter_main_t *cm, u32 index)
Clear a combined counter Clears the set of per-thread counters.
Definition: counter.h:276
counter_t packets
packet counter
Definition: counter.h:141
u8 * format_load_balance(u8 *s, va_list *args)
Definition: load_balance.c:150
dpo_type_t dpoi_type
the type
Definition: dpo.h:154
static const dpo_id_t * load_balance_get_bucket_i(const load_balance_t *lb, u32 bucket)
Definition: load_balance.h:202
dpo_proto_t lb_proto
The protocol of packets that traverse this LB.
Definition: load_balance.h:100
struct _unformat_input_t unformat_input_t
void load_balance_set_fib_entry_flags(index_t lbi, fib_entry_flag_t flags)
Definition: load_balance.c:243
load-balancing over a choice of [un]equal cost paths
Definition: dpo.h:100
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:193
static u32 ip6_compute_flow_hash(const ip6_header_t *ip, flow_hash_config_t flow_hash_config)
Definition: ip6.h:404
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:270
The FIB DPO provieds;.
Definition: load_balance.h:84
#define PREDICT_FALSE(x)
Definition: clib.h:97
load_balance_main_t load_balance_main
The one instance of load-balance main.
Definition: load_balance.c:55
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:216
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:364
const dpo_id_t * load_balance_get_bucket(index_t lbi, u32 bucket)
Definition: load_balance.c:283
The load-balance object represents an ECMP choice.
Definition: load_balance.h:44
vlib_node_registration_t l2_load_balance_node
(constructor) VLIB_REGISTER_NODE (l2_load_balance_node)
dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS]
The rest of the cache line is used for buckets.
Definition: load_balance.h:141
#define pool_get_aligned(P, E, A)
Allocate an object E from a pool P (general version).
Definition: pool.h:188
enum load_balance_flags_t_ load_balance_flags_t
Flags controlling load-balance creation and modification.
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
#define hash_mix64(a0, b0, c0)
Definition: hash.h:507
svmdb_client_t * c
u16 n_vectors
Definition: node.h:344
static void vlib_get_combined_counter(const vlib_combined_counter_main_t *cm, u32 index, vlib_counter_t *result)
Get the value of a combined counter, never called in the speed path Scrapes the entire set of per-thr...
Definition: counter.h:250
vlib_main_t * vm
Definition: buffer.c:283
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:336
static const char *const load_balance_nsh_nodes[]
Definition: load_balance.c:814
void load_balance_map_show_mem(void)
#define VLIB_BUFFER_IS_TRACED
Definition: buffer.h:93
static f64 fabs(f64 x)
Definition: math.h:50
#define clib_memcpy(a, b, c)
Definition: string.h:69
static uword max_pow2(uword x)
Definition: clib.h:263
static u8 * format_nsh_load_balance_trace(u8 *s, va_list *args)
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:454
#define LB_NUM_INLINE_BUCKETS
The number of buckets that a load-balance object can have and still fit in one cache-line.
Definition: load_balance.h:56
vlib_combined_counter_main_t lbm_via_counters
Definition: load_balance.h:47
static void load_balance_fill_buckets(load_balance_t *lb, load_balance_path_t *nhs, dpo_id_t *buckets, u32 n_buckets)
Definition: load_balance.c:450
enum fib_entry_flag_t_ fib_entry_flag_t
static uword nsh_load_balance(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
#define VLIB_CLI_COMMAND(x,...)
Definition: cli.h:154
vlib_node_registration_t nsh_load_balance_node
(constructor) VLIB_REGISTER_NODE (nsh_load_balance_node)
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
Definition: node.h:456
#define ASSERT(truth)
index_t lb_urpf
This is the index of the uRPF list for this LB.
Definition: load_balance.h:123
unsigned int u32
Definition: types.h:88
static load_balance_t * load_balance_get(index_t lbi)
Definition: load_balance.h:193
static clib_error_t * load_balance_show(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: load_balance.c:848
u32 lb_locks
The number of locks, which is approximately the number of users, of this load-balance.
Definition: load_balance.h:113
static u64 mac_to_u64(u8 *m)
Definition: load_balance.c:901
#define IP_FLOW_HASH_DEFAULT
Default: 5-tuple without the "reverse" bit.
Definition: lookup.h:69
#define LB_HAS_INLINE_BUCKETS(_lb)
Definition: load_balance.h:198
void load_balance_set_bucket(index_t lbi, u32 bucket, const dpo_id_t *next)
Definition: load_balance.c:210
u8 * format_dpo_id(u8 *s, va_list *args)
Format a DPO_id_t oject
Definition: dpo.c:143
u32 flow_hash_config_t
A flow hash configuration is a mask of the flow hash options.
Definition: lookup.h:82
u64 uword
Definition: types.h:112
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
counter_t bytes
byte counter
Definition: counter.h:142
static uword l2_load_balance(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: load_balance.c:939
Definition: defs.h:47
unsigned short u16
Definition: types.h:57
#define DPO_PROTO_NUM
Definition: dpo.h:69
i64 word
Definition: types.h:111
static word flt_round_nearest(f64 x)
Definition: clib.h:314
void qsort(void *base, uword n, uword size, int(*compar)(const void *, const void *))
Definition: qsort.c:56
index_t dpoi_index
the index of objects of that type
Definition: dpo.h:166
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
u32 path_weight
weight for the path.
Definition: load_balance.h:76
#define INDEX_INVALID
Invalid index - used when no index is known blazoned capitals INVALID speak volumes where ~0 does not...
Definition: dpo.h:47
static void load_balance_destroy(load_balance_t *lb)
Definition: load_balance.c:727
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:267
void fib_urpf_list_unlock(index_t ui)
Definition: fib_urpf_list.c:68
One path from an [EU]CMP set that the client wants to add to a load-balance object.
Definition: load_balance.h:62
u8 * format_dpo_proto(u8 *s, va_list *args)
format a DPO protocol
Definition: dpo.c:171
static u32 l2_flow_hash(vlib_buffer_t *b0)
Definition: load_balance.c:907
#define vnet_buffer(b)
Definition: buffer.h:306
index_t lb_map
index of the load-balance map, INVALID if this LB does not use one
Definition: load_balance.h:118
const f64 multipath_next_hop_error_tolerance
Definition: load_balance.c:28
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
int dpo_is_drop(const dpo_id_t *dpo)
The Drop DPO will drop all packets, no questions asked.
Definition: drop_dpo.c:33
u32 ip_multipath_normalize_next_hops(const load_balance_path_t *raw_next_hops, load_balance_path_t **normalized_next_hops, u32 *sum_weight_in, f64 multipath_next_hop_error_tolerance)
Definition: load_balance.c:304
void dpo_reset(dpo_id_t *dpo)
reset a DPO ID The DPO will be unlocked.
Definition: dpo.c:225
#define vec_foreach(var, vec)
Vector iterator.
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:101
u16 dpoi_next_node
The next VLIB node to follow.
Definition: dpo.h:162
#define LB_DBG(_p, _fmt, _args...)
Definition: load_balance.c:43
u8 ip_version_and_header_length
Definition: ip4_packet.h:132
u32 flags
Definition: vhost-user.h:77
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
u8 * format_load_balance_map(u8 *s, va_list ap)
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:75
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:680
struct load_balance_trace_t_ load_balance_trace_t
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:57
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:972
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
void dpo_stack(dpo_type_t child_type, dpo_proto_t child_proto, dpo_id_t *dpo, const dpo_id_t *parent)
Stack one DPO object on another, and thus establish a child-parent relationship.
Definition: dpo.c:456
static void load_balance_set_bucket_i(load_balance_t *lb, u32 bucket, dpo_id_t *buckets, const dpo_id_t *next)
Definition: load_balance.c:201
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:128