FD.io VPP  v18.04-17-g3a0d853
Vector Packet Processing
lb.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <lb/lb.h>
17 #include <vnet/plugin/plugin.h>
18 #include <vpp/app/version.h>
19 #include <vnet/api_errno.h>
20 
21 //GC runs at most once every so many seconds
22 #define LB_GARBAGE_RUN 60
23 
24 //After so many seconds. It is assumed that inter-core race condition will not occur.
25 #define LB_CONCURRENCY_TIMEOUT 10
26 
28 
29 #define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1))
30 #define lb_put_writer_lock() lb_main.writer_lock[0] = 0
31 
32 static void lb_as_stack (lb_as_t *as);
33 
34 
35 const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
36 const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
37 const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
38  {
41  };
42 
43 const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
44 const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
45 const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
46  {
49  };
50 
51 const static char * const lb_dpo_l3dsr_ip4[] = { "lb4-l3dsr" , NULL };
52 const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
53  {
55  };
56 
58 {
59  return (u32) (vlib_time_now(vm) + 10000);
60 }
61 
62 u8 *format_lb_main (u8 * s, va_list * args)
63 {
65  lb_main_t *lbm = &lb_main;
66  s = format(s, "lb_main");
67  s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
68  s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
69  s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
70  s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
71 
72  u32 thread_index;
73  for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
74  lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
75  if (h) {
76  s = format(s, "core %d\n", thread_index);
77  s = format(s, " timeout: %ds\n", h->timeout);
78  s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h));
79  }
80  }
81 
82  return s;
83 }
84 
85 static char *lb_vip_type_strings[] = {
86  [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
87  [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
88  [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
89  [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
90  [LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr",
91 };
92 
93 u8 *format_lb_vip_type (u8 * s, va_list * args)
94 {
95  lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
96  u32 i;
97  for (i=0; i<LB_VIP_N_TYPES; i++)
98  if (vipt == i)
99  return format(s, lb_vip_type_strings[i]);
100  return format(s, "_WRONG_TYPE_");
101 }
102 
103 uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
104 {
105  lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
106  u32 i;
107  for (i=0; i<LB_VIP_N_TYPES; i++)
108  if (unformat(input, lb_vip_type_strings[i])) {
109  *vipt = i;
110  return 1;
111  }
112  return 0;
113 }
114 
115 u8 *format_lb_vip (u8 * s, va_list * args)
116 {
117  lb_vip_t *vip = va_arg (*args, lb_vip_t *);
118  return format(s, "%U %U new_size:%u #as:%u%s",
119  format_lb_vip_type, vip->type,
121  vip->new_flow_table_mask + 1,
122  pool_elts(vip->as_indexes),
123  (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
124 }
125 
126 u8 *format_lb_as (u8 * s, va_list * args)
127 {
128  lb_as_t *as = va_arg (*args, lb_as_t *);
129  return format(s, "%U %s", format_ip46_address,
130  &as->address, IP46_TYPE_ANY,
131  (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
132 }
133 
134 u8 *format_lb_vip_detailed (u8 * s, va_list * args)
135 {
136  lb_main_t *lbm = &lb_main;
137  lb_vip_t *vip = va_arg (*args, lb_vip_t *);
138  u32 indent = format_get_indent (s);
139 
140  s = format(s, "%U %U [%lu] %U%s\n"
141  "%U new_size:%u\n",
142  format_white_space, indent,
143  format_lb_vip_type, vip->type,
144  vip - lbm->vips,
146  (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
147  format_white_space, indent,
148  vip->new_flow_table_mask + 1);
149 
150  if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
151  {
152  s = format(s, "%U dscp:%u\n",
153  format_white_space, indent,
154  vip->dscp);
155  }
156 
157  //Print counters
158  s = format(s, "%U counters:\n",
159  format_white_space, indent);
160  u32 i;
161  for (i=0; i<LB_N_VIP_COUNTERS; i++)
162  s = format(s, "%U %s: %d\n",
163  format_white_space, indent,
164  lbm->vip_counters[i].name,
165  vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
166 
167 
168  s = format(s, "%U #as:%u\n",
169  format_white_space, indent,
170  pool_elts(vip->as_indexes));
171 
172  //Let's count the buckets for each AS
173  u32 *count = 0;
174  vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
175  lb_new_flow_entry_t *nfe;
176  vec_foreach(nfe, vip->new_flow_table)
177  count[nfe->as_index]++;
178 
179  lb_as_t *as;
180  u32 *as_index;
181  pool_foreach(as_index, vip->as_indexes, {
182  as = &lbm->ass[*as_index];
183  s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n",
184  format_white_space, indent,
185  format_ip46_address, &as->address, IP46_TYPE_ANY,
186  count[as - lbm->ass],
187  vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
188  as->dpo.dpoi_index,
189  (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
190  });
191 
192  vec_free(count);
193 
194  /*
195  s = format(s, "%U new flows table:\n", format_white_space, indent);
196  lb_new_flow_entry_t *nfe;
197  vec_foreach(nfe, vip->new_flow_table) {
198  s = format(s, "%U %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index);
199  }
200  */
201  return s;
202 }
203 
204 typedef struct {
209 
210 static int lb_pseudorand_compare(void *a, void *b)
211 {
212  lb_as_t *asa, *asb;
213  lb_main_t *lbm = &lb_main;
214  asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
215  asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
216  return memcmp(&asa->address, &asb->address, sizeof(asb->address));
217 }
218 
220 {
221  lb_main_t *lbm = &lb_main;
222  ASSERT (lbm->writer_lock[0]);
223 
224  u32 now = (u32) vlib_time_now(vlib_get_main());
226  return;
227 
228  vip->last_garbage_collection = now;
229  lb_as_t *as;
230  u32 *as_index;
231  pool_foreach(as_index, vip->as_indexes, {
232  as = &lbm->ass[*as_index];
233  if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
234  clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
235  (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
236  { //Not referenced
237  fib_entry_child_remove(as->next_hop_fib_entry_index,
238  as->next_hop_child_index);
239  fib_table_entry_delete_index(as->next_hop_fib_entry_index,
240  FIB_SOURCE_RR);
241  as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
242 
243  pool_put(vip->as_indexes, as_index);
244  pool_put(lbm->ass, as);
245  }
246  });
247 }
248 
250 {
251  lb_main_t *lbm = &lb_main;
253  lb_vip_t *vip;
254  u32 *to_be_removed_vips = 0, *i;
255  pool_foreach(vip, lbm->vips, {
256  lb_vip_garbage_collection(vip);
257 
258  if (!(vip->flags & LB_VIP_FLAGS_USED) &&
259  (pool_elts(vip->as_indexes) == 0)) {
260  vec_add1(to_be_removed_vips, vip - lbm->vips);
261  }
262  });
263 
264  vec_foreach(i, to_be_removed_vips) {
265  vip = &lbm->vips[*i];
266  pool_put(lbm->vips, vip);
267  pool_free(vip->as_indexes);
268  }
269 
270  vec_free(to_be_removed_vips);
272 }
273 
275 {
276  lb_main_t *lbm = &lb_main;
277  lb_new_flow_entry_t *old_table;
278  u32 i, *as_index;
279  lb_new_flow_entry_t *new_flow_table = 0;
280  lb_as_t *as;
281  lb_pseudorand_t *pr, *sort_arr = 0;
282  u32 count;
283 
284  ASSERT (lbm->writer_lock[0]); //We must have the lock
285 
286  //Check if some AS is configured or not
287  i = 0;
288  pool_foreach(as_index, vip->as_indexes, {
289  as = &lbm->ass[*as_index];
290  if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
291  i = 1;
292  goto out; //Not sure 'break' works in this macro-loop
293  }
294  });
295 
296 out:
297  if (i == 0) {
298  //Only the default. i.e. no AS
299  vec_validate(new_flow_table, vip->new_flow_table_mask);
300  for (i=0; i<vec_len(new_flow_table); i++)
301  new_flow_table[i].as_index = 0;
302 
303  goto finished;
304  }
305 
306  //First, let's sort the ASs
307  sort_arr = 0;
308  vec_alloc(sort_arr, pool_elts(vip->as_indexes));
309 
310  i = 0;
311  pool_foreach(as_index, vip->as_indexes, {
312  as = &lbm->ass[*as_index];
313  if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
314  continue;
315 
316  sort_arr[i].as_index = as - lbm->ass;
317  i++;
318  });
319  _vec_len(sort_arr) = i;
320 
322 
323  //Now let's pseudo-randomly generate permutations
324  vec_foreach(pr, sort_arr) {
325  lb_as_t *as = &lbm->ass[pr->as_index];
326 
327  u64 seed = clib_xxhash(as->address.as_u64[0] ^
328  as->address.as_u64[1]);
329  /* We have 2^n buckets.
330  * skip must be prime with 2^n.
331  * So skip must be odd.
332  * MagLev actually state that M should be prime,
333  * but this has a big computation cost (% operation).
334  * Using 2^n is more better (& operation).
335  */
336  pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
337  pr->last = (seed >> 32) & vip->new_flow_table_mask;
338  }
339 
340  //Let's create a new flow table
341  vec_validate(new_flow_table, vip->new_flow_table_mask);
342  for (i=0; i<vec_len(new_flow_table); i++)
343  new_flow_table[i].as_index = ~0;
344 
345  u32 done = 0;
346  while (1) {
347  vec_foreach(pr, sort_arr) {
348  while (1) {
349  u32 last = pr->last;
350  pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
351  if (new_flow_table[last].as_index == ~0) {
352  new_flow_table[last].as_index = pr->as_index;
353  break;
354  }
355  }
356  done++;
357  if (done == vec_len(new_flow_table))
358  goto finished;
359  }
360  }
361 
362  vec_free(sort_arr);
363 
364 finished:
365 
366 //Count number of changed entries
367  count = 0;
368  for (i=0; i<vec_len(new_flow_table); i++)
369  if (vip->new_flow_table == 0 ||
370  new_flow_table[i].as_index != vip->new_flow_table[i].as_index)
371  count++;
372 
373  old_table = vip->new_flow_table;
374  vip->new_flow_table = new_flow_table;
375  vec_free(old_table);
376 }
377 
378 int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
379  u32 per_cpu_sticky_buckets, u32 flow_timeout)
380 {
381  lb_main_t *lbm = &lb_main;
382 
383  if (!is_pow2(per_cpu_sticky_buckets))
384  return VNET_API_ERROR_INVALID_MEMORY_SIZE;
385 
386  lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
387  lbm->ip4_src_address = *ip4_address;
388  lbm->ip6_src_address = *ip6_address;
389  lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
390  lbm->flow_timeout = flow_timeout;
392  return 0;
393 }
394 
395 static
396 int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
397 {
398  lb_main_t *lbm = &lb_main;
399  lb_vip_t *vip;
400  ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
401  ip46_prefix_normalize(prefix, plen);
402  pool_foreach(vip, lbm->vips, {
403  if ((vip->flags & LB_AS_FLAGS_USED) &&
404  vip->plen == plen &&
405  vip->prefix.as_u64[0] == prefix->as_u64[0] &&
406  vip->prefix.as_u64[1] == prefix->as_u64[1]) {
407  *vip_index = vip - lbm->vips;
408  return 0;
409  }
410  });
411  return VNET_API_ERROR_NO_SUCH_ENTRY;
412 }
413 
414 int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
415 {
416  int ret;
418  ret = lb_vip_find_index_with_lock(prefix, plen, vip_index);
420  return ret;
421 }
422 
423 static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
424 {
425  lb_main_t *lbm = &lb_main;
426  ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
427  lb_as_t *as;
428  u32 *asi;
429  pool_foreach(asi, vip->as_indexes, {
430  as = &lbm->ass[*asi];
431  if (as->vip_index == (vip - lbm->vips) &&
432  as->address.as_u64[0] == address->as_u64[0] &&
433  as->address.as_u64[1] == address->as_u64[1]) {
434  *as_index = as - lbm->ass;
435  return 0;
436  }
437  });
438  return -1;
439 }
440 
441 int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
442 {
443  lb_main_t *lbm = &lb_main;
445  lb_vip_t *vip;
446  if (!(vip = lb_vip_get_by_index(vip_index))) {
448  return VNET_API_ERROR_NO_SUCH_ENTRY;
449  }
450 
452  u32 *to_be_added = 0;
453  u32 *to_be_updated = 0;
454  u32 i;
455  u32 *ip;
456 
457  //Sanity check
458  while (n--) {
459 
460  if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
461  if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
462  vec_free(to_be_added);
463  vec_free(to_be_updated);
465  return VNET_API_ERROR_VALUE_EXIST;
466  }
467  vec_add1(to_be_updated, i);
468  goto next;
469  }
470 
471  if (ip46_address_type(&addresses[n]) != type) {
472  vec_free(to_be_added);
473  vec_free(to_be_updated);
475  return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
476  }
477 
478  if (n) {
479  u32 n2 = n;
480  while(n2--) //Check for duplicates
481  if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
482  addresses[n2].as_u64[1] == addresses[n].as_u64[1])
483  goto next;
484  }
485 
486  vec_add1(to_be_added, n);
487 
488 next:
489  continue;
490  }
491 
492  //Update reused ASs
493  vec_foreach(ip, to_be_updated) {
494  lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
495  }
496  vec_free(to_be_updated);
497 
498  //Create those who have to be created
499  vec_foreach(ip, to_be_added) {
500  lb_as_t *as;
501  u32 *as_index;
502  pool_get(lbm->ass, as);
503  as->address = addresses[*ip];
504  as->flags = LB_AS_FLAGS_USED;
505  as->vip_index = vip_index;
506  pool_get(vip->as_indexes, as_index);
507  *as_index = as - lbm->ass;
508 
509  /*
510  * become a child of the FIB entry
511  * so we are informed when its forwarding changes
512  */
513  fib_prefix_t nh = {};
514  if (lb_encap_is_ip4(vip)) {
515  nh.fp_addr.ip4 = as->address.ip4;
516  nh.fp_len = 32;
518  } else {
519  nh.fp_addr.ip6 = as->address.ip6;
520  nh.fp_len = 128;
522  }
523 
526  &nh,
531  lbm->fib_node_type,
532  as - lbm->ass);
533 
534  lb_as_stack(as);
535  }
536  vec_free(to_be_added);
537 
538  //Recompute flows
540 
541  //Garbage collection maybe
543 
545  return 0;
546 }
547 
548 int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
549 {
550  lb_main_t *lbm = &lb_main;
551  u32 now = (u32) vlib_time_now(vlib_get_main());
552  u32 *ip = 0;
553 
554  lb_vip_t *vip;
555  if (!(vip = lb_vip_get_by_index(vip_index))) {
556  return VNET_API_ERROR_NO_SUCH_ENTRY;
557  }
558 
559  u32 *indexes = NULL;
560  while (n--) {
561  u32 i;
562  if (lb_as_find_index_vip(vip, &addresses[n], &i)) {
563  vec_free(indexes);
564  return VNET_API_ERROR_NO_SUCH_ENTRY;
565  }
566 
567  if (n) { //Check for duplicates
568  u32 n2 = n - 1;
569  while(n2--) {
570  if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
571  addresses[n2].as_u64[1] == addresses[n].as_u64[1])
572  goto next;
573  }
574  }
575 
576  vec_add1(indexes, i);
577 next:
578  continue;
579  }
580 
581  //Garbage collection maybe
583 
584  if (indexes != NULL) {
585  vec_foreach(ip, indexes) {
586  lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
587  lbm->ass[*ip].last_used = now;
588  }
589 
590  //Recompute flows
592  }
593 
594  vec_free(indexes);
595  return 0;
596 }
597 
598 int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
599 {
601  int ret = lb_vip_del_ass_withlock(vip_index, addresses, n);
603  return ret;
604 }
605 
606 /**
607  * Add the VIP adjacency to the ip4 or ip6 fib
608  */
609 static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
610 {
611  dpo_proto_t proto = 0;
612  dpo_type_t dpo_type = 0;
613 
614  dpo_id_t dpo = DPO_INVALID;
615  fib_prefix_t pfx = {};
616  if (lb_vip_is_ip4(vip)) {
617  pfx.fp_addr.ip4 = vip->prefix.ip4;
618  pfx.fp_len = vip->plen - 96;
620  proto = DPO_PROTO_IP4;
621  } else {
622  pfx.fp_addr.ip6 = vip->prefix.ip6;
623  pfx.fp_len = vip->plen;
625  proto = DPO_PROTO_IP6;
626  }
627 
628  if (lb_vip_is_gre4(vip))
629  dpo_type = lbm->dpo_gre4_type;
630  else if (lb_vip_is_gre6(vip))
631  dpo_type = lbm->dpo_gre6_type;
632  else if (lb_vip_is_l3dsr(vip))
633  dpo_type = lbm->dpo_l3dsr_type;
634 
635  dpo_set(&dpo, dpo_type, proto, vip - lbm->vips);
637  &pfx,
640  &dpo);
641  dpo_reset(&dpo);
642 }
643 
644 /**
645  * Deletes the adjacency associated with the VIP
646  */
647 static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
648 {
649  fib_prefix_t pfx = {};
650  if (lb_vip_is_ip4(vip)) {
651  pfx.fp_addr.ip4 = vip->prefix.ip4;
652  pfx.fp_len = vip->plen - 96;
654  } else {
655  pfx.fp_addr.ip6 = vip->prefix.ip6;
656  pfx.fp_len = vip->plen;
658  }
660 }
661 
662 int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u8 dscp,
663  u32 new_length, u32 *vip_index)
664 {
665  lb_main_t *lbm = &lb_main;
666  lb_vip_t *vip;
667 
669  ip46_prefix_normalize(prefix, plen);
670 
671  if (!lb_vip_find_index_with_lock(prefix, plen, vip_index)) {
673  return VNET_API_ERROR_VALUE_EXIST;
674  }
675 
676  if (!is_pow2(new_length)) {
678  return VNET_API_ERROR_INVALID_MEMORY_SIZE;
679  }
680 
681  if (ip46_prefix_is_ip4(prefix, plen) &&
682  (type != LB_VIP_TYPE_IP4_GRE4) &&
683  (type != LB_VIP_TYPE_IP4_GRE6) &&
684  (type != LB_VIP_TYPE_IP4_L3DSR))
685  return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
686 
687  if ((!ip46_prefix_is_ip4(prefix, plen)) &&
688  (type != LB_VIP_TYPE_IP6_GRE4) &&
689  (type != LB_VIP_TYPE_IP6_GRE6))
690  return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
691 
692  if ((type == LB_VIP_TYPE_IP4_L3DSR) && (dscp >= 64 ) )
693  {
694  return VNET_API_ERROR_VALUE_EXIST;
695  }
696 
697  //Allocate
698  pool_get(lbm->vips, vip);
699 
700  //Init
701  vip->prefix = *prefix;
702  vip->plen = plen;
704  vip->type = type;
705  vip->dscp = dscp;
706  vip->flags = LB_VIP_FLAGS_USED;
707  vip->as_indexes = 0;
708 
709  //Validate counters
710  u32 i;
711  for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
712  vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
713  vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
714  }
715 
716  //Configure new flow table
717  vip->new_flow_table_mask = new_length - 1;
718  vip->new_flow_table = 0;
719 
720  //Create a new flow hash table full of the default entry
722 
723  //Create adjacency to direct traffic
724  lb_vip_add_adjacency(lbm, vip);
725 
726  //Return result
727  *vip_index = vip - lbm->vips;
728 
730  return 0;
731 }
732 
733 int lb_vip_del(u32 vip_index)
734 {
735  lb_main_t *lbm = &lb_main;
736  lb_vip_t *vip;
738  if (!(vip = lb_vip_get_by_index(vip_index))) {
740  return VNET_API_ERROR_NO_SUCH_ENTRY;
741  }
742 
743  //FIXME: This operation is actually not working
744  //We will need to remove state before performing this.
745 
746  {
747  //Remove all ASs
748  ip46_address_t *ass = 0;
749  lb_as_t *as;
750  u32 *as_index;
751  pool_foreach(as_index, vip->as_indexes, {
752  as = &lbm->ass[*as_index];
753  vec_add1(ass, as->address);
754  });
755  if (vec_len(ass))
756  lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass));
757  vec_free(ass);
758  }
759 
760  //Delete adjacency
761  lb_vip_del_adjacency(lbm, vip);
762 
763  //Set the VIP as unused
764  vip->flags &= ~LB_VIP_FLAGS_USED;
765 
767  return 0;
768 }
769 
770 /* *INDENT-OFF* */
772  .version = VPP_BUILD_VER,
773  .description = "Load Balancer",
774 };
775 /* *INDENT-ON* */
776 
777 u8 *format_lb_dpo (u8 * s, va_list * va)
778 {
779  index_t index = va_arg (*va, index_t);
780  CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
781  lb_main_t *lbm = &lb_main;
782  lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
783  return format (s, "%U", format_lb_vip, vip);
784 }
785 
786 static void lb_dpo_lock (dpo_id_t *dpo) {}
787 static void lb_dpo_unlock (dpo_id_t *dpo) {}
788 
789 static fib_node_t *
791 {
792  lb_main_t *lbm = &lb_main;
793  lb_as_t *as = pool_elt_at_index (lbm->ass, index);
794  return (&as->fib_node);
795 }
796 
797 static void
799 {
800 }
801 
802 static lb_as_t *
804 {
805  return ((lb_as_t*)(((char*)node) -
806  STRUCT_OFFSET_OF(lb_as_t, fib_node)));
807 }
808 
809 static void
811 {
812  lb_main_t *lbm = &lb_main;
813  lb_vip_t *vip = &lbm->vips[as->vip_index];
814  dpo_type_t dpo_type = 0;
815 
816  if (lb_vip_is_gre4(vip))
817  dpo_type = lbm->dpo_gre4_type;
818  else if (lb_vip_is_gre6(vip))
819  dpo_type = lbm->dpo_gre6_type;
820  else if (lb_vip_is_l3dsr(vip))
821  dpo_type = lbm->dpo_l3dsr_type;
822 
823  dpo_stack(dpo_type,
825  &as->dpo,
828 }
829 
833 {
836 }
837 
838 clib_error_t *
840 {
842  lb_main_t *lbm = &lb_main;
843  lb_as_t *default_as;
844  fib_node_vft_t lb_fib_node_vft = {
846  .fnv_last_lock = lb_fib_node_last_lock_gone,
847  .fnv_back_walk = lb_fib_node_back_walk_notify,
848  };
849  dpo_vft_t lb_vft = {
850  .dv_lock = lb_dpo_lock,
851  .dv_unlock = lb_dpo_unlock,
852  .dv_format = format_lb_dpo,
853  };
854 
855  lbm->vips = 0;
856  lbm->per_cpu = 0;
857  vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
859  lbm->writer_lock[0] = 0;
862  lbm->ip4_src_address.as_u32 = 0xffffffff;
863  lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
864  lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
868  lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
869 
870  //Init AS reference counters
872 
873  //Allocate and init default AS.
874  lbm->ass = 0;
875  pool_get(lbm->ass, default_as);
876  default_as->flags = 0;
877  default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
878  default_as->vip_index = ~0;
879  default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
880  default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
881 
882 #define _(a,b,c) lbm->vip_counters[c].name = b;
884 #undef _
885  return NULL;
886 }
887 
u32 skip
Definition: lb.c:207
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:434
fib_protocol_t fp_proto
protocol type
Definition: fib_types.h:197
dpo_lock_fn_t dv_lock
A reference counting lock function.
Definition: dpo.h:404
u32 lb_hash_time_now(vlib_main_t *vm)
Definition: lb.c:57
u8 dscp
DSCP bits for L3DSR.
Definition: lb.h:211
Recursive resolution source.
Definition: fib_entry.h:121
static int lb_pseudorand_compare(void *a, void *b)
Definition: lb.c:210
Each VIP is configured with a set of application server.
Definition: lb.h:56
u8 * format_ip46_prefix(u8 *s, va_list *args)
Definition: kp.c:71
#define LB_GARBAGE_RUN
Definition: lb.c:22
#define CLIB_UNUSED(x)
Definition: clib.h:79
A virtual function table regisitered for a DPO type.
Definition: dpo.h:399
ip46_type_t
Definition: format.h:63
#define lb_vip_is_gre6(vip)
Definition: lb.h:236
#define lb_vip_is_l3dsr(vip)
Definition: lb.h:238
a
Definition: bitmap.h:516
u32 last
Definition: lb.c:206
u32 per_cpu_sticky_buckets
Number of buckets in the per-cpu sticky hash table.
Definition: lb.h:299
clib_error_t * lb_init(vlib_main_t *vm)
Definition: lb.c:839
u64 as_u64
Definition: bihash_doc.h:63
u32 fib_entry_child_add(fib_node_index_t fib_entry_index, fib_node_type_t child_type, fib_node_index_t child_index)
Definition: fib_entry.c:527
static void lb_fib_node_last_lock_gone(fib_node_t *node)
Definition: lb.c:798
u64 as_u64[2]
Definition: ip6_packet.h:51
static void lb_vip_update_new_flow_table(lb_vip_t *vip)
Definition: lb.c:274
static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
Definition: lb.c:423
#define NULL
Definition: clib.h:55
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:227
enum fib_node_back_walk_rc_t_ fib_node_back_walk_rc_t
Return code from a back walk function.
const dpo_id_t * fib_entry_contribute_ip_forwarding(fib_node_index_t fib_entry_index)
Definition: fib_entry.c:478
static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
Deletes the adjacency associated with the VIP.
Definition: lb.c:647
int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, u32 per_cpu_sticky_buckets, u32 flow_timeout)
Fix global load-balancer parameters.
Definition: lb.c:378
#define lb_get_writer_lock()
Definition: lb.c:29
int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
Definition: lb.c:441
u32 index_t
A Data-Path Object is an object that represents actions that are applied to packets are they are swit...
Definition: dpo.h:41
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:520
ip46_address_t prefix
A Virtual IP represents a given service delivered by a set of application servers.
Definition: lb.h:194
static u64 clib_xxhash(u64 key)
Definition: xxhash.h:58
static heap_elt_t * last(heap_header_t *h)
Definition: heap.c:53
static_always_inline void vlib_refcount_init(vlib_refcount_t *r)
Definition: refcount.h:80
int i
static void lb_dpo_lock(dpo_id_t *dpo)
Definition: lb.c:786
format_function_t format_ip46_address
Definition: format.h:61
static u32 format_get_indent(u8 *s)
Definition: format.h:72
#define STRUCT_OFFSET_OF(t, f)
Definition: clib.h:62
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:419
static void lb_as_stack(lb_as_t *as)
Definition: lb.c:810
#define ip46_address_type(ip46)
Definition: kp.h:431
void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen)
Definition: kp.c:35
#define lb_vip_get_by_index(index)
Definition: lb.h:350
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:227
u32 vip_index
ASs are indexed by address and VIP Index.
Definition: lb.h:75
#define vec_alloc(V, N)
Allocate space for N more elements (no header, unspecified alignment)
Definition: vec.h:275
static const char *const *const lb_dpo_gre6_nodes[DPO_PROTO_NUM]
Definition: lb.c:45
lb_hash_t * sticky_ht
Each CPU has its own sticky flow hash table.
Definition: lb.h:252
#define pool_len(p)
Number of elements in pool vector.
Definition: pool.h:140
fib_node_type_t fib_node_register_new_type(const fib_node_vft_t *vft)
Create a new FIB node type and Register the function table for it.
Definition: fib_node.c:80
#define LB_VIP_FLAGS_USED
Definition: lb.h:221
ip46_address_t address
Destination address used to tunnel traffic towards that application server.
Definition: lb.h:68
int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
Definition: lb.c:548
u32 timeout
Definition: lbhash.h:60
static counter_t vlib_get_simple_counter(vlib_simple_counter_main_t *cm, u32 index)
Get the value of a simple counter Scrapes the entire set of per-thread counters.
Definition: counter.h:97
format_function_t format_ip4_address
Definition: format.h:79
#define LB_AS_FLAGS_USED
Definition: lb.h:83
enum dpo_type_t_ dpo_type_t
Common types of data-path objects New types can be dynamically added using dpo_register_new_type() ...
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:440
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:111
static lb_as_t * lb_as_from_fib_node(fib_node_t *node)
Definition: lb.c:803
void fib_table_entry_special_remove(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source)
Remove a &#39;special&#39; entry from the FIB.
Definition: fib_table.c:399
VLIB_PLUGIN_REGISTER()
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:113
#define LB_DEFAULT_PER_CPU_STICKY_BUCKETS
lb-plugin implements a MagLev-like load balancer.
Definition: lb.h:44
lb_main_t lb_main
Definition: lb.c:27
static const char *const lb_dpo_gre4_ip4[]
Definition: lb.c:35
u32 flow_timeout
Flow timeout in seconds.
Definition: lb.h:304
A high priority source a plugin can use.
Definition: fib_entry.h:62
Definition: lb.h:255
fib_node_type_t fib_node_type
Node type for registering to fib changes.
Definition: lb.h:321
dpo_type_t dpo_gre4_type
DPO used to send packet from IP4/6 lookup to LB node.
Definition: lb.h:314
Aggregrate type for a prefix.
Definition: fib_types.h:188
static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
Add the VIP adjacency to the ip4 or ip6 fib.
Definition: lb.c:609
vlib_refcount_t as_refcount
Each AS has an associated reference counter.
Definition: lb.h:274
unsigned long u64
Definition: types.h:89
static void lb_vip_garbage_collection(lb_vip_t *vip)
Definition: lb.c:219
u8 * format_lb_main(u8 *s, va_list *args)
Definition: lb.c:62
u8 * format_lb_vip(u8 *s, va_list *args)
Definition: lb.c:115
enum dpo_proto_t_ dpo_proto_t
Data path protocol.
u16 fp_len
The mask length.
Definition: fib_types.h:192
#define lb_vip_is_ip4(vip)
Definition: lb.h:230
lb_vip_t * vips
Pool of all Virtual IPs.
Definition: lb.h:259
dpo_type_t dpo_register_new_type(const dpo_vft_t *vft, const char *const *const *nodes)
Create and register a new DPO type.
Definition: dpo.c:341
u32 last_used
Rotating timestamp of when LB_AS_FLAGS_USED flag was last set.
Definition: lb.h:94
ip4_address_t ip4_src_address
Source address used for IPv4 encapsulated traffic.
Definition: lb.h:294
Definition: fib_entry.h:270
char * name
The counter collection&#39;s name.
Definition: counter.h:65
u8 plen
The VIP prefix length.
Definition: lb.h:200
The identity of a DPO is a combination of its type and its instance number/index of objects of that t...
Definition: dpo.h:168
Definition: fib_entry.h:274
#define lb_vip_is_gre4(vip)
Definition: lb.h:234
static const char *const lb_dpo_gre6_ip4[]
Definition: lb.c:43
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:461
#define lb_encap_is_ip4(vip)
Definition: lb.h:240
ip46_address_t fp_addr
The address type is not deriveable from the fp_addr member.
Definition: fib_types.h:211
int lb_vip_del(u32 vip_index)
Definition: lb.c:733
u8 * format_lb_vip_type(u8 *s, va_list *args)
Definition: lb.c:93
struct _unformat_input_t unformat_input_t
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:273
uword unformat_lb_vip_type(unformat_input_t *input, va_list *args)
Definition: lb.c:103
#define LB_DEFAULT_FLOW_TIMEOUT
Definition: lb.h:45
static const char *const *const lb_dpo_gre4_nodes[DPO_PROTO_NUM]
Definition: lb.c:37
An node in the FIB graph.
Definition: fib_node.h:286
Definition: lb.h:115
#define clib_u32_loop_gt(a, b)
32 bits integer comparison for running values.
Definition: kphash.h:47
fib_node_t fib_node
Registration to FIB event.
Definition: lb.h:60
static const char *const lb_dpo_gre6_ip6[]
Definition: lb.c:44
static const dpo_vft_t lb_vft
Definition: load_balance.c:787
#define pool_free(p)
Free a pool.
Definition: pool.h:354
fib_node_index_t fib_table_entry_special_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags)
Add a &#39;special&#39; entry to the FIB.
Definition: fib_table.c:380
format_function_t format_ip6_address
Definition: format.h:95
vlib_main_t * vm
Definition: buffer.c:294
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:336
volatile u32 * writer_lock
Definition: lb.h:328
#define lb_foreach_vip_counter
Definition: lb.h:119
fib_node_get_t fnv_get
Definition: fib_node.h:274
u32 fib_node_index_t
A typedef of a node index.
Definition: fib_types.h:30
u32 as_index
Definition: lb.h:116
static fib_node_back_walk_rc_t lb_fib_node_back_walk_notify(fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
Definition: lb.c:831
int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
Definition: lb.c:598
void dpo_set(dpo_id_t *dpo, dpo_type_t type, dpo_proto_t proto, index_t index)
Set/create a DPO ID The DPO will be locked.
Definition: dpo.c:185
int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u8 dscp, u32 new_length, u32 *vip_index)
Definition: lb.c:662
dpo_type_t dpo_gre6_type
Definition: lb.h:315
u32 last_garbage_collection
Last time garbage collection was run to free the ASs.
Definition: lb.h:183
lb_as_t * ass
Pool of ASs.
Definition: lb.h:267
lb_vip_type_t type
The type of traffic for this.
Definition: lb.h:206
Context passed between object during a back walk.
Definition: fib_node.h:199
fib_node_index_t fib_table_entry_special_dpo_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, const dpo_id_t *dpo)
Add a &#39;special&#39; entry to the FIB that links to the DPO passed A special entry is an entry that the FI...
Definition: fib_table.c:299
void vlib_validate_simple_counter(vlib_simple_counter_main_t *cm, u32 index)
validate a simple counter
Definition: counter.c:78
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
lb_vip_type_t
The load balancer supports IPv4 and IPv6 traffic and GRE4, GRE6 and L3DSR encap.
Definition: lb.h:143
long ctx[MAX_CONNS]
Definition: main.c:126
int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
Definition: lb.c:414
u8 * format_lb_as(u8 *s, va_list *args)
Definition: lb.c:126
u32 new_flow_table_mask
New flows table length - 1 (length MUST be a power of 2)
Definition: lb.h:178
static void vlib_zero_simple_counter(vlib_simple_counter_main_t *cm, u32 index)
Clear a simple counter Clears the set of per-thread u16 counters, and the u64 counter.
Definition: counter.h:123
size_t count
Definition: vapi.c:42
lb_per_cpu_t * per_cpu
Some global data is per-cpu.
Definition: lb.h:279
static void lb_dpo_unlock(dpo_id_t *dpo)
Definition: lb.c:787
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
static uword is_pow2(uword x)
Definition: clib.h:280
u32 as_index
Definition: lb.c:205
u64 uword
Definition: types.h:112
vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS]
Per VIP counter.
Definition: lb.h:309
static const char *const lb_dpo_gre4_ip6[]
Definition: lb.c:36
#define DPO_PROTO_NUM
Definition: dpo.h:70
ip6_address_t ip6_src_address
Source address used in IPv6 encapsulated traffic.
Definition: lb.h:289
u8 * format_lb_vip_detailed(u8 *s, va_list *args)
Definition: lb.c:134
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
unsigned char u8
Definition: types.h:56
static fib_node_t * lb_fib_node_get_node(fib_node_index_t index)
Definition: lb.c:790
#define vec_sort_with_function(vec, f)
Sort a vector using the supplied element comparison function.
Definition: vec.h:958
#define DPO_INVALID
An initialiser for DPOs declared on the stack.
Definition: dpo.h:195
void lb_garbage_collection()
Definition: lb.c:249
static int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
Definition: lb.c:396
u32 next_hop_child_index
The child index on the FIB entry.
Definition: lb.h:104
dpo_type_t dpo_l3dsr_type
Definition: lb.h:316
A FIB graph nodes virtual function table.
Definition: fib_node.h:273
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:120
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
void dpo_reset(dpo_id_t *dpo)
reset a DPO ID The DPO will be unlocked.
Definition: dpo.c:231
#define vec_foreach(var, vec)
Vector iterator.
dpo_id_t dpo
The next DPO in the graph to follow.
Definition: lb.h:109
static const char *const lb_dpo_l3dsr_ip4[]
Definition: lb.c:51
u8 flags
Some per-AS flags.
Definition: lb.h:81
u16 dpoi_next_node
The next VLIB node to follow.
Definition: dpo.h:180
#define ip46_prefix_is_ip4(ip46, len)
Definition: kp.h:432
lb_new_flow_entry_t * new_flow_table
Vector mapping (flow-hash & new_connect_table_mask) to AS index.
Definition: lb.h:172
static const char *const *const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM]
Definition: lb.c:52
u8 flags
Flags related to this VIP.
Definition: lb.h:220
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
static char * lb_vip_type_strings[]
Definition: lb.c:85
u8 * format_lb_dpo(u8 *s, va_list *va)
Definition: lb.c:777
Load balancing service is provided per VIP.
Definition: lb.h:164
u32 * as_indexes
Pool of AS indexes used for this VIP.
Definition: lb.h:227
#define lb_hash_size(h)
Definition: lbhash.h:65
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:972
void dpo_stack(dpo_type_t child_type, dpo_proto_t child_proto, dpo_id_t *dpo, const dpo_id_t *parent)
Stack one DPO object on another, and thus establish a child-parent relationship.
Definition: dpo.c:515
#define lb_put_writer_lock()
Definition: lb.c:30
fib_node_index_t next_hop_fib_entry_index
The FIB entry index for the next-hop.
Definition: lb.h:99
static_always_inline u32 lb_hash_elts(lb_hash_t *h, u32 time_now)
Definition: lbhash.h:185
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:128