FD.io VPP  v16.06
Vector Packet Processing
init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/bitmap.h>
20 
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/devices/dpdk/dpdk.h>
23 #include <vlib/unix/physmem.h>
24 #include <vlib/pci/pci.h>
25 #include <vlib/unix/pci.h>
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <sys/stat.h>
31 #include <sys/mount.h>
32 #include <string.h>
33 #include <fcntl.h>
34 
35 #include "dpdk_priv.h"
36 
38 
39 /* force linker to link functions used by vlib and declared weak */
41  &rte_pktmbuf_init,
42  &rte_pktmbuf_pool_init,
43 };
44 
45 #define LINK_STATE_ELOGS 0
46 
47 #define DEFAULT_HUGE_DIR "/run/vpp/hugepages"
48 #define VPP_RUN_DIR "/run/vpp"
49 
50 /* Port configuration, mildly modified Intel app values */
51 
52 static struct rte_eth_conf port_conf_template = {
53  .rxmode = {
54  .split_hdr_size = 0,
55  .header_split = 0, /**< Header Split disabled */
56  .hw_ip_checksum = 0, /**< IP checksum offload disabled */
57  .hw_vlan_filter = 0, /**< VLAN filtering disabled */
58  .hw_strip_crc = 1, /**< CRC stripped by hardware */
59  },
60  .txmode = {
61  .mq_mode = ETH_MQ_TX_NONE,
62  },
63 };
64 
67 {
68  vlib_main_t * vm = vlib_get_main();
70  int rv;
71  int j;
72 
73  ASSERT(os_get_cpu_number() == 0);
74 
75  if (xd->admin_up) {
77  rte_eth_dev_stop (xd->device_index);
78  }
79 
80  rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used,
81  xd->tx_q_used, &xd->port_conf);
82 
83  if (rv < 0)
84  return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d",
85  xd->device_index, rv);
86 
87  /* Set up one TX-queue per worker thread */
88  for (j = 0; j < xd->tx_q_used; j++)
89  {
90  rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc,
91  xd->cpu_socket, &xd->tx_conf);
92 
93  /* retry with any other CPU socket */
94  if (rv < 0)
95  rv = rte_eth_tx_queue_setup(xd->device_index, j, xd->nb_tx_desc,
96  SOCKET_ID_ANY, &xd->tx_conf);
97  if (rv < 0)
98  break;
99  }
100 
101  if (rv < 0)
102  return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d",
103  xd->device_index, rv);
104 
105  for (j = 0; j < xd->rx_q_used; j++)
106  {
107 
108  rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc,
109  xd->cpu_socket, 0,
110  bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]);
111 
112  /* retry with any other CPU socket */
113  if (rv < 0)
114  rv = rte_eth_rx_queue_setup(xd->device_index, j, xd->nb_rx_desc,
115  SOCKET_ID_ANY, 0,
116  bm->pktmbuf_pools[xd->cpu_socket_id_by_queue[j]]);
117  if (rv < 0)
118  return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d",
119  xd->device_index, rv);
120  }
121 
122  if (xd->admin_up) {
123  rte_eth_dev_start (xd->device_index);
124  }
125  return 0;
126 }
127 
130  u32 flags)
131 {
132  dpdk_main_t * dm = &dpdk_main;
134  u32 old = 0;
135 
137  {
138  old = xd->promisc;
140 
141  if (xd->admin_up)
142  {
143  if (xd->promisc)
144  rte_eth_promiscuous_enable(xd->device_index);
145  else
146  rte_eth_promiscuous_disable(xd->device_index);
147  }
148  }
149  else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags))
150  {
151  /*
152  * DAW-FIXME: The Cisco VIC firmware does not provide an api for a
153  * driver to dynamically change the mtu. If/when the
154  * VIC firmware gets fixed, then this should be removed.
155  */
156  if (xd->pmd == VNET_DPDK_PMD_VICE ||
157  xd->pmd == VNET_DPDK_PMD_ENIC)
158  {
159  struct rte_eth_dev_info dev_info;
160 
161  /*
162  * Restore mtu to what has been set by CIMC in the firmware cfg.
163  */
164  rte_eth_dev_info_get(xd->device_index, &dev_info);
165  hi->max_packet_bytes = dev_info.max_rx_pktlen;
166 
168  "Cisco VIC mtu can only be changed "
169  "using CIMC then rebooting the server!");
170  }
171  else
172  {
173  int rv;
174 
175  xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
176 
177  if (xd->admin_up)
178  rte_eth_dev_stop (xd->device_index);
179 
180  rv = rte_eth_dev_configure
181  (xd->device_index,
182  xd->rx_q_used,
183  xd->tx_q_used,
184  &xd->port_conf);
185 
186  if (rv < 0)
188  "rte_eth_dev_configure[%d]: err %d",
189  xd->device_index, rv);
190 
191  rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes);
192 
193  if (xd->admin_up)
194  rte_eth_dev_start (xd->device_index);
195  }
196  }
197  return old;
198 }
199 
200 #ifdef NETMAP
201 extern int rte_netmap_probe(void);
202 #endif
203 
204 void
206 {
207  int q;
208  vec_validate(xd->lockp, xd->tx_q_used - 1);
209  for (q = 0; q < xd->tx_q_used; q++)
210  {
213  memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
214  }
215  xd->need_txlock = 1;
216 }
217 
218 void
220 {
221  int q;
222 
223  for (q = 0; q < vec_len(xd->lockp); q++)
224  clib_mem_free((void *) xd->lockp[q]);
225  vec_free(xd->lockp);
226  xd->lockp = 0;
227  xd->need_txlock = 0;
228 }
229 
230 static clib_error_t *
232 {
233  u32 nports;
234  u32 nb_desc = 0;
235  int i;
236  clib_error_t * error;
237  vlib_main_t * vm = vlib_get_main();
239  vnet_sw_interface_t * sw;
241  dpdk_device_t * xd;
243  uword * p;
244 
245  u32 next_cpu = 0;
246  u8 af_packet_port_id = 0;
247 
248  dm->input_cpu_first_index = 0;
249  dm->input_cpu_count = 1;
250 
251  /* find out which cpus will be used for input */
253  tr = p ? (vlib_thread_registration_t *) p[0] : 0;
254 
255  if (!tr || tr->count == 0)
256  {
257  /* no io threads, workers doing input */
258  p = hash_get_mem (tm->thread_registrations_by_name, "workers");
259  tr = p ? (vlib_thread_registration_t *) p[0] : 0;
260  }
261  else
262  {
263  dm->have_io_threads = 1;
264  }
265 
266  if (tr && tr->count > 0)
267  {
269  dm->input_cpu_count = tr->count;
270  }
271 
274 
277 
278 #ifdef NETMAP
279  if(rte_netmap_probe() < 0)
280  return clib_error_return (0, "rte netmap probe failed");
281 #endif
282 
283  nports = rte_eth_dev_count();
284  if (nports < 1)
285  {
286  clib_warning ("DPDK drivers found no ports...");
287  }
288 
289  if (CLIB_DEBUG > 0)
290  clib_warning ("DPDK drivers found %d ports...", nports);
291 
292  /*
293  * All buffers are all allocated from the same rte_mempool.
294  * Thus they all have the same number of data bytes.
295  */
298  vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, "dpdk rx");
299 
300  for (i = 0; i < nports; i++)
301  {
302  u8 addr[6];
303  int j;
304  struct rte_eth_dev_info dev_info;
305  clib_error_t * rv;
306  struct rte_eth_link l;
307 
308  /* Create vnet interface */
312  xd->cpu_socket = (i8) rte_eth_dev_socket_id(i);
313  rte_eth_dev_info_get(i, &dev_info);
314 
315  clib_memcpy(&xd->tx_conf, &dev_info.default_txconf,
316  sizeof(struct rte_eth_txconf));
317  if (dm->no_multi_seg)
318  {
319  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
320  port_conf_template.rxmode.jumbo_frame = 0;
321  }
322  else
323  {
324  xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
325  port_conf_template.rxmode.jumbo_frame = 1;
326  }
327 
328  clib_memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf));
329 
330  xd->tx_q_used = clib_min(dev_info.max_tx_queues, tm->n_vlib_mains);
331 
332  if (dm->max_tx_queues)
333  xd->tx_q_used = clib_min(xd->tx_q_used, dm->max_tx_queues);
334 
335  if (dm->use_rss > 1 && dev_info.max_rx_queues >= dm->use_rss)
336  {
337  xd->rx_q_used = dm->use_rss;
338  xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
339  xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
340  }
341  else
342  xd->rx_q_used = 1;
343 
345 
346  /* workaround for drivers not setting driver_name */
347  if (!dev_info.driver_name)
348  dev_info.driver_name = dev_info.pci_dev->driver->name;
349  ASSERT(dev_info.driver_name);
350 
351  if (!xd->pmd) {
352 
353 
354 #define _(s,f) else if (!strcmp(dev_info.driver_name, s)) \
355  xd->pmd = VNET_DPDK_PMD_##f;
356  if (0)
357  ;
359 #undef _
360  else
362 
363 
364  switch (xd->pmd) {
365  /* 1G adapters */
366  case VNET_DPDK_PMD_E1000EM:
367  case VNET_DPDK_PMD_IGB:
368  case VNET_DPDK_PMD_IGBVF:
370  break;
371 
372  /* 10G adapters */
373  case VNET_DPDK_PMD_IXGBE:
374  case VNET_DPDK_PMD_IXGBEVF:
375  case VNET_DPDK_PMD_THUNDERX:
379  break;
380 
381  /* Cisco VIC */
382  case VNET_DPDK_PMD_VICE:
383  case VNET_DPDK_PMD_ENIC:
384  rte_eth_link_get_nowait(i, &l);
386  if (l.link_speed == 40000)
387  {
390  }
391  else
392  {
395  }
396  break;
397 
398  /* Intel Fortville */
399  case VNET_DPDK_PMD_I40E:
400  case VNET_DPDK_PMD_I40EVF:
404 
405  switch (dev_info.pci_dev->id.device_id) {
406  case I40E_DEV_ID_10G_BASE_T:
407  case I40E_DEV_ID_SFP_XL710:
409  break;
410  case I40E_DEV_ID_QSFP_A:
411  case I40E_DEV_ID_QSFP_B:
412  case I40E_DEV_ID_QSFP_C:
414  break;
415  case I40E_DEV_ID_VF:
416  rte_eth_link_get_nowait(i, &l);
417  xd->port_type = l.link_speed == 10000 ?
419  break;
420  default:
422  }
423  break;
424 
425  case VNET_DPDK_PMD_CXGBE:
426  switch (dev_info.pci_dev->id.device_id) {
427  case 0x5410: /* T580-LP-cr */
431  break;
432  default:
436  }
437  break;
438 
439  /* Intel Red Rock Canyon */
440  case VNET_DPDK_PMD_FM10K:
444  break;
445 
446  /* virtio */
447  case VNET_DPDK_PMD_VIRTIO:
451  break;
452 
453  /* vmxnet3 */
454  case VNET_DPDK_PMD_VMXNET3:
456  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
457  break;
458 
459  case VNET_DPDK_PMD_AF_PACKET:
461  xd->af_packet_port_id = af_packet_port_id++;
462  break;
463 
464  case VNET_DPDK_PMD_BOND:
466  break;
467 
468  default:
470  }
471 
472  #ifdef NETMAP
473  if(strncmp(dev_info.driver_name, "vale", 4) == 0
474  || strncmp(dev_info.driver_name, "netmap", 6) == 0)
475  {
476  xd->pmd = VNET_DPDK_PMD_NETMAP;
477  xd->port_type = VNET_DPDK_PORT_TYPE_NETMAP;
478  }
479  #endif
480 
481  }
482 
483  /*
484  * Ensure default mtu is not > the mtu read from the hardware.
485  * Otherwise rte_eth_dev_configure() will fail and the port will
486  * not be available.
487  */
488  if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
489  {
490  /*
491  * This device does not support the platforms's max frame
492  * size. Use it's advertised mru instead.
493  */
494  xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
495  }
496  else
497  {
498  xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
499 
500  /*
501  * Some platforms do not account for Ethernet FCS (4 bytes) in
502  * MTU calculations. To interop with them increase mru but only
503  * if the device's settings can support it.
504  */
505  if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
506  xd->port_conf.rxmode.hw_strip_crc)
507  {
508  /*
509  * Allow additional 4 bytes (for Ethernet FCS). These bytes are
510  * stripped by h/w and so will not consume any buffer memory.
511  */
512  xd->port_conf.rxmode.max_rx_pkt_len += 4;
513  }
514  }
515 
516 #if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0)
517  /*
518  * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts
519  */
520  if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
521  {
522  xd->port_conf.rxmode.max_rx_pkt_len = 1518;
523  xd->port_conf.rxmode.jumbo_frame = 0;
524  }
525 #endif
526 
527  if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
528  {
529  f64 now = vlib_time_now(vm);
530  u32 rnd;
531  rnd = (u32) (now * 1e6);
532  rnd = random_u32 (&rnd);
533  clib_memcpy (addr+2, &rnd, sizeof(rnd));
534  addr[0] = 2;
535  addr[1] = 0xfe;
536  }
537  else
538  rte_eth_macaddr_get(i,(struct ether_addr *)addr);
539 
540  if (xd->tx_q_used < tm->n_vlib_mains)
542 
543  xd->device_index = xd - dm->devices;
544  ASSERT(i == xd->device_index);
545  xd->per_interface_next_index = ~0;
546 
547  /* assign interface to input thread */
549  int q;
550 
551  for (q = 0; q < xd->rx_q_used; q++)
552  {
553  int cpu = dm->input_cpu_first_index + next_cpu;
554  unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
555 
556  /*
557  * numa node for worker thread handling this queue
558  * needed for taking buffers from the right mempool
559  */
561  xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
562 
563  /*
564  * construct vector of (device,queue) pairs for each worker thread
565  */
566  vec_add2(dm->devices_by_cpu[cpu], dq, 1);
567  dq->device = xd->device_index;
568  dq->queue_id = q;
569 
570  next_cpu++;
571  if (next_cpu == dm->input_cpu_count)
572  next_cpu = 0;
573  }
574 
577  for (j = 0; j < tm->n_vlib_mains; j++)
578  {
581  vec_reset_length (xd->tx_vectors[j]);
582  }
583 
586  for (j = 0; j< xd->rx_q_used; j++)
587  {
590  vec_reset_length (xd->rx_vectors[j]);
591  }
592 
595 
596  rv = dpdk_port_setup(dm, xd);
597 
598  if (rv < 0)
599  return rv;
600 
601  /* count the number of descriptors used for this device */
602  nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
603 
605  (dm->vnet_main,
606  dpdk_device_class.index,
607  xd->device_index,
608  /* ethernet address */ addr,
609  &xd->vlib_hw_if_index,
611  if (error)
612  return error;
613 
615  xd->vlib_sw_if_index = sw->sw_if_index;
617 
618  /*
619  * DAW-FIXME: The Cisco VIC firmware does not provide an api for a
620  * driver to dynamically change the mtu. If/when the
621  * VIC firmware gets fixed, then this should be removed.
622  */
623  if (xd->pmd == VNET_DPDK_PMD_VICE ||
624  xd->pmd == VNET_DPDK_PMD_ENIC)
625  {
626  /*
627  * Initialize mtu to what has been set by CIMC in the firmware cfg.
628  */
629  hi->max_packet_bytes = dev_info.max_rx_pktlen;
630  /*
631  * remove vlan tag from VIC port to fix VLAN0 issue.
632  * TODO Handle VLAN tagged traffic
633  */
634  int vlan_off;
635  vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index);
636  vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
637  rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off);
638  }
639 
640 #if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0)
641  /*
642  * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts
643  */
644  else if (xd->pmd == VNET_DPDK_PMD_VMXNET3)
645  hi->max_packet_bytes = 1518;
646 #endif
647 
649  xd->port_conf.rxmode.max_rx_pkt_len - sizeof(ethernet_header_t);
650 
651  rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes);
652  }
653 
654 #ifdef RTE_LIBRTE_KNI
655  if (dm->num_kni) {
656  clib_warning("Initializing KNI interfaces...");
657  rte_kni_init(dm->num_kni);
658  for (i = 0; i < dm->num_kni; i++)
659  {
660  u8 addr[6];
661  int j;
662 
663  /* Create vnet interface */
666 
667  xd->device_index = xd - dm->devices;
668  ASSERT(nports + i == xd->device_index);
669  xd->per_interface_next_index = ~0;
670  xd->kni_port_id = i;
671  xd->cpu_socket = -1;
672  hash_set (dm->dpdk_device_by_kni_port_id, i, xd - dm->devices);
673  xd->rx_q_used = 1;
674 
675  /* assign interface to input thread */
678  dq->device = xd->device_index;
679  dq->queue_id = 0;
680 
683  for (j = 0; j < tm->n_vlib_mains; j++)
684  {
687  vec_reset_length (xd->tx_vectors[j]);
688  }
689 
692  for (j = 0; j< xd->rx_q_used; j++)
693  {
696  vec_reset_length (xd->rx_vectors[j]);
697  }
698 
701 
702  /* FIXME Set up one TX-queue per worker thread */
703 
704  {
705  f64 now = vlib_time_now(vm);
706  u32 rnd;
707  rnd = (u32) (now * 1e6);
708  rnd = random_u32 (&rnd);
709 
710  clib_memcpy (addr+2, &rnd, sizeof(rnd));
711  addr[0] = 2;
712  addr[1] = 0xfe;
713  }
714 
716  (dm->vnet_main,
717  dpdk_device_class.index,
718  xd->device_index,
719  /* ethernet address */ addr,
720  &xd->vlib_hw_if_index,
722 
723  if (error)
724  return error;
725 
727  xd->vlib_sw_if_index = sw->sw_if_index;
729  }
730  }
731 #endif
732 
733  if (nb_desc > dm->num_mbufs)
734  clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
735  dm->num_mbufs, nb_desc);
736 
737  /* init next vhost-user if index */
738  dm->next_vu_if_id = 0;
739 
740  return 0;
741 }
742 
743 static void
745 {
747  clib_error_t * error;
748  vlib_pci_device_t * d;
750  u8 * pci_addr = 0;
751 
752  pool_foreach (d, pm->pci_devs, ({
753  c = &d->config0.header;
754  vec_reset_length (pci_addr);
755  pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
756 
757  if (c->device_class != PCI_CLASS_NETWORK_ETHERNET)
758  continue;
759 
760  /* if whitelist exists process only whitelisted devices */
761  if (dm->eth_if_whitelist &&
762  !strstr ((char *) dm->eth_if_whitelist, (char *) pci_addr))
763  continue;
764 
765  /* virtio */
766  if (c->vendor_id == 0x1af4 && c->device_id == 0x1000)
767  ;
768  /* vmxnet3 */
769  else if (c->vendor_id == 0x15ad && c->device_id == 0x07b0)
770  ;
771  /* all Intel devices */
772  else if (c->vendor_id == 0x8086)
773  ;
774  /* Cisco VIC */
775  else if (c->vendor_id == 0x1137 && c->device_id == 0x0043)
776  ;
777  /* Chelsio T4/T5 */
778  else if (c->vendor_id == 0x1425 && (c->device_id & 0xe000) == 0x4000)
779  ;
780  else
781  {
782  clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found "
783  "at PCI address %s\n", (u16) c->vendor_id, (u16) c->device_id,
784  pci_addr);
785  continue;
786  }
787 
788  error = vlib_pci_bind_to_uio (d, (char *) dm->uio_driver_name);
789 
790  if (error)
791  {
792  if (!dm->eth_if_whitelist)
793  dm->eth_if_blacklist = format (dm->eth_if_blacklist, "%U ",
795  clib_error_report (error);
796  }
797  }));
798  vec_free (pci_addr);
799 }
800 
801 static clib_error_t *
803 {
804  clib_error_t * error = 0;
805  dpdk_main_t * dm = &dpdk_main;
807  u8 * s, * tmp = 0;
808  u8 * pci_dev_id = 0;
809  u8 * rte_cmd = 0, * ethname = 0;
810  u32 log_level;
811  int ret, i;
812  char * fmt;
813 #ifdef NETMAP
814  int rxrings, txrings, rxslots, txslots, txburst;
815  char * nmnam;
816 #endif
817  unformat_input_t _in;
818  unformat_input_t * in = &_in;
819  u8 no_pci = 0;
820  u8 no_huge = 0;
821  u8 huge_dir = 0;
822  u8 file_prefix = 0;
823  u8 * socket_mem = 0;
824 
825  // MATT-FIXME: inverted virtio-vhost logic to use virtio by default
826  dm->use_virtio_vhost = 1;
827 
829  {
830  /* Prime the pump */
831  if (unformat (input, "no-hugetlb"))
832  {
833  vec_add1 (dm->eal_init_args, (u8 *) "no-huge");
834  no_huge = 1;
835  }
836 
837  else if (unformat (input, "enable-tcp-udp-checksum"))
838  {
839  dm->buffer_flags_template &=
841  }
842 
843  else if (unformat (input, "decimal-interface-names"))
845 
846  else if (unformat (input, "no-multi-seg"))
847  dm->no_multi_seg = 1;
848 
849  else if (unformat (input, "dev %s", &pci_dev_id))
850  {
851  if (dm->eth_if_whitelist)
852  {
853  /*
854  * Don't add duplicate device id's.
855  */
856  if (strstr ((char *)dm->eth_if_whitelist, (char *)pci_dev_id))
857  continue;
858 
859  _vec_len (dm->eth_if_whitelist) -= 1; // chomp trailing NULL.
860  dm->eth_if_whitelist = format (dm->eth_if_whitelist, " %s%c",
861  pci_dev_id, 0);
862  }
863  else
864  dm->eth_if_whitelist = format (0, "%s%c", pci_dev_id, 0);
865  }
866 
867 #ifdef NETMAP
868  else if (unformat(input, "netmap %s/%d:%d/%d:%d/%d",
869  &nmname, &rxrings, &rxslots, &txrings, &txslots, &txburst)) {
870  char * rv;
871  rv = (char *)
872  eth_nm_args(nmname, rxrings, rxslots, txrings, txslots, txburst);
873  if (rv) {
874  error = clib_error_return (0, "%s", rv);
875  goto done;
876  }
877  }else if (unformat(input, "netmap %s", &nmname)) {
878  char * rv;
879  rv = (char *)
880  eth_nm_args(nmname, 0, 0, 0, 0, 0);
881  if (rv) {
882  error = clib_error_return (0, "%s", rv);
883  goto done;
884  }
885  }
886 #endif
887 
888  else if (unformat (input, "num-mbufs %d", &dm->num_mbufs))
889  ;
890  else if (unformat (input, "max-tx-queues %d", &dm->max_tx_queues))
891  ;
892  else if (unformat (input, "kni %d", &dm->num_kni))
893  ;
894  else if (unformat (input, "uio-driver %s", &dm->uio_driver_name))
895  ;
896  else if (unformat (input, "socket-mem %s", &socket_mem))
897  ;
898  else if (unformat (input, "vhost-user-coalesce-frames %d", &dm->vhost_coalesce_frames))
899  ;
900  else if (unformat (input, "vhost-user-coalesce-time %f", &dm->vhost_coalesce_time))
901  ;
902  else if (unformat (input, "enable-vhost-user"))
903  dm->use_virtio_vhost = 0;
904  else if (unformat (input, "rss %d", &dm->use_rss))
905  ;
906 
907 #define _(a) \
908  else if (unformat(input, #a)) \
909  { \
910  if (!strncmp(#a, "no-pci", 6)) \
911  no_pci = 1; \
912  tmp = format (0, "--%s%c", #a, 0); \
913  vec_add1 (dm->eal_init_args, tmp); \
914  }
916 #undef _
917 
918 #define _(a) \
919  else if (unformat(input, #a " %s", &s)) \
920  { \
921  if (!strncmp(#a, "huge-dir", 8)) \
922  huge_dir = 1; \
923  else if (!strncmp(#a, "file-prefix", 11)) \
924  file_prefix = 1; \
925  tmp = format (0, "--%s%c", #a, 0); \
926  vec_add1 (dm->eal_init_args, tmp); \
927  vec_add1 (s, 0); \
928  vec_add1 (dm->eal_init_args, s); \
929  }
931 #undef _
932 
933 #define _(a,b) \
934  else if (unformat(input, #a " %s", &s)) \
935  { \
936  tmp = format (0, "-%s%c", #b, 0); \
937  vec_add1 (dm->eal_init_args, tmp); \
938  vec_add1 (s, 0); \
939  vec_add1 (dm->eal_init_args, s); \
940  }
942 #undef _
943 
944 #define _(a,b) \
945  else if (unformat(input, #a " %s", &s)) \
946  { \
947  tmp = format (0, "-%s%c", #b, 0); \
948  vec_add1 (dm->eal_init_args, tmp); \
949  vec_add1 (s, 0); \
950  vec_add1 (dm->eal_init_args, s); \
951  dm->a##_set_manually = 1; \
952  }
954 #undef _
955 
956  else if (unformat(input, "default"))
957  ;
958 
959  else
960  {
961  error = clib_error_return (0, "unknown input `%U'",
962  format_unformat_error, input);
963  goto done;
964  }
965  }
966 
967  if (!dm->uio_driver_name)
968  dm->uio_driver_name = format (0, "igb_uio%c", 0);
969 
970  /*
971  * Use 1G huge pages if available.
972  */
973  if (!no_huge && !huge_dir)
974  {
975  u32 x, * mem_by_socket = 0;
976  uword c = 0;
977  u8 use_1g = 1;
978  u8 use_2m = 1;
979  u8 less_than_1g = 1;
980  int rv;
981 
982  umount(DEFAULT_HUGE_DIR);
983 
984  /* Process "socket-mem" parameter value */
985  if (vec_len (socket_mem))
986  {
987  unformat_input_t in;
988  unformat_init_vector(&in, socket_mem);
990  {
991  if (unformat (&in, "%u,", &x))
992  ;
993  else if (unformat (&in, "%u", &x))
994  ;
995  else if (unformat (&in, ","))
996  x = 0;
997  else
998  break;
999 
1000  vec_add1(mem_by_socket, x);
1001 
1002  if (x > 1023)
1003  less_than_1g = 0;
1004  }
1005  /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
1006  unformat_free(&in);
1007  socket_mem = 0;
1008  }
1009  else
1010  {
1012  {
1013  vec_validate(mem_by_socket, c);
1014  mem_by_socket[c] = 512; /* default per-socket mem */
1015  }
1016  ));
1017  }
1018 
1019  /* check if available enough 1GB pages for each socket */
1021  {
1022  u32 pages_avail, page_size, mem;
1023  u8 *s = 0;
1024  char * path = "/sys/devices/system/node/node%u/hugepages/"
1025  "hugepages-%ukB/free_hugepages%c";
1026 
1027  vec_validate(mem_by_socket, c);
1028  mem = mem_by_socket[c];
1029 
1030  page_size = 1024;
1031  pages_avail = 0;
1032  s = format (s, path, c, page_size * 1024, 0);
1033  read_sys_fs ((char *) s, "%u", &pages_avail);
1034  vec_reset_length (s);
1035 
1036  if (page_size * pages_avail < mem)
1037  use_1g = 0;
1038 
1039  page_size = 2;
1040  pages_avail = 0;
1041  s = format (s, path, c, page_size * 1024, 0);
1042  read_sys_fs ((char *) s, "%u", &pages_avail);
1043  vec_reset_length (s);
1044 
1045  if (page_size * pages_avail < mem)
1046  use_2m = 0;
1047 
1048  vec_free(s);
1049  }));
1050  _vec_len (mem_by_socket) = c + 1;
1051 
1052  /* regenerate socket_mem string */
1053  vec_foreach_index (x, mem_by_socket)
1054  socket_mem = format (socket_mem, "%s%u",
1055  socket_mem ? "," : "",
1056  mem_by_socket[x]);
1057  socket_mem = format (socket_mem, "%c", 0);
1058 
1059  vec_free (mem_by_socket);
1060 
1061  rv = mkdir(VPP_RUN_DIR, 0755);
1062  if (rv && errno != EEXIST)
1063  {
1064  error = clib_error_return (0, "mkdir '%s' failed errno %d",
1065  VPP_RUN_DIR, errno);
1066  goto done;
1067  }
1068 
1069  rv = mkdir(DEFAULT_HUGE_DIR, 0755);
1070  if (rv && errno != EEXIST)
1071  {
1072  error = clib_error_return (0, "mkdir '%s' failed errno %d",
1073  DEFAULT_HUGE_DIR, errno);
1074  goto done;
1075  }
1076 
1077  if (use_1g && !(less_than_1g && use_2m))
1078  {
1079  rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G");
1080  }
1081  else if (use_2m)
1082  {
1083  rv = mount("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL);
1084  }
1085  else
1086  {
1087  return clib_error_return (0, "not enough free huge pages");
1088  }
1089 
1090  if (rv)
1091  {
1092  error = clib_error_return (0, "mount failed %d", errno);
1093  goto done;
1094  }
1095 
1096  tmp = format (0, "--huge-dir%c", 0);
1097  vec_add1 (dm->eal_init_args, tmp);
1098  tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0);
1099  vec_add1 (dm->eal_init_args, tmp);
1100  if (!file_prefix)
1101  {
1102  tmp = format (0, "--file-prefix%c", 0);
1103  vec_add1 (dm->eal_init_args, tmp);
1104  tmp = format (0, "vpp%c", 0);
1105  vec_add1 (dm->eal_init_args, tmp);
1106  }
1107  }
1108 
1109  vec_free (rte_cmd);
1110  vec_free (ethname);
1111 
1112  if (error)
1113  return error;
1114 
1115  /* I'll bet that -c and -n must be the first and second args... */
1116  if (!dm->coremask_set_manually)
1117  {
1119  uword * coremask = 0;
1120  int i;
1121 
1122  /* main thread core */
1123  coremask = clib_bitmap_set(coremask, tm->main_lcore, 1);
1124 
1125  for (i = 0; i < vec_len (tm->registrations); i++)
1126  {
1127  tr = tm->registrations[i];
1128  coremask = clib_bitmap_or(coremask, tr->coremask);
1129  }
1130 
1131  vec_insert (dm->eal_init_args, 2, 1);
1132  dm->eal_init_args[1] = (u8 *) "-c";
1133  tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
1134  dm->eal_init_args[2] = tmp;
1135  clib_bitmap_free(coremask);
1136  }
1137 
1138  if (!dm->nchannels_set_manually)
1139  {
1140  vec_insert (dm->eal_init_args, 2, 3);
1141  dm->eal_init_args[3] = (u8 *) "-n";
1142  tmp = format (0, "%d", dm->nchannels);
1143  dm->eal_init_args[4] = tmp;
1144  }
1145 
1146  if (no_pci == 0 && geteuid() == 0)
1148 
1149  /*
1150  * If there are whitelisted devices,
1151  * add the whitelist option & device list to the dpdk arg list...
1152  */
1153  if (dm->eth_if_whitelist)
1154  {
1155  unformat_init_string (in, (char *)dm->eth_if_whitelist,
1156  vec_len(dm->eth_if_whitelist) - 1);
1157  fmt = "-w%c";
1158  }
1159 
1160  /*
1161  * Otherwise add the blacklisted devices to the dpdk arg list.
1162  */
1163  else
1164  {
1165  unformat_init_string (in, (char *)dm->eth_if_blacklist,
1166  vec_len(dm->eth_if_blacklist) - 1);
1167  fmt = "-b%c";
1168  }
1169 
1171  {
1172  tmp = format (0, fmt, 0);
1173  vec_add1 (dm->eal_init_args, tmp);
1174  unformat (in, "%s", &pci_dev_id);
1175  vec_add1 (dm->eal_init_args, pci_dev_id);
1176  }
1177 
1178  /* set master-lcore */
1179  tmp = format (0, "--master-lcore%c", 0);
1180  vec_add1 (dm->eal_init_args, tmp);
1181  tmp = format (0, "%u%c", tm->main_lcore, 0);
1182  vec_add1 (dm->eal_init_args, tmp);
1183 
1184  /* set socket-mem */
1185  tmp = format (0, "--socket-mem%c", 0);
1186  vec_add1 (dm->eal_init_args, tmp);
1187  tmp = format (0, "%s%c", socket_mem, 0);
1188  vec_add1 (dm->eal_init_args, tmp);
1189 
1190  /* NULL terminate the "argv" vector, in case of stupidity */
1191  vec_add1 (dm->eal_init_args, 0);
1192  _vec_len(dm->eal_init_args) -= 1;
1193 
1194  /* Set up DPDK eal and packet mbuf pool early. */
1195 
1196  log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE;
1197 
1198  rte_set_log_level (log_level);
1199 
1200  vm = dm->vlib_main;
1201 
1202  /* make copy of args as rte_eal_init tends to mess up with arg array */
1203  for (i = 1; i < vec_len(dm->eal_init_args); i++)
1204  dm->eal_init_args_str = format(dm->eal_init_args_str, "%s ",
1205  dm->eal_init_args[i]);
1206 
1207  ret = rte_eal_init(vec_len(dm->eal_init_args), (char **) dm->eal_init_args);
1208 
1209  /* lazy umount hugepages */
1210  umount2(DEFAULT_HUGE_DIR, MNT_DETACH);
1211 
1212  if (ret < 0)
1213  return clib_error_return (0, "rte_eal_init returned %d", ret);
1214 
1215  /* Dump the physical memory layout prior to creating the mbuf_pool */
1216  fprintf(stdout, "DPDK physical memory layout:\n");
1217  rte_dump_physmem_layout(stdout);
1218 
1219  /* main thread 1st */
1220  error = vlib_buffer_pool_create(vm, dm->num_mbufs, rte_socket_id());
1221  if (error)
1222  return error;
1223 
1224  for (i = 0; i < RTE_MAX_LCORE; i++)
1225  {
1226  error = vlib_buffer_pool_create(vm, dm->num_mbufs,
1227  rte_lcore_to_socket_id(i));
1228  if (error)
1229  return error;
1230  }
1231 
1232  if (dm->use_rss)
1233  {
1235  rt->function = dpdk_input_rss;
1236  }
1237  done:
1238  return error;
1239 }
1240 
1242 
1244 {
1245  vnet_main_t * vnm = vnet_get_main();
1246  struct rte_eth_link prev_link = xd->link;
1247  u32 hw_flags = 0;
1248  u8 hw_flags_chg = 0;
1249 
1250  /* only update link state for PMD interfaces */
1251  if (xd->dev_type != VNET_DPDK_DEV_ETH)
1252  return;
1253 
1254  xd->time_last_link_update = now ? now : xd->time_last_link_update;
1255  memset(&xd->link, 0, sizeof(xd->link));
1256  rte_eth_link_get_nowait (xd->device_index, &xd->link);
1257 
1258  if (LINK_STATE_ELOGS)
1259  {
1260  vlib_main_t * vm = vlib_get_main();
1261  ELOG_TYPE_DECLARE(e) = {
1262  .format =
1263  "update-link-state: sw_if_index %d, admin_up %d,"
1264  "old link_state %d new link_state %d",
1265  .format_args = "i4i1i1i1",
1266  };
1267 
1268  struct { u32 sw_if_index; u8 admin_up;
1269  u8 old_link_state; u8 new_link_state;} *ed;
1270  ed = ELOG_DATA (&vm->elog_main, e);
1271  ed->sw_if_index = xd->vlib_sw_if_index;
1272  ed->admin_up = xd->admin_up;
1273  ed->old_link_state = (u8)
1275  ed->new_link_state = (u8) xd->link.link_status;
1276  }
1277 
1278  if ((xd->admin_up == 1) &&
1279  ((xd->link.link_status != 0) ^
1281  {
1282  hw_flags_chg = 1;
1283  hw_flags |= (xd->link.link_status ?
1285  }
1286 
1287  if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex))
1288  {
1289  hw_flags_chg = 1;
1290  switch (xd->link.link_duplex)
1291  {
1292  case ETH_LINK_HALF_DUPLEX:
1294  break;
1295  case ETH_LINK_FULL_DUPLEX:
1297  break;
1298  default:
1299  break;
1300  }
1301  }
1302 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
1303  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
1304  {
1305  hw_flags_chg = 1;
1306  switch (xd->link.link_speed)
1307  {
1308  case ETH_SPEED_NUM_10M:
1310  break;
1311  case ETH_SPEED_NUM_100M:
1313  break;
1314  case ETH_SPEED_NUM_1G:
1315  hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
1316  break;
1317  case ETH_SPEED_NUM_10G:
1319  break;
1320  case ETH_SPEED_NUM_40G:
1322  break;
1323  case 0:
1324  break;
1325  default:
1326  clib_warning("unknown link speed %d", xd->link.link_speed);
1327  break;
1328  }
1329  }
1330 #else
1331  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
1332  {
1333  hw_flags_chg = 1;
1334  switch (xd->link.link_speed)
1335  {
1336  case ETH_LINK_SPEED_10:
1338  break;
1339  case ETH_LINK_SPEED_100:
1341  break;
1342  case ETH_LINK_SPEED_1000:
1343  hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
1344  break;
1345  case ETH_LINK_SPEED_10000:
1347  break;
1348  case ETH_LINK_SPEED_40G:
1350  break;
1351  case 0:
1352  break;
1353  default:
1354  clib_warning("unknown link speed %d", xd->link.link_speed);
1355  break;
1356  }
1357  }
1358 #endif
1359  if (hw_flags_chg)
1360  {
1361  if (LINK_STATE_ELOGS)
1362  {
1363  vlib_main_t * vm = vlib_get_main();
1364 
1365  ELOG_TYPE_DECLARE(e) = {
1366  .format = "update-link-state: sw_if_index %d, new flags %d",
1367  .format_args = "i4i4",
1368  };
1369 
1370  struct { u32 sw_if_index; u32 flags; } *ed;
1371  ed = ELOG_DATA (&vm->elog_main, e);
1372  ed->sw_if_index = xd->vlib_sw_if_index;
1373  ed->flags = hw_flags;
1374  }
1375  vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags);
1376  }
1377 }
1378 
1379 static uword
1381  vlib_node_runtime_t * rt,
1382  vlib_frame_t * f)
1383 {
1384  clib_error_t * error;
1385  vnet_main_t * vnm = vnet_get_main();
1386  dpdk_main_t * dm = &dpdk_main;
1388  dpdk_device_t * xd;
1390  void *vu_state;
1391  int i;
1392 
1393  error = dpdk_lib_init (dm);
1394 
1395  /*
1396  * Turn on the input node if we found some devices to drive
1397  * and we're not running worker threads or i/o threads
1398  */
1399 
1400  if (error == 0 && vec_len(dm->devices) > 0)
1401  {
1402  if (tm->n_vlib_mains == 1)
1404  VLIB_NODE_STATE_POLLING);
1405  else if (tm->main_thread_is_io_node)
1407  VLIB_NODE_STATE_POLLING);
1408  else if (!dm->have_io_threads)
1409  for (i=0; i < tm->n_vlib_mains; i++)
1410  if (vec_len(dm->devices_by_cpu[i]) > 0)
1412  VLIB_NODE_STATE_POLLING);
1413  }
1414 
1415  if (error)
1416  clib_error_report (error);
1417 
1418  dpdk_vhost_user_process_init(&vu_state);
1419 
1420  dm->io_thread_release = 1;
1421 
1422  f64 now = vlib_time_now (vm);
1423  vec_foreach (xd, dm->devices)
1424  {
1425  dpdk_update_link_state (xd, now);
1426  }
1427 
1428 { // Extra set up for bond interfaces:
1429  // 1. Setup MACs for bond interfaces and their slave links which was set
1430  // in dpdk_port_setup() but needs to be done again here to take effect.
1431  // 2. Set max L3 packet size of each bond interface to the lowerst value of
1432  // its slave links
1433  // 3. Set up info for bond interface related CLI support.
1434  int nports = rte_eth_dev_count();
1435  if (nports > 0) {
1436  for (i = 0; i < nports; i++) {
1437  struct rte_eth_dev_info dev_info;
1438  rte_eth_dev_info_get(i, &dev_info);
1439  if (!dev_info.driver_name)
1440  dev_info.driver_name = dev_info.pci_dev->driver->name;
1441  ASSERT(dev_info.driver_name);
1442  if (strncmp(dev_info.driver_name, "rte_bond_pmd", 12) == 0) {
1443  u8 addr[6];
1444  u8 slink[16];
1445  int nlink = rte_eth_bond_slaves_get(i, slink, 16);
1446  if (nlink > 0) {
1447  vnet_hw_interface_t * bhi;
1448  ethernet_interface_t * bei;
1449  /* Get MAC of 1st slave link */
1450  rte_eth_macaddr_get(slink[0], (struct ether_addr *)addr);
1451  /* Set MAC of bounded interface to that of 1st slave link */
1452  rte_eth_bond_mac_address_set(i, (struct ether_addr *)addr);
1453  /* Populate MAC of bonded interface in VPP hw tables */
1454  bhi = vnet_get_hw_interface(
1455  vnm, dm->devices[i].vlib_hw_if_index);
1456  bei = pool_elt_at_index(em->interfaces, bhi->hw_instance);
1457  clib_memcpy(bhi->hw_address, addr, 6);
1458  clib_memcpy(bei->address, addr, 6);
1459  /* Init l3 packet size allowed on bonded interface */
1460  bhi->max_l3_packet_bytes[VLIB_RX] =
1461  bhi->max_l3_packet_bytes[VLIB_TX] =
1463  while (nlink >= 1) { /* for all slave links */
1464  int slave = slink[--nlink];
1465  dpdk_device_t * sdev = &dm->devices[slave];
1466  vnet_hw_interface_t * shi;
1467  vnet_sw_interface_t * ssi;
1468  /* Add MAC to all slave links except the first one */
1469  if (nlink) rte_eth_dev_mac_addr_add(
1470  slave, (struct ether_addr *)addr, 0);
1471  /* Set slaves bitmap for bonded interface */
1472  bhi->bond_info = clib_bitmap_set(
1473  bhi->bond_info, sdev->vlib_hw_if_index, 1);
1474  /* Set slave link flags on slave interface */
1475  shi = vnet_get_hw_interface(vnm, sdev->vlib_hw_if_index);
1476  ssi = vnet_get_sw_interface(vnm, sdev->vlib_sw_if_index);
1479  /* Set l3 packet size allowed as the lowest of slave */
1480  if (bhi->max_l3_packet_bytes[VLIB_RX] >
1485  }
1486  }
1487  }
1488  }
1489  }
1490 }
1491 
1492  while (1)
1493  {
1494  /*
1495  * check each time through the loop in case intervals are changed
1496  */
1497  f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
1499 
1500  vlib_process_wait_for_event_or_clock (vm, min_wait);
1501 
1503  /* skip the poll if an admin up down is in progress (on any interface) */
1504  continue;
1505 
1506  vec_foreach (xd, dm->devices)
1507  {
1508  f64 now = vlib_time_now (vm);
1509  if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
1510  dpdk_update_counters (xd, now);
1511  if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
1512  dpdk_update_link_state (xd, now);
1513 
1515  if (dpdk_vhost_user_process_if(vm, xd, vu_state) != 0)
1516  continue;
1517  }
1518  }
1519 
1521 
1522  return 0;
1523 }
1524 
1526  .function = dpdk_process,
1527  .type = VLIB_NODE_TYPE_PROCESS,
1528  .name = "dpdk-process",
1529  .process_log2_n_stack_bytes = 17,
1530 };
1531 
1533 {
1534  if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
1535  return (VNET_API_ERROR_INVALID_VALUE);
1536 
1537  dpdk_main.stat_poll_interval = interval;
1538 
1539  return 0;
1540 }
1541 
1543 {
1544  if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
1545  return (VNET_API_ERROR_INVALID_VALUE);
1546 
1547  dpdk_main.link_state_poll_interval = interval;
1548 
1549  return 0;
1550 }
1551 
1552 clib_error_t *
1554 {
1555  dpdk_main_t * dm = &dpdk_main;
1556  vlib_node_t * ei;
1557  clib_error_t * error = 0;
1559 
1560  /* verify that structs are cacheline aligned */
1561  ASSERT(offsetof(dpdk_device_t, cacheline0) == 0);
1562  ASSERT(offsetof(dpdk_device_t, cacheline1) == CLIB_CACHE_LINE_BYTES);
1563  ASSERT(offsetof(dpdk_worker_t, cacheline0) == 0);
1564  ASSERT(offsetof(frame_queue_trace_t, cacheline0) == 0);
1565 
1566  dm->vlib_main = vm;
1567  dm->vnet_main = vnet_get_main();
1568 
1569  ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
1570  if (ei == 0)
1571  return clib_error_return (0, "ethernet-input node AWOL");
1572 
1573  dm->ethernet_input_node_index = ei->index;
1574 
1575  dm->nchannels = 4;
1576  dm->num_mbufs = dm->num_mbufs ? dm->num_mbufs : NB_MBUF;
1577  vec_add1 (dm->eal_init_args, (u8 *) "vnet");
1578 
1579  dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword));
1580  dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword));
1581  dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword));
1582 
1583  /* $$$ use n_thread_stacks since it's known-good at this point */
1584  vec_validate (dm->recycle, tm->n_thread_stacks - 1);
1585 
1586  /* initialize EFD (early fast discard) default settings */
1589  DPDK_NB_RX_DESC_10GE)/100);
1592 
1593  /* vhost-user coalescence frames defaults */
1594  dm->vhost_coalesce_frames = 32;
1595  dm->vhost_coalesce_time = 1e-3;
1596 
1597  /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
1598  dm->buffer_flags_template =
1602 
1605 
1606  /* init CLI */
1607  if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
1608  return error;
1609 
1610  return error;
1611 }
1612 
1614 
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:394
#define DPDK_NB_TX_DESC_10GE
Definition: dpdk_priv.h:21
f64 time_last_link_update
Definition: dpdk.h:246
vmrglw vmrglh hi
void unformat_init_vector(unformat_input_t *input, u8 *vector_string)
Definition: unformat.c:1000
static u8 * format_bitmap_hex(u8 *s, va_list *args)
Definition: bitmap.h:559
#define vec_foreach_index(var, v)
Iterate over vector indices.
u16 enabled
Definition: dpdk.h:307
#define hash_set(h, key, value)
Definition: hash.h:237
always_inline vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:267
#define clib_min(x, y)
Definition: clib.h:295
vlib_node_registration_t dpdk_io_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_io_input_node)
Definition: node.c:1934
u8 promisc
Definition: dpdk.h:219
uword unformat(unformat_input_t *i, char *fmt,...)
Definition: unformat.c:942
static u8 * format_vlib_pci_addr(u8 *s, va_list *va)
Definition: pci.h:169
#define VNET_HW_INTERFACE_FLAG_SPEED_1G
Definition: interface.h:254
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:454
u8 no_multi_seg
Definition: dpdk.h:347
#define NB_MBUF
Definition: dpdk.h:64
static clib_error_t * dpdk_lib_init(dpdk_main_t *dm)
Definition: init.c:231
vnet_device_class_t dpdk_device_class
static void dpdk_bind_devices_to_uio(dpdk_main_t *dm)
Definition: init.c:744
static u32 dpdk_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
Definition: init.c:128
u8 ** eal_init_args
Definition: dpdk.h:342
always_inline void clib_mem_free(void *p)
Definition: mem.h:149
u8 need_txlock
Definition: dpdk.h:257
always_inline void unformat_free(unformat_input_t *i)
Definition: format.h:160
#define UNFORMAT_END_OF_INPUT
Definition: format.h:142
#define NULL
Definition: clib.h:55
u32 vhost_coalesce_frames
Definition: dpdk.h:376
u32 index
Definition: node.h:203
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:531
f64 vhost_coalesce_time
Definition: dpdk.h:377
uword dpdk_input_rss(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Definition: node.c:805
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:480
#define DPDK_EFD_DISABLED
Definition: dpdk.h:298
#define DPDK_NB_RX_DESC_VIRTIO
Definition: dpdk_priv.h:18
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:519
vlib_buffer_main_t * buffer_main
Definition: main.h:103
u32 per_interface_next_index
Definition: dpdk.h:199
u8 nchannels_set_manually
Definition: dpdk.h:351
static clib_error_t * dpdk_config(vlib_main_t *vm, unformat_input_t *input)
Definition: init.c:802
#define DPDK_NB_TX_DESC_DEFAULT
Definition: dpdk_priv.h:17
#define clib_error_report(e)
Definition: error.h:126
#define foreach_eal_double_hyphen_predicate_arg
Definition: dpdk_priv.h:27
u32 next_vu_if_id
Definition: dpdk.h:385
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:241
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:405
dpdk_main_t dpdk_main
Definition: init.c:37
always_inline vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
clib_error_t * vlib_buffer_pool_create(vlib_main_t *vm, unsigned num_mbufs, unsigned socket_id)
Definition: dpdk_buffer.c:934
u8 admin_up
Definition: dpdk.h:218
always_inline uword unformat_check_input(unformat_input_t *i)
Definition: format.h:168
vnet_main_t * vnet_get_main(void)
Definition: misc.c:45
#define LINK_STATE_ELOGS
Definition: init.c:45
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:202
#define pool_foreach(VAR, POOL, BODY)
Definition: pool.h:328
vlib_node_function_t * function
Definition: node.h:375
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:109
#define VPP_RUN_DIR
Definition: init.c:48
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:829
f64 stat_poll_interval
Definition: dpdk.h:404
#define IP_BUFFER_L4_CHECKSUM_CORRECT
Definition: buffer.h:50
u32 max_tx_queues
Definition: dpdk.h:356
vlib_pci_addr_t bus_address
Definition: pci.h:60
int input_cpu_first_index
Definition: dpdk.h:399
char i8
Definition: types.h:45
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
clib_error_t * vlib_pci_bind_to_uio(vlib_pci_device_t *d, char *uio_driver_name)
Definition: pci.c:58
always_inline void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Definition: node_funcs.h:100
u16 rx_q_used
Definition: dpdk.h:225
void dpdk_vhost_user_process_cleanup(void *ctx)
Definition: vhost_user.c:1615
#define clib_warning(format, args...)
Definition: error.h:59
void unformat_init_string(unformat_input_t *input, char *string, int string_len)
Definition: unformat.c:991
#define vlib_call_init_function(vm, x)
Definition: init.h:159
always_inline u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:68
#define DPDK_NB_TX_DESC_VIRTIO
Definition: dpdk_priv.h:19
u32 device_index
Definition: dpdk.h:193
struct rte_eth_conf port_conf
Definition: dpdk.h:229
dpdk_worker_t * workers
Definition: dpdk.h:339
#define DPDK_NB_TX_DESC_40GE
Definition: dpdk_priv.h:23
f64 time_last_stats_update
Definition: dpdk.h:253
u16 consec_full_frames_hi_thresh
Definition: dpdk.h:309
always_inline void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:113
ethernet_main_t ethernet_main
Definition: init.c:44
static struct rte_eth_conf port_conf_template
Definition: init.c:52
clib_error_t * dpdk_init(vlib_main_t *vm)
Definition: init.c:1553
u32 vlib_sw_if_index
Definition: dpdk.h:196
struct rte_eth_txconf tx_conf
Definition: dpdk.h:230
#define clib_bitmap_foreach(i, ai, body)
Definition: bitmap.h:308
#define pool_elt_at_index(p, i)
Definition: pool.h:346
#define vec_insert(V, N, M)
Insert N vector elements starting at element M, initialize new elements to zero (no header...
Definition: vec.h:644
#define foreach_eal_double_hyphen_arg
Definition: dpdk_priv.h:45
#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES
Definition: buffer.h:297
dpdk_device_and_queue_t ** devices_by_cpu
Definition: dpdk.h:317
#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)
Definition: ethernet.h:87
#define VNET_HW_INTERFACE_FLAG_SPEED_10M
Definition: interface.h:252
vlib_pci_device_t * pci_devs
Definition: pci.h:77
u32 vlib_hw_if_index
Definition: dpdk.h:195
#define VNET_SW_INTERFACE_FLAG_BOND_SLAVE
Definition: interface.h:382
u8 use_virtio_vhost
Definition: dpdk.h:373
#define foreach_eal_single_hyphen_mandatory_arg
Definition: dpdk_priv.h:35
u8 * eth_if_whitelist
Definition: dpdk.h:345
int dpdk_set_link_state_poll_interval(f64 interval)
Definition: init.c:1542
always_inline uword vnet_hw_interface_is_link_up(vnet_main_t *vnm, u32 hw_if_index)
#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX
Definition: interface.h:244
uword os_get_cpu_number(void)
Definition: unix-misc.c:206
vlib_node_registration_t dpdk_process_node
(constructor) VLIB_REGISTER_NODE (dpdk_process_node)
Definition: init.c:1525
#define foreach_dpdk_pmd
Definition: dpdk.h:78
u8 * eal_init_args_str
Definition: dpdk.h:343
always_inline f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Definition: node_funcs.h:551
#define ELOG_DATA(em, f)
Definition: elog.h:386
dpdk_port_type_t port_type
Definition: dpdk.h:254
static uword dpdk_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: init.c:1380
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:116
always_inline uword * clib_bitmap_set(uword *ai, uword i, uword value)
Definition: bitmap.h:132
#define DPDK_MIN_STATS_POLL_INTERVAL
Definition: dpdk.h:280
int input_cpu_count
Definition: dpdk.h:400
#define VLIB_FRAME_SIZE
Definition: node.h:292
u8 have_io_threads
Definition: dpdk.h:396
void * vlib_weakly_linked_functions[]
Definition: init.c:40
u16 tx_q_used
Definition: dpdk.h:224
u16 nb_rx_desc
Definition: dpdk.h:226
u32 main_thread_is_io_node
Definition: threads.h:268
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:538
u8 * eth_if_blacklist
Definition: dpdk.h:344
u32 num_mbufs
Definition: dpdk.h:354
u32 ** recycle
Definition: dpdk.h:320
#define VNET_HW_INTERFACE_BOND_INFO_SLAVE
Definition: interface.h:327
#define foreach_eal_single_hyphen_arg
Definition: dpdk_priv.h:39
int dpdk_set_stat_poll_interval(f64 interval)
Definition: init.c:1532
#define DPDK_NB_RX_DESC_DEFAULT
Definition: dpdk_priv.h:16
dpdk_device_t * devices
Definition: dpdk.h:316
linux_pci_main_t linux_pci_main
Definition: pci.c:55
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
Definition: dpdk_priv.h:167
u16 * cpu_socket_id_by_queue
Definition: dpdk.h:228
#define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT
Definition: dpdk.h:303
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:298
volatile u32 ** lockp
Definition: dpdk.h:190
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:203
void dpdk_device_lock_free(dpdk_device_t *xd)
Definition: init.c:219
u16 queue_hi_thresh
Definition: dpdk.h:308
u32 use_rss
Definition: dpdk.h:355
#define clib_memcpy(a, b, c)
Definition: string.h:63
#define DPDK_NB_RX_DESC_ENIC
Definition: dpdk_priv.h:24
dpdk_pmd_t pmd
Definition: dpdk.h:215
#define VLIB_BUFFER_TOTAL_LENGTH_VALID
Definition: buffer.h:95
elog_main_t elog_main
Definition: main.h:141
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL
Definition: ethernet.h:81
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:344
always_inline vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define DEFAULT_HUGE_DIR
Definition: init.c:47
clib_error_t * dpdk_port_setup(dpdk_main_t *dm, dpdk_device_t *xd)
Definition: init.c:66
#define VNET_HW_INTERFACE_FLAG_SPEED_10G
Definition: interface.h:255
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
Definition: vec.h:374
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
Definition: init.c:1243
vlib_worker_thread_t * vlib_worker_threads
Definition: threads.h:106
#define hash_create(elts, value_bytes)
Definition: hash.h:615
#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
Definition: interface.h:245
u8 num_kni
Definition: dpdk.h:357
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
Definition: interface.h:313
#define DPDK_TX_RING_SIZE
Definition: dpdk.h:277
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
void dpdk_vhost_user_process_init(void **ctx)
Definition: vhost_user.c:1603
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:87
u32 vlib_buffer_get_or_create_free_list(vlib_main_t *vm, u32 n_data_bytes, char *fmt,...)
Definition: buffer.c:511
dpdk_frame_t * frames
Definition: dpdk.h:209
u8 * format(u8 *s, char *fmt,...)
Definition: format.c:405
dpdk_efd_t efd
Definition: dpdk.h:388
#define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH
Definition: dpdk.h:304
u8 af_packet_port_id
Definition: dpdk.h:243
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:157
#define clib_bitmap_free(v)
Definition: bitmap.h:76
#define DPDK_LINK_POLL_INTERVAL
Definition: dpdk.h:282
uword * thread_registrations_by_name
Definition: threads.h:263
struct rte_eth_link link
Definition: dpdk.h:245
#define DPDK_NB_RX_DESC_10GE
Definition: dpdk_priv.h:20
u64 uword
Definition: types.h:112
#define IP_BUFFER_L4_CHECKSUM_COMPUTED
Definition: buffer.h:49
u8 kni_port_id
Definition: dpdk.h:234
vlib_node_t * vlib_get_node_by_name(vlib_main_t *vm, u8 *name)
Definition: node.c:44
Definition: defs.h:46
#define DPDK_STATS_POLL_INTERVAL
Definition: dpdk.h:279
u32 nchannels
Definition: dpdk.h:353
#define VNET_HW_INTERFACE_FLAG_SPEED_100M
Definition: interface.h:253
u8 interface_name_format_decimal
Definition: dpdk.h:335
u32 ethernet_input_node_index
Definition: dpdk.h:360
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:140
dpdk_device_type_t dev_type
Definition: dpdk.h:214
unsigned char u8
Definition: types.h:56
always_inline vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
void dpdk_device_lock_init(dpdk_device_t *xd)
Definition: init.c:205
#define DPDK_MIN_LINK_POLL_INTERVAL
Definition: dpdk.h:283
#define hash_get_mem(h, key)
Definition: hash.h:251
u32 buffer_flags_template
Definition: dpdk.h:323
#define VNET_HW_INTERFACE_FLAG_SPEED_40G
Definition: interface.h:256
u8 * uio_driver_name
Definition: dpdk.h:346
u32 vlib_buffer_free_list_index
Definition: dpdk.h:329
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:140
u32 dpdk_get_admin_up_down_in_progress(void)
Definition: device.c:1204
always_inline vlib_node_runtime_t * vlib_node_get_runtime(vlib_main_t *vm, u32 node_index)
Definition: node_funcs.h:61
volatile u32 io_thread_release
Definition: dpdk.h:363
#define ETHERNET_MAX_PACKET_BYTES
Definition: ethernet.h:75
u8 coremask_set_manually
Definition: dpdk.h:350
#define vec_foreach(var, vec)
Vector iterator.
i8 cpu_socket
Definition: dpdk.h:216
always_inline f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:182
#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)
Definition: ethernet.h:82
uword * cpu_socket_bitmap
Definition: threads.h:301
vhost_vring_addr_t addr
Definition: vhost-user.h:78
#define clib_error_return(e, args...)
Definition: error.h:112
struct _unformat_input_t unformat_input_t
vlib_thread_registration_t ** registrations
Definition: threads.h:261
u32 flags
Definition: vhost-user.h:73
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
ethernet_interface_t * interfaces
Definition: ethernet.h:201
vnet_main_t * vnet_main
Definition: dpdk.h:412
u16 nb_tx_desc
Definition: dpdk.h:227
uword dpdk_vhost_user_process_if(vlib_main_t *vm, dpdk_device_t *xd, void *ctx)
Definition: vhost_user.c:1620
#define DPDK_NB_RX_DESC_40GE
Definition: dpdk_priv.h:22
vlib_main_t ** vlib_mains
Definition: buffer.c:244
uword * dpdk_device_by_kni_port_id
Definition: dpdk.h:380
Definition: defs.h:45
uword * vu_sw_if_index_by_listener_fd
Definition: dpdk.h:381
f64 link_state_poll_interval
Definition: dpdk.h:403
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
clib_error_t * dpdk_cli_init(vlib_main_t *vm)
Definition: cli.c:1059
always_inline vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
uword * vu_sw_if_index_by_sock_fd
Definition: dpdk.h:382
vlib_main_t * vlib_main
Definition: dpdk.h:411