FD.io VPP  v17.07.01-10-g3be13f0
Vector Packet Processing
init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/error.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/bitmap.h>
20 
21 #include <vnet/ethernet/ethernet.h>
22 #include <dpdk/device/dpdk.h>
23 #include <vlib/unix/physmem.h>
24 #include <vlib/pci/pci.h>
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <sys/stat.h>
30 #include <sys/mount.h>
31 #include <string.h>
32 #include <fcntl.h>
33 
34 #include <dpdk/device/dpdk_priv.h>
35 
37 
38 #define LINK_STATE_ELOGS 0
39 
40 #define DEFAULT_HUGE_DIR "/run/vpp/hugepages"
41 #define VPP_RUN_DIR "/run/vpp"
42 
43 /* Port configuration, mildly modified Intel app values */
44 
45 static struct rte_eth_conf port_conf_template = {
46  .rxmode = {
47  .split_hdr_size = 0,
48  .header_split = 0, /**< Header Split disabled */
49  .hw_ip_checksum = 0, /**< IP checksum offload disabled */
50  .hw_vlan_filter = 0, /**< VLAN filtering disabled */
51  .hw_strip_crc = 0, /**< CRC stripped by hardware */
52  },
53  .txmode = {
54  .mq_mode = ETH_MQ_TX_NONE,
55  },
56 };
57 
58 static dpdk_port_type_t
59 port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
60 {
61 
62  if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
64  else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
66  else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
68  else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
70  else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
72  else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
74 
76 }
77 
78 
79 static u32
81 {
82  dpdk_main_t *dm = &dpdk_main;
84  u32 old = 0;
85 
87  {
88  old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0;
89 
92  else
94 
96  {
98  rte_eth_promiscuous_enable (xd->device_index);
99  else
100  rte_eth_promiscuous_disable (xd->device_index);
101  }
102  }
103  else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags))
104  {
105  int rv;
106 
107  xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
108 
110  dpdk_device_stop (xd);
111 
112  rv = rte_eth_dev_configure
113  (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf);
114 
115  if (rv < 0)
117  "rte_eth_dev_configure[%d]: err %d",
118  xd->device_index, rv);
119 
120  rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
121 
123  dpdk_device_start (xd);
124 
125  }
126  return old;
127 }
128 
129 static void
131 {
132  int q;
133  vec_validate (xd->lockp, xd->tx_q_used - 1);
134  for (q = 0; q < xd->tx_q_used; q++)
135  {
138  memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
139  }
140 }
141 
142 static clib_error_t *
144 {
145  u32 nports;
146  u32 nb_desc = 0;
147  int i;
148  clib_error_t *error;
149  vlib_main_t *vm = vlib_get_main ();
154  dpdk_device_t *xd;
155  vlib_pci_addr_t last_pci_addr;
156  u32 last_pci_addr_port = 0;
158  uword *p_hqos;
159 
160  u32 next_hqos_cpu = 0;
161  u8 af_packet_port_id = 0;
162  u8 bond_ether_port_id = 0;
163  last_pci_addr.as_u32 = ~0;
164 
165  dm->hqos_cpu_first_index = 0;
166  dm->hqos_cpu_count = 0;
167 
168  /* find out which cpus will be used for I/O TX */
169  p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
170  tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
171 
172  if (tr_hqos && tr_hqos->count > 0)
173  {
174  dm->hqos_cpu_first_index = tr_hqos->first_index;
175  dm->hqos_cpu_count = tr_hqos->count;
176  }
177 
180 
181  nports = rte_eth_dev_count ();
182  if (nports < 1)
183  {
184  clib_warning ("DPDK drivers found no ports...");
185  }
186 
187  if (CLIB_DEBUG > 0)
188  clib_warning ("DPDK drivers found %d ports...", nports);
189 
190  /*
191  * All buffers are all allocated from the same rte_mempool.
192  * Thus they all have the same number of data bytes.
193  */
197  "dpdk rx");
198 
199  if (dm->conf->enable_tcp_udp_checksum)
202 
203  /* vlib_buffer_t template */
206  for (i = 0; i < tm->n_vlib_mains; i++)
207  {
210  fl = vlib_buffer_get_free_list (vm,
213  bt->flags = dm->buffer_flags_template;
214  bt->current_data = -RTE_PKTMBUF_HEADROOM;
215  vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
216  }
217 
218  for (i = 0; i < nports; i++)
219  {
220  u8 addr[6];
221  u8 vlan_strip = 0;
222  int j;
223  struct rte_eth_dev_info dev_info;
224  struct rte_eth_link l;
225  dpdk_device_config_t *devconf = 0;
226  vlib_pci_addr_t pci_addr;
227  uword *p = 0;
228 
229  rte_eth_dev_info_get (i, &dev_info);
230  if (dev_info.pci_dev) /* bonded interface has no pci info */
231  {
232  pci_addr.domain = dev_info.pci_dev->addr.domain;
233  pci_addr.bus = dev_info.pci_dev->addr.bus;
234  pci_addr.slot = dev_info.pci_dev->addr.devid;
235  pci_addr.function = dev_info.pci_dev->addr.function;
236  p =
238  pci_addr.as_u32);
239  }
240 
241  if (p)
242  devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
243  else
244  devconf = &dm->conf->default_devconf;
245 
246  /* Create vnet interface */
250  xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
251 
252  /* Handle interface naming for devices with multiple ports sharing same PCI ID */
253  if (dev_info.pci_dev)
254  {
255  struct rte_eth_dev_info di = { 0 };
256  rte_eth_dev_info_get (i + 1, &di);
257  if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
258  memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr,
259  sizeof (struct rte_pci_addr)) == 0)
260  {
261  xd->interface_name_suffix = format (0, "0");
262  last_pci_addr.as_u32 = pci_addr.as_u32;
263  last_pci_addr_port = i;
264  }
265  else if (pci_addr.as_u32 == last_pci_addr.as_u32)
266  {
268  format (0, "%u", i - last_pci_addr_port);
269  }
270  else
271  {
272  last_pci_addr.as_u32 = ~0;
273  }
274  }
275  else
276  last_pci_addr.as_u32 = ~0;
277 
278  clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
279  sizeof (struct rte_eth_txconf));
280  if (dm->conf->no_multi_seg)
281  {
282  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
283  port_conf_template.rxmode.jumbo_frame = 0;
284  port_conf_template.rxmode.enable_scatter = 0;
285  }
286  else
287  {
288  xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
289  port_conf_template.rxmode.jumbo_frame = 1;
290  port_conf_template.rxmode.enable_scatter = 1;
292  }
293 
295  sizeof (struct rte_eth_conf));
296 
297  xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
298 
299  if (devconf->num_tx_queues > 0
300  && devconf->num_tx_queues < xd->tx_q_used)
301  xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
302 
303  if (devconf->num_rx_queues > 1 && dm->use_rss == 0)
304  {
305  dm->use_rss = 1;
306  }
307 
308  if (devconf->num_rx_queues > 1
309  && dev_info.max_rx_queues >= devconf->num_rx_queues)
310  {
311  xd->rx_q_used = devconf->num_rx_queues;
312  xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
313  if (devconf->rss_fn == 0)
314  xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
315  ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
316  else
317  xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
318  }
319  else
320  xd->rx_q_used = 1;
321 
323 
324  /* workaround for drivers not setting driver_name */
325  if ((!dev_info.driver_name) && (dev_info.pci_dev))
326  dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
327 
328  ASSERT (dev_info.driver_name);
329 
330  if (!xd->pmd)
331  {
332 
333 
334 #define _(s,f) else if (dev_info.driver_name && \
335  !strcmp(dev_info.driver_name, s)) \
336  xd->pmd = VNET_DPDK_PMD_##f;
337  if (0)
338  ;
340 #undef _
341  else
343 
347 
348  switch (xd->pmd)
349  {
350  /* Drivers with valid speed_capa set */
351  case VNET_DPDK_PMD_E1000EM:
352  case VNET_DPDK_PMD_IGB:
353  case VNET_DPDK_PMD_IXGBE:
354  case VNET_DPDK_PMD_I40E:
355  case VNET_DPDK_PMD_CXGBE:
356  case VNET_DPDK_PMD_MLX4:
357  case VNET_DPDK_PMD_MLX5:
358  xd->port_type = port_type_from_speed_capa (&dev_info);
359  break;
360 
361  /* SR-IOV VFs */
362  case VNET_DPDK_PMD_IGBVF:
363  case VNET_DPDK_PMD_IXGBEVF:
364  case VNET_DPDK_PMD_I40EVF:
366  xd->port_conf.rxmode.hw_strip_crc = 1;
367  break;
368 
369  case VNET_DPDK_PMD_THUNDERX:
371  break;
372 
373  case VNET_DPDK_PMD_DPAA2:
375  break;
376 
377  /* Cisco VIC */
378  case VNET_DPDK_PMD_ENIC:
379  rte_eth_link_get_nowait (i, &l);
380  if (l.link_speed == 40000)
382  else
384  break;
385 
386  /* Intel Red Rock Canyon */
387  case VNET_DPDK_PMD_FM10K:
389  xd->port_conf.rxmode.hw_strip_crc = 1;
390  break;
391 
392  /* virtio */
393  case VNET_DPDK_PMD_VIRTIO:
397  break;
398 
399  /* vmxnet3 */
400  case VNET_DPDK_PMD_VMXNET3:
402  xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
403  break;
404 
405  case VNET_DPDK_PMD_AF_PACKET:
407  xd->port_id = af_packet_port_id++;
408  break;
409 
410  case VNET_DPDK_PMD_BOND:
412  xd->port_id = bond_ether_port_id++;
413  break;
414 
415  case VNET_DPDK_PMD_VIRTIO_USER:
417  break;
418 
419  default:
421  }
422 
423  if (devconf->num_rx_desc)
424  xd->nb_rx_desc = devconf->num_rx_desc;
425 
426  if (devconf->num_tx_desc)
427  xd->nb_tx_desc = devconf->num_tx_desc;
428  }
429 
430  /*
431  * Ensure default mtu is not > the mtu read from the hardware.
432  * Otherwise rte_eth_dev_configure() will fail and the port will
433  * not be available.
434  */
435  if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
436  {
437  /*
438  * This device does not support the platforms's max frame
439  * size. Use it's advertised mru instead.
440  */
441  xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
442  }
443  else
444  {
445  xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
446 
447  /*
448  * Some platforms do not account for Ethernet FCS (4 bytes) in
449  * MTU calculations. To interop with them increase mru but only
450  * if the device's settings can support it.
451  */
452  if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
453  xd->port_conf.rxmode.hw_strip_crc)
454  {
455  /*
456  * Allow additional 4 bytes (for Ethernet FCS). These bytes are
457  * stripped by h/w and so will not consume any buffer memory.
458  */
459  xd->port_conf.rxmode.max_rx_pkt_len += 4;
460  }
461  }
462 
463  if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
464  {
465  f64 now = vlib_time_now (vm);
466  u32 rnd;
467  rnd = (u32) (now * 1e6);
468  rnd = random_u32 (&rnd);
469  clib_memcpy (addr + 2, &rnd, sizeof (rnd));
470  addr[0] = 2;
471  addr[1] = 0xfe;
472  }
473  else
474  rte_eth_macaddr_get (i, (struct ether_addr *) addr);
475 
476  if (xd->tx_q_used < tm->n_vlib_mains)
478 
479  xd->device_index = xd - dm->devices;
480  ASSERT (i == xd->device_index);
481  xd->per_interface_next_index = ~0;
482 
483  /* assign interface to input thread */
485  int q;
486 
487  if (devconf->hqos_enabled)
488  {
490 
491  if (devconf->hqos.hqos_thread_valid)
492  {
493  int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
494 
495  if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
496  return clib_error_return (0, "invalid HQoS thread index");
497 
498  vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
499  dq->device = xd->device_index;
500  dq->queue_id = 0;
501  }
502  else
503  {
504  int cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
505 
506  if (dm->hqos_cpu_count == 0)
507  return clib_error_return (0, "no HQoS threads available");
508 
509  vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
510  dq->device = xd->device_index;
511  dq->queue_id = 0;
512 
513  next_hqos_cpu++;
514  if (next_hqos_cpu == dm->hqos_cpu_count)
515  next_hqos_cpu = 0;
516 
517  devconf->hqos.hqos_thread_valid = 1;
518  devconf->hqos.hqos_thread = cpu;
519  }
520  }
521 
524  for (j = 0; j < tm->n_vlib_mains; j++)
525  {
528  vec_reset_length (xd->tx_vectors[j]);
529  }
530 
533  for (j = 0; j < xd->rx_q_used; j++)
534  {
537  vec_reset_length (xd->rx_vectors[j]);
538  }
539 
542 
543 
544  /* count the number of descriptors used for this device */
545  nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
546 
548  (dm->vnet_main, dpdk_device_class.index, xd->device_index,
549  /* ethernet address */ addr,
551  if (error)
552  return error;
553 
555  xd->vlib_sw_if_index = sw->sw_if_index;
557  dpdk_input_node.index);
558 
559  if (devconf->workers)
560  {
561  int i;
562  q = 0;
563  /* *INDENT-OFF* */
564  clib_bitmap_foreach (i, devconf->workers, ({
565  vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++,
566  vdm->first_worker_thread_index + i);
567  }));
568  /* *INDENT-ON* */
569  }
570  else
571  for (q = 0; q < xd->rx_q_used; q++)
572  {
574  ~1);
575  }
576 
578 
579  dpdk_device_setup (xd);
580 
581  if (vec_len (xd->errors))
582  clib_warning ("setup failed for device %U. Errors:\n %U",
585 
586  if (devconf->hqos_enabled)
587  {
588  clib_error_t *rv;
589  rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
590  if (rv)
591  return rv;
592  }
593 
594  /*
595  * For cisco VIC vNIC, set default to VLAN strip enabled, unless
596  * specified otherwise in the startup config.
597  * For other NICs default to VLAN strip disabled, unless specified
598  * otherwis in the startup config.
599  */
600  if (xd->pmd == VNET_DPDK_PMD_ENIC)
601  {
602  if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
603  vlan_strip = 1; /* remove vlan tag from VIC port by default */
604  else
605  clib_warning ("VLAN strip disabled for interface\n");
606  }
607  else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
608  vlan_strip = 1;
609 
610  if (vlan_strip)
611  {
612  int vlan_off;
613  vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
614  vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
615  xd->port_conf.rxmode.hw_vlan_strip = vlan_off;
616  if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0)
617  clib_warning ("VLAN strip enabled for interface\n");
618  else
619  clib_warning ("VLAN strip cannot be supported by interface\n");
620  }
621 
623  xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
624 
625  rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
626  }
627 
628  if (nb_desc > dm->conf->num_mbufs)
629  clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
630  dm->conf->num_mbufs, nb_desc);
631 
632  return 0;
633 }
634 
635 static void
637 {
638  vlib_pci_main_t *pm = &pci_main;
639  clib_error_t *error;
641  u8 *pci_addr = 0;
642  int num_whitelisted = vec_len (conf->dev_confs);
643 
644  /* *INDENT-OFF* */
645  pool_foreach (d, pm->pci_devs, ({
646  dpdk_device_config_t * devconf = 0;
647  vec_reset_length (pci_addr);
648  pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
649 
650  if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
651  continue;
652 
653  if (num_whitelisted)
654  {
655  uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32);
656 
657  if (!p)
658  continue;
659 
660  devconf = pool_elt_at_index (conf->dev_confs, p[0]);
661  }
662 
663  /* virtio */
664  if (d->vendor_id == 0x1af4 && d->device_id == 0x1000)
665  ;
666  /* vmxnet3 */
667  else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
668  ;
669  /* all Intel network devices */
670  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
671  ;
672  /* all Intel QAT devices VFs */
673  else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
674  (d->device_id == 0x0443 || d->device_id == 0x37c9 || d->device_id == 0x19e3))
675  ;
676  /* Cisco VIC */
677  else if (d->vendor_id == 0x1137 && d->device_id == 0x0043)
678  ;
679  /* Chelsio T4/T5 */
680  else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
681  ;
682  else
683  {
684  clib_warning ("Unsupported PCI device 0x%04x:0x%04x found "
685  "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
686  pci_addr);
687  continue;
688  }
689 
690  error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name);
691 
692  if (error)
693  {
694  if (devconf == 0)
695  {
696  pool_get (conf->dev_confs, devconf);
698  devconf - conf->dev_confs);
699  devconf->pci_addr.as_u32 = d->bus_address.as_u32;
700  }
701  devconf->is_blacklisted = 1;
702  clib_error_report (error);
703  }
704  }));
705  /* *INDENT-ON* */
706  vec_free (pci_addr);
707 }
708 
709 static clib_error_t *
710 dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
711  unformat_input_t * input, u8 is_default)
712 {
713  clib_error_t *error = 0;
714  uword *p;
715  dpdk_device_config_t *devconf;
716  unformat_input_t sub_input;
717 
718  if (is_default)
719  {
720  devconf = &conf->default_devconf;
721  }
722  else
723  {
724  p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32);
725 
726  if (!p)
727  {
728  pool_get (conf->dev_confs, devconf);
729  hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32,
730  devconf - conf->dev_confs);
731  }
732  else
733  return clib_error_return (0,
734  "duplicate configuration for PCI address %U",
735  format_vlib_pci_addr, &pci_addr);
736  }
737 
738  devconf->pci_addr.as_u32 = pci_addr.as_u32;
739  devconf->hqos_enabled = 0;
741 
742  if (!input)
743  return 0;
744 
747  {
748  if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues))
749  ;
750  else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues))
751  ;
752  else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc))
753  ;
754  else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
755  ;
756  else if (unformat (input, "workers %U", unformat_bitmap_list,
757  &devconf->workers))
758  ;
759  else
760  if (unformat
761  (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
762  {
763  error = unformat_rss_fn (&sub_input, &devconf->rss_fn);
764  if (error)
765  break;
766  }
767  else if (unformat (input, "vlan-strip-offload off"))
769  else if (unformat (input, "vlan-strip-offload on"))
771  else
772  if (unformat
773  (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input))
774  {
775  devconf->hqos_enabled = 1;
776  error = unformat_hqos (&sub_input, &devconf->hqos);
777  if (error)
778  break;
779  }
780  else if (unformat (input, "hqos"))
781  {
782  devconf->hqos_enabled = 1;
783  }
784  else
785  {
786  error = clib_error_return (0, "unknown input `%U'",
787  format_unformat_error, input);
788  break;
789  }
790  }
791 
792  if (error)
793  return error;
794 
795  if (devconf->workers && devconf->num_rx_queues == 0)
796  devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers);
797  else if (devconf->workers &&
798  clib_bitmap_count_set_bits (devconf->workers) !=
799  devconf->num_rx_queues)
800  error =
802  "%U: number of worker threadds must be "
803  "equal to number of rx queues", format_vlib_pci_addr,
804  &pci_addr);
805 
806  return error;
807 }
808 
809 static clib_error_t *
811 {
812  clib_error_t *error = 0;
813  dpdk_main_t *dm = &dpdk_main;
816  dpdk_device_config_t *devconf;
817  vlib_pci_addr_t pci_addr;
818  unformat_input_t sub_input;
819  uword x;
820  u8 *s, *tmp = 0;
821  u8 *rte_cmd = 0, *ethname = 0;
822  u32 log_level;
823  int ret, i;
824  int num_whitelisted = 0;
825  u8 no_pci = 0;
826  u8 no_huge = 0;
827  u8 huge_dir = 0;
828  u8 file_prefix = 0;
829  u8 *socket_mem = 0;
830 
831  conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
832  log_level = RTE_LOG_NOTICE;
833 
835  {
836  /* Prime the pump */
837  if (unformat (input, "no-hugetlb"))
838  {
839  vec_add1 (conf->eal_init_args, (u8 *) "no-huge");
840  no_huge = 1;
841  }
842 
843  else if (unformat (input, "enable-tcp-udp-checksum"))
844  conf->enable_tcp_udp_checksum = 1;
845 
846  else if (unformat (input, "decimal-interface-names"))
848 
849  else if (unformat (input, "log-level %U", unformat_dpdk_log_level, &x))
850  log_level = x;
851 
852  else if (unformat (input, "no-multi-seg"))
853  conf->no_multi_seg = 1;
854 
855  else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
856  &sub_input))
857  {
858  error =
859  dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input,
860  1);
861 
862  if (error)
863  return error;
864  }
865  else
866  if (unformat
867  (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr,
868  unformat_vlib_cli_sub_input, &sub_input))
869  {
870  error = dpdk_device_config (conf, pci_addr, &sub_input, 0);
871 
872  if (error)
873  return error;
874 
875  num_whitelisted++;
876  }
877  else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr))
878  {
879  error = dpdk_device_config (conf, pci_addr, 0, 0);
880 
881  if (error)
882  return error;
883 
884  num_whitelisted++;
885  }
886  else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
887  ;
888  else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
889  ;
890  else if (unformat (input, "socket-mem %s", &socket_mem))
891  ;
892  else if (unformat (input, "no-pci"))
893  {
894  no_pci = 1;
895  tmp = format (0, "--no-pci%c", 0);
896  vec_add1 (conf->eal_init_args, tmp);
897  }
898  else if (unformat (input, "poll-sleep %d", &dm->poll_sleep_usec))
899  ;
900 
901 #define _(a) \
902  else if (unformat(input, #a)) \
903  { \
904  tmp = format (0, "--%s%c", #a, 0); \
905  vec_add1 (conf->eal_init_args, tmp); \
906  }
908 #undef _
909 #define _(a) \
910  else if (unformat(input, #a " %s", &s)) \
911  { \
912  if (!strncmp(#a, "huge-dir", 8)) \
913  huge_dir = 1; \
914  else if (!strncmp(#a, "file-prefix", 11)) \
915  file_prefix = 1; \
916  tmp = format (0, "--%s%c", #a, 0); \
917  vec_add1 (conf->eal_init_args, tmp); \
918  vec_add1 (s, 0); \
919  if (!strncmp(#a, "vdev", 4)) \
920  if (strstr((char*)s, "af_packet")) \
921  clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \
922  vec_add1 (conf->eal_init_args, s); \
923  }
925 #undef _
926 #define _(a,b) \
927  else if (unformat(input, #a " %s", &s)) \
928  { \
929  tmp = format (0, "-%s%c", #b, 0); \
930  vec_add1 (conf->eal_init_args, tmp); \
931  vec_add1 (s, 0); \
932  vec_add1 (conf->eal_init_args, s); \
933  }
935 #undef _
936 #define _(a,b) \
937  else if (unformat(input, #a " %s", &s)) \
938  { \
939  tmp = format (0, "-%s%c", #b, 0); \
940  vec_add1 (conf->eal_init_args, tmp); \
941  vec_add1 (s, 0); \
942  vec_add1 (conf->eal_init_args, s); \
943  conf->a##_set_manually = 1; \
944  }
946 #undef _
947  else if (unformat (input, "default"))
948  ;
949 
950  else if (unformat_skip_white_space (input))
951  ;
952  else
953  {
954  error = clib_error_return (0, "unknown input `%U'",
955  format_unformat_error, input);
956  goto done;
957  }
958  }
959 
960  if (!conf->uio_driver_name)
961  conf->uio_driver_name = format (0, "uio_pci_generic%c", 0);
962 
963  /*
964  * Use 1G huge pages if available.
965  */
966  if (!no_huge && !huge_dir)
967  {
968  u32 x, *mem_by_socket = 0;
969  uword c = 0;
970  u8 use_1g = 1;
971  u8 use_2m = 1;
972  u8 less_than_1g = 1;
973  int rv;
974 
975  umount (DEFAULT_HUGE_DIR);
976 
977  /* Process "socket-mem" parameter value */
978  if (vec_len (socket_mem))
979  {
980  unformat_input_t in;
981  unformat_init_vector (&in, socket_mem);
983  {
984  if (unformat (&in, "%u,", &x))
985  ;
986  else if (unformat (&in, "%u", &x))
987  ;
988  else if (unformat (&in, ","))
989  x = 0;
990  else
991  break;
992 
993  vec_add1 (mem_by_socket, x);
994 
995  if (x > 1023)
996  less_than_1g = 0;
997  }
998  /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
999  unformat_free (&in);
1000  socket_mem = 0;
1001  }
1002  else
1003  {
1004  /* *INDENT-OFF* */
1006  {
1007  vec_validate(mem_by_socket, c);
1008  mem_by_socket[c] = 256; /* default per-socket mem */
1009  }
1010  ));
1011  /* *INDENT-ON* */
1012  }
1013 
1014  /* check if available enough 1GB pages for each socket */
1015  /* *INDENT-OFF* */
1017  {
1018  int pages_avail, page_size, mem;
1019 
1020  vec_validate(mem_by_socket, c);
1021  mem = mem_by_socket[c];
1022 
1023  page_size = 1024;
1024  pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024);
1025 
1026  if (pages_avail < 0 || page_size * pages_avail < mem)
1027  use_1g = 0;
1028 
1029  page_size = 2;
1030  pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024);
1031 
1032  if (pages_avail < 0 || page_size * pages_avail < mem)
1033  use_2m = 0;
1034  }));
1035  /* *INDENT-ON* */
1036 
1037  if (mem_by_socket == 0)
1038  {
1039  error = clib_error_return (0, "mem_by_socket NULL");
1040  goto done;
1041  }
1042  _vec_len (mem_by_socket) = c + 1;
1043 
1044  /* regenerate socket_mem string */
1045  vec_foreach_index (x, mem_by_socket)
1046  socket_mem = format (socket_mem, "%s%u",
1047  socket_mem ? "," : "", mem_by_socket[x]);
1048  socket_mem = format (socket_mem, "%c", 0);
1049 
1050  vec_free (mem_by_socket);
1051 
1052  rv = mkdir (VPP_RUN_DIR, 0755);
1053  if (rv && errno != EEXIST)
1054  {
1055  error = clib_error_return (0, "mkdir '%s' failed errno %d",
1056  VPP_RUN_DIR, errno);
1057  goto done;
1058  }
1059 
1060  rv = mkdir (DEFAULT_HUGE_DIR, 0755);
1061  if (rv && errno != EEXIST)
1062  {
1063  error = clib_error_return (0, "mkdir '%s' failed errno %d",
1064  DEFAULT_HUGE_DIR, errno);
1065  goto done;
1066  }
1067 
1068  if (use_1g && !(less_than_1g && use_2m))
1069  {
1070  rv =
1071  mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G");
1072  }
1073  else if (use_2m)
1074  {
1075  rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL);
1076  }
1077  else
1078  {
1079  return clib_error_return (0, "not enough free huge pages");
1080  }
1081 
1082  if (rv)
1083  {
1084  error = clib_error_return (0, "mount failed %d", errno);
1085  goto done;
1086  }
1087 
1088  tmp = format (0, "--huge-dir%c", 0);
1089  vec_add1 (conf->eal_init_args, tmp);
1090  tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0);
1091  vec_add1 (conf->eal_init_args, tmp);
1092  if (!file_prefix)
1093  {
1094  tmp = format (0, "--file-prefix%c", 0);
1095  vec_add1 (conf->eal_init_args, tmp);
1096  tmp = format (0, "vpp%c", 0);
1097  vec_add1 (conf->eal_init_args, tmp);
1098  }
1099  }
1100 
1101  vec_free (rte_cmd);
1102  vec_free (ethname);
1103 
1104  if (error)
1105  return error;
1106 
1107  /* I'll bet that -c and -n must be the first and second args... */
1108  if (!conf->coremask_set_manually)
1109  {
1111  uword *coremask = 0;
1112  int i;
1113 
1114  /* main thread core */
1115  coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
1116 
1117  for (i = 0; i < vec_len (tm->registrations); i++)
1118  {
1119  tr = tm->registrations[i];
1120  coremask = clib_bitmap_or (coremask, tr->coremask);
1121  }
1122 
1123  vec_insert (conf->eal_init_args, 2, 1);
1124  conf->eal_init_args[1] = (u8 *) "-c";
1125  tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
1126  conf->eal_init_args[2] = tmp;
1127  clib_bitmap_free (coremask);
1128  }
1129 
1130  if (!conf->nchannels_set_manually)
1131  {
1132  vec_insert (conf->eal_init_args, 2, 3);
1133  conf->eal_init_args[3] = (u8 *) "-n";
1134  tmp = format (0, "%d", conf->nchannels);
1135  conf->eal_init_args[4] = tmp;
1136  }
1137 
1138  if (no_pci == 0 && geteuid () == 0)
1139  dpdk_bind_devices_to_uio (conf);
1140 
1141 #define _(x) \
1142  if (devconf->x == 0 && conf->default_devconf.x > 0) \
1143  devconf->x = conf->default_devconf.x ;
1144 
1145  /* *INDENT-OFF* */
1146  pool_foreach (devconf, conf->dev_confs, ({
1147 
1148  /* default per-device config items */
1149  foreach_dpdk_device_config_item
1150 
1151  /* add DPDK EAL whitelist/blacklist entry */
1152  if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
1153  {
1154  tmp = format (0, "-w%c", 0);
1155  vec_add1 (conf->eal_init_args, tmp);
1156  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1157  vec_add1 (conf->eal_init_args, tmp);
1158  }
1159  else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
1160  {
1161  tmp = format (0, "-b%c", 0);
1162  vec_add1 (conf->eal_init_args, tmp);
1163  tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
1164  vec_add1 (conf->eal_init_args, tmp);
1165  }
1166  }));
1167  /* *INDENT-ON* */
1168 
1169 #undef _
1170 
1171  /* set master-lcore */
1172  tmp = format (0, "--master-lcore%c", 0);
1173  vec_add1 (conf->eal_init_args, tmp);
1174  tmp = format (0, "%u%c", tm->main_lcore, 0);
1175  vec_add1 (conf->eal_init_args, tmp);
1176 
1177  /* set socket-mem */
1178  tmp = format (0, "--socket-mem%c", 0);
1179  vec_add1 (conf->eal_init_args, tmp);
1180  tmp = format (0, "%s%c", socket_mem, 0);
1181  vec_add1 (conf->eal_init_args, tmp);
1182 
1183  /* NULL terminate the "argv" vector, in case of stupidity */
1184  vec_add1 (conf->eal_init_args, 0);
1185  _vec_len (conf->eal_init_args) -= 1;
1186 
1187  /* Set up DPDK eal and packet mbuf pool early. */
1188 
1189 #if RTE_VERSION >= RTE_VERSION_NUM(17, 5, 0, 0)
1190  rte_log_set_global_level (log_level);
1191 #else
1192  rte_set_log_level (log_level);
1193 #endif
1194 
1195  vm = vlib_get_main ();
1196 
1197  /* make copy of args as rte_eal_init tends to mess up with arg array */
1198  for (i = 1; i < vec_len (conf->eal_init_args); i++)
1199  conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
1200  conf->eal_init_args[i]);
1201 
1202  ret =
1203  rte_eal_init (vec_len (conf->eal_init_args),
1204  (char **) conf->eal_init_args);
1205 
1206  /* lazy umount hugepages */
1207  umount2 (DEFAULT_HUGE_DIR, MNT_DETACH);
1208 
1209  if (ret < 0)
1210  return clib_error_return (0, "rte_eal_init returned %d", ret);
1211 
1212  /* Dump the physical memory layout prior to creating the mbuf_pool */
1213  fprintf (stdout, "DPDK physical memory layout:\n");
1214  rte_dump_physmem_layout (stdout);
1215 
1216  /* main thread 1st */
1217  error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
1218  if (error)
1219  return error;
1220 
1221  for (i = 0; i < RTE_MAX_LCORE; i++)
1222  {
1223  error = vlib_buffer_pool_create (vm, conf->num_mbufs,
1224  rte_lcore_to_socket_id (i));
1225  if (error)
1226  return error;
1227  }
1228 
1229 done:
1230  return error;
1231 }
1232 
1234 
1235 void
1237 {
1238  vnet_main_t *vnm = vnet_get_main ();
1239  struct rte_eth_link prev_link = xd->link;
1240  u32 hw_flags = 0;
1241  u8 hw_flags_chg = 0;
1242 
1243  /* only update link state for PMD interfaces */
1244  if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
1245  return;
1246 
1247  xd->time_last_link_update = now ? now : xd->time_last_link_update;
1248  memset (&xd->link, 0, sizeof (xd->link));
1249  rte_eth_link_get_nowait (xd->device_index, &xd->link);
1250 
1251  if (LINK_STATE_ELOGS)
1252  {
1253  vlib_main_t *vm = vlib_get_main ();
1254  ELOG_TYPE_DECLARE (e) =
1255  {
1256  .format =
1257  "update-link-state: sw_if_index %d, admin_up %d,"
1258  "old link_state %d new link_state %d",.format_args = "i4i1i1i1",};
1259 
1260  struct
1261  {
1262  u32 sw_if_index;
1263  u8 admin_up;
1264  u8 old_link_state;
1265  u8 new_link_state;
1266  } *ed;
1267  ed = ELOG_DATA (&vm->elog_main, e);
1268  ed->sw_if_index = xd->vlib_sw_if_index;
1269  ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
1270  ed->old_link_state = (u8)
1272  ed->new_link_state = (u8) xd->link.link_status;
1273  }
1274 
1276  && ((xd->link.link_status != 0) ^
1278  {
1279  hw_flags_chg = 1;
1280  hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
1281  }
1282 
1283  if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex))
1284  {
1285  hw_flags_chg = 1;
1286  switch (xd->link.link_duplex)
1287  {
1288  case ETH_LINK_HALF_DUPLEX:
1290  break;
1291  case ETH_LINK_FULL_DUPLEX:
1293  break;
1294  default:
1295  break;
1296  }
1297  }
1298  if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
1299  {
1300  hw_flags_chg = 1;
1301  switch (xd->link.link_speed)
1302  {
1303  case ETH_SPEED_NUM_10M:
1305  break;
1306  case ETH_SPEED_NUM_100M:
1308  break;
1309  case ETH_SPEED_NUM_1G:
1310  hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
1311  break;
1312  case ETH_SPEED_NUM_10G:
1314  break;
1315  case ETH_SPEED_NUM_40G:
1317  break;
1318  case 0:
1319  break;
1320  default:
1321  clib_warning ("unknown link speed %d", xd->link.link_speed);
1322  break;
1323  }
1324  }
1325  if (hw_flags_chg)
1326  {
1327  if (LINK_STATE_ELOGS)
1328  {
1329  vlib_main_t *vm = vlib_get_main ();
1330 
1331  ELOG_TYPE_DECLARE (e) =
1332  {
1333  .format =
1334  "update-link-state: sw_if_index %d, new flags %d",.format_args
1335  = "i4i4",};
1336 
1337  struct
1338  {
1339  u32 sw_if_index;
1340  u32 flags;
1341  } *ed;
1342  ed = ELOG_DATA (&vm->elog_main, e);
1343  ed->sw_if_index = xd->vlib_sw_if_index;
1344  ed->flags = hw_flags;
1345  }
1346  vnet_hw_interface_set_flags (vnm, xd->hw_if_index, hw_flags);
1347  }
1348 }
1349 
1350 static uword
1352 {
1353  clib_error_t *error;
1354  vnet_main_t *vnm = vnet_get_main ();
1355  dpdk_main_t *dm = &dpdk_main;
1357  dpdk_device_t *xd;
1359  int i;
1360 
1361  error = dpdk_lib_init (dm);
1362 
1363  if (error)
1364  clib_error_report (error);
1365 
1366  tm->worker_thread_release = 1;
1367 
1368  f64 now = vlib_time_now (vm);
1369  vec_foreach (xd, dm->devices)
1370  {
1371  dpdk_update_link_state (xd, now);
1372  }
1373 
1374  {
1375  /*
1376  * Extra set up for bond interfaces:
1377  * 1. Setup MACs for bond interfaces and their slave links which was set
1378  * in dpdk_device_setup() but needs to be done again here to take
1379  * effect.
1380  * 2. Set up info and register slave link state change callback handling.
1381  * 3. Set up info for bond interface related CLI support.
1382  */
1383  int nports = rte_eth_dev_count ();
1384  if (nports > 0)
1385  {
1386  for (i = 0; i < nports; i++)
1387  {
1388  xd = &dm->devices[i];
1389  ASSERT (i == xd->device_index);
1390  if (xd->pmd == VNET_DPDK_PMD_BOND)
1391  {
1392  u8 addr[6];
1393  u8 slink[16];
1394  int nlink = rte_eth_bond_slaves_get (i, slink, 16);
1395  if (nlink > 0)
1396  {
1397  vnet_hw_interface_t *bhi;
1398  ethernet_interface_t *bei;
1399  int rv;
1400 
1401  /* Get MAC of 1st slave link */
1402  rte_eth_macaddr_get
1403  (slink[0], (struct ether_addr *) addr);
1404 
1405  /* Set MAC of bounded interface to that of 1st slave link */
1406  clib_warning ("Set MAC for bond port %d BondEthernet%d",
1407  i, xd->port_id);
1408  rv = rte_eth_bond_mac_address_set
1409  (i, (struct ether_addr *) addr);
1410  if (rv)
1411  clib_warning ("Set MAC addr failure rv=%d", rv);
1412 
1413  /* Populate MAC of bonded interface in VPP hw tables */
1414  bhi = vnet_get_hw_interface
1415  (vnm, dm->devices[i].hw_if_index);
1416  bei = pool_elt_at_index
1417  (em->interfaces, bhi->hw_instance);
1418  clib_memcpy (bhi->hw_address, addr, 6);
1419  clib_memcpy (bei->address, addr, 6);
1420 
1421  /* Init l3 packet size allowed on bonded interface */
1426  while (nlink >= 1)
1427  { /* for all slave links */
1428  int slave = slink[--nlink];
1429  dpdk_device_t *sdev = &dm->devices[slave];
1430  vnet_hw_interface_t *shi;
1431  vnet_sw_interface_t *ssi;
1432  ethernet_interface_t *sei;
1433  /* Add MAC to all slave links except the first one */
1434  if (nlink)
1435  {
1436  clib_warning ("Add MAC for slave port %d", slave);
1437  rv = rte_eth_dev_mac_addr_add
1438  (slave, (struct ether_addr *) addr, 0);
1439  if (rv)
1440  clib_warning ("Add MAC addr failure rv=%d", rv);
1441  }
1442  /* Setup slave link state change callback handling */
1443  rte_eth_dev_callback_register
1444  (slave, RTE_ETH_EVENT_INTR_LSC,
1446  dpdk_device_t *sxd = &dm->devices[slave];
1448  sxd->bond_port = i;
1449  /* Set slaves bitmap for bonded interface */
1450  bhi->bond_info = clib_bitmap_set
1451  (bhi->bond_info, sdev->hw_if_index, 1);
1452  /* Set MACs and slave link flags on slave interface */
1453  shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
1454  ssi = vnet_get_sw_interface
1455  (vnm, sdev->vlib_sw_if_index);
1456  sei = pool_elt_at_index
1457  (em->interfaces, shi->hw_instance);
1460  clib_memcpy (shi->hw_address, addr, 6);
1461  clib_memcpy (sei->address, addr, 6);
1462  /* Set l3 packet size allowed as the lowest of slave */
1463  if (bhi->max_l3_packet_bytes[VLIB_RX] >
1468  /* Set max packet size allowed as the lowest of slave */
1469  if (bhi->max_packet_bytes > shi->max_packet_bytes)
1470  bhi->max_packet_bytes = shi->max_packet_bytes;
1471  }
1472  }
1473  }
1474  }
1475  }
1476  }
1477 
1478  while (1)
1479  {
1480  /*
1481  * check each time through the loop in case intervals are changed
1482  */
1483  f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
1485 
1486  vlib_process_wait_for_event_or_clock (vm, min_wait);
1487 
1488  if (dm->admin_up_down_in_progress)
1489  /* skip the poll if an admin up down is in progress (on any interface) */
1490  continue;
1491 
1492  vec_foreach (xd, dm->devices)
1493  {
1494  f64 now = vlib_time_now (vm);
1495  if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
1496  dpdk_update_counters (xd, now);
1497  if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
1498  dpdk_update_link_state (xd, now);
1499 
1500  }
1501  }
1502 
1503  return 0;
1504 }
1505 
1506 /* *INDENT-OFF* */
1508  .function = dpdk_process,
1509  .type = VLIB_NODE_TYPE_PROCESS,
1510  .name = "dpdk-process",
1511  .process_log2_n_stack_bytes = 17,
1512 };
1513 /* *INDENT-ON* */
1514 
1515 static clib_error_t *
1517 {
1518  dpdk_main_t *dm = &dpdk_main;
1519  vlib_node_t *ei;
1520  clib_error_t *error = 0;
1522 
1523  /* verify that structs are cacheline aligned */
1524  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0,
1525  "Cache line marker must be 1st element in dpdk_device_t");
1526  STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) ==
1528  "Data in cache line 0 is bigger than cache line size");
1529  STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
1530  "Cache line marker must be 1st element in frame_queue_trace_t");
1531 
1532  dm->vlib_main = vm;
1533  dm->vnet_main = vnet_get_main ();
1534  dm->conf = &dpdk_config_main;
1535 
1536  ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
1537  if (ei == 0)
1538  return clib_error_return (0, "ethernet-input node AWOL");
1539 
1540  dm->ethernet_input_node_index = ei->index;
1541 
1542  dm->conf->nchannels = 4;
1543  dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF;
1544  vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
1545 
1546  vec_validate (dm->recycle, tm->n_thread_stacks - 1);
1547 
1548  /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
1549  dm->buffer_flags_template =
1552 
1555 
1556  /* init CLI */
1557  if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
1558  return error;
1559 
1560  return error;
1561 }
1562 
1564 
1565 
1566 /*
1567  * fd.io coding-style-patch-verification: ON
1568  *
1569  * Local Variables:
1570  * eval: (c-set-style "gnu")
1571  * End:
1572  */
u32 ** d_trace_buffers
Definition: dpdk.h:164
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:436
#define DPDK_DEVICE_FLAG_PROMISC
Definition: dpdk.h:171
static void dpdk_bind_devices_to_uio(dpdk_config_main_t *conf)
Definition: init.c:636
f64 time_last_link_update
Definition: dpdk.h:208
vmrglw vmrglh hi
static u8 * format_bitmap_hex(u8 *s, va_list *args)
Format a bitmap as a string of hex bytes.
Definition: bitmap.h:744
format_function_t format_vlib_pci_addr
Definition: pci.h:238
#define vec_foreach_index(var, v)
Iterate over vector indices.
#define hash_set(h, key, value)
Definition: hash.h:254
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:337
#define clib_min(x, y)
Definition: clib.h:332
u32 vlib_buffer_get_or_create_free_list(vlib_main_t *vm, u32 n_data_bytes, char *fmt,...)
#define VNET_HW_INTERFACE_FLAG_SPEED_1G
Definition: interface.h:410
ethernet_main_t ethernet_main
Definition: init.c:45
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:537
static f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Suspend a cooperative multi-tasking thread Waits for an event, or for the indicated number of seconds...
Definition: node_funcs.h:699
u8 interface_name_format_decimal
Definition: dpdk.h:322
vnet_main_t * vnet_get_main(void)
Definition: misc.c:46
#define NB_MBUF
Definition: dpdk.h:53
u8 use_rss
Definition: dpdk.h:368
vnet_device_class_t dpdk_device_class
#define DPDK_DEVICE_VLAN_STRIP_OFF
Definition: dpdk.h:290
#define NULL
Definition: clib.h:55
u32 index
Definition: node.h:238
clib_error_t * vlib_pci_bind_to_uio(vlib_pci_device_t *d, char *uio_driver_name)
Definition: linux_pci.c:95
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:192
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:573
static uword * clib_bitmap_or(uword *ai, uword *bi)
Logical operator across two bitmaps.
static u32 dpdk_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
Definition: init.c:80
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
Definition: init.c:1236
u16 flags
Definition: dpdk.h:169
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define LINK_STATE_ELOGS
Definition: init.c:38
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:522
dpdk_device_and_queue_t ** devices_by_hqos_cpu
Definition: dpdk.h:338
#define DPDK_NB_RX_DESC_VIRTIO
Definition: dpdk_priv.h:21
clib_error_t * errors
Definition: dpdk.h:222
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:561
#define DPDK_DEVICE_FLAG_HQOS
Definition: dpdk.h:176
u32 per_interface_next_index
Definition: dpdk.h:157
u8 enable_tcp_udp_checksum
Definition: dpdk.h:309
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
Definition: bitmap.h:167
#define DPDK_DEVICE_VLAN_STRIP_ON
Definition: dpdk.h:291
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
vlib_buffer_t * buffer_templates
Definition: dpdk.h:344
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:419
#define DPDK_NB_TX_DESC_DEFAULT
Definition: dpdk_priv.h:20
#define foreach_eal_double_hyphen_predicate_arg
Definition: dpdk_priv.h:32
unformat_function_t unformat_vlib_pci_addr
Definition: pci.h:237
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:397
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:447
void dpdk_port_state_callback(uint8_t port_id, enum rte_eth_event_type type, void *param)
Definition: common.c:185
u16 device_id
Definition: pci.h:80
dpdk_device_config_hqos_t hqos
Definition: dpdk.h:298
#define VPP_RUN_DIR
Definition: init.c:41
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:200
vlib_pci_addr_t bus_address
Definition: pci.h:58
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
#define DPDK_DEVICE_FLAG_PMD
Definition: dpdk.h:172
u8 port_id
Definition: dpdk.h:201
clib_error_t * vlib_buffer_pool_create(vlib_main_t *vm, unsigned num_mbufs, unsigned socket_id)
Definition: buffer.c:430
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:160
foreach_dpdk_device_config_item clib_bitmap_t * workers
Definition: dpdk.h:296
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:67
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:376
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:111
dpdk_config_main_t dpdk_config_main
Definition: dpdk.h:331
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:671
#define IP_BUFFER_L4_CHECKSUM_CORRECT
Definition: buffer.h:50
dpdk_device_config_t default_devconf
Definition: dpdk.h:325
f64 stat_poll_interval
Definition: dpdk.h:376
static dpdk_port_type_t port_type_from_speed_capa(struct rte_eth_dev_info *dev_info)
Definition: init.c:59
#define VLIB_BUFFER_TOTAL_LENGTH_VALID
Definition: buffer.h:89
char i8
Definition: types.h:45
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
Definition: error.h:99
u16 rx_q_used
Definition: dpdk.h:190
#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES
Definition: buffer.h:403
#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG
Definition: dpdk.h:174
void dpdk_device_setup(dpdk_device_t *xd)
Definition: common.c:39
u16 vendor_id
Definition: pci.h:79
#define vlib_call_init_function(vm, x)
Definition: init.h:162
clib_error_t * unformat_rss_fn(unformat_input_t *input, uword *rss_fn)
Definition: format.c:730
#define DPDK_NB_TX_DESC_VIRTIO
Definition: dpdk_priv.h:22
u32 device_index
Definition: dpdk.h:151
struct rte_eth_conf port_conf
Definition: dpdk.h:193
static clib_error_t * dpdk_init(vlib_main_t *vm)
Definition: init.c:1516
u16 device_class
Definition: pci.h:78
f64 time_last_stats_update
Definition: dpdk.h:215
u32 vlib_sw_if_index
Definition: dpdk.h:154
struct rte_eth_txconf tx_conf
Definition: dpdk.h:194
#define hash_get(h, key)
Definition: hash.h:248
void dpdk_device_start(dpdk_device_t *xd)
Definition: common.c:119
#define clib_bitmap_foreach(i, ai, body)
Macro to iterate across set bits in a bitmap.
Definition: bitmap.h:361
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:397
#define vec_insert(V, N, M)
Insert N vector elements starting at element M, initialize new elements to zero (no header...
Definition: vec.h:686
vlib_pci_addr_t pci_addr
Definition: dpdk.h:286
static clib_error_t * dpdk_config(vlib_main_t *vm, unformat_input_t *input)
Definition: init.c:810
#define foreach_eal_double_hyphen_arg
Definition: dpdk_priv.h:48
#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)
Definition: ethernet.h:119
u8 ** eal_init_args
Definition: dpdk.h:305
#define VNET_HW_INTERFACE_FLAG_SPEED_10M
Definition: interface.h:408
#define VNET_SW_INTERFACE_FLAG_BOND_SLAVE
Definition: interface.h:569
#define foreach_eal_single_hyphen_mandatory_arg
Definition: dpdk_priv.h:38
vlib_pci_device_t * pci_devs
Definition: pci.h:116
struct _unformat_input_t unformat_input_t
#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX
Definition: interface.h:400
#define foreach_dpdk_pmd
Definition: dpdk.h:58
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
Definition: buffer.h:402
#define ELOG_DATA(em, f)
Definition: elog.h:481
dpdk_port_type_t port_type
Definition: dpdk.h:216
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:119
#define VLIB_FRAME_SIZE
Definition: node.h:329
u16 tx_q_used
Definition: dpdk.h:189
u16 nb_rx_desc
Definition: dpdk.h:191
u32 hw_if_index
Definition: dpdk.h:153
void unformat_init_vector(unformat_input_t *input, u8 *vector_string)
Definition: unformat.c:1031
#define DPDK_DEVICE_FLAG_ADMIN_UP
Definition: dpdk.h:170
u32 ** recycle
Definition: dpdk.h:341
#define VNET_HW_INTERFACE_BOND_INFO_SLAVE
Definition: interface.h:482
u8 bond_port
Definition: dpdk.h:205
#define foreach_eal_single_hyphen_arg
Definition: dpdk_priv.h:42
#define DPDK_NB_RX_DESC_DEFAULT
Definition: dpdk_priv.h:19
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
svmdb_client_t * c
static clib_error_t * dpdk_device_config(dpdk_config_main_t *conf, vlib_pci_addr_t pci_addr, unformat_input_t *input, u8 is_default)
Definition: init.c:710
dpdk_device_t * devices
Definition: dpdk.h:337
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
Definition: dpdk_priv.h:78
u8 nchannels_set_manually
Definition: dpdk.h:313
vlib_pci_main_t pci_main
Definition: pci.c:53
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:340
volatile u32 ** lockp
Definition: dpdk.h:148
dpdk_device_config_t * dev_confs
Definition: dpdk.h:326
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:161
#define clib_warning(format, args...)
Definition: error.h:59
#define clib_memcpy(a, b, c)
Definition: string.h:69
dpdk_pmd_t pmd
Definition: dpdk.h:166
format_function_t format_dpdk_device_errors
Definition: dpdk.h:442
elog_main_t elog_main
Definition: main.h:141
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL
Definition: ethernet.h:113
#define DEFAULT_HUGE_DIR
Definition: init.c:40
u8 coremask_set_manually
Definition: dpdk.h:312
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:439
vlib_node_t * vlib_get_node_by_name(vlib_main_t *vm, u8 *name)
Definition: node.c:45
#define VNET_HW_INTERFACE_FLAG_SPEED_10G
Definition: interface.h:411
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
Definition: vec.h:416
static void dpdk_device_lock_init(dpdk_device_t *xd)
Definition: init.c:130
void dpdk_device_stop(dpdk_device_t *xd)
Definition: common.c:163
u8 * interface_name_suffix
Definition: dpdk.h:183
#define hash_create(elts, value_bytes)
Definition: hash.h:658
#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
Definition: interface.h:401
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
Definition: interface.h:468
#define ASSERT(truth)
void dpdk_device_config_hqos_default(dpdk_device_config_hqos_t *hqos)
Definition: hqos.c:206
format_function_t format_dpdk_device_name
Definition: dpdk.h:440
unsigned int u32
Definition: types.h:88
int hqos_cpu_count
Definition: dpdk.h:372
void vnet_hw_interface_assign_rx_thread(vnet_main_t *vnm, u32 hw_if_index, u16 queue_id, uword thread_index)
Definition: devices.c:122
#define IP_BUFFER_L4_CHECKSUM_COMPUTED
Definition: buffer.h:49
u32 poll_sleep_usec
Definition: dpdk.h:379
static uword dpdk_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: init.c:1351
Bitmaps built as vectors of machine words.
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:246
#define clib_error_report(e)
Definition: error.h:113
#define clib_bitmap_free(v)
Free a bitmap.
Definition: bitmap.h:92
#define DPDK_LINK_POLL_INTERVAL
Definition: dpdk.h:228
dpdk_main_t dpdk_main
Definition: init.c:36
uword * thread_registrations_by_name
Definition: threads.h:261
clib_error_t * dpdk_cli_init(vlib_main_t *vm)
Definition: cli.c:1889
struct rte_eth_link link
Definition: dpdk.h:207
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
clib_error_t * dpdk_port_setup_hqos(dpdk_device_t *xd, dpdk_device_config_hqos_t *hqos)
Definition: hqos.c:248
u64 uword
Definition: types.h:112
dpdk_port_type_t
Definition: dpdk.h:89
static uword clib_bitmap_count_set_bits(uword *ai)
Return the number of set bits in a bitmap.
Definition: bitmap.h:441
Definition: defs.h:47
#define DPDK_STATS_POLL_INTERVAL
Definition: dpdk.h:225
#define VNET_HW_INTERFACE_FLAG_SPEED_100M
Definition: interface.h:409
static vlib_node_registration_t dpdk_process_node
(constructor) VLIB_REGISTER_NODE (dpdk_process_node)
Definition: init.c:1507
u32 ethernet_input_node_index
Definition: dpdk.h:353
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
uword unformat_vlib_cli_sub_input(unformat_input_t *i, va_list *args)
Definition: cli.c:152
u8 admin_up_down_in_progress
Definition: dpdk.h:366
#define STATIC_ASSERT(truth,...)
static struct rte_eth_conf port_conf_template
Definition: init.c:45
static uword unformat_bitmap_list(unformat_input_t *input, va_list *va)
unformat a list of bit ranges into a bitmap (eg "0-3,5-7,11" )
Definition: bitmap.h:693
static void unformat_free(unformat_input_t *i)
Definition: format.h:161
#define DPDK_DEVICE_FLAG_BOND_SLAVE
Definition: dpdk.h:177
static clib_error_t * dpdk_lib_init(dpdk_main_t *dm)
Definition: init.c:143
#define hash_get_mem(h, key)
Definition: hash.h:268
u32 buffer_flags_template
Definition: dpdk.h:347
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:117
#define VLIB_BUFFER_EXT_HDR_VALID
Definition: buffer.h:93
static void vlib_buffer_init_for_free_list(vlib_buffer_t *dst, vlib_buffer_free_list_t *fl)
Definition: buffer_funcs.h:777
#define vnet_buffer(b)
Definition: buffer.h:304
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VNET_HW_INTERFACE_FLAG_SPEED_40G
Definition: interface.h:412
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
u32 vlib_buffer_free_list_index
Definition: dpdk.h:350
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:144
int hqos_cpu_first_index
Definition: dpdk.h:371
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
#define ETHERNET_MAX_PACKET_BYTES
Definition: ethernet.h:106
#define vec_foreach(var, vec)
Vector iterator.
i8 cpu_socket
Definition: dpdk.h:167
#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)
Definition: ethernet.h:114
uword * cpu_socket_bitmap
Definition: threads.h:296
vhost_vring_addr_t addr
Definition: vhost-user.h:82
static vlib_buffer_free_list_t * vlib_buffer_get_free_list(vlib_main_t *vm, u32 free_list_index)
Definition: buffer_funcs.h:385
u8 * uio_driver_name
Definition: dpdk.h:307
vlib_thread_registration_t ** registrations
Definition: threads.h:259
u32 flags
Definition: vhost-user.h:76
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
unformat_function_t unformat_dpdk_log_level
Definition: dpdk.h:447
ethernet_interface_t * interfaces
Definition: ethernet.h:243
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:74
vnet_main_t * vnet_main
Definition: dpdk.h:383
u16 nb_tx_desc
Definition: dpdk.h:180
clib_error_t * unformat_hqos(unformat_input_t *input, dpdk_device_config_hqos_t *hqos)
Definition: format.c:767
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:680
uword * device_config_index_by_pci_addr
Definition: dpdk.h:327
uword unformat_skip_white_space(unformat_input_t *input)
Definition: unformat.c:815
static uword vnet_hw_interface_is_link_up(vnet_main_t *vnm, u32 hw_if_index)
volatile u32 worker_thread_release
Definition: threads.h:302
static void vnet_hw_interface_set_input_node(vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
Definition: devices.h:78
vnet_device_main_t vnet_device_main
Definition: devices.c:22
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:972
Definition: defs.h:46
f64 link_state_poll_interval
Definition: dpdk.h:375
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
dpdk_config_main_t * conf
Definition: dpdk.h:384
vlib_main_t * vlib_main
Definition: dpdk.h:382