FD.io VPP  v16.06
Vector Packet Processing
device.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/vnet.h>
16 #include <vppinfra/vec.h>
17 #include <vppinfra/format.h>
18 #include <vlib/unix/cj.h>
19 #include <assert.h>
20 
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/devices/dpdk/dpdk.h>
23 
24 #include "dpdk_priv.h"
25 #include <vppinfra/error.h>
26 
27 #define foreach_dpdk_tx_func_error \
28  _(BAD_RETVAL, "DPDK tx function returned an error") \
29  _(RING_FULL, "Tx packet drops (ring full)") \
30  _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \
31  _(REPL_FAIL, "Tx packet drops (replication failure)")
32 
33 typedef enum {
34 #define _(f,s) DPDK_TX_FUNC_ERROR_##f,
36 #undef _
39 
40 static char * dpdk_tx_func_error_strings[] = {
41 #define _(n,s) s,
43 #undef _
44 };
45 
48 {
49  int error;
50  dpdk_main_t * dm = &dpdk_main;
52 
53  error=rte_eth_dev_default_mac_addr_set(xd->device_index,
54  (struct ether_addr *) address);
55 
56  if (error) {
57  return clib_error_return (0, "mac address set failed: %d", error);
58  } else {
59  return NULL;
60  }
61 }
62 
65  struct ether_addr mc_addr_vec[], int naddr)
66 {
67  int error;
68  dpdk_main_t * dm = &dpdk_main;
70 
71  error=rte_eth_dev_set_mc_addr_list(xd->device_index, mc_addr_vec, naddr);
72 
73  if (error) {
74  return clib_error_return (0, "mc addr list failed: %d", error);
75  } else {
76  return NULL;
77  }
78 }
79 
80 struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b)
81 {
82  vlib_main_t * vm = vlib_get_main();
84  struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0;
85  u8 nb_segs, nb_segs_left;
86  u32 copy_bytes;
87  unsigned socket_id = rte_socket_id();
88 
89  ASSERT (bm->pktmbuf_pools[socket_id]);
90  pkt_mb = rte_mbuf_from_vlib_buffer(b);
91  nb_segs = pkt_mb->nb_segs;
92  for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
93  {
94  if (PREDICT_FALSE(pkt_mb == 0))
95  {
96  clib_warning ("Missing %d mbuf chain segment(s): "
97  "(nb_segs = %d, nb_segs_left = %d)!",
98  nb_segs - nb_segs_left, nb_segs, nb_segs_left);
99  if (first_mb)
100  rte_pktmbuf_free(first_mb);
101  return NULL;
102  }
103  new_mb = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
104  if (PREDICT_FALSE(new_mb == 0))
105  {
106  if (first_mb)
107  rte_pktmbuf_free(first_mb);
108  return NULL;
109  }
110 
111  /*
112  * Copy packet info into 1st segment.
113  */
114  if (first_mb == 0)
115  {
116  first_mb = new_mb;
117  rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
118  first_mb->nb_segs = pkt_mb->nb_segs;
119  first_mb->port = pkt_mb->port;
120 #ifdef DAW_FIXME // TX Offload support TBD
121  first_mb->vlan_macip = pkt_mb->vlan_macip;
122  first_mb->hash = pkt_mb->hash;
123  first_mb->ol_flags = pkt_mb->ol_flags
124 #endif
125  }
126  else
127  {
128  ASSERT(prev_mb_next != 0);
129  *prev_mb_next = new_mb;
130  }
131 
132  /*
133  * Copy packet segment data into new mbuf segment.
134  */
135  rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
136  copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM;
137  ASSERT(copy_bytes <= pkt_mb->buf_len);
138  clib_memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
139 
140  prev_mb_next = &new_mb->next;
141  pkt_mb = pkt_mb->next;
142  }
143 
144  ASSERT(pkt_mb == 0);
145  __rte_mbuf_sanity_check(first_mb, 1);
146 
147  return first_mb;
148 }
149 
151 {
152  vlib_main_t * vm = vlib_get_main();
153  vlib_buffer_main_t * bm = vm->buffer_main;
154  struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0;
155  u8 nb_segs, nb_segs_left;
156  unsigned socket_id = rte_socket_id();
157 
158  ASSERT (bm->pktmbuf_pools[socket_id]);
159  pkt_mb = rte_mbuf_from_vlib_buffer(b);
160  nb_segs = pkt_mb->nb_segs;
161  for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
162  {
163  if (PREDICT_FALSE(pkt_mb == 0))
164  {
165  clib_warning ("Missing %d mbuf chain segment(s): "
166  "(nb_segs = %d, nb_segs_left = %d)!",
167  nb_segs - nb_segs_left, nb_segs, nb_segs_left);
168  if (first_mb)
169  rte_pktmbuf_free(first_mb);
170  return NULL;
171  }
172  new_mb = rte_pktmbuf_clone(pkt_mb, bm->pktmbuf_pools[socket_id]);
173  if (PREDICT_FALSE(new_mb == 0))
174  {
175  if (first_mb)
176  rte_pktmbuf_free(first_mb);
177  return NULL;
178  }
179 
180  /*
181  * Copy packet info into 1st segment.
182  */
183  if (first_mb == 0)
184  {
185  first_mb = new_mb;
186  rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
187  first_mb->nb_segs = pkt_mb->nb_segs;
188  first_mb->port = pkt_mb->port;
189 #ifdef DAW_FIXME // TX Offload support TBD
190  first_mb->vlan_macip = pkt_mb->vlan_macip;
191  first_mb->hash = pkt_mb->hash;
192  first_mb->ol_flags = pkt_mb->ol_flags
193 #endif
194  }
195  else
196  {
197  ASSERT(prev_mb_next != 0);
198  *prev_mb_next = new_mb;
199  }
200 
201  /*
202  * Copy packet segment data into new mbuf segment.
203  */
204  rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
205 
206  prev_mb_next = &new_mb->next;
207  pkt_mb = pkt_mb->next;
208  }
209 
210  ASSERT(pkt_mb == 0);
211  __rte_mbuf_sanity_check(first_mb, 1);
212 
213  return first_mb;
214 
215 
216 }
217 
218 static void
220  vlib_node_runtime_t * node,
221  dpdk_device_t * xd,
222  u16 queue_id,
223  u32 buffer_index,
224  vlib_buffer_t * buffer)
225 {
226  vlib_main_t * vm = vlib_get_main();
227  dpdk_tx_dma_trace_t * t0;
228  struct rte_mbuf * mb;
229 
230  mb = rte_mbuf_from_vlib_buffer(buffer);
231 
232  t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0]));
233  t0->queue_index = queue_id;
234  t0->device_index = xd->device_index;
235  t0->buffer_index = buffer_index;
236  clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
237  clib_memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data));
238  clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
239  sizeof (t0->buffer.pre_data));
240 }
241 
242 /*
243  * This function calls the dpdk's tx_burst function to transmit the packets
244  * on the tx_vector. It manages a lock per-device if the device does not
245  * support multiple queues. It returns the number of packets untransmitted
246  * on the tx_vector. If all packets are transmitted (the normal case), the
247  * function returns 0.
248  *
249  * The tx_burst function may not be able to transmit all packets because the
250  * dpdk ring is full. If a flowcontrol callback function has been configured
251  * then the function simply returns. If no callback has been configured, the
252  * function will retry calling tx_burst with the remaining packets. This will
253  * continue until all packets are transmitted or tx_burst indicates no packets
254  * could be transmitted. (The caller can drop the remaining packets.)
255  *
256  * The function assumes there is at least one packet on the tx_vector.
257  */
260  dpdk_device_t * xd,
261  struct rte_mbuf ** tx_vector)
262 {
263  dpdk_main_t * dm = &dpdk_main;
264  u32 n_packets;
265  u32 tx_head;
266  u32 tx_tail;
267  u32 n_retry;
268  int rv;
269  int queue_id;
270  tx_ring_hdr_t *ring;
271 
272  ring = vec_header(tx_vector, sizeof(*ring));
273 
274  n_packets = ring->tx_head - ring->tx_tail;
275 
276  tx_head = ring->tx_head % DPDK_TX_RING_SIZE;
277 
278  /*
279  * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to
280  * unpredictable results.
281  */
282  ASSERT(n_packets > 0);
283 
284  /*
285  * Check for tx_vector overflow. If this fails it is a system configuration
286  * error. The ring should be sized big enough to handle the largest un-flowed
287  * off burst from a traffic manager. A larger size also helps performance
288  * a bit because it decreases the probability of having to issue two tx_burst
289  * calls due to a ring wrap.
290  */
291  ASSERT(n_packets < DPDK_TX_RING_SIZE);
292 
293  /*
294  * If there is no flowcontrol callback, there is only temporary buffering
295  * on the tx_vector and so the tail should always be 0.
296  */
297  ASSERT(dm->flowcontrol_callback || ring->tx_tail == 0);
298 
299  /*
300  * If there is a flowcontrol callback, don't retry any incomplete tx_bursts.
301  * Apply backpressure instead. If there is no callback, keep retrying until
302  * a tx_burst sends no packets. n_retry of 255 essentially means no retry
303  * limit.
304  */
305  n_retry = dm->flowcontrol_callback ? 0 : 255;
306 
307  queue_id = vm->cpu_index;
308 
309  do {
310  /* start the burst at the tail */
311  tx_tail = ring->tx_tail % DPDK_TX_RING_SIZE;
312 
313  /*
314  * This device only supports one TX queue,
315  * and we're running multi-threaded...
316  */
318  xd->lockp != 0))
319  {
320  queue_id = queue_id % xd->tx_q_used;
321  while (__sync_lock_test_and_set (xd->lockp[queue_id], 1))
322  /* zzzz */
323  queue_id = (queue_id + 1) % xd->tx_q_used;
324  }
325 
327  {
328  if (PREDICT_TRUE(tx_head > tx_tail))
329  {
330  /* no wrap, transmit in one burst */
331  rv = rte_eth_tx_burst(xd->device_index,
332  (uint16_t) queue_id,
333  &tx_vector[tx_tail],
334  (uint16_t) (tx_head-tx_tail));
335  }
336  else
337  {
338  /*
339  * This can only happen if there is a flowcontrol callback.
340  * We need to split the transmit into two calls: one for
341  * the packets up to the wrap point, and one to continue
342  * at the start of the ring.
343  * Transmit pkts up to the wrap point.
344  */
345  rv = rte_eth_tx_burst(xd->device_index,
346  (uint16_t) queue_id,
347  &tx_vector[tx_tail],
348  (uint16_t) (DPDK_TX_RING_SIZE - tx_tail));
349 
350  /*
351  * If we transmitted everything we wanted, then allow 1 retry
352  * so we can try to transmit the rest. If we didn't transmit
353  * everything, stop now.
354  */
355  n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
356  }
357  }
358  else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
359  {
360  u32 offset = 0;
361  if (xd->need_txlock) {
362  queue_id = 0;
363  while (__sync_lock_test_and_set (xd->lockp[queue_id], 1));
364  }
365 #if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
366  else {
368  vec_foreach (dq, dm->devices_by_cpu[vm->cpu_index])
369  {
370  if (xd->device_index == dq->device)
371  break;
372  }
373  assert (dq);
374  offset = dq->queue_id * VIRTIO_QNUM;
375  }
376 #endif
377  if (PREDICT_TRUE(tx_head > tx_tail))
378  {
379  int i; u32 bytes = 0;
380  struct rte_mbuf **pkts = &tx_vector[tx_tail];
381  for (i = 0; i < (tx_head - tx_tail); i++) {
382  struct rte_mbuf *buff = pkts[i];
383  bytes += rte_pktmbuf_data_len(buff);
384  }
385 
386  /* no wrap, transmit in one burst */
387  rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, offset + VIRTIO_RXQ,
388  &tx_vector[tx_tail],
389  (uint16_t) (tx_head-tx_tail));
390  if (PREDICT_TRUE(rv > 0))
391  {
392  dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
393  vring->packets += rv;
394  vring->bytes += bytes;
395 
396  if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) {
397  vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
398  vring->n_since_last_int += rv;
399 
400  f64 now = vlib_time_now (vm);
401  if (vring->int_deadline < now ||
403  dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ);
404  }
405 
406  int c = rv;
407  while(c--)
408  rte_pktmbuf_free (tx_vector[tx_tail+c]);
409  }
410  }
411  else
412  {
413  /*
414  * If we transmitted everything we wanted, then allow 1 retry
415  * so we can try to transmit the rest. If we didn't transmit
416  * everything, stop now.
417  */
418  int i; u32 bytes = 0;
419  struct rte_mbuf **pkts = &tx_vector[tx_tail];
420  for (i = 0; i < (DPDK_TX_RING_SIZE - tx_tail); i++) {
421  struct rte_mbuf *buff = pkts[i];
422  bytes += rte_pktmbuf_data_len(buff);
423  }
424  rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, offset + VIRTIO_RXQ,
425  &tx_vector[tx_tail],
426  (uint16_t) (DPDK_TX_RING_SIZE - tx_tail));
427 
428  if (PREDICT_TRUE(rv > 0))
429  {
430  dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
431  vring->packets += rv;
432  vring->bytes += bytes;
433 
434  if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) {
435  vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
436  vring->n_since_last_int += rv;
437 
438  f64 now = vlib_time_now (vm);
439  if (vring->int_deadline < now ||
441  dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ);
442  }
443 
444  int c = rv;
445  while(c--)
446  rte_pktmbuf_free (tx_vector[tx_tail+c]);
447  }
448 
449  n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
450  }
451 
452  if (xd->need_txlock)
453  *xd->lockp[queue_id] = 0;
454  }
455 #if RTE_LIBRTE_KNI
456  else if (xd->dev_type == VNET_DPDK_DEV_KNI)
457  {
458  if (PREDICT_TRUE(tx_head > tx_tail))
459  {
460  /* no wrap, transmit in one burst */
461  rv = rte_kni_tx_burst(xd->kni,
462  &tx_vector[tx_tail],
463  (uint16_t) (tx_head-tx_tail));
464  }
465  else
466  {
467  /*
468  * This can only happen if there is a flowcontrol callback.
469  * We need to split the transmit into two calls: one for
470  * the packets up to the wrap point, and one to continue
471  * at the start of the ring.
472  * Transmit pkts up to the wrap point.
473  */
474  rv = rte_kni_tx_burst(xd->kni,
475  &tx_vector[tx_tail],
476  (uint16_t) (DPDK_TX_RING_SIZE - tx_tail));
477 
478  /*
479  * If we transmitted everything we wanted, then allow 1 retry
480  * so we can try to transmit the rest. If we didn't transmit
481  * everything, stop now.
482  */
483  n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
484  }
485  }
486 #endif
487  else
488  {
489  ASSERT(0);
490  rv = 0;
491  }
492 
494  xd->lockp != 0))
495  *xd->lockp[queue_id] = 0;
496 
497  if (PREDICT_FALSE(rv < 0))
498  {
499  // emit non-fatal message, bump counter
500  vnet_main_t * vnm = dm->vnet_main;
502  u32 node_index;
503 
504  node_index = vec_elt_at_index(im->hw_interfaces,
505  xd->vlib_hw_if_index)->tx_node_index;
506 
507  vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
508  clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, rv);
509  return n_packets; // untransmitted packets
510  }
511  ring->tx_tail += (u16)rv;
512  n_packets -= (uint16_t) rv;
513  } while (rv && n_packets && (n_retry>0));
514 
515  return n_packets;
516 }
517 
518 
519 /*
520  * This function transmits any packets on the interface's tx_vector and returns
521  * the number of packets untransmitted on the tx_vector. If the tx_vector is
522  * empty the function simply returns 0.
523  *
524  * It is intended to be called by a traffic manager which has flowed-off an
525  * interface to see if the interface can be flowed-on again.
526  */
528 {
529  dpdk_main_t * dm = &dpdk_main;
530  dpdk_device_t * xd;
531  int queue_id;
532  struct rte_mbuf ** tx_vector;
533  tx_ring_hdr_t *ring;
534 
535  /* param is dev_instance and not hw_if_index to save another lookup */
536  xd = vec_elt_at_index (dm->devices, dev_instance);
537 
538  queue_id = vm->cpu_index;
539  tx_vector = xd->tx_vectors[queue_id];
540 
541  /* If no packets on the ring, don't bother calling tx function */
542  ring = vec_header(tx_vector, sizeof(*ring));
543  if (ring->tx_head == ring->tx_tail)
544  {
545  return 0;
546  }
547 
548  return tx_burst_vector_internal (vm, xd, tx_vector);
549 }
550 
551 /*
552  * Transmits the packets on the frame to the interface associated with the
553  * node. It first copies packets on the frame to a tx_vector containing the
554  * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
555  * which calls the dpdk tx_burst function.
556  *
557  * The tx_vector is treated slightly differently depending on whether or
558  * not a flowcontrol callback function has been configured. If there is no
559  * callback, the tx_vector is a temporary array of rte_mbuf packet pointers.
560  * Its entries are written and consumed before the function exits.
561  *
562  * If there is a callback then the transmit is being invoked in the presence
563  * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf
564  * pointers. If not all packets can be transmitted, the untransmitted packets
565  * stay on the tx_vector until the next call. The callback allows the traffic
566  * manager to flow-off dequeues to the interface. The companion function
567  * dpdk_interface_tx_vector() allows the traffic manager to detect when
568  * it should flow-on the interface again.
569  */
570 static uword
572  vlib_node_runtime_t * node,
573  vlib_frame_t * f)
574 {
575  dpdk_main_t * dm = &dpdk_main;
576  vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
578  u32 n_packets = f->n_vectors;
579  u32 n_left;
580  u32 * from;
581  struct rte_mbuf ** tx_vector;
582  int i;
583  int queue_id;
584  u32 my_cpu;
585  u32 tx_pkts = 0;
586  tx_ring_hdr_t *ring;
587  u32 n_on_ring;
588 
589  my_cpu = vm->cpu_index;
590 
591  queue_id = my_cpu;
592 
593  tx_vector = xd->tx_vectors[queue_id];
594  ring = vec_header(tx_vector, sizeof(*ring));
595 
596  n_on_ring = ring->tx_head - ring->tx_tail;
597  from = vlib_frame_vector_args (f);
598 
599  ASSERT(n_packets <= VLIB_FRAME_SIZE);
600 
601  if (PREDICT_FALSE(n_on_ring + n_packets > DPDK_TX_RING_SIZE))
602  {
603  /*
604  * Overflowing the ring should never happen.
605  * If it does then drop the whole frame.
606  */
607  vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL,
608  n_packets);
609 
610  while (n_packets--)
611  {
612  u32 bi0 = from[n_packets];
613  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
614  struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer(b0);
615  rte_pktmbuf_free (mb0);
616  }
617  return n_on_ring;
618  }
619 
620  if (PREDICT_FALSE(dm->tx_pcap_enable))
621  {
622  n_left = n_packets;
623  while (n_left > 0)
624  {
625  u32 bi0 = from[0];
626  vlib_buffer_t * b0 = vlib_get_buffer (vm, bi0);
627  if (dm->pcap_sw_if_index == 0 ||
628  dm->pcap_sw_if_index == vnet_buffer(b0)->sw_if_index [VLIB_TX])
629  pcap_add_buffer (&dm->pcap_main, vm, bi0, 512);
630  from++;
631  n_left--;
632  }
633  }
634 
635  from = vlib_frame_vector_args (f);
636  n_left = n_packets;
637  i = ring->tx_head % DPDK_TX_RING_SIZE;
638 
639  while (n_left >= 4)
640  {
641  u32 bi0, bi1;
642  u32 pi0, pi1;
643  struct rte_mbuf * mb0, * mb1;
644  struct rte_mbuf * prefmb0, * prefmb1;
645  vlib_buffer_t * b0, * b1;
646  vlib_buffer_t * pref0, * pref1;
647  i16 delta0, delta1;
648  u16 new_data_len0, new_data_len1;
649  u16 new_pkt_len0, new_pkt_len1;
650  u32 any_clone;
651 
652  pi0 = from[2];
653  pi1 = from[3];
654  pref0 = vlib_get_buffer (vm, pi0);
655  pref1 = vlib_get_buffer (vm, pi1);
656 
657  prefmb0 = rte_mbuf_from_vlib_buffer(pref0);
658  prefmb1 = rte_mbuf_from_vlib_buffer(pref1);
659 
660  CLIB_PREFETCH(prefmb0, CLIB_CACHE_LINE_BYTES, LOAD);
661  CLIB_PREFETCH(pref0, CLIB_CACHE_LINE_BYTES, LOAD);
662  CLIB_PREFETCH(prefmb1, CLIB_CACHE_LINE_BYTES, LOAD);
663  CLIB_PREFETCH(pref1, CLIB_CACHE_LINE_BYTES, LOAD);
664 
665  bi0 = from[0];
666  bi1 = from[1];
667  from += 2;
668 
669  b0 = vlib_get_buffer (vm, bi0);
670  b1 = vlib_get_buffer (vm, bi1);
671 
672  mb0 = rte_mbuf_from_vlib_buffer(b0);
673  mb1 = rte_mbuf_from_vlib_buffer(b1);
674 
675  any_clone = b0->clone_count | b1->clone_count;
676  if (PREDICT_FALSE(any_clone != 0))
677  {
678  if (PREDICT_FALSE(b0->clone_count != 0))
679  {
680  struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
681  if (PREDICT_FALSE(mb0_new == 0))
682  {
683  vlib_error_count (vm, node->node_index,
684  DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
686  }
687  else
688  mb0 = mb0_new;
689  vec_add1 (dm->recycle[my_cpu], bi0);
690  }
691  if (PREDICT_FALSE(b1->clone_count != 0))
692  {
693  struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1);
694  if (PREDICT_FALSE(mb1_new == 0))
695  {
696  vlib_error_count (vm, node->node_index,
697  DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
699  }
700  else
701  mb1 = mb1_new;
702  vec_add1 (dm->recycle[my_cpu], bi1);
703  }
704  }
705 
706  delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
707  vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
708  delta1 = PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
709  vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
710 
711  new_data_len0 = (u16)((i16) mb0->data_len + delta0);
712  new_data_len1 = (u16)((i16) mb1->data_len + delta1);
713  new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0);
714  new_pkt_len1 = (u16)((i16) mb1->pkt_len + delta1);
715 
716  b0->current_length = new_data_len0;
717  b1->current_length = new_data_len1;
718  mb0->data_len = new_data_len0;
719  mb1->data_len = new_data_len1;
720  mb0->pkt_len = new_pkt_len0;
721  mb1->pkt_len = new_pkt_len1;
722 
723  mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
724  mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
725  mb1->data_off = (PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL)) ?
726  mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data);
727 
729  {
730  if (b0->flags & VLIB_BUFFER_IS_TRACED)
731  dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
732  if (b1->flags & VLIB_BUFFER_IS_TRACED)
733  dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
734  }
735 
736  if (PREDICT_TRUE(any_clone == 0))
737  {
738  tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
739  i++;
740  tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
741  i++;
742  }
743  else
744  {
745  /* cloning was done, need to check for failure */
746  if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
747  {
748  tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
749  i++;
750  }
751  if (PREDICT_TRUE((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
752  {
753  tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
754  i++;
755  }
756  }
757 
758  n_left -= 2;
759  }
760  while (n_left > 0)
761  {
762  u32 bi0;
763  struct rte_mbuf * mb0;
764  vlib_buffer_t * b0;
765  i16 delta0;
766  u16 new_data_len0;
767  u16 new_pkt_len0;
768 
769  bi0 = from[0];
770  from++;
771 
772  b0 = vlib_get_buffer (vm, bi0);
773 
774  mb0 = rte_mbuf_from_vlib_buffer(b0);
775  if (PREDICT_FALSE(b0->clone_count != 0))
776  {
777  struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
778  if (PREDICT_FALSE(mb0_new == 0))
779  {
780  vlib_error_count (vm, node->node_index,
781  DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
783  }
784  else
785  mb0 = mb0_new;
786  vec_add1 (dm->recycle[my_cpu], bi0);
787  }
788 
789  delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
790  vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
791 
792  new_data_len0 = (u16)((i16) mb0->data_len + delta0);
793  new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0);
794 
795  b0->current_length = new_data_len0;
796  mb0->data_len = new_data_len0;
797  mb0->pkt_len = new_pkt_len0;
798  mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
799  mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
800 
802  if (b0->flags & VLIB_BUFFER_IS_TRACED)
803  dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
804 
805  if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
806  {
807  tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
808  i++;
809  }
810  n_left--;
811  }
812 
813  /* account for additional packets in the ring */
814  ring->tx_head += n_packets;
815  n_on_ring = ring->tx_head - ring->tx_tail;
816 
817  /* transmit as many packets as possible */
818  n_packets = tx_burst_vector_internal (vm, xd, tx_vector);
819 
820  /*
821  * tx_pkts is the number of packets successfully transmitted
822  * This is the number originally on ring minus the number remaining on ring
823  */
824  tx_pkts = n_on_ring - n_packets;
825 
826  if (PREDICT_FALSE(dm->flowcontrol_callback != 0))
827  {
828  if (PREDICT_FALSE(n_packets))
829  {
830  /* Callback may want to enable flowcontrol */
831  dm->flowcontrol_callback(vm, xd->vlib_hw_if_index, ring->tx_head - ring->tx_tail);
832  }
833  else
834  {
835  /* Reset head/tail to avoid unnecessary wrap */
836  ring->tx_head = 0;
837  ring->tx_tail = 0;
838  }
839  }
840  else
841  {
842  /* If there is no callback then drop any non-transmitted packets */
843  if (PREDICT_FALSE(n_packets))
844  {
846  vnet_main_t * vnm = vnet_get_main();
847 
850 
851  vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, n_packets);
852 
853  vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
854  n_packets);
855 
856  while (n_packets--)
857  rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
858  }
859 
860  /* Reset head/tail to avoid unnecessary wrap */
861  ring->tx_head = 0;
862  ring->tx_tail = 0;
863  }
864 
865  /* Recycle replicated buffers */
866  if (PREDICT_FALSE(vec_len(dm->recycle[my_cpu])))
867  {
868  vlib_buffer_free (vm, dm->recycle[my_cpu], vec_len(dm->recycle[my_cpu]));
869  _vec_len(dm->recycle[my_cpu]) = 0;
870  }
871 
872  ASSERT(ring->tx_head >= ring->tx_tail);
873 
874  return tx_pkts;
875 }
876 
878  u32 new_dev_instance)
879 {
880  dpdk_main_t * dm = &dpdk_main;
882 
883  if (!xd || xd->dev_type != VNET_DPDK_DEV_VHOST_USER) {
884  clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)",
885  hi->sw_if_index);
886  return 0;
887  }
888 
889  xd->vu_if_id = new_dev_instance;
890  return 0;
891 }
892 
893 static void dpdk_clear_hw_interface_counters (u32 instance)
894 {
895  dpdk_main_t * dm = &dpdk_main;
896  dpdk_device_t * xd = vec_elt_at_index (dm->devices, instance);
897 
898  /*
899  * DAW-FIXME: VMXNET3 device stop/start doesn't work,
900  * therefore fake the stop in the dpdk driver by
901  * silently dropping all of the incoming pkts instead of
902  * stopping the driver / hardware.
903  */
904  if (xd->admin_up != 0xff)
905  {
906  /*
907  * Set the "last_cleared_stats" to the current stats, so that
908  * things appear to clear from a display perspective.
909  */
911 
912  clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof(xd->stats));
915  sizeof(xd->last_cleared_xstats[0]));
916  }
917  else
918  {
919  /*
920  * Internally rte_eth_xstats_reset() is calling rte_eth_stats_reset(),
921  * so we're only calling xstats_reset() here.
922  */
923  rte_eth_xstats_reset (xd->device_index);
924  memset (&xd->stats, 0, sizeof(xd->stats));
925  memset (&xd->last_stats, 0, sizeof (xd->last_stats));
926  }
927 
929  int i;
930  for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) {
931  xd->vu_intf->vrings[i].packets = 0;
932  xd->vu_intf->vrings[i].bytes = 0;
933  }
934  }
935 }
936 
937 #ifdef RTE_LIBRTE_KNI
938 static int
939 kni_config_network_if(u8 port_id, u8 if_up)
940 {
941  vnet_main_t * vnm = vnet_get_main();
942  dpdk_main_t * dm = &dpdk_main;
943  dpdk_device_t * xd;
944  uword *p;
945 
946  p = hash_get (dm->dpdk_device_by_kni_port_id, port_id);
947  if (p == 0) {
948  clib_warning("unknown interface");
949  return 0;
950  } else {
951  xd = vec_elt_at_index (dm->devices, p[0]);
952  }
953 
956  ETH_LINK_FULL_DUPLEX : 0);
957  return 0;
958 }
959 
960 static int
961 kni_change_mtu(u8 port_id, unsigned new_mtu)
962 {
963  vnet_main_t * vnm = vnet_get_main();
964  dpdk_main_t * dm = &dpdk_main;
965  dpdk_device_t * xd;
966  uword *p;
967  vnet_hw_interface_t * hif;
968 
969  p = hash_get (dm->dpdk_device_by_kni_port_id, port_id);
970  if (p == 0) {
971  clib_warning("unknown interface");
972  return 0;
973  } else {
974  xd = vec_elt_at_index (dm->devices, p[0]);
975  }
976  hif = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index);
977 
978  hif->max_packet_bytes = new_mtu;
979 
980  return 0;
981 }
982 #endif
983 
984 static clib_error_t *
986 {
987  vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index);
988  uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
989  dpdk_main_t * dm = &dpdk_main;
991  int rv = 0;
992 
993 #ifdef RTE_LIBRTE_KNI
994  if (xd->dev_type == VNET_DPDK_DEV_KNI)
995  {
996  if (is_up)
997  {
998  struct rte_kni_conf conf;
999  struct rte_kni_ops ops;
1000  vlib_main_t * vm = vlib_get_main();
1001  vlib_buffer_main_t * bm = vm->buffer_main;
1002  memset(&conf, 0, sizeof(conf));
1003  snprintf(conf.name, RTE_KNI_NAMESIZE, "vpp%u", xd->kni_port_id);
1004  conf.mbuf_size = VLIB_BUFFER_DATA_SIZE;
1005  memset(&ops, 0, sizeof(ops));
1006  ops.port_id = xd->kni_port_id;
1007  ops.change_mtu = kni_change_mtu;
1008  ops.config_network_if = kni_config_network_if;
1009 
1010  xd->kni = rte_kni_alloc(bm->pktmbuf_pools[rte_socket_id()], &conf, &ops);
1011  if (!xd->kni)
1012  {
1013  clib_warning("failed to allocate kni interface");
1014  }
1015  else
1016  {
1017  hif->max_packet_bytes = 1500; /* kni interface default value */
1018  xd->admin_up = 1;
1019  }
1020  }
1021  else
1022  {
1023  xd->admin_up = 0;
1024  rte_kni_release(xd->kni);
1025  }
1026  return 0;
1027  }
1028 #endif
1030  {
1031  if (is_up)
1032  {
1033  if (xd->vu_is_running)
1036  ETH_LINK_FULL_DUPLEX );
1037  xd->admin_up = 1;
1038  }
1039  else
1040  {
1042  xd->admin_up = 0;
1043  }
1044 
1045  return 0;
1046  }
1047 
1048 
1049  if (is_up)
1050  {
1051  f64 now = vlib_time_now (dm->vlib_main);
1052 
1053  /*
1054  * DAW-FIXME: VMXNET3 device stop/start doesn't work,
1055  * therefore fake the stop in the dpdk driver by
1056  * silently dropping all of the incoming pkts instead of
1057  * stopping the driver / hardware.
1058  */
1059  if (xd->admin_up == 0)
1060  rv = rte_eth_dev_start (xd->device_index);
1061 
1062  if (xd->promisc)
1063  rte_eth_promiscuous_enable(xd->device_index);
1064  else
1065  rte_eth_promiscuous_disable(xd->device_index);
1066 
1067  rte_eth_allmulticast_enable (xd->device_index);
1068  xd->admin_up = 1;
1069  dpdk_update_counters (xd, now);
1070  dpdk_update_link_state (xd, now);
1071  }
1072  else
1073  {
1074  /*
1075  * DAW-FIXME: VMXNET3 device stop/start doesn't work,
1076  * therefore fake the stop in the dpdk driver by
1077  * silently dropping all of the incoming pkts instead of
1078  * stopping the driver / hardware.
1079  */
1080  if (xd->pmd != VNET_DPDK_PMD_VMXNET3)
1081  xd->admin_up = 0;
1082  else
1083  xd->admin_up = ~0;
1084 
1085  rte_eth_allmulticast_disable (xd->device_index);
1087 
1088  /*
1089  * DAW-FIXME: VMXNET3 device stop/start doesn't work,
1090  * therefore fake the stop in the dpdk driver by
1091  * silently dropping all of the incoming pkts instead of
1092  * stopping the driver / hardware.
1093  */
1094  if (xd->pmd != VNET_DPDK_PMD_VMXNET3)
1095  rte_eth_dev_stop (xd->device_index);
1096  }
1097 
1098  if (rv < 0)
1099  clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop",
1100  rv);
1101 
1102  return /* no error */ 0;
1103 }
1104 
1105 /*
1106  * Dynamically redirect all pkts from a specific interface
1107  * to the specified node
1108  */
1109 static void dpdk_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
1110  u32 node_index)
1111 {
1112  dpdk_main_t * xm = &dpdk_main;
1113  vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
1115 
1116  /* Shut off redirection */
1117  if (node_index == ~0)
1118  {
1119  xd->per_interface_next_index = node_index;
1120  return;
1121  }
1122 
1124  vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
1125 }
1126 
1127 
1128 static clib_error_t *
1130  u32 hw_if_index,
1131  struct vnet_sw_interface_t * st,
1132  int is_add)
1133 {
1134  dpdk_main_t * xm = &dpdk_main;
1135  vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
1138  int r, vlan_offload;
1139  u32 prev_subifs = xd->vlan_subifs;
1140 
1141  if (is_add) xd->vlan_subifs++;
1142  else if (xd->vlan_subifs) xd->vlan_subifs--;
1143 
1144  if (xd->dev_type != VNET_DPDK_DEV_ETH)
1145  return 0;
1146 
1147  /* currently we program VLANS only for IXGBE VF and I40E VF */
1148  if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) &&
1149  (xd->pmd != VNET_DPDK_PMD_I40EVF))
1150  return 0;
1151 
1152  if (t->sub.eth.flags.no_tags == 1)
1153  return 0;
1154 
1155  if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1 )) {
1156  xd->vlan_subifs = prev_subifs;
1157  return clib_error_return (0, "unsupported VLAN setup");
1158  }
1159 
1160  vlan_offload = rte_eth_dev_get_vlan_offload(xd->device_index);
1161  vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
1162 
1163  if ((r = rte_eth_dev_set_vlan_offload(xd->device_index, vlan_offload))) {
1164  xd->vlan_subifs = prev_subifs;
1165  return clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d",
1166  xd->device_index, r);
1167  }
1168 
1169 
1170  if ((r = rte_eth_dev_vlan_filter(xd->device_index, t->sub.eth.outer_vlan_id, is_add))) {
1171  xd->vlan_subifs = prev_subifs;
1172  return clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d",
1173  xd->device_index, r);
1174  }
1175 
1176  return 0;
1177 }
1178 
1180  .name = "dpdk",
1181  .tx_function = dpdk_interface_tx,
1182  .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
1183  .tx_function_error_strings = dpdk_tx_func_error_strings,
1184  .format_device_name = format_dpdk_device_name,
1185  .format_device = format_dpdk_device,
1186  .format_tx_trace = format_dpdk_tx_dma_trace,
1187  .clear_counters = dpdk_clear_hw_interface_counters,
1188  .admin_up_down_function = dpdk_interface_admin_up_down,
1189  .subif_add_del_function = dpdk_subif_add_del_function,
1190  .rx_redirect_to_node = dpdk_set_interface_next_node,
1191  .no_flatten_output_chains = 1,
1192  .name_renumber = dpdk_device_renumber,
1193 };
1194 
1196  dpdk_flowcontrol_callback_t callback)
1197 {
1198  dpdk_main.flowcontrol_callback = callback;
1199 }
1200 
1201 #define UP_DOWN_FLAG_EVENT 1
1202 
1203 
1205 {
1207 }
1208 
1209 static uword
1211  vlib_node_runtime_t * rt,
1212  vlib_frame_t * f)
1213 {
1214  clib_error_t * error = 0;
1215  uword event_type;
1216  uword *event_data = 0;
1217  u32 index;
1218  u32 sw_if_index;
1219  u32 flags;
1220 
1221  while (1)
1222  {
1224 
1225  event_type = vlib_process_get_events (vm, &event_data);
1226 
1228 
1229  for (index=0; index<vec_len(event_data); index++)
1230  {
1231  sw_if_index = event_data[index] >> 32;
1232  flags = (u32) event_data[index];
1233 
1234  switch (event_type) {
1235  case UP_DOWN_FLAG_EVENT:
1236  error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags);
1237  clib_error_report(error);
1238  break;
1239  }
1240  }
1241 
1242  vec_reset_length (event_data);
1243 
1245 
1246  }
1247  return 0; /* or not */
1248 }
1249 
1251  .function = admin_up_down_process,
1252  .type = VLIB_NODE_TYPE_PROCESS,
1253  .name = "admin-up-down-process",
1254  .process_log2_n_stack_bytes = 17, // 256KB
1255 };
1256 
1257 /*
1258  * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down
1259  * process. Useful for avoiding long blocking delays (>150ms) in the dpdk
1260  * drivers.
1261  * WARNING: when posting this event, no other interface-related calls should
1262  * be made (e.g. vnet_create_sw_interface()) while the event is being
1263  * processed (admin_up_down_in_progress). This is required in order to avoid
1264  * race conditions in manipulating interface data structures.
1265  */
1267 {
1269  (vm, admin_up_down_process_node.index,
1271  (((uword)sw_if_index << 32) | flags));
1272 }
1273 
1274 /*
1275  * Called by the dpdk driver's rte_delay_us() function.
1276  * Return 0 to have the dpdk do a regular delay loop.
1277  * Return 1 if to skip the delay loop because we are suspending
1278  * the calling vlib process instead.
1279  */
1280 int rte_delay_us_override (unsigned us) {
1281  vlib_main_t * vm;
1282 
1283  /* Don't bother intercepting for short delays */
1284  if (us < 10) return 0;
1285 
1286  /*
1287  * Only intercept if we are in a vlib process.
1288  * If we are called from a vlib worker thread or the vlib main
1289  * thread then do not intercept. (Must not be called from an
1290  * independent pthread).
1291  */
1292  if (os_get_cpu_number() == 0)
1293  {
1294  /*
1295  * We're in the vlib main thread or a vlib process. Make sure
1296  * the process is running and we're not still initializing.
1297  */
1298  vm = vlib_get_main();
1299  if (vlib_in_process_context(vm))
1300  {
1301  /* Only suspend for the admin_down_process */
1303  if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) ||
1305  return 0;
1306 
1307  f64 delay = 1e-6 * us;
1308  vlib_process_suspend(vm, delay);
1309  return 1;
1310  }
1311  }
1312  return 0; // no override
1313 }
1314 
1315 /*
1316  * Return a copy of the DPDK port stats in dest.
1317  */
1318 clib_error_t*
1319 dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest)
1320 {
1321  dpdk_main_t * dm = &dpdk_main;
1322  vnet_main_t * vnm = vnet_get_main();
1323  vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index);
1325 
1326  if (!dest) {
1327  return clib_error_return (0, "Missing or NULL argument");
1328  }
1329  if (!xd) {
1330  return clib_error_return (0, "Unable to get DPDK device from HW interface");
1331  }
1332 
1334 
1335  clib_memcpy(dest, &xd->stats, sizeof(xd->stats));
1336  return (0);
1337 }
1338 
1339 /*
1340  * Return the number of dpdk mbufs
1341  */
1343 {
1344  dpdk_main_t * dm = &dpdk_main;
1345 
1346  return dm->num_mbufs;
1347 }
1348 
1349 /*
1350  * Return the io_thread_release
1351  */
1353 {
1354  dpdk_main_t * dm = &dpdk_main;
1355 
1356  return dm->io_thread_release;
1357 }
1358 
1359 /*
1360  * Return the pmd type for a given hardware interface
1361  */
1363 {
1364  dpdk_main_t * dm = &dpdk_main;
1365  dpdk_device_t * xd;
1366 
1367  assert (hi);
1368 
1369  xd = vec_elt_at_index (dm->devices, hi->dev_instance);
1370 
1371  assert (xd);
1372 
1373  return xd->pmd;
1374 }
1375 
1376 /*
1377  * Return the cpu socket for a given hardware interface
1378  */
1380 {
1381  dpdk_main_t * dm = &dpdk_main;
1382  dpdk_device_t * xd;
1383 
1384  assert (hi);
1385 
1386  xd = vec_elt_at_index(dm->devices, hi->dev_instance);
1387 
1388  assert (xd);
1389 
1390  return xd->cpu_socket;
1391 }
struct vnet_sub_interface_t::@91 eth
void(* dpdk_flowcontrol_callback_t)(vlib_main_t *vm, u32 hw_if_index, u32 n_packets)
Definition: dpdk.h:174
vmrglw vmrglh hi
always_inline void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
Definition: error_funcs.h:54
i8 dpdk_get_cpu_socket(vnet_hw_interface_t *hi)
Definition: device.c:1379
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:267
u8 promisc
Definition: dpdk.h:219
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:454
u32 n_since_last_int
Definition: dpdk.h:147
always_inline uword vlib_process_get_events(vlib_main_t *vm, uword **data_vector)
Definition: node_funcs.h:410
vlib_buffer_t buffer
Definition: dpdk.h:432
#define UP_DOWN_FLAG_EVENT
Definition: device.c:1201
vlib_node_runtime_t node_runtime
Definition: node.h:450
dpdk_main_t dpdk_main
Definition: dpdk.h:415
struct rte_eth_stats last_stats
Definition: dpdk.h:249
void dpdk_vhost_user_send_interrupt(vlib_main_t *vm, dpdk_device_t *xd, int idx)
Definition: vhost_user.c:852
vnet_interface_main_t interface_main
Definition: vnet.h:62
clib_error_t * dpdk_set_mac_address(vnet_hw_interface_t *hi, char *address)
Definition: device.c:47
vnet_device_class_t dpdk_device_class
static int dpdk_device_renumber(vnet_hw_interface_t *hi, u32 new_dev_instance)
Definition: device.c:877
#define PREDICT_TRUE(x)
Definition: clib.h:98
u8 need_txlock
Definition: dpdk.h:257
int dpdk_io_thread_release(void)
Definition: device.c:1352
#define NULL
Definition: clib.h:55
u32 vhost_coalesce_frames
Definition: dpdk.h:376
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:480
struct virtio_net vu_vhost_dev
Definition: dpdk.h:238
vlib_buffer_main_t * buffer_main
Definition: main.h:103
u32 per_interface_next_index
Definition: dpdk.h:199
always_inline void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
Definition: node_funcs.h:789
u64 tx_tail
Definition: dpdk.h:185
#define clib_error_report(e)
Definition: error.h:126
always_inline uword vlib_process_suspend(vlib_main_t *vm, f64 dt)
Definition: node_funcs.h:326
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:241
u64 packets
Definition: dpdk.h:149
struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb(vlib_buffer_t *b)
Definition: device.c:150
format_function_t format_dpdk_tx_dma_trace
Definition: dpdk.h:598
static clib_error_t * dpdk_interface_admin_up_down(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: device.c:985
always_inline vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
u8 admin_up
Definition: dpdk.h:218
struct rte_eth_stats stats
Definition: dpdk.h:248
VNET_DEVICE_CLASS(af_packet_device_class)
vnet_main_t * vnet_get_main(void)
Definition: misc.c:45
u8 dpdk_vhost_user_want_interrupt(dpdk_device_t *xd, int idx)
Definition: vhost_user.c:836
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:202
static_always_inline u32 tx_burst_vector_internal(vlib_main_t *vm, dpdk_device_t *xd, struct rte_mbuf **tx_vector)
Definition: device.c:259
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:77
#define static_always_inline
Definition: clib.h:85
vlib_node_function_t * function
Definition: node.h:375
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:829
dpdk_vu_vring vrings[VHOST_MAX_QUEUE_PAIRS *2]
Definition: dpdk.h:166
u32 vu_is_running
Definition: dpdk.h:239
char i8
Definition: types.h:45
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
u16 rx_q_used
Definition: dpdk.h:225
u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]
Space for inserting data before buffer start.
Definition: buffer.h:142
always_inline uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
Definition: buffer_funcs.h:112
u32 cpu_index
Definition: main.h:159
#define clib_warning(format, args...)
Definition: error.h:59
always_inline void * vlib_frame_vector_args(vlib_frame_t *f)
Definition: node_funcs.h:202
u32 device_index
Definition: dpdk.h:193
struct rte_eth_xstats * last_cleared_xstats
Definition: dpdk.h:252
u32 dpdk_num_mbufs(void)
Definition: device.c:1342
u32 pcap_sw_if_index
Definition: dpdk.h:369
struct rte_mbuf * dpdk_replicate_packet_mb(vlib_buffer_t *b)
Definition: device.c:80
vnet_hw_interface_t * hw_interfaces
Definition: interface.h:435
#define hash_get(h, key)
Definition: hash.h:231
vnet_sub_interface_t sub
Definition: interface.h:404
static clib_error_t * dpdk_subif_add_del_function(vnet_main_t *vnm, u32 hw_if_index, struct vnet_sw_interface_t *st, int is_add)
Definition: device.c:1129
void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer.c:1060
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:81
dpdk_device_and_queue_t ** devices_by_cpu
Definition: dpdk.h:317
u32 vlib_hw_if_index
Definition: dpdk.h:195
dpdk_flowcontrol_callback_t flowcontrol_callback
Definition: dpdk.h:326
static char * dpdk_tx_func_error_strings[]
Definition: device.c:40
always_inline void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 cpu_index, u32 index, u32 increment)
Definition: counter.h:70
always_inline uword * vlib_process_wait_for_event(vlib_main_t *vm)
Definition: node_funcs.h:484
pcap_main_t pcap_main
Definition: dpdk.h:367
uword os_get_cpu_number(void)
Definition: unix-misc.c:206
unsigned short int uint16_t
Definition: fix_types.h:28
u64 bytes
Definition: dpdk.h:150
struct rte_mbuf mb
Definition: dpdk.h:430
u16 vlan_subifs
Definition: dpdk.h:212
static void pcap_add_buffer(pcap_main_t *pm, vlib_main_t *vm, u32 buffer_index, u32 n_bytes_in_trace)
Definition: pcap.h:159
#define PREDICT_FALSE(x)
Definition: clib.h:97
static void dpdk_set_interface_next_node(vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
Definition: device.c:1109
#define VLIB_FRAME_SIZE
Definition: node.h:292
vlib_simple_counter_main_t * sw_if_counters
Definition: interface.h:457
u16 tx_q_used
Definition: dpdk.h:224
void dpdk_set_flowcontrol_callback(vlib_main_t *vm, dpdk_flowcontrol_callback_t callback)
Definition: device.c:1195
u32 num_mbufs
Definition: dpdk.h:354
static uword dpdk_interface_tx(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Definition: device.c:571
u32 ** recycle
Definition: dpdk.h:320
u16 n_vectors
Definition: node.h:307
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:82
dpdk_device_t * devices
Definition: dpdk.h:316
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
Definition: dpdk_priv.h:167
struct rte_kni * kni
Definition: dpdk.h:233
volatile u32 ** lockp
Definition: dpdk.h:190
#define clib_memcpy(a, b, c)
Definition: string.h:63
dpdk_pmd_t pmd
Definition: dpdk.h:215
dpdk_pmd_t
Definition: dpdk.h:96
format_function_t format_dpdk_device
Definition: dpdk.h:597
always_inline vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
f64 int_deadline
Definition: dpdk.h:148
#define VNET_SW_INTERFACE_FLAG_ADMIN_UP
Definition: interface.h:373
u32 vu_if_id
Definition: dpdk.h:237
#define DPDK_TX_RING_SIZE
Definition: dpdk.h:277
#define ASSERT(truth)
format_function_t format_dpdk_device_name
Definition: dpdk.h:596
#define VLIB_BUFFER_REPL_FAIL
Definition: buffer.h:96
unsigned int u32
Definition: types.h:88
#define vnet_buffer(b)
Definition: buffer.h:300
u64 tx_head
Definition: dpdk.h:184
u32 clone_count
Specifies whether this buffer should be reinitialized when freed.
Definition: buffer.h:121
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:225
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
Definition: init.c:1243
always_inline uword vlib_node_add_next(vlib_main_t *vm, uword node, uword next_node)
Definition: node_funcs.h:919
#define VLIB_BUFFER_IS_TRACED
Definition: buffer.h:91
dpdk_pmd_t dpdk_get_pmd_type(vnet_hw_interface_t *hi)
Definition: device.c:1362
u64 uword
Definition: types.h:112
static void dpdk_clear_hw_interface_counters(u32 instance)
Definition: device.c:893
vlib_node_registration_t admin_up_down_process_node
(constructor) VLIB_REGISTER_NODE (admin_up_down_process_node)
Definition: device.c:1250
u8 kni_port_id
Definition: dpdk.h:234
Definition: defs.h:46
unsigned short u16
Definition: types.h:57
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:140
dpdk_device_type_t dev_type
Definition: dpdk.h:214
unsigned char u8
Definition: types.h:56
#define foreach_dpdk_tx_func_error
Definition: device.c:27
u8 admin_up_down_in_progress
Definition: dpdk.h:394
struct vnet_sub_interface_t::@91::@92::@94 flags
int rte_delay_us_override(unsigned us)
Definition: device.c:1280
struct rte_eth_xstats * xstats
Definition: dpdk.h:251
#define VLIB_BUFFER_DATA_SIZE
Definition: buffer.h:55
static void dpdk_tx_trace_buffer(dpdk_main_t *dm, vlib_node_runtime_t *node, dpdk_device_t *xd, u16 queue_id, u32 buffer_index, vlib_buffer_t *buffer)
Definition: device.c:219
always_inline void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
u16 flags
Definition: node.h:463
u32 dpdk_interface_tx_vector(vlib_main_t *vm, u32 dev_instance)
Definition: device.c:527
always_inline void * vec_header(void *v, uword header_bytes)
Find a user vector header.
Definition: vec_bootstrap.h:88
short i16
Definition: types.h:46
always_inline uword vlib_in_process_context(vlib_main_t *vm)
Definition: node_funcs.h:313
void post_sw_interface_set_flags(vlib_main_t *vm, u32 sw_if_index, u32 flags)
Definition: device.c:1266
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:140
u32 dpdk_get_admin_up_down_in_progress(void)
Definition: device.c:1204
u8 data[0]
Packet data.
Definition: buffer.h:150
always_inline vlib_process_t * vlib_get_current_process(vlib_main_t *vm)
Definition: node_funcs.h:306
volatile u32 io_thread_release
Definition: dpdk.h:363
#define vec_foreach(var, vec)
Vector iterator.
i8 cpu_socket
Definition: dpdk.h:216
always_inline f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:182
clib_error_t * vnet_sw_interface_set_flags(vnet_main_t *vnm, u32 sw_if_index, u32 flags)
Definition: interface.c:462
struct rte_eth_stats last_cleared_stats
Definition: dpdk.h:250
static uword admin_up_down_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: device.c:1210
dpdk_vu_intf_t * vu_intf
Definition: dpdk.h:240
#define clib_error_return(e, args...)
Definition: error.h:112
u32 flags
Definition: vhost-user.h:73
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:84
clib_error_t * dpdk_get_hw_interface_stats(u32 hw_if_index, struct rte_eth_stats *dest)
Definition: device.c:1319
int tx_pcap_enable
Definition: dpdk.h:366
vnet_main_t * vnet_main
Definition: dpdk.h:412
uword runtime_data[(128-1 *sizeof(vlib_node_function_t *)-1 *sizeof(vlib_error_t *)-11 *sizeof(u32)-5 *sizeof(u16))/sizeof(uword)]
Definition: node.h:432
always_inline vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:69
dpdk_tx_func_error_t
Definition: device.c:33
clib_error_t * dpdk_set_mc_filter(vnet_hw_interface_t *hi, struct ether_addr mc_addr_vec[], int naddr)
Definition: device.c:64
uword * dpdk_device_by_kni_port_id
Definition: dpdk.h:380
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
#define VLIB_PROCESS_IS_RUNNING
Definition: node.h:470
vlib_main_t * vlib_main
Definition: dpdk.h:411