FD.io VPP  v16.09
Vector Packet Processing
vhost_user.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <assert.h>
16 #include <sys/socket.h>
17 #include <sys/un.h>
18 #include <sys/stat.h>
19 #include <sys/vfs.h>
20 
21 #include <vlib/vlib.h>
22 #include <vlib/unix/unix.h>
23 
24 #include <vnet/vnet.h>
25 #include <vppinfra/vec.h>
26 #include <vppinfra/error.h>
27 #include <vppinfra/format.h>
28 
29 #include <vnet/ethernet/ethernet.h>
30 #include <vnet/devices/dpdk/dpdk.h>
31 
33 
34 #define VHOST_USER_DEBUG_SOCKET 0
35 
36 #if VHOST_USER_DEBUG_SOCKET == 1
37 #define DBG_SOCK(args...) clib_warning(args);
38 #else
39 #define DBG_SOCK(args...)
40 #endif
41 
42 #if DPDK_VHOST_USER
43 
44 /* *INDENT-OFF* */
45 static const char *vhost_message_str[] __attribute__ ((unused)) =
46 {
47  [VHOST_USER_NONE] = "VHOST_USER_NONE",
48  [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
49  [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
50  [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
51  [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
52  [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
53  [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
54  [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
55  [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
56  [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
57  [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
58  [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
59  [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
60  [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
61  [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
62  [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
63  [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
64  [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
65  [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
66 };
67 /* *INDENT-ON* */
68 
69 static int dpdk_vhost_user_set_vring_enable (u32 hw_if_index,
70  u8 idx, int enable);
71 
72 /*
73  * DPDK vhost-user functions
74  */
75 
76 /* portions taken from dpdk
77  * BSD LICENSE
78  *
79  * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
80  * All rights reserved.
81  *
82  * Redistribution and use in source and binary forms, with or without
83  * modification, are permitted provided that the following conditions
84  * are met:
85  *
86  * * Redistributions of source code must retain the above copyright
87  * notice, this list of conditions and the following disclaimer.
88  * * Redistributions in binary form must reproduce the above copyright
89  * notice, this list of conditions and the following disclaimer in
90  * the documentation and/or other materials provided with the
91  * distribution.
92  * * Neither the name of Intel Corporation nor the names of its
93  * contributors may be used to endorse or promote products derived
94  * from this software without specific prior written permission.
95  *
96  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
97  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
98  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
99  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
101  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
102  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
103  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
104  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
105  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
106  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107  */
108 
109 
110 static uword
111 qva_to_vva (struct virtio_net *dev, uword qemu_va)
112 {
113  struct virtio_memory_regions *region;
114  uword vhost_va = 0;
115  uint32_t regionidx = 0;
116 
117  /* Find the region where the address lives. */
118  for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++)
119  {
120  region = &dev->mem->regions[regionidx];
121  if ((qemu_va >= region->userspace_address) &&
122  (qemu_va <= region->userspace_address + region->memory_size))
123  {
124  vhost_va = qemu_va + region->guest_phys_address +
125  region->address_offset - region->userspace_address;
126  break;
127  }
128  }
129  return vhost_va;
130 }
131 
132 static dpdk_device_t *
133 dpdk_vhost_user_device_from_hw_if_index (u32 hw_if_index)
134 {
135  vnet_main_t *vnm = vnet_get_main ();
136  dpdk_main_t *dm = &dpdk_main;
137  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
139 
141  return 0;
142 
143  return xd;
144 }
145 
146 static dpdk_device_t *
147 dpdk_vhost_user_device_from_sw_if_index (u32 sw_if_index)
148 {
149  vnet_main_t *vnm = vnet_get_main ();
150  vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
152 
153  return dpdk_vhost_user_device_from_hw_if_index (sw->hw_if_index);
154 }
155 
156 static void
157 stop_processing_packets (u32 hw_if_index, u8 idx)
158 {
159  dpdk_device_t *xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index);
160  assert (xd);
161  xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
162 }
163 
164 static void
165 disable_interface (dpdk_device_t * xd)
166 {
167  u8 idx;
168  int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
169  for (idx = 0; idx < numqs; idx++)
170  xd->vu_vhost_dev.virtqueue[idx]->enabled = 0;
171 
172  xd->vu_is_running = 0;
173 }
174 
175 static inline void *
176 map_guest_mem (dpdk_device_t * xd, uword addr)
177 {
178  dpdk_vu_intf_t *vui = xd->vu_intf;
179  struct virtio_memory *mem = xd->vu_vhost_dev.mem;
180  int i;
181  for (i = 0; i < mem->nregions; i++)
182  {
183  if ((mem->regions[i].guest_phys_address <= addr) &&
184  ((mem->regions[i].guest_phys_address +
185  mem->regions[i].memory_size) > addr))
186  {
187  return (void *) ((uword) vui->region_addr[i] + addr -
188  (uword) mem->regions[i].guest_phys_address);
189  }
190  }
191  DBG_SOCK ("failed to map guest mem addr %lx", addr);
192  return 0;
193 }
194 
195 static clib_error_t *
196 dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 * hwaddr)
197 {
198  dpdk_main_t *dm = &dpdk_main;
199  vlib_main_t *vm = vlib_get_main ();
202  clib_error_t *error;
204  int num_qpairs = 1;
205  dpdk_vu_intf_t *vui = NULL;
206 
207  num_qpairs = dm->use_rss < 1 ? 1 : tm->n_vlib_mains;
208 
209  dpdk_device_t *xd = NULL;
210  u8 addr[6];
211  int j;
212 
214 
215  int inactive_cnt = vec_len (dm->vu_inactive_interfaces_device_index);
216  // if there are any inactive ifaces
217  if (inactive_cnt > 0)
218  {
219  // take last
220  u32 vui_idx = dm->vu_inactive_interfaces_device_index[inactive_cnt - 1];
221  if (vec_len (dm->devices) > vui_idx)
222  {
223  xd = vec_elt_at_index (dm->devices, vui_idx);
225  {
226  DBG_SOCK
227  ("reusing inactive vhost-user interface sw_if_index %d",
228  xd->vlib_sw_if_index);
229  }
230  else
231  {
233  ("error: inactive vhost-user interface sw_if_index %d not VHOST_USER type!",
234  xd->vlib_sw_if_index);
235  // reset so new interface is created
236  xd = NULL;
237  }
238  }
239  // "remove" from inactive list
240  _vec_len (dm->vu_inactive_interfaces_device_index) -= 1;
241  }
242 
243  if (xd)
244  {
245  // existing interface used - do not overwrite if_id if not needed
246  if (if_id != (u32) ~ 0)
247  xd->vu_if_id = if_id;
248 
249  // reset virtqueues
250  vui = xd->vu_intf;
251  for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++)
252  {
253  memset (xd->vu_vhost_dev.virtqueue[j], 0,
254  sizeof (struct vhost_virtqueue));
255  xd->vu_vhost_dev.virtqueue[j]->kickfd = -1;
256  xd->vu_vhost_dev.virtqueue[j]->callfd = -1;
257  xd->vu_vhost_dev.virtqueue[j]->backend = -1;
258  vui->vrings[j].packets = 0;
259  vui->vrings[j].bytes = 0;
260  }
261 
262  // reset lockp
265 
266  // reset tx vectors
267  for (j = 0; j < tm->n_vlib_mains; j++)
268  {
271  vec_reset_length (xd->tx_vectors[j]);
272  }
273 
274  // reset rx vector
275  for (j = 0; j < xd->rx_q_used; j++)
276  {
279  vec_reset_length (xd->rx_vectors[j]);
280  }
281  }
282  else
283  {
284  // vui was not retrieved from inactive ifaces - create new
287  xd->rx_q_used = num_qpairs;
288  xd->tx_q_used = num_qpairs;
289  xd->vu_vhost_dev.virt_qp_nb = num_qpairs;
290 
293 
294  if (if_id == (u32) ~ 0)
295  xd->vu_if_id = dm->next_vu_if_id++;
296  else
297  xd->vu_if_id = if_id;
298 
299  xd->device_index = xd - dm->devices;
300  xd->per_interface_next_index = ~0;
301  xd->vu_intf = clib_mem_alloc (sizeof (*(xd->vu_intf)));
302 
303  xd->vu_vhost_dev.mem = clib_mem_alloc (sizeof (struct virtio_memory) +
305  sizeof (struct
306  virtio_memory_regions));
307 
308  /* Will be set when guest sends VHOST_USER_SET_MEM_TABLE cmd */
309  xd->vu_vhost_dev.mem->nregions = 0;
310 
311  /*
312  * New virtqueue structure is an array of VHOST_MAX_QUEUE_PAIRS * 2
313  * We need to allocate numq pairs.
314  */
315  vui = xd->vu_intf;
316  for (j = 0; j < num_qpairs * VIRTIO_QNUM; j++)
317  {
318  xd->vu_vhost_dev.virtqueue[j] =
319  clib_mem_alloc (sizeof (struct vhost_virtqueue));
320  memset (xd->vu_vhost_dev.virtqueue[j], 0,
321  sizeof (struct vhost_virtqueue));
322  xd->vu_vhost_dev.virtqueue[j]->kickfd = -1;
323  xd->vu_vhost_dev.virtqueue[j]->callfd = -1;
324  xd->vu_vhost_dev.virtqueue[j]->backend = -1;
325  vui->vrings[j].packets = 0;
326  vui->vrings[j].bytes = 0;
327  }
328 
330 
331  DBG_SOCK
332  ("tm->n_vlib_mains: %d. TX %d, RX: %d, num_qpairs: %d, Lock: %p",
333  tm->n_vlib_mains, xd->tx_q_used, xd->rx_q_used, num_qpairs,
334  xd->lockp);
335 
338 
339  for (j = 0; j < tm->n_vlib_mains; j++)
340  {
343  vec_reset_length (xd->tx_vectors[j]);
344  }
345 
346  // reset rx vector
347  for (j = 0; j < xd->rx_q_used; j++)
348  {
351  vec_reset_length (xd->rx_vectors[j]);
352  }
353 
354  }
355  /*
356  * Generate random MAC address for the interface
357  */
358  if (hwaddr)
359  {
360  clib_memcpy (addr, hwaddr, sizeof (addr));
361  }
362  else
363  {
364  f64 now = vlib_time_now (vm);
365  u32 rnd;
366  rnd = (u32) (now * 1e6);
367  rnd = random_u32 (&rnd);
368 
369  clib_memcpy (addr + 2, &rnd, sizeof (rnd));
370  addr[0] = 2;
371  addr[1] = 0xfe;
372  }
373 
375  (dm->vnet_main, dpdk_device_class.index, xd->device_index,
376  /* ethernet address */ addr,
377  &xd->vlib_hw_if_index, 0);
378 
379  if (error)
380  return error;
381 
383  xd->vlib_sw_if_index = sw->sw_if_index;
384 
385  *hw_if_index = xd->vlib_hw_if_index;
386 
387  DBG_SOCK ("xd->device_index: %d, dm->input_cpu_count: %d, "
388  "dm->input_cpu_first_index: %d\n", xd->device_index,
390 
391  int q, next_cpu = 0;
392  for (q = 0; q < num_qpairs; q++)
393  {
394  int cpu = dm->input_cpu_first_index + (next_cpu % dm->input_cpu_count);
395 
396  unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
398  xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore);
399 
400  vec_add2 (dm->devices_by_cpu[cpu], dq, 1);
401  dq->device = xd->device_index;
402  dq->queue_id = q;
403  DBG_SOCK ("CPU for %d = %d. QID: %d", *hw_if_index, cpu, dq->queue_id);
404 
405  // start polling if it was not started yet (because of no phys ifaces)
406  if (tm->n_vlib_mains == 1
407  && dpdk_input_node.state != VLIB_NODE_STATE_POLLING)
409  VLIB_NODE_STATE_POLLING);
410 
411  if (tm->n_vlib_mains > 1)
413  VLIB_NODE_STATE_POLLING);
414  next_cpu++;
415  }
416 
418  return 0;
419 }
420 
421 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
422 static long
423 get_huge_page_size (int fd)
424 {
425  struct statfs s;
426  fstatfs (fd, &s);
427  return s.f_bsize;
428 }
429 #endif
430 
431 static clib_error_t *
432 dpdk_vhost_user_set_protocol_features (u32 hw_if_index, u64 prot_features)
433 {
434  dpdk_device_t *xd;
435  xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index);
436  assert (xd);
437  xd->vu_vhost_dev.protocol_features = prot_features;
438  return 0;
439 }
440 
441 static clib_error_t *
442 dpdk_vhost_user_get_features (u32 hw_if_index, u64 * features)
443 {
444  *features = rte_vhost_feature_get ();
445 
446 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
447 #define OFFLOAD_FEATURES ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \
448  (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
449  (1ULL << VIRTIO_NET_F_CSUM) | \
450  (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
451  (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
452  (1ULL << VIRTIO_NET_F_GUEST_TSO6))
453 
454  /* These are not suppoted as bridging/tunneling VHOST
455  * interfaces with hardware interfaces/drivers that does
456  * not support offloading breaks L4 traffic.
457  */
458  *features &= (~OFFLOAD_FEATURES);
459 #endif
460 
461  DBG_SOCK ("supported features: 0x%lx", *features);
462  return 0;
463 }
464 
465 static clib_error_t *
466 dpdk_vhost_user_set_features (u32 hw_if_index, u64 features)
467 {
468  dpdk_device_t *xd;
469  u16 hdr_len = sizeof (struct virtio_net_hdr);
470 
471 
472  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
473  {
474  clib_warning ("not a vhost-user interface");
475  return 0;
476  }
477 
478  xd->vu_vhost_dev.features = features;
479 
480  if (xd->vu_vhost_dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))
481  hdr_len = sizeof (struct virtio_net_hdr_mrg_rxbuf);
482 
483  int numqs = VIRTIO_QNUM;
484  u8 idx;
485  int prot_feature = features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
486  numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
487  for (idx = 0; idx < numqs; idx++)
488  {
489  xd->vu_vhost_dev.virtqueue[idx]->vhost_hlen = hdr_len;
490  /*
491  * Spec says, if F_PROTOCOL_FEATURE is not set by the
492  * slave, then all the vrings should start off as
493  * enabled. If slave negotiates F_PROTOCOL_FEATURE, then
494  * slave is responsible to enable it.
495  */
496  if (!prot_feature)
497  dpdk_vhost_user_set_vring_enable (hw_if_index, idx, 1);
498  }
499 
500  return 0;
501 }
502 
503 static clib_error_t *
504 dpdk_vhost_user_set_mem_table (u32 hw_if_index, vhost_user_memory_t * vum,
505  int fd[])
506 {
507  struct virtio_memory *mem;
508  int i;
509  dpdk_device_t *xd;
510  dpdk_vu_intf_t *vui;
511 
512  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
513  {
514  clib_warning ("not a vhost-user interface");
515  return 0;
516  }
517 
518  vui = xd->vu_intf;
519  mem = xd->vu_vhost_dev.mem;
520 
521  mem->nregions = vum->nregions;
522 
523  for (i = 0; i < mem->nregions; i++)
524  {
525  u64 mapped_size, mapped_address;
526 
527  mem->regions[i].guest_phys_address = vum->regions[i].guest_phys_addr;
528  mem->regions[i].guest_phys_address_end =
530  mem->regions[i].memory_size = vum->regions[i].memory_size;
531  mem->regions[i].userspace_address = vum->regions[i].userspace_addr;
532 
533  mapped_size = mem->regions[i].memory_size + vum->regions[i].mmap_offset;
534  mapped_address =
535  pointer_to_uword (mmap
536  (NULL, mapped_size, PROT_READ | PROT_WRITE,
537  MAP_SHARED, fd[i], 0));
538 
539  if (uword_to_pointer (mapped_address, void *) == MAP_FAILED)
540  {
541  clib_warning ("mmap error");
542  return 0;
543  }
544 
545  mapped_address += vum->regions[i].mmap_offset;
546  vui->region_addr[i] = mapped_address;
547  vui->region_fd[i] = fd[i];
548  vui->region_offset[i] = vum->regions[i].mmap_offset;
549  mem->regions[i].address_offset =
550  mapped_address - mem->regions[i].guest_phys_address;
551 
552  DBG_SOCK ("map memory region %d addr 0x%lx off 0x%lx len 0x%lx",
553  i, vui->region_addr[i], vui->region_offset[i], mapped_size);
554 
555  if (vum->regions[i].guest_phys_addr == 0)
556  {
557  mem->base_address = vum->regions[i].userspace_addr;
558  mem->mapped_address = mem->regions[i].address_offset;
559  }
560  }
561 
562  disable_interface (xd);
563  return 0;
564 }
565 
566 static clib_error_t *
567 dpdk_vhost_user_set_vring_num (u32 hw_if_index, u8 idx, u32 num)
568 {
569  dpdk_device_t *xd;
570  struct vhost_virtqueue *vq;
571 
572  DBG_SOCK ("idx %u num %u", idx, num);
573 
574  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
575  {
576  clib_warning ("not a vhost-user interface");
577  return 0;
578  }
579  vq = xd->vu_vhost_dev.virtqueue[idx];
580  vq->size = num;
581 
582  stop_processing_packets (hw_if_index, idx);
583 
584  return 0;
585 }
586 
587 static clib_error_t *
588 dpdk_vhost_user_set_vring_addr (u32 hw_if_index, u8 idx, uword desc,
589  uword used, uword avail, uword log)
590 {
591  dpdk_device_t *xd;
592  struct vhost_virtqueue *vq;
593 
594  DBG_SOCK ("idx %u desc 0x%lx used 0x%lx avail 0x%lx log 0x%lx",
595  idx, desc, used, avail, log);
596 
597  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
598  {
599  clib_warning ("not a vhost-user interface");
600  return 0;
601  }
602  vq = xd->vu_vhost_dev.virtqueue[idx];
603 
604  vq->desc = (struct vring_desc *) qva_to_vva (&xd->vu_vhost_dev, desc);
605  vq->used = (struct vring_used *) qva_to_vva (&xd->vu_vhost_dev, used);
606  vq->avail = (struct vring_avail *) qva_to_vva (&xd->vu_vhost_dev, avail);
607 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
608  vq->log_guest_addr = log;
609 #endif
610 
611  if (!(vq->desc && vq->used && vq->avail))
612  {
613  clib_warning ("falied to set vring addr");
614  }
615 
616  if (vq->last_used_idx != vq->used->idx)
617  {
618  clib_warning ("last_used_idx (%u) and vq->used->idx (%u) mismatches; "
619  "some packets maybe resent for Tx and dropped for Rx",
620  vq->last_used_idx, vq->used->idx);
621  vq->last_used_idx = vq->used->idx;
622  vq->last_used_idx_res = vq->used->idx;
623  }
624 
625  /*
626  * Inform the guest that there is no need to inform (kick) the
627  * host when it adds buffers. kick results in vmexit and will
628  * incur performance degradation.
629  *
630  * The below function sets a flag in used table. Therefore,
631  * should be initialized after initializing vq->used.
632  */
633  rte_vhost_enable_guest_notification (&xd->vu_vhost_dev, idx, 0);
634  stop_processing_packets (hw_if_index, idx);
635 
636  return 0;
637 }
638 
639 static clib_error_t *
640 dpdk_vhost_user_get_vring_base (u32 hw_if_index, u8 idx, u32 * num)
641 {
642  dpdk_device_t *xd;
643  struct vhost_virtqueue *vq;
644 
645  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
646  {
647  clib_warning ("not a vhost-user interface");
648  return 0;
649  }
650 
651  vq = xd->vu_vhost_dev.virtqueue[idx];
652  *num = vq->last_used_idx;
653 
654 /*
655  * From spec:
656  * Client must start ring upon receiving a kick
657  * (that is, detecting that file descriptor is readable)
658  * on the descriptor specified by VHOST_USER_SET_VRING_KICK,
659  * and stop ring upon receiving VHOST_USER_GET_VRING_BASE.
660  */
661  DBG_SOCK ("Stopping vring Q %u of device %d", idx, hw_if_index);
662  dpdk_vu_intf_t *vui = xd->vu_intf;
663  vui->vrings[idx].enabled = 0; /* Reset local copy */
664  vui->vrings[idx].callfd = -1; /* Reset FD */
665  vq->enabled = 0;
666  vq->desc = NULL;
667  vq->used = NULL;
668  vq->avail = NULL;
669 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
670  vq->log_guest_addr = 0;
671 #endif
672 
673  /* Check if all Qs are disabled */
674  int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
675  for (idx = 0; idx < numqs; idx++)
676  {
677  if (xd->vu_vhost_dev.virtqueue[idx]->enabled)
678  break;
679  }
680 
681  /* If all vrings are disabed then disable device */
682  if (idx == numqs)
683  {
684  DBG_SOCK ("Device %d disabled", hw_if_index);
685  xd->vu_is_running = 0;
686  }
687 
688  return 0;
689 }
690 
691 static clib_error_t *
692 dpdk_vhost_user_set_vring_base (u32 hw_if_index, u8 idx, u32 num)
693 {
694  dpdk_device_t *xd;
695  struct vhost_virtqueue *vq;
696 
697  DBG_SOCK ("idx %u num %u", idx, num);
698 
699  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
700  {
701  clib_warning ("not a vhost-user interface");
702  return 0;
703  }
704 
705  vq = xd->vu_vhost_dev.virtqueue[idx];
706  vq->last_used_idx = num;
707  vq->last_used_idx_res = num;
708 
709  stop_processing_packets (hw_if_index, idx);
710 
711  return 0;
712 }
713 
714 static clib_error_t *
715 dpdk_vhost_user_set_vring_kick (u32 hw_if_index, u8 idx, int fd)
716 {
717  dpdk_main_t *dm = &dpdk_main;
718  dpdk_device_t *xd;
719  dpdk_vu_vring *vring;
720  struct vhost_virtqueue *vq0, *vq1, *vq;
721  int index, vu_is_running = 0;
722 
723  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
724  {
725  clib_warning ("not a vhost-user interface");
726  return 0;
727  }
728 
729  vq = xd->vu_vhost_dev.virtqueue[idx];
730  vq->kickfd = fd;
731 
732  vring = &xd->vu_intf->vrings[idx];
733  vq->enabled = (vq->desc && vq->avail && vq->used && vring->enabled) ? 1 : 0;
734 
735  /*
736  * Set xd->vu_is_running if at least one pair of
737  * RX/TX queues are enabled.
738  */
739  int numqs = VIRTIO_QNUM;
740  numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
741 
742  for (index = 0; index < numqs; index += 2)
743  {
744  vq0 = xd->vu_vhost_dev.virtqueue[index]; /* RX */
745  vq1 = xd->vu_vhost_dev.virtqueue[index + 1]; /* TX */
746  if (vq0->enabled && vq1->enabled)
747  {
748  vu_is_running = 1;
749  break;
750  }
751  }
752  DBG_SOCK ("SET_VRING_KICK - idx %d, running %d, fd: %d",
753  idx, vu_is_running, fd);
754 
755  xd->vu_is_running = vu_is_running;
756  if (xd->vu_is_running && xd->admin_up)
757  {
759  xd->vlib_hw_if_index,
761  ETH_LINK_FULL_DUPLEX);
762  }
763 
764  return 0;
765 }
766 
767 static int
768 dpdk_vhost_user_set_vring_enable (u32 hw_if_index, u8 idx, int enable)
769 {
770  dpdk_device_t *xd;
771  struct vhost_virtqueue *vq;
772  dpdk_vu_intf_t *vui;
773 
774  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
775  {
776  clib_warning ("not a vhost-user interface");
777  return 0;
778  }
779 
780  vui = xd->vu_intf;
781  /*
782  * Guest vhost driver wrongly enables queue before
783  * setting the vring address. Therefore, save a
784  * local copy. Reflect it in vq structure if addresses
785  * are set. If not, vq will be enabled when vring
786  * is kicked.
787  */
788  vui->vrings[idx].enabled = enable; /* Save local copy */
789 
790  int numqs = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
791  while (numqs--)
792  {
793  if (!vui->vrings[numqs].enabled)
794  break;
795  }
796 
797  if (numqs == -1) /* All Qs are enabled */
798  xd->need_txlock = 0;
799  else
800  xd->need_txlock = 1;
801 
802  vq = xd->vu_vhost_dev.virtqueue[idx];
803  if (vq->desc && vq->avail && vq->used)
804  xd->vu_vhost_dev.virtqueue[idx]->enabled = enable;
805 
806  return 0;
807 }
808 
809 static clib_error_t *
810 dpdk_vhost_user_callfd_read_ready (unix_file_t * uf)
811 {
812  __attribute__ ((unused)) int n;
813  u8 buff[8];
814  n = read (uf->file_descriptor, ((char *) &buff), 8);
815  return 0;
816 }
817 
818 static clib_error_t *
819 dpdk_vhost_user_set_vring_call (u32 hw_if_index, u8 idx, int fd)
820 {
821  dpdk_device_t *xd;
822  struct vhost_virtqueue *vq;
823  unix_file_t template = { 0 };
824 
825  DBG_SOCK ("SET_VRING_CALL - idx %d, fd %d", idx, fd);
826 
827  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_index)))
828  {
829  clib_warning ("not a vhost-user interface");
830  return 0;
831  }
832 
833  dpdk_vu_intf_t *vui = xd->vu_intf;
834 
835  /* if there is old fd, delete it */
836  if (vui->vrings[idx].callfd > 0)
837  {
839  vui->vrings[idx].callfd_idx);
840  unix_file_del (&unix_main, uf);
841  }
842  vui->vrings[idx].callfd = fd;
843  template.read_function = dpdk_vhost_user_callfd_read_ready;
844  template.file_descriptor = fd;
845  vui->vrings[idx].callfd_idx = unix_file_add (&unix_main, &template);
846 
847  vq = xd->vu_vhost_dev.virtqueue[idx];
848  vq->callfd = -1; /* We use locally saved vring->callfd; */
849 
850  return 0;
851 }
852 
853 u8
854 dpdk_vhost_user_want_interrupt (dpdk_device_t * xd, int idx)
855 {
856  dpdk_vu_intf_t *vui = xd->vu_intf;
857  ASSERT (vui != NULL);
858 
859  if (PREDICT_FALSE (vui->num_vrings <= 0))
860  return 0;
861 
862  dpdk_vu_vring *vring = &(vui->vrings[idx]);
863  struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx];
864 
865  /* return if vm is interested in interrupts */
866  return (vring->callfd > 0)
867  && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
868 }
869 
870 void
871 dpdk_vhost_user_send_interrupt (vlib_main_t * vm, dpdk_device_t * xd, int idx)
872 {
873  dpdk_main_t *dm = &dpdk_main;
874  dpdk_vu_intf_t *vui = xd->vu_intf;
875  ASSERT (vui != NULL);
876 
877  if (PREDICT_FALSE (vui->num_vrings <= 0))
878  return;
879 
880  dpdk_vu_vring *vring = &(vui->vrings[idx]);
881  struct vhost_virtqueue *vq = xd->vu_vhost_dev.virtqueue[idx];
882 
883  /* if vm is interested in interrupts */
884  if ((vring->callfd > 0) && !(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
885  {
886  eventfd_write (vring->callfd, (eventfd_t) 1);
887  vring->n_since_last_int = 0;
888  vring->int_deadline =
890  }
891 }
892 
893 /*
894  * vhost-user interface management functions
895  */
896 
897 // initialize vui with specified attributes
898 static void
899 dpdk_vhost_user_vui_init (vnet_main_t * vnm,
900  dpdk_device_t * xd, int sockfd,
901  const char *sock_filename,
902  u8 is_server, u64 feature_mask, u32 * sw_if_index)
903 {
904  dpdk_vu_intf_t *vui = xd->vu_intf;
905  memset (vui, 0, sizeof (*vui));
906 
907  vui->unix_fd = sockfd;
908  vui->num_vrings = xd->vu_vhost_dev.virt_qp_nb * VIRTIO_QNUM;
909  DBG_SOCK ("dpdk_vhost_user_vui_init VRINGS: %d", vui->num_vrings);
910  vui->sock_is_server = is_server;
911  strncpy (vui->sock_filename, sock_filename,
912  ARRAY_LEN (vui->sock_filename) - 1);
913  vui->sock_errno = 0;
914  vui->is_up = 0;
915  vui->feature_mask = feature_mask;
916  vui->active = 1;
917  vui->unix_file_index = ~0;
918 
920 
921  if (sw_if_index)
922  *sw_if_index = xd->vlib_sw_if_index;
923 }
924 
925 // register vui and start polling on it
926 static void
927 dpdk_vhost_user_vui_register (vlib_main_t * vm, dpdk_device_t * xd)
928 {
929  dpdk_main_t *dm = &dpdk_main;
930  dpdk_vu_intf_t *vui = xd->vu_intf;
931 
932  hash_set (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd,
933  xd->vlib_sw_if_index);
934 }
935 
936 static void
937 dpdk_unmap_all_mem_regions (dpdk_device_t * xd)
938 {
939  int i, r;
940  dpdk_vu_intf_t *vui = xd->vu_intf;
941  struct virtio_memory *mem = xd->vu_vhost_dev.mem;
942 
943  for (i = 0; i < mem->nregions; i++)
944  {
945  if (vui->region_addr[i] != -1)
946  {
947 
948  long page_sz = get_huge_page_size (vui->region_fd[i]);
949 
950  ssize_t map_sz = RTE_ALIGN_CEIL (mem->regions[i].memory_size +
951  vui->region_offset[i], page_sz);
952 
953  r =
954  munmap ((void *) (vui->region_addr[i] - vui->region_offset[i]),
955  map_sz);
956 
957  DBG_SOCK
958  ("unmap memory region %d addr 0x%lx off 0x%lx len 0x%lx page_sz 0x%x",
959  i, vui->region_addr[i], vui->region_offset[i], map_sz, page_sz);
960 
961  vui->region_addr[i] = -1;
962 
963  if (r == -1)
964  {
965  clib_unix_warning ("failed to unmap memory region");
966  }
967  close (vui->region_fd[i]);
968  }
969  }
970  mem->nregions = 0;
971 }
972 
973 static inline void
974 dpdk_vhost_user_if_disconnect (dpdk_device_t * xd)
975 {
976  dpdk_vu_intf_t *vui = xd->vu_intf;
977  vnet_main_t *vnm = vnet_get_main ();
978  dpdk_main_t *dm = &dpdk_main;
979  struct vhost_virtqueue *vq;
980  int q;
981 
982  xd->admin_up = 0;
984 
985  if (vui->unix_file_index != ~0)
986  {
987  unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index);
988  vui->unix_file_index = ~0;
989  }
990 
991  hash_unset (dm->vu_sw_if_index_by_sock_fd, vui->unix_fd);
992  hash_unset (dm->vu_sw_if_index_by_listener_fd, vui->unix_fd);
993  close (vui->unix_fd);
994  vui->unix_fd = -1;
995  vui->is_up = 0;
996 
997  for (q = 0; q < vui->num_vrings; q++)
998  {
999  vq = xd->vu_vhost_dev.virtqueue[q];
1000  vui->vrings[q].enabled = 0; /* Reset local copy */
1001  vui->vrings[q].callfd = -1; /* Reset FD */
1002  vq->enabled = 0;
1003 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
1004  vq->log_guest_addr = 0;
1005 #endif
1006  vq->desc = NULL;
1007  vq->used = NULL;
1008  vq->avail = NULL;
1009  }
1010  xd->vu_is_running = 0;
1011 
1012  dpdk_unmap_all_mem_regions (xd);
1013  DBG_SOCK ("interface ifindex %d disconnected", xd->vlib_sw_if_index);
1014 }
1015 
1016 static clib_error_t *
1017 dpdk_vhost_user_socket_read (unix_file_t * uf)
1018 {
1019  int n;
1020  int fd, number_of_fds = 0;
1021  int fds[VHOST_MEMORY_MAX_NREGIONS];
1022  vhost_user_msg_t msg;
1023  struct msghdr mh;
1024  struct iovec iov[1];
1025  dpdk_main_t *dm = &dpdk_main;
1026  dpdk_device_t *xd;
1027  dpdk_vu_intf_t *vui;
1028  struct cmsghdr *cmsg;
1029  uword *p;
1030  u8 q;
1031  vnet_main_t *vnm = vnet_get_main ();
1032 
1034  if (p == 0)
1035  {
1036  DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor);
1037  return 0;
1038  }
1039  else
1040  xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1041 
1042  ASSERT (xd != NULL);
1043  vui = xd->vu_intf;
1044 
1045  char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
1046 
1047  memset (&mh, 0, sizeof (mh));
1048  memset (control, 0, sizeof (control));
1049 
1050  /* set the payload */
1051  iov[0].iov_base = (void *) &msg;
1052  iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
1053 
1054  mh.msg_iov = iov;
1055  mh.msg_iovlen = 1;
1056  mh.msg_control = control;
1057  mh.msg_controllen = sizeof (control);
1058 
1059  n = recvmsg (uf->file_descriptor, &mh, 0);
1060 
1061  if (n != VHOST_USER_MSG_HDR_SZ)
1062  goto close_socket;
1063 
1064  if (mh.msg_flags & MSG_CTRUNC)
1065  {
1066  goto close_socket;
1067  }
1068 
1069  cmsg = CMSG_FIRSTHDR (&mh);
1070 
1071  if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
1072  (cmsg->cmsg_type == SCM_RIGHTS) &&
1073  (cmsg->cmsg_len - CMSG_LEN (0) <=
1074  VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
1075  {
1076  number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
1077  clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
1078  }
1079 
1080  /* version 1, no reply bit set */
1081  if ((msg.flags & 7) != 1)
1082  {
1083  DBG_SOCK ("malformed message received. closing socket");
1084  goto close_socket;
1085  }
1086 
1087  {
1088  int rv __attribute__ ((unused));
1089  /* $$$$ pay attention to rv */
1090  rv = read (uf->file_descriptor, ((char *) &msg) + n, msg.size);
1091  }
1092 
1093  DBG_SOCK ("VPP VHOST message %s", vhost_message_str[msg.request]);
1094  switch (msg.request)
1095  {
1097  DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES", xd->vlib_hw_if_index);
1098 
1099  msg.flags |= VHOST_USER_REPLY_MASK;
1100 
1101  dpdk_vhost_user_get_features (xd->vlib_hw_if_index, &msg.u64);
1102  msg.u64 &= vui->feature_mask;
1103  msg.size = sizeof (msg.u64);
1104  break;
1105 
1107  DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016lx",
1108  xd->vlib_hw_if_index, msg.u64);
1109 
1110  dpdk_vhost_user_set_features (xd->vlib_hw_if_index, msg.u64);
1111  break;
1112 
1114  DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
1115  xd->vlib_hw_if_index, msg.memory.nregions);
1116 
1117  if ((msg.memory.nregions < 1) ||
1118  (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
1119  {
1120 
1121  DBG_SOCK ("number of mem regions must be between 1 and %i",
1123 
1124  goto close_socket;
1125  }
1126 
1127  if (msg.memory.nregions != number_of_fds)
1128  {
1129  DBG_SOCK ("each memory region must have FD");
1130  goto close_socket;
1131  }
1132 
1133  /* Unmap previously configured memory if necessary */
1134  dpdk_unmap_all_mem_regions (xd);
1135 
1136  dpdk_vhost_user_set_mem_table (xd->vlib_hw_if_index, &msg.memory, fds);
1137  break;
1138 
1140  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
1141  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1142 
1143  if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
1144  (msg.state.num == 0) || /* it cannot be zero */
1145  (msg.state.num % 2)) /* must be power of 2 */
1146  goto close_socket;
1147 
1148  dpdk_vhost_user_set_vring_num (xd->vlib_hw_if_index, msg.state.index,
1149  msg.state.num);
1150  break;
1151 
1153  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
1154  xd->vlib_hw_if_index, msg.state.index);
1155 
1156  dpdk_vhost_user_set_vring_addr (xd->vlib_hw_if_index, msg.state.index,
1157  msg.addr.desc_user_addr,
1158  msg.addr.used_user_addr,
1159  msg.addr.avail_user_addr,
1160  msg.addr.log_guest_addr);
1161  break;
1162 
1163  case VHOST_USER_SET_OWNER:
1164  DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", xd->vlib_hw_if_index);
1165  break;
1166 
1168  DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", xd->vlib_hw_if_index);
1169  break;
1170 
1172  q = (u8) (msg.u64 & 0xFF);
1173 
1174  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %lx, idx: %d",
1175  xd->vlib_hw_if_index, msg.u64, q);
1176 
1177  if (!(msg.u64 & 0x100))
1178  {
1179  if (number_of_fds != 1)
1180  goto close_socket;
1181  fd = fds[0];
1182  }
1183  else
1184  {
1185  fd = -1;
1186  }
1187  dpdk_vhost_user_set_vring_call (xd->vlib_hw_if_index, q, fd);
1188 
1189  break;
1190 
1192 
1193  q = (u8) (msg.u64 & 0xFF);
1194 
1195  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %lx, idx: %d",
1196  xd->vlib_hw_if_index, msg.u64, q);
1197 
1198  if (!(msg.u64 & 0x100))
1199  {
1200  if (number_of_fds != 1)
1201  goto close_socket;
1202 
1203  vui->vrings[q].kickfd = fds[0];
1204  }
1205  else
1206  vui->vrings[q].kickfd = -1;
1207 
1208  dpdk_vhost_user_set_vring_kick (xd->vlib_hw_if_index, q,
1209  vui->vrings[q].kickfd);
1210  break;
1211 
1213 
1214  q = (u8) (msg.u64 & 0xFF);
1215 
1216  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %lx, idx: %d",
1217  xd->vlib_hw_if_index, msg.u64, q);
1218 
1219  if (!(msg.u64 & 0x100))
1220  {
1221  if (number_of_fds != 1)
1222  goto close_socket;
1223 
1224  fd = fds[0];
1225  }
1226  else
1227  fd = -1;
1228 
1229  vui->vrings[q].errfd = fd;
1230  break;
1231 
1233  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
1234  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1235 
1236  dpdk_vhost_user_set_vring_base (xd->vlib_hw_if_index, msg.state.index,
1237  msg.state.num);
1238  break;
1239 
1241  DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
1242  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1243 
1244  msg.flags |= VHOST_USER_REPLY_MASK;
1245  msg.size = sizeof (msg.state);
1246 
1247  dpdk_vhost_user_get_vring_base (xd->vlib_hw_if_index, msg.state.index,
1248  &msg.state.num);
1249  break;
1250 
1251  case VHOST_USER_NONE:
1252  DBG_SOCK ("if %d msg VHOST_USER_NONE", xd->vlib_hw_if_index);
1253  break;
1254 
1256 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0)
1257  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", xd->vlib_hw_if_index);
1258 
1259  if (msg.size != sizeof (msg.log))
1260  {
1261  DBG_SOCK
1262  ("invalid msg size for VHOST_USER_SET_LOG_BASE: %u instead of %lu",
1263  msg.size, sizeof (msg.log));
1264  goto close_socket;
1265  }
1266 
1267  if (!
1268  (xd->vu_vhost_dev.protocol_features & (1 <<
1270  {
1271  DBG_SOCK
1272  ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1273  goto close_socket;
1274  }
1275 
1276  fd = fds[0];
1277  /* align size to 2M page */
1278  long page_sz = get_huge_page_size (fd);
1279  ssize_t map_sz =
1280  RTE_ALIGN_CEIL (msg.log.size + msg.log.offset, page_sz);
1281 
1282  void *addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1283  MAP_SHARED, fd, 0);
1284 
1285  DBG_SOCK ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped %p",
1286  map_sz, msg.log.offset, fd, addr);
1287 
1288  if (addr == MAP_FAILED)
1289  {
1290  clib_warning ("failed to map memory. errno is %d", errno);
1291  goto close_socket;
1292  }
1293 
1294  xd->vu_vhost_dev.log_base += pointer_to_uword (addr) + msg.log.offset;
1295  xd->vu_vhost_dev.log_size = msg.log.size;
1296  msg.flags |= VHOST_USER_REPLY_MASK;
1297  msg.size = sizeof (msg.u64);
1298 #else
1299  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE Not-Implemented",
1300  xd->vlib_hw_if_index);
1301 #endif
1302  break;
1303 
1304  case VHOST_USER_SET_LOG_FD:
1305  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", xd->vlib_hw_if_index);
1306  break;
1307 
1309  DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
1310  xd->vlib_hw_if_index);
1311 
1312  msg.flags |= VHOST_USER_REPLY_MASK;
1313  msg.u64 = VHOST_USER_PROTOCOL_FEATURES;
1314  DBG_SOCK ("VHOST_USER_PROTOCOL_FEATURES: %llx",
1316  msg.size = sizeof (msg.u64);
1317  break;
1318 
1320  DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES",
1321  xd->vlib_hw_if_index);
1322 
1323  DBG_SOCK ("VHOST_USER_SET_PROTOCOL_FEATURES: 0x%lx", msg.u64);
1324  dpdk_vhost_user_set_protocol_features (xd->vlib_hw_if_index, msg.u64);
1325  break;
1326 
1328  DBG_SOCK ("%d VPP VHOST_USER_SET_VRING_ENABLE IDX: %d, Enable: %d",
1329  xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1330  dpdk_vhost_user_set_vring_enable
1331  (xd->vlib_hw_if_index, msg.state.index, msg.state.num);
1332  break;
1333 
1335  DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM:", xd->vlib_hw_if_index);
1336 
1337  msg.flags |= VHOST_USER_REPLY_MASK;
1338  msg.u64 = xd->vu_vhost_dev.virt_qp_nb;
1339  msg.size = sizeof (msg.u64);
1340  break;
1341 
1342  default:
1343  DBG_SOCK ("unknown vhost-user message %d received. closing socket",
1344  msg.request);
1345  goto close_socket;
1346  }
1347 
1348  /* if we have pointers to descriptor table, go up */
1349  if (!vui->is_up &&
1350  xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_TX]->desc &&
1351  xd->vu_vhost_dev.virtqueue[VHOST_NET_VRING_IDX_RX]->desc)
1352  {
1353 
1354  DBG_SOCK ("interface %d connected", xd->vlib_sw_if_index);
1355 
1358  vui->is_up = 1;
1359  xd->admin_up = 1;
1360  }
1361 
1362  /* if we need to reply */
1363  if (msg.flags & VHOST_USER_REPLY_MASK)
1364  {
1365  n =
1366  send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
1367  if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
1368  goto close_socket;
1369  }
1370 
1371  return 0;
1372 
1373 close_socket:
1374  DBG_SOCK ("error: close_socket");
1375  dpdk_vhost_user_if_disconnect (xd);
1376  return 0;
1377 }
1378 
1379 static clib_error_t *
1380 dpdk_vhost_user_socket_error (unix_file_t * uf)
1381 {
1382  dpdk_main_t *dm = &dpdk_main;
1383  dpdk_device_t *xd;
1384  uword *p;
1385 
1387  if (p == 0)
1388  {
1389  DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor);
1390  return 0;
1391  }
1392  else
1393  xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1394 
1395  dpdk_vhost_user_if_disconnect (xd);
1396  return 0;
1397 }
1398 
1399 static clib_error_t *
1400 dpdk_vhost_user_socksvr_accept_ready (unix_file_t * uf)
1401 {
1402  int client_fd, client_len;
1403  struct sockaddr_un client;
1404  unix_file_t template = { 0 };
1405  dpdk_main_t *dm = &dpdk_main;
1406  dpdk_device_t *xd = NULL;
1407  dpdk_vu_intf_t *vui;
1408  uword *p;
1409 
1411  if (p == 0)
1412  {
1413  DBG_SOCK ("fd %d doesn't belong to any interface", uf->file_descriptor);
1414  return 0;
1415  }
1416 
1417  xd = dpdk_vhost_user_device_from_sw_if_index (p[0]);
1418  ASSERT (xd != NULL);
1419  vui = xd->vu_intf;
1420 
1421  client_len = sizeof (client);
1422  client_fd = accept (uf->file_descriptor,
1423  (struct sockaddr *) &client,
1424  (socklen_t *) & client_len);
1425 
1426  if (client_fd < 0)
1427  return clib_error_return_unix (0, "accept");
1428 
1429  template.read_function = dpdk_vhost_user_socket_read;
1430  template.error_function = dpdk_vhost_user_socket_error;
1431  template.file_descriptor = client_fd;
1432  vui->unix_file_index = unix_file_add (&unix_main, &template);
1433 
1434  vui->client_fd = client_fd;
1435  hash_set (dm->vu_sw_if_index_by_sock_fd, vui->client_fd,
1436  xd->vlib_sw_if_index);
1437 
1438  return 0;
1439 }
1440 
1441 // init server socket on specified sock_filename
1442 static int
1443 dpdk_vhost_user_init_server_sock (const char *sock_filename, int *sockfd)
1444 {
1445  int rv = 0;
1446  struct sockaddr_un un = { };
1447  int fd;
1448  /* create listening socket */
1449  fd = socket (AF_UNIX, SOCK_STREAM, 0);
1450 
1451  if (fd < 0)
1452  {
1453  return VNET_API_ERROR_SYSCALL_ERROR_1;
1454  }
1455 
1456  un.sun_family = AF_UNIX;
1457  strcpy ((char *) un.sun_path, (char *) sock_filename);
1458 
1459  /* remove if exists */
1460  unlink ((char *) sock_filename);
1461 
1462  if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
1463  {
1464  rv = VNET_API_ERROR_SYSCALL_ERROR_2;
1465  goto error;
1466  }
1467 
1468  if (listen (fd, 1) == -1)
1469  {
1470  rv = VNET_API_ERROR_SYSCALL_ERROR_3;
1471  goto error;
1472  }
1473 
1474  unix_file_t template = { 0 };
1475  template.read_function = dpdk_vhost_user_socksvr_accept_ready;
1476  template.file_descriptor = fd;
1477  unix_file_add (&unix_main, &template);
1478  *sockfd = fd;
1479  return rv;
1480 
1481 error:
1482  close (fd);
1483  return rv;
1484 }
1485 
1486 /*
1487  * vhost-user interface control functions used from vpe api
1488  */
1489 
1490 int
1491 dpdk_vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
1492  const char *sock_filename,
1493  u8 is_server,
1494  u32 * sw_if_index,
1495  u64 feature_mask,
1496  u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
1497 {
1498  dpdk_main_t *dm = &dpdk_main;
1499  dpdk_device_t *xd;
1500  u32 hw_if_idx = ~0;
1501  int sockfd = -1;
1502  int rv = 0;
1503 
1504  // using virtio vhost user?
1505  if (dm->conf->use_virtio_vhost)
1506  {
1507  return vhost_user_create_if (vnm, vm, sock_filename, is_server,
1508  sw_if_index, feature_mask, renumber,
1509  custom_dev_instance, hwaddr);
1510  }
1511 
1512  if (is_server)
1513  {
1514  if ((rv =
1515  dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1516  {
1517  return rv;
1518  }
1519  }
1520 
1521  if (renumber)
1522  {
1523  // set next vhost-user if id if custom one is higher or equal
1524  if (custom_dev_instance >= dm->next_vu_if_id)
1525  dm->next_vu_if_id = custom_dev_instance + 1;
1526 
1527  dpdk_create_vhost_user_if_internal (&hw_if_idx, custom_dev_instance,
1528  hwaddr);
1529  }
1530  else
1531  dpdk_create_vhost_user_if_internal (&hw_if_idx, (u32) ~ 0, hwaddr);
1532  DBG_SOCK ("dpdk vhost-user interface created hw_if_index %d", hw_if_idx);
1533 
1534  xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_idx);
1535  ASSERT (xd != NULL);
1536 
1537  dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server,
1538  feature_mask, sw_if_index);
1539 
1540  dpdk_vhost_user_vui_register (vm, xd);
1541  return rv;
1542 }
1543 
1544 int
1545 dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
1546  const char *sock_filename,
1547  u8 is_server,
1548  u32 sw_if_index,
1549  u64 feature_mask,
1550  u8 renumber, u32 custom_dev_instance)
1551 {
1552  dpdk_main_t *dm = &dpdk_main;
1553  dpdk_device_t *xd;
1554  dpdk_vu_intf_t *vui = NULL;
1555  u32 sw_if_idx = ~0;
1556  int sockfd = -1;
1557  int rv = 0;
1558 
1559  // using virtio vhost user?
1560  if (dm->conf->use_virtio_vhost)
1561  {
1562  return vhost_user_modify_if (vnm, vm, sock_filename, is_server,
1563  sw_if_index, feature_mask, renumber,
1564  custom_dev_instance);
1565  }
1566 
1567  xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index);
1568 
1569  if (xd == NULL)
1570  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1571 
1572  vui = xd->vu_intf;
1573 
1574  // interface is inactive
1575  vui->active = 0;
1576  // disconnect interface sockets
1577  dpdk_vhost_user_if_disconnect (xd);
1578 
1579  if (is_server)
1580  {
1581  if ((rv =
1582  dpdk_vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1583  {
1584  return rv;
1585  }
1586  }
1587 
1588  dpdk_vhost_user_vui_init (vnm, xd, sockfd, sock_filename, is_server,
1589  feature_mask, &sw_if_idx);
1590 
1591  if (renumber)
1592  {
1593  vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
1594  }
1595 
1596  dpdk_vhost_user_vui_register (vm, xd);
1597 
1598  return rv;
1599 }
1600 
1601 int
1602 dpdk_vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
1603  u32 sw_if_index)
1604 {
1605  dpdk_main_t *dm = &dpdk_main;
1606  dpdk_device_t *xd = NULL;
1607  dpdk_vu_intf_t *vui;
1608  int rv = 0;
1609 
1610  // using virtio vhost user?
1611  if (dm->conf->use_virtio_vhost)
1612  {
1613  return vhost_user_delete_if (vnm, vm, sw_if_index);
1614  }
1615 
1616  xd = dpdk_vhost_user_device_from_sw_if_index (sw_if_index);
1617 
1618  if (xd == NULL)
1619  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1620 
1621  vui = xd->vu_intf;
1622 
1623  // interface is inactive
1624  vui->active = 0;
1625  // disconnect interface sockets
1626  dpdk_vhost_user_if_disconnect (xd);
1627  // add to inactive interface list
1629 
1631  DBG_SOCK ("deleted (deactivated) vhost-user interface sw_if_index %d",
1632  sw_if_index);
1633 
1634  return rv;
1635 }
1636 
1637 int
1638 dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
1639  vhost_user_intf_details_t ** out_vuids)
1640 {
1641  int rv = 0;
1642  dpdk_main_t *dm = &dpdk_main;
1643  dpdk_device_t *xd;
1644  dpdk_vu_intf_t *vui;
1645  struct virtio_net *vhost_dev;
1646  vhost_user_intf_details_t *r_vuids = NULL;
1648  u32 *hw_if_indices = 0;
1650  u8 *s = NULL;
1651  int i;
1652 
1653  if (!out_vuids)
1654  return -1;
1655 
1656  // using virtio vhost user?
1657  if (dm->conf->use_virtio_vhost)
1658  {
1659  return vhost_user_dump_ifs (vnm, vm, out_vuids);
1660  }
1661 
1662  vec_foreach (xd, dm->devices)
1663  {
1664  if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && xd->vu_intf->active)
1665  vec_add1 (hw_if_indices, xd->vlib_hw_if_index);
1666  }
1667 
1668  for (i = 0; i < vec_len (hw_if_indices); i++)
1669  {
1670  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
1671  xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i]);
1672  if (!xd)
1673  {
1674  clib_warning ("invalid vhost-user interface hw_if_index %d",
1675  hw_if_indices[i]);
1676  continue;
1677  }
1678 
1679  vui = xd->vu_intf;
1680  ASSERT (vui != NULL);
1681  vhost_dev = &xd->vu_vhost_dev;
1682  u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ?
1683  vhost_dev->virtqueue[0]->vhost_hlen : 0);
1684 
1685  vec_add2 (r_vuids, vuid, 1);
1686  vuid->sw_if_index = xd->vlib_sw_if_index;
1687  vuid->virtio_net_hdr_sz = virtio_net_hdr_sz;
1688  vuid->features = vhost_dev->features;
1689  vuid->is_server = vui->sock_is_server;
1690  vuid->num_regions =
1691  (vhost_dev->mem != NULL ? vhost_dev->mem->nregions : 0);
1692  vuid->sock_errno = vui->sock_errno;
1693  strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
1694  ARRAY_LEN (vuid->sock_filename) - 1);
1695 
1696  s = format (s, "%v%c", hi->name, 0);
1697 
1698  strncpy ((char *) vuid->if_name, (char *) s,
1699  ARRAY_LEN (vuid->if_name) - 1);
1700  _vec_len (s) = 0;
1701  }
1702 
1703  vec_free (s);
1704  vec_free (hw_if_indices);
1705 
1706  *out_vuids = r_vuids;
1707 
1708  return rv;
1709 }
1710 
1711 /*
1712  * Processing functions called from dpdk process fn
1713  */
1714 
1715 typedef struct
1716 {
1717  struct sockaddr_un sun;
1718  int sockfd;
1719  unix_file_t template;
1720  uword *event_data;
1721 } dpdk_vu_process_state;
1722 
1723 void
1724 dpdk_vhost_user_process_init (void **ctx)
1725 {
1726  dpdk_vu_process_state *state =
1727  clib_mem_alloc (sizeof (dpdk_vu_process_state));
1728  memset (state, 0, sizeof (*state));
1729  state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1730  state->sun.sun_family = AF_UNIX;
1731  state->template.read_function = dpdk_vhost_user_socket_read;
1732  state->template.error_function = dpdk_vhost_user_socket_error;
1733  state->event_data = 0;
1734  *ctx = state;
1735 }
1736 
1737 void
1738 dpdk_vhost_user_process_cleanup (void *ctx)
1739 {
1740  clib_mem_free (ctx);
1741 }
1742 
1743 uword
1744 dpdk_vhost_user_process_if (vlib_main_t * vm, dpdk_device_t * xd, void *ctx)
1745 {
1746  dpdk_main_t *dm = &dpdk_main;
1747  dpdk_vu_process_state *state = (dpdk_vu_process_state *) ctx;
1748  dpdk_vu_intf_t *vui = xd->vu_intf;
1749 
1750  if (vui->sock_is_server || !vui->active)
1751  return 0;
1752 
1753  if (vui->unix_fd == -1)
1754  {
1755  /* try to connect */
1756  strncpy (state->sun.sun_path, (char *) vui->sock_filename,
1757  sizeof (state->sun.sun_path) - 1);
1758 
1759  if (connect
1760  (state->sockfd, (struct sockaddr *) &(state->sun),
1761  sizeof (struct sockaddr_un)) == 0)
1762  {
1763  vui->sock_errno = 0;
1764  vui->unix_fd = state->sockfd;
1765  state->template.file_descriptor = state->sockfd;
1766  vui->unix_file_index =
1767  unix_file_add (&unix_main, &(state->template));
1768  hash_set (dm->vu_sw_if_index_by_sock_fd, state->sockfd,
1769  xd->vlib_sw_if_index);
1770 
1771  state->sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1772  if (state->sockfd < 0)
1773  return -1;
1774  }
1775  else
1776  {
1777  vui->sock_errno = errno;
1778  }
1779  }
1780  else
1781  {
1782  /* check if socket is alive */
1783  int error = 0;
1784  socklen_t len = sizeof (error);
1785  int retval =
1786  getsockopt (vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len);
1787 
1788  if (retval)
1789  dpdk_vhost_user_if_disconnect (xd);
1790  }
1791  return 0;
1792 }
1793 #endif
1794 
1795 /*
1796  * CLI functions
1797  */
1798 
1799 static clib_error_t *
1801  unformat_input_t * input,
1802  vlib_cli_command_t * cmd)
1803 {
1804 #if DPDK_VHOST_USER
1805  dpdk_main_t *dm = &dpdk_main;
1806  unformat_input_t _line_input, *line_input = &_line_input;
1807  u8 *sock_filename = NULL;
1808  u32 sw_if_index;
1809  u8 is_server = 0;
1810  u64 feature_mask = (u64) ~ 0;
1811  u8 renumber = 0;
1812  u32 custom_dev_instance = ~0;
1813  u8 hwaddr[6];
1814  u8 *hw = NULL;
1815 
1816  if (dm->conf->use_virtio_vhost)
1817  {
1818 #endif
1819  return vhost_user_connect_command_fn (vm, input, cmd);
1820 #if DPDK_VHOST_USER
1821  }
1822 
1823  /* Get a line of input. */
1824  if (!unformat_user (input, unformat_line_input, line_input))
1825  return 0;
1826 
1827  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1828  {
1829  if (unformat (line_input, "socket %s", &sock_filename))
1830  ;
1831  else if (unformat (line_input, "server"))
1832  is_server = 1;
1833  else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
1834  ;
1835  else
1836  if (unformat
1837  (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
1838  hw = hwaddr;
1839  else if (unformat (line_input, "renumber %d", &custom_dev_instance))
1840  {
1841  renumber = 1;
1842  }
1843  else
1844  return clib_error_return (0, "unknown input `%U'",
1845  format_unformat_error, input);
1846  }
1847  unformat_free (line_input);
1848 
1849  vnet_main_t *vnm = vnet_get_main ();
1850  if (sock_filename == NULL)
1851  return clib_error_return (0, "missing socket file");
1852 
1853  dpdk_vhost_user_create_if (vnm, vm, (char *) sock_filename,
1854  is_server, &sw_if_index, feature_mask,
1855  renumber, custom_dev_instance, hw);
1856 
1857  vec_free (sock_filename);
1859  sw_if_index);
1860  return 0;
1861 #endif
1862 }
1863 
1864 /* *INDENT-OFF* */
1865 VLIB_CLI_COMMAND (dpdk_vhost_user_connect_command, static) = {
1866  .path = "create vhost-user",
1867  .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [renumber <dev_instance>]",
1869 };
1870 /* *INDENT-ON* */
1871 
1872 static clib_error_t *
1874  unformat_input_t * input,
1875  vlib_cli_command_t * cmd)
1876 {
1877  dpdk_main_t *dm = &dpdk_main;
1878  clib_error_t *error = 0;
1879  unformat_input_t _line_input, *line_input = &_line_input;
1880  u32 sw_if_index = ~0;
1881 
1882  if (dm->conf->use_virtio_vhost)
1883  {
1884  return vhost_user_delete_command_fn (vm, input, cmd);
1885  }
1886 
1887  /* Get a line of input. */
1888  if (!unformat_user (input, unformat_line_input, line_input))
1889  return 0;
1890 
1891  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1892  {
1893  if (unformat (line_input, "sw_if_index %d", &sw_if_index))
1894  ;
1895  else
1896  return clib_error_return (0, "unknown input `%U'",
1897  format_unformat_error, input);
1898  }
1899  unformat_free (line_input);
1900 
1901  if (sw_if_index == ~0)
1902  {
1903  error = clib_error_return (0, "invalid sw_if_index",
1904  format_unformat_error, input);
1905  return error;
1906  }
1907 
1908  vnet_main_t *vnm = vnet_get_main ();
1909 
1910 #if DPDK_VHOST_USER
1911  dpdk_vhost_user_delete_if (vnm, vm, sw_if_index);
1912 #else
1913  vhost_user_delete_if (vnm, vm, sw_if_index);
1914 #endif
1915 
1916  return 0;
1917 }
1918 
1919 /* *INDENT-OFF* */
1920 VLIB_CLI_COMMAND (dpdk_vhost_user_delete_command, static) = {
1921  .path = "delete vhost-user",
1922  .short_help = "delete vhost-user sw_if_index <nn>",
1924 };
1925 /* *INDENT-ON* */
1926 
1927 #define foreach_dpdk_vhost_feature \
1928  _ (VIRTIO_NET_F_MRG_RXBUF) \
1929  _ (VIRTIO_NET_F_CTRL_VQ) \
1930  _ (VIRTIO_NET_F_CTRL_RX)
1931 
1932 static clib_error_t *
1934  unformat_input_t * input,
1935  vlib_cli_command_t * cmd)
1936 {
1937 #if DPDK_VHOST_USER
1938  clib_error_t *error = 0;
1939  dpdk_main_t *dm = &dpdk_main;
1940  vnet_main_t *vnm = vnet_get_main ();
1941  dpdk_device_t *xd;
1942  dpdk_vu_intf_t *vui;
1943  struct virtio_net *vhost_dev;
1944  u32 hw_if_index, *hw_if_indices = 0;
1946  int i, j, q;
1947  int show_descr = 0;
1948  struct virtio_memory *mem;
1949  struct feat_struct
1950  {
1951  u8 bit;
1952  char *str;
1953  };
1954  struct feat_struct *feat_entry;
1955 
1956  static struct feat_struct feat_array[] = {
1957 #define _(f) { .str = #f, .bit = f, },
1959 #undef _
1960  {.str = NULL}
1961  };
1962 
1963  if (dm->conf->use_virtio_vhost)
1964  {
1965 #endif
1966  return show_vhost_user_command_fn (vm, input, cmd);
1967 #if DPDK_VHOST_USER
1968  }
1969 
1970  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1971  {
1972  if (unformat
1973  (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
1974  {
1975  vec_add1 (hw_if_indices, hw_if_index);
1976  vlib_cli_output (vm, "add %d", hw_if_index);
1977  }
1978  else if (unformat (input, "descriptors") || unformat (input, "desc"))
1979  show_descr = 1;
1980  else
1981  {
1982  error = clib_error_return (0, "unknown input `%U'",
1983  format_unformat_error, input);
1984  goto done;
1985  }
1986  }
1987  if (vec_len (hw_if_indices) == 0)
1988  {
1989  vec_foreach (xd, dm->devices)
1990  {
1991  if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER && xd->vu_intf->active)
1992  vec_add1 (hw_if_indices, xd->vlib_hw_if_index);
1993  }
1994  }
1995 
1996  vlib_cli_output (vm, "DPDK vhost-user interfaces");
1997  vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n",
1999  dm->conf->vhost_coalesce_time);
2000 
2001  for (i = 0; i < vec_len (hw_if_indices); i++)
2002  {
2003  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2004 
2005  if (!(xd = dpdk_vhost_user_device_from_hw_if_index (hw_if_indices[i])))
2006  {
2007  error = clib_error_return (0, "not dpdk vhost-user interface: '%s'",
2008  hi->name);
2009  goto done;
2010  }
2011  vui = xd->vu_intf;
2012  vhost_dev = &xd->vu_vhost_dev;
2013  mem = vhost_dev->mem;
2014  u32 virtio_net_hdr_sz = (vui->num_vrings > 0 ?
2015  vhost_dev->virtqueue[0]->vhost_hlen : 0);
2016 
2017  vlib_cli_output (vm, "Interface: %v (ifindex %d)",
2018  hi->name, hw_if_indices[i]);
2019 
2020  vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n",
2021  virtio_net_hdr_sz, xd->vu_vhost_dev.features);
2022 
2023  feat_entry = (struct feat_struct *) &feat_array;
2024  while (feat_entry->str)
2025  {
2026  if (xd->vu_vhost_dev.features & (1 << feat_entry->bit))
2027  vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2028  feat_entry->bit);
2029  feat_entry++;
2030  }
2031 
2032  vlib_cli_output (vm, "\n");
2033 
2034  vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
2035  vui->sock_filename,
2036  vui->sock_is_server ? "server" : "client",
2037  strerror (vui->sock_errno));
2038 
2039  vlib_cli_output (vm, " Memory regions (total %d)\n", mem->nregions);
2040 
2041  if (mem->nregions)
2042  {
2043  vlib_cli_output (vm,
2044  " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2045  vlib_cli_output (vm,
2046  " ====== ===== ================== ================== ================== ================== ==================\n");
2047  }
2048  for (j = 0; j < mem->nregions; j++)
2049  {
2050  vlib_cli_output (vm,
2051  " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2052  j, vui->region_fd[j],
2053  mem->regions[j].guest_phys_address,
2054  mem->regions[j].memory_size,
2055  mem->regions[j].userspace_address,
2056  mem->regions[j].address_offset,
2057  vui->region_addr[j]);
2058  }
2059  for (q = 0; q < vui->num_vrings; q++)
2060  {
2061  struct vhost_virtqueue *vq = vhost_dev->virtqueue[q];
2062  const char *qtype = (q & 1) ? "TX" : "RX";
2063 
2064  vlib_cli_output (vm, "\n Virtqueue %d (%s)\n", q / 2, qtype);
2065 
2066  vlib_cli_output (vm,
2067  " qsz %d last_used_idx %d last_used_idx_res %d\n",
2068  vq->size, vq->last_used_idx,
2069  vq->last_used_idx_res);
2070 
2071  if (vq->avail && vq->used)
2072  vlib_cli_output (vm,
2073  " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
2074  vq->avail->flags, vq->avail->idx,
2075  vq->used->flags, vq->used->idx);
2076 
2077  vlib_cli_output (vm, " kickfd %d callfd %d errfd %d enabled %d\n",
2078  vq->kickfd, vq->callfd, vui->vrings[q].errfd,
2079  vq->enabled);
2080 
2081  if (show_descr && vq->enabled)
2082  {
2083  vlib_cli_output (vm, "\n descriptor table:\n");
2084  vlib_cli_output (vm,
2085  " id addr len flags next user_addr\n");
2086  vlib_cli_output (vm,
2087  " ===== ================== ===== ====== ===== ==================\n");
2088  for (j = 0; j < vq->size; j++)
2089  {
2090  vlib_cli_output (vm,
2091  " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
2092  j, vq->desc[j].addr, vq->desc[j].len,
2093  vq->desc[j].flags, vq->desc[j].next,
2095  (xd, vq->desc[j].addr)));
2096  }
2097  }
2098  }
2099  vlib_cli_output (vm, "\n");
2100  }
2101 done:
2102  vec_free (hw_if_indices);
2103  return error;
2104 #endif
2105 }
2106 
2107 /* *INDENT-OFF* */
2108 VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
2109  .path = "show vhost-user",
2110  .short_help = "show vhost-user interface",
2111  .function = show_dpdk_vhost_user_command_fn,
2112 };
2113 /* *INDENT-ON* */
2114 
2115 /*
2116  * fd.io coding-style-patch-verification: ON
2117  *
2118  * Local Variables:
2119  * eval: (c-set-style "gnu")
2120  * End:
2121  */
unformat_function_t unformat_vnet_hw_interface
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:396
unix_file_t * file_pool
Definition: unix.h:89
void dpdk_device_lock_free(dpdk_device_t *xd)
Definition: init.c:224
vmrglw vmrglh hi
#define hash_set(h, key, value)
Definition: hash.h:254
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:343
uword unformat(unformat_input_t *i, char *fmt,...)
Definition: unformat.c:966
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:513
unix_file_function_t * read_function
Definition: unix.h:62
#define hash_unset(h, key)
Definition: hash.h:260
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
Definition: interface.c:230
dpdk_main_t dpdk_main
Definition: dpdk.h:443
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
static void * map_guest_mem(vhost_user_intf_t *vui, uword addr)
Definition: vhost-user.c:150
u8 use_rss
Definition: dpdk.h:424
unsigned int uint32_t
Definition: fix_types.h:29
vnet_device_class_t dpdk_device_class
u32 vhost_coalesce_frames
Definition: dpdk.h:362
u8 need_txlock
Definition: dpdk.h:269
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
#define NULL
Definition: clib.h:55
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:182
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:533
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:482
int vnet_interface_name_renumber(u32 sw_if_index, u32 new_show_dev_instance)
Definition: interface.c:1151
#define VHOST_USER_MSG_HDR_SZ
Definition: vhost-user.h:20
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:521
u32 per_interface_next_index
Definition: dpdk.h:205
vlib_worker_thread_t * vlib_worker_threads
Definition: threads.h:110
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
#define VHOST_USER_PROTOCOL_FEATURES
Definition: vhost-user.h:39
clib_error_t * show_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2090
u32 next_vu_if_id
Definition: dpdk.h:413
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:273
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:407
format_function_t format_vnet_sw_if_index_name
static uword unix_file_add(unix_main_t *um, unix_file_t *template)
Definition: unix.h:136
#define VHOST_USER_REPLY_MASK
Definition: vhost-user.h:27
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
u8 admin_up
Definition: dpdk.h:221
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
vnet_main_t * vnet_get_main(void)
Definition: misc.c:45
struct rte_mbuf *** tx_vectors
Definition: dpdk.h:208
f64 vhost_coalesce_time
Definition: dpdk.h:363
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
Definition: node.c:787
int input_cpu_first_index
Definition: dpdk.h:427
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
u16 rx_q_used
Definition: dpdk.h:230
static void unformat_free(unformat_input_t *i)
Definition: format.h:161
#define foreach_dpdk_vhost_feature
Definition: vhost_user.c:1927
#define clib_warning(format, args...)
Definition: error.h:59
unsigned long u64
Definition: types.h:89
uword unformat_user(unformat_input_t *input, unformat_function_t *func,...)
Definition: unformat.c:977
vhost_user_log_t log
Definition: vhost-user.h:84
u32 device_index
Definition: dpdk.h:199
static uword pointer_to_uword(const void *p)
Definition: types.h:131
vlib_main_t ** vlib_mains
Definition: dpdk_buffer.c:157
u32 vlib_sw_if_index
Definition: dpdk.h:202
static void unix_file_del(unix_main_t *um, unix_file_t *f)
Definition: unix.h:146
#define hash_get(h, key)
Definition: hash.h:248
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:369
u32 file_descriptor
Definition: unix.h:52
dpdk_device_and_queue_t ** devices_by_cpu
Definition: dpdk.h:379
int vhost_user_delete_if(vnet_main_t *vnm, vlib_main_t *vm, u32 sw_if_index)
Definition: vhost-user.c:1632
u32 vlib_hw_if_index
Definition: dpdk.h:201
#define clib_error_return_unix(e, args...)
Definition: error.h:114
#define PREDICT_FALSE(x)
Definition: clib.h:97
int input_cpu_count
Definition: dpdk.h:428
#define VLIB_FRAME_SIZE
Definition: node.h:328
u16 tx_q_used
Definition: dpdk.h:229
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:575
int vhost_user_dump_ifs(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_details_t **out_vuids)
Definition: vhost-user.c:2037
#define uword_to_pointer(u, type)
Definition: types.h:136
int vhost_user_create_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 *sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance, u8 *hwaddr)
Definition: vhost-user.c:1859
dpdk_device_t * devices
Definition: dpdk.h:378
u16 * cpu_socket_id_by_queue
Definition: dpdk.h:233
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:300
volatile u32 ** lockp
Definition: dpdk.h:196
vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:89
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
struct rte_mbuf *** rx_vectors
Definition: dpdk.h:209
int vhost_user_modify_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance)
Definition: vhost-user.c:1900
#define clib_memcpy(a, b, c)
Definition: string.h:63
#define VHOST_MEMORY_MAX_NREGIONS
Definition: vhost-user.h:19
#define clib_unix_warning(format, args...)
Definition: error.h:68
static clib_error_t * dpdk_vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost_user.c:1800
void vlib_worker_thread_barrier_sync(vlib_main_t *vm)
Definition: threads.c:1144
#define ARRAY_LEN(x)
Definition: clib.h:59
static clib_error_t * show_dpdk_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost_user.c:1933
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD
Definition: vhost-user.h:30
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
Definition: vec.h:376
uword unformat_ethernet_address(unformat_input_t *input, va_list *args)
Definition: format.c:206
#define DPDK_TX_RING_SIZE
Definition: dpdk.h:273
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
u32 * vu_inactive_interfaces_device_index
Definition: dpdk.h:411
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
u8 * format(u8 *s, char *fmt,...)
Definition: format.c:418
static long get_huge_page_size(int fd)
Definition: vhost-user.c:185
vhost_vring_state_t state
Definition: vhost-user.h:81
clib_error_t * vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2008
static void clib_mem_free(void *p)
Definition: mem.h:154
void dpdk_device_lock_init(dpdk_device_t *xd)
Definition: init.c:210
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:181
u8 use_virtio_vhost
Definition: dpdk.h:359
static void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Set node dispatch state.
Definition: node_funcs.h:144
unix_main_t unix_main
Definition: main.c:57
static void * clib_mem_alloc(uword size)
Definition: mem.h:107
u64 uword
Definition: types.h:112
unsigned short u16
Definition: types.h:57
VLIB_CLI_COMMAND(set_interface_ip_source_and_port_range_check_command, static)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
dpdk_device_type_t dev_type
Definition: dpdk.h:217
unsigned char u8
Definition: types.h:56
#define VHOST_NET_VRING_IDX_RX
Definition: vhost-user.h:22
Definition: unix.h:49
#define VHOST_USER_F_PROTOCOL_FEATURES
Definition: vhost-user.h:33
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VHOST_NET_VRING_IDX_TX
Definition: vhost-user.h:23
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
Definition: threads.c:1176
vnet_sw_interface_type_t type
Definition: interface.h:410
#define vec_foreach(var, vec)
Vector iterator.
#define clib_error_return(e, args...)
Definition: error.h:111
struct _unformat_input_t unformat_input_t
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
vnet_main_t * vnet_main
Definition: dpdk.h:439
unformat_function_t unformat_line_input
Definition: format.h:281
static clib_error_t * dpdk_vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost_user.c:1873
clib_error_t * vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:1950
#define DBG_SOCK(args...)
Definition: vhost_user.c:39
uword * vu_sw_if_index_by_listener_fd
Definition: dpdk.h:409
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
dpdk_config_main_t * conf
Definition: dpdk.h:440
uword * vu_sw_if_index_by_sock_fd
Definition: dpdk.h:410