FD.io VPP  v16.09
Vector Packet Processing
vhost-user.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * vhost.c - vhost-user
4  *
5  * Copyright (c) 2014 Cisco and/or its affiliates.
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at:
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *------------------------------------------------------------------
18  */
19 
20 #include <fcntl.h> /* for open */
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
23 #include <sys/un.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <sys/uio.h> /* for iovec */
27 #include <netinet/in.h>
28 #include <sys/vfs.h>
29 
30 #include <linux/if_arp.h>
31 #include <linux/if_tun.h>
32 
33 #include <vlib/vlib.h>
34 #include <vlib/unix/unix.h>
35 
36 #include <vnet/ip/ip.h>
37 
38 #include <vnet/ethernet/ethernet.h>
39 
41 
42 #define VHOST_USER_DEBUG_SOCKET 0
43 #define VHOST_USER_DEBUG_VQ 0
44 
45 /* Set to get virtio_net_hdr in buffer pre-data
46  details will be shown in packet trace */
47 #define VHOST_USER_COPY_TX_HDR 0
48 
49 #if VHOST_USER_DEBUG_SOCKET == 1
50 #define DBG_SOCK(args...) clib_warning(args);
51 #else
52 #define DBG_SOCK(args...)
53 #endif
54 
55 #if VHOST_USER_DEBUG_VQ == 1
56 #define DBG_VQ(args...) clib_warning(args);
57 #else
58 #define DBG_VQ(args...)
59 #endif
60 
62 
63 #define foreach_vhost_user_tx_func_error \
64  _(NONE, "no error") \
65  _(NOT_READY, "vhost user state error") \
66  _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
67  _(MMAP_FAIL, "mmap failure")
68 
69 typedef enum
70 {
71 #define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
73 #undef _
76 
78 #define _(n,s) s,
80 #undef _
81 };
82 
83 #define foreach_vhost_user_input_func_error \
84  _(NO_ERROR, "no error") \
85  _(NO_BUFFER, "no available buffer") \
86  _(MMAP_FAIL, "mmap failure") \
87  _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
88  _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
89 
90 typedef enum
91 {
92 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
94 #undef _
97 
99 #define _(n,s) s,
101 #undef _
102 };
103 
104 /* *INDENT-OFF* */
105 static vhost_user_main_t vhost_user_main = {
106  .mtu_bytes = 1518,
107 };
108 
109 VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
110  .name = "vhost-user",
111 };
112 /* *INDENT-ON* */
113 
114 static u8 *
116 {
117  u32 i = va_arg (*args, u32);
118  u32 show_dev_instance = ~0;
120 
122  show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
123 
124  if (show_dev_instance != ~0)
125  i = show_dev_instance;
126 
127  s = format (s, "VirtualEthernet0/0/%d", i);
128  return s;
129 }
130 
131 static int
133 {
135 
137  hi->dev_instance, ~0);
138 
140  new_dev_instance;
141 
142  DBG_SOCK ("renumbered vhost-user interface dev_instance %d to %d",
143  hi->dev_instance, new_dev_instance);
144 
145  return 0;
146 }
147 
148 
149 static inline void *
151 {
152  int i;
153  for (i = 0; i < vui->nregions; i++)
154  {
155  if ((vui->regions[i].guest_phys_addr <= addr) &&
156  ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
157  addr))
158  {
159  return (void *) (vui->region_mmap_addr[i] + addr -
160  vui->regions[i].guest_phys_addr);
161  }
162  }
163  DBG_VQ ("failed to map guest mem addr %llx", addr);
164  return 0;
165 }
166 
167 static inline void *
169 {
170  int i;
171  for (i = 0; i < vui->nregions; i++)
172  {
173  if ((vui->regions[i].userspace_addr <= addr) &&
174  ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
175  addr))
176  {
177  return (void *) (vui->region_mmap_addr[i] + addr -
178  vui->regions[i].userspace_addr);
179  }
180  }
181  return 0;
182 }
183 
184 static long
186 {
187  struct statfs s;
188  fstatfs (fd, &s);
189  return s.f_bsize;
190 }
191 
192 static void
194 {
195  int i, r;
196  for (i = 0; i < vui->nregions; i++)
197  {
198  if (vui->region_mmap_addr[i] != (void *) -1)
199  {
200 
201  long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
202 
203  ssize_t map_sz = (vui->regions[i].memory_size +
204  vui->regions[i].mmap_offset +
205  page_sz) & ~(page_sz - 1);
206 
207  r =
208  munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
209  map_sz);
210 
211  DBG_SOCK
212  ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
213  vui->region_mmap_addr[i], map_sz, page_sz);
214 
215  vui->region_mmap_addr[i] = (void *) -1;
216 
217  if (r == -1)
218  {
219  clib_warning ("failed to unmap memory region (errno %d)",
220  errno);
221  }
222  close (vui->region_mmap_fd[i]);
223  }
224  }
225  vui->nregions = 0;
226 }
227 
228 
229 static clib_error_t *
231 {
232  __attribute__ ((unused)) int n;
233  u8 buff[8];
234  n = read (uf->file_descriptor, ((char *) &buff), 8);
235  return 0;
236 }
237 
238 static inline void
240 {
242  vnet_main_t *vnm = vnet_get_main ();
243  int q;
244 
246 
247  if (vui->unix_file_index != ~0)
248  {
250  vui->unix_file_index = ~0;
251  }
252 
255  close (vui->unix_fd);
256  vui->unix_fd = -1;
257  vui->is_up = 0;
258  for (q = 0; q < vui->num_vrings; q++)
259  {
260  vui->vrings[q].desc = NULL;
261  vui->vrings[q].avail = NULL;
262  vui->vrings[q].used = NULL;
263  vui->vrings[q].log_guest_addr = 0;
264  vui->vrings[q].log_used = 0;
265  }
266 
267  unmap_all_mem_regions (vui);
268  DBG_SOCK ("interface ifindex %d disconnected", vui->sw_if_index);
269 }
270 
271 #define VHOST_LOG_PAGE 0x1000
272 always_inline void
274 {
275  if (PREDICT_TRUE (vui->log_base_addr == 0
276  || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL))))
277  {
278  return;
279  }
280  if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
281  {
282  DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n");
283  return;
284  }
285 
287  u64 page = addr / VHOST_LOG_PAGE;
288  while (page * VHOST_LOG_PAGE < addr + len)
289  {
290  ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
291  page++;
292  }
293 }
294 
295 #define vhost_user_log_dirty_ring(vui, vq, member) \
296  if (PREDICT_FALSE(vq->log_used)) { \
297  vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
298  sizeof(vq->used->member)); \
299  }
300 
301 static clib_error_t *
303 {
304  int n, i;
305  int fd, number_of_fds = 0;
306  int fds[VHOST_MEMORY_MAX_NREGIONS];
307  vhost_user_msg_t msg;
308  struct msghdr mh;
309  struct iovec iov[1];
311  vhost_user_intf_t *vui;
312  struct cmsghdr *cmsg;
313  uword *p;
314  u8 q;
315  unix_file_t template = { 0 };
316  vnet_main_t *vnm = vnet_get_main ();
317 
318  p = hash_get (vum->vhost_user_interface_index_by_sock_fd,
319  uf->file_descriptor);
320  if (p == 0)
321  {
322  DBG_SOCK ("FD %d doesn't belong to any interface", uf->file_descriptor);
323  return 0;
324  }
325  else
326  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
327 
328  char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
329 
330  memset (&mh, 0, sizeof (mh));
331  memset (control, 0, sizeof (control));
332 
333  for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
334  fds[i] = -1;
335 
336  /* set the payload */
337  iov[0].iov_base = (void *) &msg;
338  iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
339 
340  mh.msg_iov = iov;
341  mh.msg_iovlen = 1;
342  mh.msg_control = control;
343  mh.msg_controllen = sizeof (control);
344 
345  n = recvmsg (uf->file_descriptor, &mh, 0);
346 
347  /* Stop workers to avoid end of the world */
349 
350  if (n != VHOST_USER_MSG_HDR_SZ)
351  goto close_socket;
352 
353  if (mh.msg_flags & MSG_CTRUNC)
354  {
355  goto close_socket;
356  }
357 
358  cmsg = CMSG_FIRSTHDR (&mh);
359 
360  if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
361  (cmsg->cmsg_type == SCM_RIGHTS) &&
362  (cmsg->cmsg_len - CMSG_LEN (0) <=
363  VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
364  {
365  number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
366  clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
367  }
368 
369  /* version 1, no reply bit set */
370  if ((msg.flags & 7) != 1)
371  {
372  DBG_SOCK ("malformed message received. closing socket");
373  goto close_socket;
374  }
375 
376  {
377  int rv __attribute__ ((unused));
378  /* $$$$ pay attention to rv */
379  rv = read (uf->file_descriptor, ((char *) &msg) + n, msg.size);
380  }
381 
382  switch (msg.request)
383  {
385  DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES", vui->hw_if_index);
386 
387  msg.flags |= 4;
388  msg.u64 = (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
389  (1 << FEAT_VIRTIO_F_ANY_LAYOUT) |
390  (1 << FEAT_VHOST_F_LOG_ALL) |
391  (1 << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
392  (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES);
393  msg.u64 &= vui->feature_mask;
394 
395  msg.size = sizeof (msg.u64);
396  break;
397 
399  DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
400  vui->hw_if_index, msg.u64);
401 
402  vui->features = msg.u64;
403 
404  if (vui->features & (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF))
405  vui->virtio_net_hdr_sz = 12;
406  else
407  vui->virtio_net_hdr_sz = 10;
408 
409  vui->is_any_layout =
410  (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
411 
414  vui->is_up = 0;
415 
416  for (q = 0; q < 2; q++)
417  {
418  vui->vrings[q].desc = 0;
419  vui->vrings[q].avail = 0;
420  vui->vrings[q].used = 0;
421  vui->vrings[q].log_guest_addr = 0;
422  vui->vrings[q].log_used = 0;
423  }
424 
425  DBG_SOCK ("interface %d disconnected", vui->sw_if_index);
426 
427  break;
428 
430  DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
431  vui->hw_if_index, msg.memory.nregions);
432 
433  if ((msg.memory.nregions < 1) ||
434  (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
435  {
436 
437  DBG_SOCK ("number of mem regions must be between 1 and %i",
438  VHOST_MEMORY_MAX_NREGIONS);
439 
440  goto close_socket;
441  }
442 
443  if (msg.memory.nregions != number_of_fds)
444  {
445  DBG_SOCK ("each memory region must have FD");
446  goto close_socket;
447  }
448  unmap_all_mem_regions (vui);
449  for (i = 0; i < msg.memory.nregions; i++)
450  {
451  clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i],
452  sizeof (vhost_user_memory_region_t));
453 
454  long page_sz = get_huge_page_size (fds[i]);
455 
456  /* align size to 2M page */
457  ssize_t map_sz = (vui->regions[i].memory_size +
458  vui->regions[i].mmap_offset +
459  page_sz) & ~(page_sz - 1);
460 
461  vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
462  MAP_SHARED, fds[i], 0);
463 
464  DBG_SOCK
465  ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
466  "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i],
467  page_sz);
468 
469  if (vui->region_mmap_addr[i] == MAP_FAILED)
470  {
471  clib_warning ("failed to map memory. errno is %d", errno);
472  goto close_socket;
473  }
474  vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
475  vui->region_mmap_fd[i] = fds[i];
476  }
477  vui->nregions = msg.memory.nregions;
478  break;
479 
481  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
482  vui->hw_if_index, msg.state.index, msg.state.num);
483 
484  if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
485  (msg.state.num == 0) || /* it cannot be zero */
486  (msg.state.num % 2)) /* must be power of 2 */
487  goto close_socket;
488  vui->vrings[msg.state.index].qsz = msg.state.num;
489  break;
490 
492  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
493  vui->hw_if_index, msg.state.index);
494 
495  vui->vrings[msg.state.index].desc = (vring_desc_t *)
496  map_user_mem (vui, msg.addr.desc_user_addr);
497  vui->vrings[msg.state.index].used = (vring_used_t *)
498  map_user_mem (vui, msg.addr.used_user_addr);
499  vui->vrings[msg.state.index].avail = (vring_avail_t *)
500  map_user_mem (vui, msg.addr.avail_user_addr);
501 
502  if ((vui->vrings[msg.state.index].desc == NULL) ||
503  (vui->vrings[msg.state.index].used == NULL) ||
504  (vui->vrings[msg.state.index].avail == NULL))
505  {
506  DBG_SOCK ("failed to map user memory for hw_if_index %d",
507  vui->hw_if_index);
508  goto close_socket;
509  }
510 
511  vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
512  vui->vrings[msg.state.index].log_used =
513  (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
514 
515  /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
516  the ring is initialized in an enabled state. */
517 
518  if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES)))
519  {
520  vui->vrings[msg.state.index].enabled = 1;
521  }
522 
523  vui->vrings[msg.state.index].last_used_idx =
524  vui->vrings[msg.state.index].used->idx;
525 
526  /* tell driver that we don't want interrupts */
527  vui->vrings[msg.state.index].used->flags |= 1;
528  break;
529 
531  DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
532  break;
533 
535  DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", vui->hw_if_index);
536  break;
537 
539  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %d",
540  vui->hw_if_index, msg.u64);
541 
542  q = (u8) (msg.u64 & 0xFF);
543 
544  if (!(msg.u64 & 0x100))
545  {
546  if (number_of_fds != 1)
547  goto close_socket;
548 
549  /* if there is old fd, delete it */
550  if (vui->vrings[q].callfd)
551  {
553  vui->vrings[q].callfd_idx);
554  unix_file_del (&unix_main, uf);
555  }
556  vui->vrings[q].callfd = fds[0];
557  template.read_function = vhost_user_callfd_read_ready;
558  template.file_descriptor = fds[0];
559  vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template);
560  }
561  else
562  vui->vrings[q].callfd = -1;
563  break;
564 
566  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %d",
567  vui->hw_if_index, msg.u64);
568 
569  q = (u8) (msg.u64 & 0xFF);
570 
571  if (!(msg.u64 & 0x100))
572  {
573  if (number_of_fds != 1)
574  goto close_socket;
575 
576  vui->vrings[q].kickfd = fds[0];
577  }
578  else
579  vui->vrings[q].kickfd = -1;
580  break;
581 
583  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %d",
584  vui->hw_if_index, msg.u64);
585 
586  q = (u8) (msg.u64 & 0xFF);
587 
588  if (!(msg.u64 & 0x100))
589  {
590  if (number_of_fds != 1)
591  goto close_socket;
592 
593  fd = fds[0];
594  }
595  else
596  fd = -1;
597 
598  vui->vrings[q].errfd = fd;
599  break;
600 
602  DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
603  vui->hw_if_index, msg.state.index, msg.state.num);
604 
605  vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
606  break;
607 
609  DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
610  vui->hw_if_index, msg.state.index, msg.state.num);
611 
612  /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
613  vui->vrings[msg.state.index].enabled = 0;
614 
615  msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
616  msg.flags |= 4;
617  msg.size = sizeof (msg.state);
618  break;
619 
620  case VHOST_USER_NONE:
621  DBG_SOCK ("if %d msg VHOST_USER_NONE", vui->hw_if_index);
622 
623  break;
624 
626  {
627  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", vui->hw_if_index);
628 
629  if (msg.size != sizeof (msg.log))
630  {
631  DBG_SOCK
632  ("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
633  msg.size, sizeof (msg.log));
634  goto close_socket;
635  }
636 
637  if (!
639  {
640  DBG_SOCK
641  ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
642  goto close_socket;
643  }
644 
645  fd = fds[0];
646  /* align size to 2M page */
647  long page_sz = get_huge_page_size (fd);
648  ssize_t map_sz =
649  (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
650 
651  vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
652  MAP_SHARED, fd, 0);
653 
654  DBG_SOCK
655  ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
656  map_sz, msg.log.offset, fd, vui->log_base_addr);
657 
658  if (vui->log_base_addr == MAP_FAILED)
659  {
660  clib_warning ("failed to map memory. errno is %d", errno);
661  goto close_socket;
662  }
663 
664  vui->log_base_addr += msg.log.offset;
665  vui->log_size = msg.log.size;
666 
667  msg.flags |= 4;
668  msg.size = sizeof (msg.u64);
669 
670  break;
671  }
672 
674  DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
675 
676  break;
677 
679  DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
680  vui->hw_if_index);
681 
682  msg.flags |= 4;
683  msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
684  msg.size = sizeof (msg.u64);
685  break;
686 
688  DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx",
689  vui->hw_if_index, msg.u64);
690 
691  vui->protocol_features = msg.u64;
692 
693  break;
694 
696  DBG_SOCK ("if %d VHOST_USER_SET_VRING_ENABLE, enable: %d",
697  vui->hw_if_index, msg.state.num);
698  vui->vrings[msg.state.index].enabled = msg.state.num;
699  break;
700 
701  default:
702  DBG_SOCK ("unknown vhost-user message %d received. closing socket",
703  msg.request);
704  goto close_socket;
705  }
706 
707  /* if we have pointers to descriptor table, go up */
708  if (!vui->is_up &&
711  {
712 
713  DBG_SOCK ("interface %d connected", vui->sw_if_index);
714 
717  vui->is_up = 1;
718 
719  }
720 
721  /* if we need to reply */
722  if (msg.flags & 4)
723  {
724  n =
725  send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
726  if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
727  goto close_socket;
728  }
729 
731  return 0;
732 
733 close_socket:
736  return 0;
737 }
738 
739 static clib_error_t *
741 {
743  vhost_user_intf_t *vui;
744  uword *p;
745 
747  uf->file_descriptor);
748  if (p == 0)
749  {
750  DBG_SOCK ("fd %d doesn't belong to any interface", uf->file_descriptor);
751  return 0;
752  }
753  else
754  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
755 
757  return 0;
758 }
759 
760 static clib_error_t *
762 {
763  int client_fd, client_len;
764  struct sockaddr_un client;
765  unix_file_t template = { 0 };
767  vhost_user_intf_t *vui;
768  uword *p;
769 
771  uf->file_descriptor);
772  if (p == 0)
773  {
774  DBG_SOCK ("fd %d doesn't belong to any interface", uf->file_descriptor);
775  return 0;
776  }
777  else
778  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
779 
780  client_len = sizeof (client);
781  client_fd = accept (uf->file_descriptor,
782  (struct sockaddr *) &client,
783  (socklen_t *) & client_len);
784 
785  if (client_fd < 0)
786  return clib_error_return_unix (0, "accept");
787 
788  template.read_function = vhost_user_socket_read;
789  template.error_function = vhost_user_socket_error;
790  template.file_descriptor = client_fd;
791  vui->unix_file_index = unix_file_add (&unix_main, &template);
792 
793  vui->client_fd = client_fd;
795  vui - vum->vhost_user_interfaces);
796 
797  return 0;
798 }
799 
800 static clib_error_t *
802 {
803  clib_error_t *error;
806 
807  error = vlib_call_init_function (vm, ip4_init);
808  if (error)
809  return error;
810 
812  hash_create (0, sizeof (uword));
814  hash_create (0, sizeof (uword));
816  hash_create (0, sizeof (uword));
817  vum->coalesce_frames = 32;
818  vum->coalesce_time = 1e-3;
819 
822 
823  return 0;
824 }
825 
827 
828 static clib_error_t *
830 {
831  /* TODO cleanup */
832  return 0;
833 }
834 
836 
837 enum
838 {
842 };
843 
844 
845 typedef struct
846 {
849 #if VHOST_USER_COPY_TX_HDR == 1
850  virtio_net_hdr_t hdr;
851 #endif
853 
854 static u8 *
856 {
857  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
858  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
863  t->device_index);
864 
866 
867 #if VHOST_USER_COPY_TX_HDR == 1
868  uword indent = format_get_indent (s);
869 #endif
870 
871  s = format (s, "%U virtqueue %d",
873 
874 #if VHOST_USER_COPY_TX_HDR == 1
875  s = format (s, "\n%Uvirtio_net_hdr flags 0x%02x gso_type %u hdr_len %u",
876  format_white_space, indent,
877  t->hdr.flags, t->hdr.gso_type, t->hdr.hdr_len);
878 #endif
879 
880  return s;
881 }
882 
883 void
885  vlib_node_runtime_t * node,
886  vhost_user_intf_t * vui, i16 virtqueue)
887 {
888  u32 *b, n_left;
890 
892 
893  n_left = vec_len (vui->d_trace_buffers);
894  b = vui->d_trace_buffers;
895 
896  while (n_left >= 1)
897  {
898  u32 bi0;
899  vlib_buffer_t *b0;
901 
902  bi0 = b[0];
903  n_left -= 1;
904 
905  b0 = vlib_get_buffer (vm, bi0);
906  vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 0);
907  t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
908  t0->virtqueue = virtqueue;
909  t0->device_index = vui - vum->vhost_user_interfaces;
910 #if VHOST_USER_COPY_TX_HDR == 1
911  clib_memcpy (&t0->hdr, b0->pre_data, sizeof (virtio_net_hdr_t));
912 #endif
913 
914  b += 1;
915  }
916 }
917 
918 static inline void
920 {
922  u64 x = 1;
923  int rv __attribute__ ((unused));
924  /* $$$$ pay attention to rv */
925  rv = write (vq->callfd, &x, sizeof (x));
926  vq->n_since_last_int = 0;
927  vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
928 }
929 
930 
931 static u32
933  vhost_user_main_t * vum,
935 {
938  uword n_rx_packets = 0, n_rx_bytes = 0;
939  uword n_left;
940  u32 n_left_to_next, *to_next;
941  u32 next_index = 0;
942  u32 next0;
943  uword n_trace = vlib_get_trace_count (vm, node);
944  u16 qsz_mask;
945  u32 cpu_index, rx_len, drops, flush;
946  f64 now = vlib_time_now (vm);
947 
949 
950  /* no descriptor ptr - bail out */
951  if (PREDICT_FALSE (!txvq->desc || !txvq->avail || !txvq->enabled))
952  return 0;
953 
954  /* do we have pending intterupts ? */
955  if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
956  vhost_user_send_call (vm, txvq);
957 
958  if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
959  vhost_user_send_call (vm, rxvq);
960 
961  /* only bit 0 of avail.flags is used so we don't want to deal with this
962  interface if any other bit is set */
963  if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
964  return 0;
965 
966  n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
967 
968  /* nothing to do */
969  if (PREDICT_FALSE (n_left == 0))
970  return 0;
971 
972  if (PREDICT_FALSE (n_left == txvq->qsz))
973  {
974  //Informational error logging when VPP is not receiving packets fast enough
975  vlib_error_count (vm, node->node_index,
976  VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
977  }
978 
979  if (PREDICT_FALSE (!vui->admin_up))
980  {
981  /* if intf is admin down, just drop all packets waiting in the ring */
982  txvq->last_avail_idx = txvq->last_used_idx = txvq->avail->idx;
984  txvq->used->idx = txvq->last_used_idx;
985  vhost_user_log_dirty_ring (vui, txvq, idx);
986  vhost_user_send_call (vm, txvq);
987  return 0;
988  }
989 
990  qsz_mask = txvq->qsz - 1;
991  cpu_index = os_get_cpu_number ();
992  drops = 0;
993  flush = 0;
994 
995  if (n_left > VLIB_FRAME_SIZE)
996  n_left = VLIB_FRAME_SIZE;
997 
998  /* Allocate some buffers.
999  * Note that buffers that are chained for jumbo
1000  * frames are allocated separately using a slower path.
1001  * The idea is to be certain to have enough buffers at least
1002  * to cycle through the descriptors without having to check for errors.
1003  * For jumbo frames, the bottleneck is memory copy anyway.
1004  */
1005  if (PREDICT_FALSE (!vum->rx_buffers[cpu_index]))
1006  {
1007  vec_alloc (vum->rx_buffers[cpu_index], 2 * VLIB_FRAME_SIZE);
1008 
1009  if (PREDICT_FALSE (!vum->rx_buffers[cpu_index]))
1010  flush = n_left; //Drop all input
1011  }
1012 
1013  if (PREDICT_FALSE (_vec_len (vum->rx_buffers[cpu_index]) < n_left))
1014  {
1015  u32 curr_len = _vec_len (vum->rx_buffers[cpu_index]);
1016  _vec_len (vum->rx_buffers[cpu_index]) +=
1018  vum->rx_buffers[cpu_index] +
1019  curr_len,
1020  2 * VLIB_FRAME_SIZE - curr_len,
1022 
1023  if (PREDICT_FALSE (n_left > _vec_len (vum->rx_buffers[cpu_index])))
1024  flush = n_left - _vec_len (vum->rx_buffers[cpu_index]);
1025  }
1026 
1027  if (PREDICT_FALSE (flush))
1028  {
1029  //Remove some input buffers
1030  drops += flush;
1031  n_left -= flush;
1033  VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
1034  while (flush)
1035  {
1036  u16 desc_chain_head =
1037  txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
1038  txvq->last_avail_idx++;
1039  txvq->used->ring[txvq->last_used_idx & qsz_mask].id =
1040  desc_chain_head;
1041  txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
1042  vhost_user_log_dirty_ring (vui, txvq,
1043  ring[txvq->last_used_idx & qsz_mask]);
1044  txvq->last_used_idx++;
1045  flush--;
1046  }
1047  }
1048 
1049  rx_len = vec_len (vum->rx_buffers[cpu_index]); //vector might be null
1050  while (n_left > 0)
1051  {
1052  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1053 
1054  while (n_left > 0 && n_left_to_next > 0)
1055  {
1056  vlib_buffer_t *b_head, *b_current;
1057  u32 bi_head, bi_current;
1058  u16 desc_chain_head, desc_current;
1059  u8 error = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
1060 
1061  desc_chain_head = desc_current =
1062  txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
1063  bi_head = bi_current = vum->rx_buffers[cpu_index][--rx_len];
1064  b_head = b_current = vlib_get_buffer (vm, bi_head);
1065  vlib_buffer_chain_init (b_head);
1066 
1067  uword offset;
1068  if (PREDICT_TRUE (vui->is_any_layout) ||
1069  !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT))
1070  {
1071  /* ANYLAYOUT or single buffer */
1072  offset = vui->virtio_net_hdr_sz;
1073  }
1074  else
1075  {
1076  /* CSR case without ANYLAYOUT, skip 1st buffer */
1077  offset = txvq->desc[desc_current].len;
1078  }
1079 
1080  while (1)
1081  {
1082  void *buffer_addr =
1083  map_guest_mem (vui, txvq->desc[desc_current].addr);
1084  if (PREDICT_FALSE (buffer_addr == 0))
1085  {
1086  error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
1087  break;
1088  }
1089 
1090 #if VHOST_USER_COPY_TX_HDR == 1
1091  if (PREDICT_TRUE (offset))
1092  clib_memcpy (b->pre_data, buffer_addr, sizeof (virtio_net_hdr_t)); /* 12 byte hdr is not used on tx */
1093 #endif
1094 
1095  if (txvq->desc[desc_current].len > offset)
1096  {
1097  u16 len = txvq->desc[desc_current].len - offset;
1100  b_head,
1101  &b_current,
1102  buffer_addr
1103  +
1104  offset,
1105  len);
1106 
1107  if (copied != len)
1108  {
1109  error = VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER;
1110  break;
1111  }
1112  }
1113  offset = 0;
1114 
1115  /* if next flag is set, take next desc in the chain */
1116  if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1117  desc_current = txvq->desc[desc_current].next;
1118  else
1119  break;
1120  }
1121 
1122  /* consume the descriptor and return it as used */
1123  txvq->last_avail_idx++;
1124  txvq->used->ring[txvq->last_used_idx & qsz_mask].id =
1125  desc_chain_head;
1126  txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
1127  vhost_user_log_dirty_ring (vui, txvq,
1128  ring[txvq->last_used_idx & qsz_mask]);
1129  txvq->last_used_idx++;
1130 
1131  if (PREDICT_FALSE (b_head->current_length < 14 &&
1132  error == VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1133  {
1134  error = VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME;
1135  }
1136 
1138 
1139  vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
1140  vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1141  b_head->error = node->errors[error];
1142 
1143  if (PREDICT_FALSE (n_trace > n_rx_packets))
1144  vec_add1 (vui->d_trace_buffers, bi_head);
1145 
1146  if (PREDICT_FALSE (error))
1147  {
1148  drops++;
1149  next0 = VHOST_USER_RX_NEXT_DROP;
1150  }
1151  else
1152  {
1153  n_rx_bytes +=
1154  b_head->current_length +
1156  n_rx_packets++;
1158  }
1159 
1160  to_next[0] = bi_head;
1161  to_next++;
1162  n_left_to_next--;
1163  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1164  to_next, n_left_to_next,
1165  bi_head, next0);
1166  n_left--;
1167  }
1168 
1169  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1170  }
1171 
1172  if (PREDICT_TRUE (vum->rx_buffers[cpu_index] != 0))
1173  _vec_len (vum->rx_buffers[cpu_index]) = rx_len;
1174 
1175  /* give buffers back to driver */
1177  txvq->used->idx = txvq->last_used_idx;
1178  vhost_user_log_dirty_ring (vui, txvq, idx);
1179 
1180  if (PREDICT_FALSE (vec_len (vui->d_trace_buffers) > 0))
1181  {
1183  vlib_set_trace_count (vm, node,
1184  n_trace - vec_len (vui->d_trace_buffers));
1185  }
1186 
1187  /* interrupt (call) handling */
1188  if ((txvq->callfd > 0) && !(txvq->avail->flags & 1))
1189  {
1190  txvq->n_since_last_int += n_rx_packets;
1191 
1192  if (txvq->n_since_last_int > vum->coalesce_frames)
1193  vhost_user_send_call (vm, txvq);
1194  }
1195 
1196  if (PREDICT_FALSE (drops))
1197  {
1201  vui->sw_if_index, drops);
1202  }
1203 
1204  /* increase rx counters */
1208  os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
1209 
1210  return n_rx_packets;
1211 }
1212 
1213 static uword
1215  vlib_node_runtime_t * node, vlib_frame_t * f)
1216 {
1218 #if DPDK > 0
1219  dpdk_main_t *dm = &dpdk_main;
1220  u32 cpu_index = os_get_cpu_number ();
1221 #endif
1222  vhost_user_intf_t *vui;
1223  uword n_rx_packets = 0;
1224  int i;
1225 
1226  for (i = 0; i < vec_len (vum->vhost_user_interfaces); i++)
1227  {
1228  vui = vec_elt_at_index (vum->vhost_user_interfaces, i);
1229  if (vui->is_up)
1230  {
1231 #if DPDK > 0
1232  if ((i % dm->input_cpu_count) ==
1233  (cpu_index - dm->input_cpu_first_index))
1234 #endif
1235  n_rx_packets += vhost_user_if_input (vm, vum, vui, node);
1236  }
1237  }
1238  return n_rx_packets;
1239 }
1240 
1241 /* *INDENT-OFF* */
1243  .function = vhost_user_input,
1244  .type = VLIB_NODE_TYPE_INPUT,
1245  .name = "vhost-user-input",
1246 
1247  /* Will be enabled if/when hardware is detected. */
1248  .state = VLIB_NODE_STATE_DISABLED,
1249 
1250  .format_buffer = format_ethernet_header_with_length,
1251  .format_trace = format_vhost_user_input_trace,
1252 
1253  .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
1254  .error_strings = vhost_user_input_func_error_strings,
1255 
1256  .n_next_nodes = VHOST_USER_RX_N_NEXT,
1257  .next_nodes = {
1258  [VHOST_USER_RX_NEXT_DROP] = "error-drop",
1259  [VHOST_USER_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
1260  },
1261 };
1262 
1264 /* *INDENT-ON* */
1265 
1266 static uword
1268  vlib_node_runtime_t * node, vlib_frame_t * frame)
1269 {
1270  u32 *buffers = vlib_frame_args (frame);
1271  u32 n_left = 0;
1272  u16 used_index;
1274  uword n_packets = 0;
1275  vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
1276  vhost_user_intf_t *vui =
1278  vhost_user_vring_t *rxvq = &vui->vrings[VHOST_NET_VRING_IDX_RX];
1279  u16 qsz_mask;
1280  u8 error = VHOST_USER_TX_FUNC_ERROR_NONE;
1281 
1282  n_left = n_packets = frame->n_vectors;
1283 
1284  if (PREDICT_FALSE (!vui->is_up))
1285  goto done2;
1286 
1287  if (PREDICT_FALSE
1288  (!rxvq->desc || !rxvq->avail || vui->sock_errno != 0 || !rxvq->enabled))
1289  {
1290  error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
1291  goto done2;
1292  }
1293 
1294  if (PREDICT_FALSE (vui->lockp != 0))
1295  {
1296  while (__sync_lock_test_and_set (vui->lockp, 1))
1297  ;
1298  }
1299 
1300  /* only bit 0 of avail.flags is used so we don't want to deal with this
1301  interface if any other bit is set */
1302  if (PREDICT_FALSE (rxvq->avail->flags & 0xFFFE))
1303  {
1304  error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
1305  goto done2;
1306  }
1307 
1308  if (PREDICT_FALSE ((rxvq->avail->idx == rxvq->last_avail_idx)))
1309  {
1310  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1311  goto done2;
1312  }
1313 
1314  used_index = rxvq->used->idx;
1315  qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */
1316 
1317  while (n_left > 0)
1318  {
1319  vlib_buffer_t *b0, *current_b0;
1320  u16 desc_chain_head, desc_current, desc_len;
1321  void *buffer_addr;
1322  uword offset;
1323 
1324  if (n_left >= 2)
1325  vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
1326 
1327  b0 = vlib_get_buffer (vm, buffers[0]);
1328  buffers++;
1329  n_left--;
1330 
1331  if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
1332  {
1333  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1334  goto done;
1335  }
1336 
1337  desc_current = desc_chain_head =
1338  rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
1339  offset = vui->virtio_net_hdr_sz;
1340  desc_len = offset;
1341  if (PREDICT_FALSE
1342  (!(buffer_addr =
1343  map_guest_mem (vui, rxvq->desc[desc_current].addr))))
1344  {
1345  error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1346  goto done;
1347  }
1348  CLIB_PREFETCH (buffer_addr, clib_min (rxvq->desc[desc_current].len,
1349  4 * CLIB_CACHE_LINE_BYTES),
1350  STORE);
1351 
1352  virtio_net_hdr_mrg_rxbuf_t *hdr =
1353  (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr;
1354  hdr->hdr.flags = 0;
1355  hdr->hdr.gso_type = 0;
1356 
1357  vhost_user_log_dirty_pages (vui, rxvq->desc[desc_current].addr,
1358  vui->virtio_net_hdr_sz);
1359 
1360  if (vui->virtio_net_hdr_sz == 12)
1361  hdr->num_buffers = 1;
1362 
1363  u16 bytes_left = b0->current_length;
1364  buffer_addr += offset;
1365  current_b0 = b0;
1366 
1367  //FIXME: This was in the code but I don't think it is valid
1368  /*if (PREDICT_FALSE(!vui->is_any_layout && (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT))) {
1369  rxvq->desc[desc_current].len = vui->virtio_net_hdr_sz;
1370  } */
1371 
1372  while (1)
1373  {
1374  if (!bytes_left)
1375  { //Get new input
1376  if (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT)
1377  {
1378  current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
1379  bytes_left = current_b0->current_length;
1380  }
1381  else
1382  {
1383  //End of packet
1384  break;
1385  }
1386  }
1387 
1388  if (rxvq->desc[desc_current].len <= offset)
1389  { //Get new output
1390  if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1391  {
1392  offset = 0;
1393  desc_current = rxvq->desc[desc_current].next;
1394  if (PREDICT_FALSE
1395  (!(buffer_addr =
1396  map_guest_mem (vui, rxvq->desc[desc_current].addr))))
1397  {
1398  used_index -= hdr->num_buffers - 1;
1399  rxvq->last_avail_idx -= hdr->num_buffers - 1;
1400  error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1401  goto done;
1402  }
1403  }
1404  else if (vui->virtio_net_hdr_sz == 12)
1405  { //MRG is available
1406 
1407  //Move from available to used buffer
1408  rxvq->used->ring[used_index & qsz_mask].id =
1409  desc_chain_head;
1410  rxvq->used->ring[used_index & qsz_mask].len = desc_len;
1411  vhost_user_log_dirty_ring (vui, rxvq,
1412  ring[used_index & qsz_mask]);
1413  rxvq->last_avail_idx++;
1414  used_index++;
1415  hdr->num_buffers++;
1416 
1417  if (PREDICT_FALSE
1418  (rxvq->last_avail_idx == rxvq->avail->idx))
1419  {
1420  //Dequeue queued descriptors for this packet
1421  used_index -= hdr->num_buffers - 1;
1422  rxvq->last_avail_idx -= hdr->num_buffers - 1;
1423  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1424  goto done;
1425  }
1426 
1427  //Look at next one
1428  desc_chain_head =
1429  rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
1430  desc_current = desc_chain_head;
1431  desc_len = 0;
1432  offset = 0;
1433  if (PREDICT_FALSE
1434  (!(buffer_addr =
1435  map_guest_mem (vui, rxvq->desc[desc_current].addr))))
1436  {
1437  //Dequeue queued descriptors for this packet
1438  used_index -= hdr->num_buffers - 1;
1439  rxvq->last_avail_idx -= hdr->num_buffers - 1;
1440  error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1441  goto done;
1442  }
1443  }
1444  else
1445  {
1446  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1447  goto done;
1448  }
1449  }
1450 
1451  u16 bytes_to_copy =
1452  bytes_left >
1453  (rxvq->desc[desc_current].len -
1454  offset) ? (rxvq->desc[desc_current].len - offset) : bytes_left;
1455  clib_memcpy (buffer_addr,
1456  vlib_buffer_get_current (current_b0) +
1457  current_b0->current_length - bytes_left,
1458  bytes_to_copy);
1459 
1461  rxvq->desc[desc_current].addr + offset,
1462  bytes_to_copy);
1463  bytes_left -= bytes_to_copy;
1464  offset += bytes_to_copy;
1465  buffer_addr += bytes_to_copy;
1466  desc_len += bytes_to_copy;
1467  }
1468 
1469  //Move from available to used ring
1470  rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head;
1471  rxvq->used->ring[used_index & qsz_mask].len = desc_len;
1472  vhost_user_log_dirty_ring (vui, rxvq, ring[used_index & qsz_mask]);
1473 
1474  rxvq->last_avail_idx++;
1475  used_index++;
1476  }
1477 
1478 done:
1480  rxvq->used->idx = used_index;
1481  vhost_user_log_dirty_ring (vui, rxvq, idx);
1482 
1483  /* interrupt (call) handling */
1484  if ((rxvq->callfd > 0) && !(rxvq->avail->flags & 1))
1485  {
1486  rxvq->n_since_last_int += n_packets - n_left;
1487 
1488  if (rxvq->n_since_last_int > vum->coalesce_frames)
1489  vhost_user_send_call (vm, rxvq);
1490  }
1491 
1492 done2:
1493 
1494  if (PREDICT_FALSE (vui->lockp != 0))
1495  *vui->lockp = 0;
1496 
1497  if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
1498  {
1499  vlib_error_count (vm, node->node_index, error, n_left);
1503  os_get_cpu_number (), vui->sw_if_index, n_left);
1504  }
1505 
1506  vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
1507  return frame->n_vectors;
1508 }
1509 
1510 static clib_error_t *
1512  u32 flags)
1513 {
1514  vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1515  uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1517  vhost_user_intf_t *vui =
1519 
1520  vui->admin_up = is_up;
1521 
1522  if (is_up)
1525 
1526  return /* no error */ 0;
1527 }
1528 
1529 /* *INDENT-OFF* */
1530 VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
1531  .name = "vhost-user",
1532  .tx_function = vhost_user_intfc_tx,
1533  .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
1534  .tx_function_error_strings = vhost_user_tx_func_error_strings,
1535  .format_device_name = format_vhost_user_interface_name,
1536  .name_renumber = vhost_user_name_renumber,
1537  .admin_up_down_function = vhost_user_interface_admin_up_down,
1538  .no_flatten_output_chains = 1,
1539 };
1540 
1543 /* *INDENT-ON* */
1544 
1545 static uword
1546 vhost_user_process (vlib_main_t * vm,
1548 {
1550  vhost_user_intf_t *vui;
1551  struct sockaddr_un sun;
1552  int sockfd;
1553  unix_file_t template = { 0 };
1554  f64 timeout = 3153600000.0 /* 100 years */ ;
1555  uword *event_data = 0;
1556 
1557  sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1558  sun.sun_family = AF_UNIX;
1559  template.read_function = vhost_user_socket_read;
1560  template.error_function = vhost_user_socket_error;
1561 
1562 
1563  if (sockfd < 0)
1564  return 0;
1565 
1566  while (1)
1567  {
1569  vlib_process_get_events (vm, &event_data);
1570  vec_reset_length (event_data);
1571 
1572  timeout = 3.0;
1573 
1575  {
1576 
1577  if (vui->sock_is_server || !vui->active)
1578  continue;
1579 
1580  if (vui->unix_fd == -1)
1581  {
1582  /* try to connect */
1583 
1584  strncpy (sun.sun_path, (char *) vui->sock_filename,
1585  sizeof (sun.sun_path) - 1);
1586 
1587  if (connect
1588  (sockfd, (struct sockaddr *) &sun,
1589  sizeof (struct sockaddr_un)) == 0)
1590  {
1591  vui->sock_errno = 0;
1592  vui->unix_fd = sockfd;
1593  template.file_descriptor = sockfd;
1594  vui->unix_file_index = unix_file_add (&unix_main, &template);
1596  vui - vum->vhost_user_interfaces);
1597 
1598  sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
1599  if (sockfd < 0)
1600  return 0;
1601  }
1602  else
1603  {
1604  vui->sock_errno = errno;
1605  }
1606  }
1607  else
1608  {
1609  /* check if socket is alive */
1610  int error = 0;
1611  socklen_t len = sizeof (error);
1612  int retval =
1613  getsockopt (vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len);
1614 
1615  if (retval)
1617  }
1618  }
1619  }
1620  return 0;
1621 }
1622 
1623 /* *INDENT-OFF* */
1625  .function = vhost_user_process,
1626  .type = VLIB_NODE_TYPE_PROCESS,
1627  .name = "vhost-user-process",
1628 };
1629 /* *INDENT-ON* */
1630 
1631 int
1633 {
1635  vhost_user_intf_t *vui;
1636  uword *p = NULL;
1637  int rv = 0;
1638 
1639  p = hash_get (vum->vhost_user_interface_index_by_sw_if_index, sw_if_index);
1640  if (p == 0)
1641  {
1642  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1643  }
1644  else
1645  {
1646  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
1647  }
1648 
1649  // interface is inactive
1650  vui->active = 0;
1651  // disconnect interface sockets
1653  // add to inactive interface list
1655 
1656  // reset renumbered iface
1659 
1661  DBG_SOCK ("deleted (deactivated) vhost-user interface instance %d", p[0]);
1662 
1663  return rv;
1664 }
1665 
1666 // init server socket on specified sock_filename
1667 static int
1668 vhost_user_init_server_sock (const char *sock_filename, int *sockfd)
1669 {
1670  int rv = 0;
1671  struct sockaddr_un un = { };
1672  int fd;
1673  /* create listening socket */
1674  fd = socket (AF_UNIX, SOCK_STREAM, 0);
1675 
1676  if (fd < 0)
1677  {
1678  return VNET_API_ERROR_SYSCALL_ERROR_1;
1679  }
1680 
1681  un.sun_family = AF_UNIX;
1682  strncpy ((char *) un.sun_path, (char *) sock_filename,
1683  sizeof (un.sun_path) - 1);
1684 
1685  /* remove if exists */
1686  unlink ((char *) sock_filename);
1687 
1688  if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
1689  {
1690  rv = VNET_API_ERROR_SYSCALL_ERROR_2;
1691  goto error;
1692  }
1693 
1694  if (listen (fd, 1) == -1)
1695  {
1696  rv = VNET_API_ERROR_SYSCALL_ERROR_3;
1697  goto error;
1698  }
1699 
1700  unix_file_t template = { 0 };
1702  template.file_descriptor = fd;
1703  unix_file_add (&unix_main, &template);
1704  *sockfd = fd;
1705  return rv;
1706 
1707 error:
1708  close (fd);
1709  return rv;
1710 }
1711 
1712 // get new vhost_user_intf_t from inactive interfaces or create new one
1713 static vhost_user_intf_t *
1715 {
1717  vhost_user_intf_t *vui = NULL;
1718  int inactive_cnt = vec_len (vum->vhost_user_inactive_interfaces_index);
1719  // if there are any inactive ifaces
1720  if (inactive_cnt > 0)
1721  {
1722  // take last
1723  u32 vui_idx =
1724  vum->vhost_user_inactive_interfaces_index[inactive_cnt - 1];
1725  if (vec_len (vum->vhost_user_interfaces) > vui_idx)
1726  {
1727  vui = vec_elt_at_index (vum->vhost_user_interfaces, vui_idx);
1728  DBG_SOCK ("reusing inactive vhost-user interface index %d",
1729  vui_idx);
1730  }
1731  // "remove" from inactive list
1732  _vec_len (vum->vhost_user_inactive_interfaces_index) -= 1;
1733  }
1734 
1735  // vui was not retrieved from inactive ifaces - create new
1736  if (!vui)
1737  vec_add2 (vum->vhost_user_interfaces, vui, 1);
1738  return vui;
1739 }
1740 
1741 // create ethernet interface for vhost user intf
1742 static void
1744  vhost_user_intf_t * vui, u8 * hwaddress)
1745 {
1747  u8 hwaddr[6];
1748  clib_error_t *error;
1749 
1750  /* create hw and sw interface */
1751  if (hwaddress)
1752  {
1753  clib_memcpy (hwaddr, hwaddress, 6);
1754  }
1755  else
1756  {
1757  f64 now = vlib_time_now (vm);
1758  u32 rnd;
1759  rnd = (u32) (now * 1e6);
1760  rnd = random_u32 (&rnd);
1761 
1762  clib_memcpy (hwaddr + 2, &rnd, sizeof (rnd));
1763  hwaddr[0] = 2;
1764  hwaddr[1] = 0xfe;
1765  }
1766 
1768  (vnm,
1769  vhost_user_dev_class.index,
1770  vui - vum->vhost_user_interfaces /* device instance */ ,
1771  hwaddr /* ethernet address */ ,
1772  &vui->hw_if_index, 0 /* flag change */ );
1773  if (error)
1774  clib_error_report (error);
1775 
1778 }
1779 
1780 // initialize vui with specified attributes
1781 static void
1783  vhost_user_intf_t * vui, int sockfd,
1784  const char *sock_filename,
1785  u8 is_server, u64 feature_mask, u32 * sw_if_index)
1786 {
1787  vnet_sw_interface_t *sw;
1788  sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
1790  int q;
1791 
1792  vui->unix_fd = sockfd;
1793  vui->sw_if_index = sw->sw_if_index;
1794  vui->num_vrings = 2;
1795  vui->sock_is_server = is_server;
1796  strncpy (vui->sock_filename, sock_filename,
1797  ARRAY_LEN (vui->sock_filename) - 1);
1798  vui->sock_errno = 0;
1799  vui->is_up = 0;
1800  vui->feature_mask = feature_mask;
1801  vui->active = 1;
1802  vui->unix_file_index = ~0;
1803  vui->log_base_addr = 0;
1804 
1805  for (q = 0; q < 2; q++)
1806  {
1807  vui->vrings[q].enabled = 0;
1808  }
1809 
1811 
1812  if (sw_if_index)
1813  *sw_if_index = vui->sw_if_index;
1814 
1815  if (tm->n_vlib_mains > 1)
1816  {
1819  memset ((void *) vui->lockp, 0, CLIB_CACHE_LINE_BYTES);
1820  }
1821 }
1822 
1823 // register vui and start polling on it
1824 static void
1826 {
1828 #if DPDK > 0
1829  dpdk_main_t *dm = &dpdk_main;
1830  int cpu_index;
1832 #endif
1833 
1835  vui - vum->vhost_user_interfaces);
1837  vui - vum->vhost_user_interfaces);
1838 
1839  /* start polling */
1840 #if DPDK > 0
1841  cpu_index = dm->input_cpu_first_index +
1842  (vui - vum->vhost_user_interfaces) % dm->input_cpu_count;
1843 
1844  if (tm->n_vlib_mains == 1)
1845 #endif
1847  VLIB_NODE_STATE_POLLING);
1848 #if DPDK > 0
1849  else
1851  VLIB_NODE_STATE_POLLING);
1852 #endif
1853 
1854  /* tell process to start polling for sockets */
1856 }
1857 
1858 int
1860  const char *sock_filename,
1861  u8 is_server,
1862  u32 * sw_if_index,
1863  u64 feature_mask,
1864  u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
1865 {
1866  vhost_user_intf_t *vui = NULL;
1867  u32 sw_if_idx = ~0;
1868  int sockfd = -1;
1869  int rv = 0;
1870 
1871  if (is_server)
1872  {
1873  if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1874  {
1875  return rv;
1876  }
1877  }
1878 
1879  vui = vhost_user_vui_new ();
1880  ASSERT (vui != NULL);
1881 
1882  vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
1883  vhost_user_vui_init (vnm, vui, sockfd, sock_filename, is_server,
1884  feature_mask, &sw_if_idx);
1885 
1886  if (renumber)
1887  {
1888  vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
1889  }
1890 
1891  vhost_user_vui_register (vm, vui);
1892 
1893  if (sw_if_index)
1894  *sw_if_index = sw_if_idx;
1895 
1896  return rv;
1897 }
1898 
1899 int
1901  const char *sock_filename,
1902  u8 is_server,
1903  u32 sw_if_index,
1904  u64 feature_mask, u8 renumber, u32 custom_dev_instance)
1905 {
1907  vhost_user_intf_t *vui = NULL;
1908  u32 sw_if_idx = ~0;
1909  int sockfd = -1;
1910  int rv = 0;
1911  uword *p = NULL;
1912 
1913  p = hash_get (vum->vhost_user_interface_index_by_sw_if_index, sw_if_index);
1914  if (p == 0)
1915  {
1916  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1917  }
1918  else
1919  {
1920  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
1921  }
1922 
1923  // interface is inactive
1924  vui->active = 0;
1925  // disconnect interface sockets
1927 
1928  if (is_server)
1929  {
1930  if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0)
1931  {
1932  return rv;
1933  }
1934  }
1935 
1936  vhost_user_vui_init (vnm, vui, sockfd, sock_filename, is_server,
1937  feature_mask, &sw_if_idx);
1938 
1939  if (renumber)
1940  {
1941  vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
1942  }
1943 
1944  vhost_user_vui_register (vm, vui);
1945 
1946  return rv;
1947 }
1948 
1949 clib_error_t *
1951  unformat_input_t * input,
1952  vlib_cli_command_t * cmd)
1953 {
1954  unformat_input_t _line_input, *line_input = &_line_input;
1955  u8 *sock_filename = NULL;
1956  u32 sw_if_index;
1957  u8 is_server = 0;
1958  u64 feature_mask = (u64) ~ 0;
1959  u8 renumber = 0;
1960  u32 custom_dev_instance = ~0;
1961  u8 hwaddr[6];
1962  u8 *hw = NULL;
1963 
1964  /* Get a line of input. */
1965  if (!unformat_user (input, unformat_line_input, line_input))
1966  return 0;
1967 
1968  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1969  {
1970  if (unformat (line_input, "socket %s", &sock_filename))
1971  ;
1972  else if (unformat (line_input, "server"))
1973  is_server = 1;
1974  else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
1975  ;
1976  else
1977  if (unformat
1978  (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
1979  hw = hwaddr;
1980  else if (unformat (line_input, "renumber %d", &custom_dev_instance))
1981  {
1982  renumber = 1;
1983  }
1984  else
1985  return clib_error_return (0, "unknown input `%U'",
1986  format_unformat_error, input);
1987  }
1988  unformat_free (line_input);
1989 
1990  vnet_main_t *vnm = vnet_get_main ();
1991 
1992  int rv;
1993  if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
1994  is_server, &sw_if_index, feature_mask,
1995  renumber, custom_dev_instance, hw)))
1996  {
1997  vec_free (sock_filename);
1998  return clib_error_return (0, "vhost_user_create_if returned %d", rv);
1999  }
2000 
2001  vec_free (sock_filename);
2003  sw_if_index);
2004  return 0;
2005 }
2006 
2007 clib_error_t *
2009  unformat_input_t * input,
2010  vlib_cli_command_t * cmd)
2011 {
2012  unformat_input_t _line_input, *line_input = &_line_input;
2013  u32 sw_if_index = ~0;
2014 
2015  /* Get a line of input. */
2016  if (!unformat_user (input, unformat_line_input, line_input))
2017  return 0;
2018 
2019  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
2020  {
2021  if (unformat (line_input, "sw_if_index %d", &sw_if_index))
2022  ;
2023  else
2024  return clib_error_return (0, "unknown input `%U'",
2025  format_unformat_error, input);
2026  }
2027  unformat_free (line_input);
2028 
2029  vnet_main_t *vnm = vnet_get_main ();
2030 
2031  vhost_user_delete_if (vnm, vm, sw_if_index);
2032 
2033  return 0;
2034 }
2035 
2036 int
2038  vhost_user_intf_details_t ** out_vuids)
2039 {
2040  int rv = 0;
2042  vhost_user_intf_t *vui;
2043  vhost_user_intf_details_t *r_vuids = NULL;
2045  u32 *hw_if_indices = 0;
2047  u8 *s = NULL;
2048  int i;
2049 
2050  if (!out_vuids)
2051  return -1;
2052 
2054  {
2055  if (vui->active)
2056  vec_add1 (hw_if_indices, vui->hw_if_index);
2057  }
2058 
2059  for (i = 0; i < vec_len (hw_if_indices); i++)
2060  {
2061  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2063 
2064  vec_add2 (r_vuids, vuid, 1);
2065  vuid->sw_if_index = vui->sw_if_index;
2066  vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
2067  vuid->features = vui->features;
2068  vuid->is_server = vui->sock_is_server;
2069  vuid->num_regions = vui->nregions;
2070  vuid->sock_errno = vui->sock_errno;
2071  strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
2072  ARRAY_LEN (vuid->sock_filename) - 1);
2073 
2074  s = format (s, "%v%c", hi->name, 0);
2075 
2076  strncpy ((char *) vuid->if_name, (char *) s,
2077  ARRAY_LEN (vuid->if_name) - 1);
2078  _vec_len (s) = 0;
2079  }
2080 
2081  vec_free (s);
2082  vec_free (hw_if_indices);
2083 
2084  *out_vuids = r_vuids;
2085 
2086  return rv;
2087 }
2088 
2089 clib_error_t *
2091  unformat_input_t * input,
2092  vlib_cli_command_t * cmd)
2093 {
2094  clib_error_t *error = 0;
2095  vnet_main_t *vnm = vnet_get_main ();
2097  vhost_user_intf_t *vui;
2098  u32 hw_if_index, *hw_if_indices = 0;
2100  int i, j, q;
2101  int show_descr = 0;
2102  struct feat_struct
2103  {
2104  u8 bit;
2105  char *str;
2106  };
2107  struct feat_struct *feat_entry;
2108 
2109  static struct feat_struct feat_array[] = {
2110 #define _(s,b) { .str = #s, .bit = b, },
2112 #undef _
2113  {.str = NULL}
2114  };
2115 
2116  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2117  {
2118  if (unformat
2119  (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
2120  {
2121  vec_add1 (hw_if_indices, hw_if_index);
2122  vlib_cli_output (vm, "add %d", hw_if_index);
2123  }
2124  else if (unformat (input, "descriptors") || unformat (input, "desc"))
2125  show_descr = 1;
2126  else
2127  {
2128  error = clib_error_return (0, "unknown input `%U'",
2129  format_unformat_error, input);
2130  goto done;
2131  }
2132  }
2133  if (vec_len (hw_if_indices) == 0)
2134  {
2136  {
2137  if (vui->active)
2138  vec_add1 (hw_if_indices, vui->hw_if_index);
2139  }
2140  }
2141  vlib_cli_output (vm, "Virtio vhost-user interfaces");
2142  vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n",
2143  vum->coalesce_frames, vum->coalesce_time);
2144 
2145  for (i = 0; i < vec_len (hw_if_indices); i++)
2146  {
2147  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2149  vlib_cli_output (vm, "Interface: %s (ifindex %d)",
2150  hi->name, hw_if_indices[i]);
2151 
2152  vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n",
2153  vui->virtio_net_hdr_sz, vui->features);
2154 
2155  feat_entry = (struct feat_struct *) &feat_array;
2156  while (feat_entry->str)
2157  {
2158  if (vui->features & (1 << feat_entry->bit))
2159  vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2160  feat_entry->bit);
2161  feat_entry++;
2162  }
2163 
2164  vlib_cli_output (vm, "\n");
2165 
2166 
2167  vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
2168  vui->sock_filename,
2169  vui->sock_is_server ? "server" : "client",
2170  strerror (vui->sock_errno));
2171 
2172  vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
2173 
2174  if (vui->nregions)
2175  {
2176  vlib_cli_output (vm,
2177  " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2178  vlib_cli_output (vm,
2179  " ====== ===== ================== ================== ================== ================== ==================\n");
2180  }
2181  for (j = 0; j < vui->nregions; j++)
2182  {
2183  vlib_cli_output (vm,
2184  " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2185  j, vui->region_mmap_fd[j],
2186  vui->regions[j].guest_phys_addr,
2187  vui->regions[j].memory_size,
2188  vui->regions[j].userspace_addr,
2189  vui->regions[j].mmap_offset,
2191  }
2192  for (q = 0; q < vui->num_vrings; q++)
2193  {
2194  vlib_cli_output (vm, "\n Virtqueue %d\n", q);
2195 
2196  vlib_cli_output (vm,
2197  " qsz %d last_avail_idx %d last_used_idx %d\n",
2198  vui->vrings[q].qsz, vui->vrings[q].last_avail_idx,
2199  vui->vrings[q].last_used_idx);
2200 
2201  if (vui->vrings[q].avail && vui->vrings[q].used)
2202  vlib_cli_output (vm,
2203  " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
2204  vui->vrings[q].avail->flags,
2205  vui->vrings[q].avail->idx,
2206  vui->vrings[q].used->flags,
2207  vui->vrings[q].used->idx);
2208 
2209  vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n",
2210  vui->vrings[q].kickfd,
2211  vui->vrings[q].callfd, vui->vrings[q].errfd);
2212 
2213  if (show_descr)
2214  {
2215  vlib_cli_output (vm, "\n descriptor table:\n");
2216  vlib_cli_output (vm,
2217  " id addr len flags next user_addr\n");
2218  vlib_cli_output (vm,
2219  " ===== ================== ===== ====== ===== ==================\n");
2220  for (j = 0; j < vui->vrings[q].qsz; j++)
2221  {
2222  vlib_cli_output (vm,
2223  " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
2224  j, vui->vrings[q].desc[j].addr,
2225  vui->vrings[q].desc[j].len,
2226  vui->vrings[q].desc[j].flags,
2227  vui->vrings[q].desc[j].next,
2229  (vui,
2230  vui->vrings[q].
2231  desc[j].addr)));
2232  }
2233  }
2234  }
2235  vlib_cli_output (vm, "\n");
2236  }
2237 done:
2238  vec_free (hw_if_indices);
2239  return error;
2240 }
2241 
2242 /*
2243  * CLI functions
2244  */
2245 
2246 #if DPDK == 0
2247 /* *INDENT-OFF* */
2248 VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
2249  .path = "create vhost-user",
2250  .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [renumber <dev_instance>]",
2251  .function = vhost_user_connect_command_fn,
2252 };
2253 
2254 VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
2255  .path = "delete vhost-user",
2256  .short_help = "delete vhost-user sw_if_index <nn>",
2257  .function = vhost_user_delete_command_fn,
2258 };
2259 
2260 VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
2261  .path = "show vhost-user",
2262  .short_help = "show vhost-user interface",
2263  .function = show_vhost_user_command_fn,
2264 };
2265 /* *INDENT-ON* */
2266 #endif
2267 
2268 static clib_error_t *
2270 {
2272 
2273  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2274  {
2275  if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
2276  ;
2277  else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
2278  ;
2279  else if (unformat (input, "dont-dump-memory"))
2280  vum->dont_dump_vhost_user_memory = 1;
2281  else
2282  return clib_error_return (0, "unknown input `%U'",
2283  format_unformat_error, input);
2284  }
2285 
2286  return 0;
2287 }
2288 
2289 /* vhost-user { ... } configuration. */
2290 VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
2291 
2292 void
2294 {
2296  vhost_user_intf_t *vui;
2297 
2298  if (vum->dont_dump_vhost_user_memory)
2299  {
2301  {
2302  unmap_all_mem_regions (vui);
2303  }
2304  }
2305 }
2306 
2307 /*
2308  * fd.io coding-style-patch-verification: ON
2309  *
2310  * Local Variables:
2311  * eval: (c-set-style "gnu")
2312  * End:
2313  */
unformat_function_t unformat_vnet_hw_interface
static clib_error_t * vhost_user_init(vlib_main_t *vm)
Definition: vhost-user.c:801
unix_file_t * file_pool
Definition: unix.h:89
static void vhost_user_vui_init(vnet_main_t *vnm, vhost_user_intf_t *vui, int sockfd, const char *sock_filename, u8 is_server, u64 feature_mask, u32 *sw_if_index)
Definition: vhost-user.c:1782
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:457
static void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 cpu_index, u32 index, u32 increment)
Increment a simple counter.
Definition: counter.h:78
vmrglw vmrglh hi
static void vhost_user_if_disconnect(vhost_user_intf_t *vui)
Definition: vhost-user.c:239
#define hash_set(h, key, value)
Definition: hash.h:254
static uword vhost_user_intfc_tx(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: vhost-user.c:1267
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:343
#define clib_min(x, y)
Definition: clib.h:326
vring_desc_t * desc
Definition: vhost-user.h:196
#define CLIB_UNUSED(x)
Definition: clib.h:79
uword unformat(unformat_input_t *i, char *fmt,...)
Definition: unformat.c:966
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:513
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
Definition: trace_funcs.h:143
static f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Suspend a cooperative multi-tasking thread Waits for an event, or for the indicated number of seconds...
Definition: node_funcs.h:682
unix_file_function_t * read_function
Definition: unix.h:62
#define hash_unset(h, key)
Definition: hash.h:260
static void vhost_user_create_ethernet(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_t *vui, u8 *hwaddress)
Definition: vhost-user.c:1743
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
Definition: interface.c:230
dpdk_main_t dpdk_main
Definition: dpdk.h:443
static vlib_main_t * vlib_get_main(void)
Definition: global_funcs.h:23
static clib_error_t * vhost_user_socket_error(unix_file_t *uf)
Definition: vhost-user.c:740
static void * map_guest_mem(vhost_user_intf_t *vui, uword addr)
Definition: vhost-user.c:150
uword * vhost_user_interface_index_by_sock_fd
Definition: vhost-user.h:249
vnet_interface_main_t interface_main
Definition: vnet.h:64
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:399
static vhost_user_intf_t * vhost_user_vui_new()
Definition: vhost-user.c:1714
#define PREDICT_TRUE(x)
Definition: clib.h:98
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
Definition: error_funcs.h:55
#define UNFORMAT_END_OF_INPUT
Definition: format.h:143
#define NULL
Definition: clib.h:55
u32 vlib_buffer_alloc_from_free_list(vlib_main_t *vm, u32 *buffers, u32 n_buffers, u32 free_list_index)
Allocate buffers from specific freelist into supplied array.
Definition: dpdk_buffer.c:655
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:182
vring_avail_t * avail
Definition: vhost-user.h:197
static uword vhost_user_input(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Definition: vhost-user.c:1214
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:482
int vnet_interface_name_renumber(u32 sw_if_index, u32 new_show_dev_instance)
Definition: interface.c:1151
static u32 vhost_user_if_input(vlib_main_t *vm, vhost_user_main_t *vum, vhost_user_intf_t *vui, vlib_node_runtime_t *node)
Definition: vhost-user.c:932
static u8 * format_vhost_user_input_trace(u8 *s, va_list *va)
Definition: vhost-user.c:855
struct _vlib_node_registration vlib_node_registration_t
#define VHOST_USER_MSG_HDR_SZ
Definition: vhost-user.h:20
static clib_error_t * vhost_user_interface_admin_up_down(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: vhost-user.c:1511
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:521
static void vhost_user_vui_register(vlib_main_t *vm, vhost_user_intf_t *vui)
Definition: vhost-user.c:1825
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
clib_error_t * show_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2090
#define clib_error_report(e)
Definition: error.h:125
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:273
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:407
vlib_error_t * errors
Definition: node.h:418
static char * vhost_user_input_func_error_strings[]
Definition: vhost-user.c:98
static char * vhost_user_tx_func_error_strings[]
Definition: vhost-user.c:77
vring_used_t * used
Definition: vhost-user.h:198
#define vec_alloc(V, N)
Allocate space for N more elements (no header, unspecified alignment)
Definition: vec.h:239
format_function_t format_vnet_sw_if_index_name
static uword unix_file_add(unix_main_t *um, unix_file_t *template)
Definition: unix.h:136
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
static int vhost_user_name_renumber(vnet_hw_interface_t *hi, u32 new_dev_instance)
Definition: vhost-user.c:132
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
static void vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
Definition: trace_funcs.h:104
#define VHOST_VRING_F_LOG
Definition: vhost-user.h:31
vnet_main_t * vnet_get_main(void)
Definition: misc.c:45
vhost_user_vring_t vrings[2]
Definition: vhost-user.h:233
VNET_DEVICE_CLASS(vhost_user_dev_class, static)
static u8 * format_vhost_user_interface_name(u8 *s, va_list *args)
Definition: vhost-user.c:115
#define vlib_prefetch_buffer_with_index(vm, bi, type)
Prefetch buffer metadata by buffer index The first 64 bytes of buffer contains most header informatio...
Definition: buffer_funcs.h:182
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:111
static uword vlib_process_get_events(vlib_main_t *vm, uword **data_vector)
Return the first event type which has occurred and a vector of per-event data of that type...
Definition: node_funcs.h:525
static clib_error_t * ip4_init(vlib_main_t *vm)
Definition: ip4_input.c:431
#define always_inline
Definition: clib.h:84
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:187
vlib_combined_counter_main_t * combined_sw_if_counters
Definition: interface.h:501
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:113
void * log_base_addr
Definition: vhost-user.h:238
int input_cpu_first_index
Definition: dpdk.h:427
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
void vhost_user_rx_trace(vlib_main_t *vm, vlib_node_runtime_t *node, vhost_user_intf_t *vui, i16 virtqueue)
Definition: vhost-user.c:884
u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]
Space for inserting data before buffer start.
Definition: buffer.h:143
static void unformat_free(unformat_input_t *i)
Definition: format.h:161
vhost_user_tx_func_error_t
Definition: vhost-user.c:69
#define clib_warning(format, args...)
Definition: error.h:59
unsigned long u64
Definition: types.h:89
static void unmap_all_mem_regions(vhost_user_intf_t *vui)
Definition: vhost-user.c:193
uword unformat_user(unformat_input_t *input, unformat_function_t *func,...)
Definition: unformat.c:977
VLIB_DEVICE_TX_FUNCTION_MULTIARCH(vhost_user_dev_class, vhost_user_intfc_tx)
Definition: vhost-user.c:1541
vhost_user_input_func_error_t
Definition: vhost-user.c:90
#define vlib_call_init_function(vm, x)
Definition: init.h:161
static clib_error_t * vhost_user_socket_read(unix_file_t *uf)
Definition: vhost-user.c:302
static uword pointer_to_uword(const void *p)
Definition: types.h:131
#define VLIB_BUFFER_NEXT_PRESENT
Definition: buffer.h:95
vlib_main_t ** vlib_mains
Definition: dpdk_buffer.c:157
#define VLIB_BUFFER_PRE_DATA_SIZE
Definition: buffer.h:52
static void unix_file_del(unix_main_t *um, unix_file_t *f)
Definition: unix.h:146
#define hash_get(h, key)
Definition: hash.h:248
format_function_t format_vnet_sw_interface_name
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:369
static uword format_get_indent(u8 *s)
Definition: format.h:72
u32 file_descriptor
Definition: unix.h:52
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:82
static void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
Definition: node_funcs.h:929
int vhost_user_delete_if(vnet_main_t *vnm, vlib_main_t *vm, u32 sw_if_index)
Definition: vhost-user.c:1632
static void * map_user_mem(vhost_user_intf_t *vui, uword addr)
Definition: vhost-user.c:168
uword os_get_cpu_number(void)
Definition: unix-misc.c:224
#define clib_error_return_unix(e, args...)
Definition: error.h:114
#define PREDICT_FALSE(x)
Definition: clib.h:97
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:118
#define vhost_user_log_dirty_ring(vui, vq, member)
Definition: vhost-user.c:295
static vlib_node_registration_t vhost_user_process_node
(constructor) VLIB_REGISTER_NODE (vhost_user_process_node)
Definition: vhost-user.c:1624
void vhost_user_unmap_all(void)
Definition: vhost-user.c:2293
char sock_filename[256]
Definition: vhost-user.h:219
vnet_main_t vnet_main
Definition: misc.c:42
int input_cpu_count
Definition: dpdk.h:428
#define VLIB_FRAME_SIZE
Definition: node.h:328
vlib_simple_counter_main_t * sw_if_counters
Definition: interface.h:500
u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:232
static void vhost_user_send_call(vlib_main_t *vm, vhost_user_vring_t *vq)
Definition: vhost-user.c:919
vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:230
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:130
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:348
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:575
int vhost_user_dump_ifs(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_details_t **out_vuids)
Definition: vhost-user.c:2037
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:118
static void vhost_user_log_dirty_pages(vhost_user_intf_t *vui, u64 addr, u64 len)
Definition: vhost-user.c:273
static void vlib_buffer_chain_init(vlib_buffer_t *first)
Definition: buffer_funcs.h:432
static clib_error_t * vhost_user_exit(vlib_main_t *vm)
Definition: vhost-user.c:829
u8 * format_ethernet_header_with_length(u8 *s, va_list *args)
Definition: format.c:91
u32 * show_dev_instance_by_real_dev_instance
Definition: vhost-user.h:251
int vhost_user_create_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 *sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance, u8 *hwaddr)
Definition: vhost-user.c:1859
vhost_user_intf_t * vhost_user_interfaces
Definition: vhost-user.h:246
u16 n_vectors
Definition: node.h:344
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:82
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:300
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x)
Definition: init.h:115
int vhost_user_modify_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance)
Definition: vhost-user.c:1900
#define clib_memcpy(a, b, c)
Definition: string.h:63
#define VHOST_MEMORY_MAX_NREGIONS
Definition: vhost-user.h:19
void vlib_worker_thread_barrier_sync(vlib_main_t *vm)
Definition: threads.c:1144
#define ARRAY_LEN(x)
Definition: clib.h:59
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD
Definition: vhost-user.h:30
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
Definition: buffer.h:303
u32 * vhost_user_inactive_interfaces_index
Definition: vhost-user.h:247
#define hash_create(elts, value_bytes)
Definition: hash.h:647
#define VNET_SW_INTERFACE_FLAG_ADMIN_UP
Definition: interface.h:415
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
Definition: interface.h:345
uword unformat_ethernet_address(unformat_input_t *input, va_list *args)
Definition: format.c:206
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 cpu_index, u32 index, u32 packet_increment, u32 byte_increment)
Increment a combined counter.
Definition: counter.h:241
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
#define vnet_buffer(b)
Definition: buffer.h:335
u8 * format(u8 *s, char *fmt,...)
Definition: format.c:418
void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: dpdk_buffer.c:766
static long get_huge_page_size(int fd)
Definition: vhost-user.c:185
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:114
clib_error_t * vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:2008
u16 vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm, u32 free_list_index, vlib_buffer_t *first, vlib_buffer_t **last, void *data, u16 data_len)
Definition: dpdk_buffer.c:908
#define VIRTQ_DESC_F_NEXT
Definition: vhost-user.h:26
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:181
static void * vlib_frame_args(vlib_frame_t *f)
Get pointer to frame scalar data.
Definition: node_funcs.h:268
static void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Set node dispatch state.
Definition: node_funcs.h:144
unix_main_t unix_main
Definition: main.c:57
u64 uword
Definition: types.h:112
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
volatile u32 * lockp
Definition: vhost-user.h:213
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:109
#define foreach_vhost_user_tx_func_error
Definition: vhost-user.c:63
void * region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:231
Definition: defs.h:47
unsigned short u16
Definition: types.h:57
VLIB_CLI_COMMAND(set_interface_ip_source_and_port_range_check_command, static)
static clib_error_t * vhost_user_socksvr_accept_ready(unix_file_t *uf)
Definition: vhost-user.c:761
static clib_error_t * vhost_user_config(vlib_main_t *vm, unformat_input_t *input)
Definition: vhost-user.c:2269
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:142
unsigned char u8
Definition: types.h:56
static int vhost_user_init_server_sock(const char *sock_filename, int *sockfd)
Definition: vhost-user.c:1668
#define VHOST_NET_VRING_IDX_RX
Definition: vhost-user.h:22
uword * vhost_user_interface_index_by_listener_fd
Definition: vhost-user.h:248
Definition: unix.h:49
vlib_node_registration_t vhost_user_input_node
(constructor) VLIB_REGISTER_NODE (vhost_user_input_node)
Definition: vhost-user.c:61
#define DBG_SOCK(args...)
Definition: vhost-user.c:52
struct clib_bihash_value offset
template key/value backing page structure
static vhost_user_main_t vhost_user_main
Definition: vhost-user.c:105
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:114
short i16
Definition: types.h:46
#define DBG_VQ(args...)
Definition: vhost-user.c:58
#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn)
Definition: node.h:158
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:169
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VHOST_NET_VRING_IDX_TX
Definition: vhost-user.h:23
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
Definition: threads.c:1176
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
static clib_error_t * vhost_user_callfd_read_ready(unix_file_t *uf)
Definition: vhost-user.c:230
#define vec_foreach(var, vec)
Vector iterator.
#define foreach_vhost_user_input_func_error
Definition: vhost-user.c:83
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:101
vhost_vring_addr_t addr
Definition: vhost-user.h:82
#define clib_error_return(e, args...)
Definition: error.h:111
struct _unformat_input_t unformat_input_t
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
Definition: trace_funcs.h:159
u32 flags
Definition: vhost-user.h:76
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
Definition: vec.h:445
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:85
uword * vhost_user_interface_index_by_sw_if_index
Definition: vhost-user.h:250
unformat_function_t unformat_line_input
Definition: format.h:281
uword runtime_data[(128-1 *sizeof(vlib_node_function_t *)-1 *sizeof(vlib_error_t *)-11 *sizeof(u32)-5 *sizeof(u16))/sizeof(uword)]
Definition: node.h:472
#define VHOST_LOG_PAGE
Definition: vhost-user.c:271
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:69
clib_error_t * vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:1950
VNET_HW_INTERFACE_CLASS(vhost_interface_class, static)
Definition: defs.h:46
int dont_dump_vhost_user_memory
Definition: vhost-user.h:254