FD.io VPP  v16.06
Vector Packet Processing
vhost-user.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * vhost.c - vhost-user
4  *
5  * Copyright (c) 2014 Cisco and/or its affiliates.
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at:
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *------------------------------------------------------------------
18  */
19 
20 #include <fcntl.h> /* for open */
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
23 #include <sys/un.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <sys/uio.h> /* for iovec */
27 #include <netinet/in.h>
28 #include <sys/vfs.h>
29 
30 #include <linux/if_arp.h>
31 #include <linux/if_tun.h>
32 
33 #include <vlib/vlib.h>
34 #include <vlib/unix/unix.h>
35 
36 #include <vnet/ip/ip.h>
37 
38 #include <vnet/ethernet/ethernet.h>
39 
41 
42 #define VHOST_USER_DEBUG_SOCKET 0
43 #define VHOST_USER_DEBUG_VQ 0
44 
45 /* Set to get virtio_net_hdr in buffer pre-data
46  details will be shown in packet trace */
47 #define VHOST_USER_COPY_TX_HDR 0
48 
49 #if VHOST_USER_DEBUG_SOCKET == 1
50 #define DBG_SOCK(args...) clib_warning(args);
51 #else
52 #define DBG_SOCK(args...)
53 #endif
54 
55 #if VHOST_USER_DEBUG_VQ == 1
56 #define DBG_VQ(args...) clib_warning(args);
57 #else
58 #define DBG_VQ(args...)
59 #endif
60 
62 
63 #define foreach_vhost_user_tx_func_error \
64  _(NONE, "no error") \
65  _(NOT_READY, "vhost user state error") \
66  _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
67  _(MMAP_FAIL, "mmap failure")
68 
69 typedef enum {
70 #define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
72 #undef _
75 
77 #define _(n,s) s,
79 #undef _
80 };
81 
82 #define foreach_vhost_user_input_func_error \
83  _(NO_ERROR, "no error") \
84  _(NO_BUFFER, "no available buffer") \
85  _(MMAP_FAIL, "mmap failure") \
86  _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)")
87 
88 typedef enum {
89 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
91 #undef _
94 
96 #define _(n,s) s,
98 #undef _
99 };
100 
101 static vhost_user_main_t vhost_user_main = {
102  .mtu_bytes = 1518,
103 };
104 
105 VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
106  .name = "vhost-user",
107 };
108 
109 static u8 * format_vhost_user_interface_name (u8 * s, va_list * args)
110 {
111  u32 i = va_arg (*args, u32);
112  u32 show_dev_instance = ~0;
114 
116  show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
117 
118  if (show_dev_instance != ~0)
119  i = show_dev_instance;
120 
121  s = format (s, "VirtualEthernet0/0/%d", i);
122  return s;
123 }
124 
126  u32 new_dev_instance)
127 {
129 
131  hi->dev_instance, ~0);
132 
134  new_dev_instance;
135 
136  DBG_SOCK("renumbered vhost-user interface dev_instance %d to %d",
137  hi->dev_instance, new_dev_instance);
138 
139  return 0;
140 }
141 
142 
143 static inline void * map_guest_mem(vhost_user_intf_t * vui, u64 addr)
144 {
145  int i;
146  for (i=0; i<vui->nregions; i++) {
147  if ((vui->regions[i].guest_phys_addr <= addr) &&
148  ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) > addr)) {
149  return (void *) (vui->region_mmap_addr[i] + addr - vui->regions[i].guest_phys_addr);
150  }
151  }
152  DBG_VQ("failed to map guest mem addr %llx", addr);
153  return 0;
154 }
155 
156 static inline void * map_user_mem(vhost_user_intf_t * vui, u64 addr)
157 {
158  int i;
159  for (i=0; i<vui->nregions; i++) {
160  if ((vui->regions[i].userspace_addr <= addr) &&
161  ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) > addr)) {
162  return (void *) (vui->region_mmap_addr[i] + addr - vui->regions[i].userspace_addr);
163  }
164  }
165  return 0;
166 }
167 
168 static long get_huge_page_size(int fd)
169 {
170  struct statfs s;
171  fstatfs(fd, &s);
172  return s.f_bsize;
173 }
174 
176 {
177  int i,r;
178  for (i=0; i<vui->nregions; i++) {
179  if (vui->region_mmap_addr[i] != (void *) -1) {
180 
181  long page_sz = get_huge_page_size(vui->region_mmap_fd[i]);
182 
183  ssize_t map_sz = (vui->regions[i].memory_size +
184  vui->regions[i].mmap_offset + page_sz) & ~(page_sz - 1);
185 
186  r = munmap(vui->region_mmap_addr[i] - vui->regions[i].mmap_offset, map_sz);
187 
188  DBG_SOCK("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
189  vui->region_mmap_addr[i], map_sz, page_sz);
190 
191  vui->region_mmap_addr[i]= (void *) -1;
192 
193  if (r == -1) {
194  clib_warning("failed to unmap memory region (errno %d)", errno);
195  }
196  close(vui->region_mmap_fd[i]);
197  }
198  }
199  vui->nregions = 0;
200 }
201 
202 
204 {
205  __attribute__((unused)) int n;
206  u8 buff[8];
207  n = read(uf->file_descriptor, ((char*)&buff), 8);
208  return 0;
209 }
210 
212 {
214  vnet_main_t * vnm = vnet_get_main();
215  int q;
216 
218 
219  if (vui->unix_file_index != ~0) {
221  vui->unix_file_index = ~0;
222  }
223 
226  close(vui->unix_fd);
227  vui->unix_fd = -1;
228  vui->is_up = 0;
229  for (q = 0; q < vui->num_vrings; q++) {
230  vui->vrings[q].desc = NULL;
231  vui->vrings[q].avail = NULL;
232  vui->vrings[q].used = NULL;
233  vui->vrings[q].log_guest_addr = 0;
234  vui->vrings[q].log_used = 0;
235  }
236 
238  DBG_SOCK("interface ifindex %d disconnected", vui->sw_if_index);
239 }
240 
241 #define VHOST_LOG_PAGE 0x1000
243  u64 addr, u64 len)
244 {
245  if (PREDICT_TRUE(vui->log_base_addr == 0
246  || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL)))) {
247  return;
248  }
249  if (PREDICT_FALSE((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size)) {
250  DBG_SOCK("vhost_user_log_dirty_pages(): out of range\n");
251  return;
252  }
253 
255  u64 page = addr / VHOST_LOG_PAGE;
256  while (page * VHOST_LOG_PAGE < addr + len) {
257  ((u8*)vui->log_base_addr)[page / 8] |= 1 << page % 8;
258  page++;
259  }
260 }
261 
262 #define vhost_user_log_dirty_ring(vui, vq, member) \
263  if (PREDICT_FALSE(vq->log_used)) { \
264  vhost_user_log_dirty_pages(vui, vq->log_guest_addr + offsetof(vring_used_t, member), \
265  sizeof(vq->used->member)); \
266  }
267 
269 {
270  int n, i;
271  int fd, number_of_fds = 0;
272  int fds[VHOST_MEMORY_MAX_NREGIONS];
273  vhost_user_msg_t msg;
274  struct msghdr mh;
275  struct iovec iov[1];
277  vhost_user_intf_t * vui;
278  struct cmsghdr *cmsg;
279  uword * p;
280  u8 q;
281  unix_file_t template = {0};
282  vnet_main_t * vnm = vnet_get_main();
283 
284  p = hash_get (vum->vhost_user_interface_index_by_sock_fd,
285  uf->file_descriptor);
286  if (p == 0) {
287  DBG_SOCK ("FD %d doesn't belong to any interface",
288  uf->file_descriptor);
289  return 0;
290  }
291  else
292  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
293 
294  char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))];
295 
296  memset(&mh, 0, sizeof(mh));
297  memset(control, 0, sizeof(control));
298 
299  /* set the payload */
300  iov[0].iov_base = (void *) &msg;
301  iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
302 
303  mh.msg_iov = iov;
304  mh.msg_iovlen = 1;
305  mh.msg_control = control;
306  mh.msg_controllen = sizeof(control);
307 
308  n = recvmsg(uf->file_descriptor, &mh, 0);
309 
310  if (n != VHOST_USER_MSG_HDR_SZ)
311  goto close_socket;
312 
313  if (mh.msg_flags & MSG_CTRUNC) {
314  goto close_socket;
315  }
316 
317  cmsg = CMSG_FIRSTHDR(&mh);
318 
319  if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
320  (cmsg->cmsg_type == SCM_RIGHTS) &&
321  (cmsg->cmsg_len - CMSG_LEN(0) <= VHOST_MEMORY_MAX_NREGIONS * sizeof(int))) {
322  number_of_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
323  clib_memcpy(fds, CMSG_DATA(cmsg), number_of_fds * sizeof(int));
324  }
325 
326  /* version 1, no reply bit set*/
327  if ((msg.flags & 7) != 1) {
328  DBG_SOCK("malformed message received. closing socket");
329  goto close_socket;
330  }
331 
332  {
333  int rv __attribute__((unused));
334  /* $$$$ pay attention to rv */
335  rv = read(uf->file_descriptor, ((char*)&msg) + n, msg.size);
336  }
337 
338  switch (msg.request) {
340  DBG_SOCK("if %d msg VHOST_USER_GET_FEATURES",
341  vui->hw_if_index);
342 
343  msg.flags |= 4;
344  msg.u64 = (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
345  (1 << FEAT_VIRTIO_F_ANY_LAYOUT) |
346  (1 << FEAT_VHOST_F_LOG_ALL) |
347  (1 << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
348  (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES);
349  msg.u64 &= vui->feature_mask;
350 
351  msg.size = sizeof(msg.u64);
352  break;
353 
355  DBG_SOCK("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
356  vui->hw_if_index, msg.u64);
357 
358  vui->features = msg.u64;
359 
360  if (vui->features & (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF))
361  vui->virtio_net_hdr_sz = 12;
362  else
363  vui->virtio_net_hdr_sz = 10;
364 
365  vui->is_any_layout = (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
366 
369  vui->is_up = 0;
370 
371  for (q = 0; q < 2; q++) {
372  vui->vrings[q].desc = 0;
373  vui->vrings[q].avail = 0;
374  vui->vrings[q].used = 0;
375  vui->vrings[q].log_guest_addr = 0;
376  vui->vrings[q].log_used = 0;
377  }
378 
379  DBG_SOCK("interface %d disconnected", vui->sw_if_index);
380 
381  break;
382 
384  DBG_SOCK("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
385  vui->hw_if_index, msg.memory.nregions);
386 
387  if ((msg.memory.nregions < 1) ||
388  (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) {
389 
390  DBG_SOCK("number of mem regions must be between 1 and %i",
392 
393  goto close_socket;
394  }
395 
396  if (msg.memory.nregions != number_of_fds) {
397  DBG_SOCK("each memory region must have FD");
398  goto close_socket;
399  }
401  for(i=0; i < msg.memory.nregions; i++) {
402  clib_memcpy(&(vui->regions[i]), &msg.memory.regions[i],
404 
405  long page_sz = get_huge_page_size(fds[i]);
406 
407  /* align size to 2M page */
408  ssize_t map_sz = (vui->regions[i].memory_size +
409  vui->regions[i].mmap_offset + page_sz) & ~(page_sz - 1);
410 
411  vui->region_mmap_addr[i] = mmap(0, map_sz, PROT_READ | PROT_WRITE,
412  MAP_SHARED, fds[i], 0);
413 
414  DBG_SOCK("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
415  "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i], page_sz);
416 
417  if (vui->region_mmap_addr[i] == MAP_FAILED) {
418  clib_warning("failed to map memory. errno is %d", errno);
419  goto close_socket;
420  }
421  vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
422  vui->region_mmap_fd[i] = fds[i];
423  }
424  vui->nregions = msg.memory.nregions;
425  break;
426 
428  DBG_SOCK("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
429  vui->hw_if_index, msg.state.index, msg.state.num);
430 
431  if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
432  (msg.state.num == 0) || /* it cannot be zero */
433  (msg.state.num % 2)) /* must be power of 2 */
434  goto close_socket;
435  vui->vrings[msg.state.index].qsz = msg.state.num;
436  break;
437 
439  DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
440  vui->hw_if_index, msg.state.index);
441 
442  vui->vrings[msg.state.index].desc = (vring_desc_t *)
443  map_user_mem(vui, msg.addr.desc_user_addr);
444  vui->vrings[msg.state.index].used = (vring_used_t *)
445  map_user_mem(vui, msg.addr.used_user_addr);
446  vui->vrings[msg.state.index].avail = (vring_avail_t *)
447  map_user_mem(vui, msg.addr.avail_user_addr);
448 
449  if ((vui->vrings[msg.state.index].desc == NULL) ||
450  (vui->vrings[msg.state.index].used == NULL) ||
451  (vui->vrings[msg.state.index].avail == NULL)) {
452  DBG_SOCK("failed to map user memory for hw_if_index %d",
453  vui->hw_if_index);
454  goto close_socket;
455  }
456 
457  vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
458  vui->vrings[msg.state.index].log_used =
459  (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
460 
461  /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
462  the ring is initialized in an enabled state. */
463 
464  if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES))) {
465  vui->vrings[msg.state.index].enabled = 1;
466  }
467 
468  vui->vrings[msg.state.index].last_used_idx =
469  vui->vrings[msg.state.index].used->idx;
470 
471  /* tell driver that we don't want interrupts */
472  vui->vrings[msg.state.index].used->flags |= 1;
473  break;
474 
476  DBG_SOCK("if %d msg VHOST_USER_SET_OWNER",
477  vui->hw_if_index);
478  break;
479 
481  DBG_SOCK("if %d msg VHOST_USER_RESET_OWNER",
482  vui->hw_if_index);
483  break;
484 
486  DBG_SOCK("if %d msg VHOST_USER_SET_VRING_CALL u64 %d",
487  vui->hw_if_index, msg.u64);
488 
489  q = (u8) (msg.u64 & 0xFF);
490 
491  if (!(msg.u64 & 0x100))
492  {
493  if (number_of_fds != 1)
494  goto close_socket;
495 
496  /* if there is old fd, delete it */
497  if (vui->vrings[q].callfd) {
499  vui->vrings[q].callfd_idx);
500  unix_file_del (&unix_main, uf);
501  }
502  vui->vrings[q].callfd = fds[0];
503  template.read_function = vhost_user_callfd_read_ready;
504  template.file_descriptor = fds[0];
505  vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template);
506  }
507  else
508  vui->vrings[q].callfd = -1;
509  break;
510 
512  DBG_SOCK("if %d msg VHOST_USER_SET_VRING_KICK u64 %d",
513  vui->hw_if_index, msg.u64);
514 
515  q = (u8) (msg.u64 & 0xFF);
516 
517  if (!(msg.u64 & 0x100))
518  {
519  if (number_of_fds != 1)
520  goto close_socket;
521 
522  vui->vrings[q].kickfd = fds[0];
523  }
524  else
525  vui->vrings[q].kickfd = -1;
526  break;
527 
529  DBG_SOCK("if %d msg VHOST_USER_SET_VRING_ERR u64 %d",
530  vui->hw_if_index, msg.u64);
531 
532  q = (u8) (msg.u64 & 0xFF);
533 
534  if (!(msg.u64 & 0x100))
535  {
536  if (number_of_fds != 1)
537  goto close_socket;
538 
539  fd = fds[0];
540  }
541  else
542  fd = -1;
543 
544  vui->vrings[q].errfd = fd;
545  break;
546 
548  DBG_SOCK("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
549  vui->hw_if_index, msg.state.index, msg.state.num);
550 
551  vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
552  break;
553 
555  DBG_SOCK("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
556  vui->hw_if_index, msg.state.index, msg.state.num);
557 
558  /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
559  vui->vrings[msg.state.index].enabled = 0;
560 
561  msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
562  msg.flags |= 4;
563  msg.size = sizeof(msg.state);
564  break;
565 
566  case VHOST_USER_NONE:
567  DBG_SOCK("if %d msg VHOST_USER_NONE",
568  vui->hw_if_index);
569 
570  break;
571 
573  {
574  DBG_SOCK("if %d msg VHOST_USER_SET_LOG_BASE",
575  vui->hw_if_index);
576 
577  if (msg.size != sizeof(msg.log)) {
578  DBG_SOCK("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
579  msg.size, sizeof(msg.log));
580  goto close_socket;
581  }
582 
583  if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) {
584  DBG_SOCK("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
585  goto close_socket;
586  }
587 
588  fd = fds[0];
589  /* align size to 2M page */
590  long page_sz = get_huge_page_size(fd);
591  ssize_t map_sz = (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
592 
593  vui->log_base_addr = mmap(0, map_sz, PROT_READ | PROT_WRITE,
594  MAP_SHARED, fd, 0);
595 
596  DBG_SOCK("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
597  map_sz, msg.log.offset, fd, vui->log_base_addr);
598 
599  if (vui->log_base_addr == MAP_FAILED) {
600  clib_warning("failed to map memory. errno is %d", errno);
601  goto close_socket;
602  }
603 
604  vui->log_base_addr += msg.log.offset;
605  vui->log_size = msg.log.size;
606 
607  msg.flags |= 4;
608  msg.size = sizeof(msg.u64);
609 
610  break;
611  }
612 
614  DBG_SOCK("if %d msg VHOST_USER_SET_LOG_FD",
615  vui->hw_if_index);
616 
617  break;
618 
620  DBG_SOCK("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", vui->hw_if_index);
621 
622  msg.flags |= 4;
623  msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD);
624  msg.size = sizeof(msg.u64);
625  break;
626 
628  DBG_SOCK("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx",
629  vui->hw_if_index, msg.u64);
630 
631  vui->protocol_features = msg.u64;
632 
633  break;
634 
636  DBG_SOCK("if %d VHOST_USER_SET_VRING_ENABLE, enable: %d",
637  vui->hw_if_index, msg.state.num);
638  vui->vrings[msg.state.index].enabled = msg.state.num;
639  break;
640 
641  default:
642  DBG_SOCK("unknown vhost-user message %d received. closing socket",
643  msg.request);
644  goto close_socket;
645  }
646 
647  /* if we have pointers to descriptor table, go up*/
648  if (!vui->is_up &&
651 
652  DBG_SOCK("interface %d connected", vui->sw_if_index);
653 
655  vui->is_up = 1;
656 
657  }
658 
659  /* if we need to reply */
660  if (msg.flags & 4)
661  {
662  n = send(uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
663  if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
664  goto close_socket;
665  }
666 
667  return 0;
668 
669 close_socket:
671  return 0;
672 }
673 
675 {
677  vhost_user_intf_t * vui;
678  uword * p;
679 
681  uf->file_descriptor);
682  if (p == 0) {
683  DBG_SOCK ("fd %d doesn't belong to any interface",
684  uf->file_descriptor);
685  return 0;
686  }
687  else
688  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
689 
691  return 0;
692 }
693 
695 {
696  int client_fd, client_len;
697  struct sockaddr_un client;
698  unix_file_t template = {0};
700  vhost_user_intf_t * vui;
701  uword * p;
702 
704  uf->file_descriptor);
705  if (p == 0) {
706  DBG_SOCK ("fd %d doesn't belong to any interface",
707  uf->file_descriptor);
708  return 0;
709  }
710  else
711  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
712 
713  client_len = sizeof(client);
714  client_fd = accept (uf->file_descriptor,
715  (struct sockaddr *)&client,
716  (socklen_t *)&client_len);
717 
718  if (client_fd < 0)
719  return clib_error_return_unix (0, "accept");
720 
721  template.read_function = vhost_user_socket_read;
722  template.error_function = vhost_user_socket_error;
723  template.file_descriptor = client_fd;
724  vui->unix_file_index = unix_file_add (&unix_main, &template);
725 
726  vui->client_fd = client_fd;
728  vui - vum->vhost_user_interfaces);
729 
730  return 0;
731 }
732 
733 static clib_error_t *
735 {
736  clib_error_t * error;
739 
740  error = vlib_call_init_function (vm, ip4_init);
741  if (error)
742  return error;
743 
747  vum->coalesce_frames = 32;
748  vum->coalesce_time = 1e-3;
749 
752 
753  return 0;
754 }
755 
757 
758 static clib_error_t *
760 {
761  /* TODO cleanup */
762  return 0;
763 }
764 
766 
767 enum {
771 };
772 
773 
774 typedef struct {
777 #if VHOST_USER_COPY_TX_HDR == 1
778  virtio_net_hdr_t hdr;
779 #endif
781 
782 static u8 * format_vhost_user_input_trace (u8 * s, va_list * va)
783 {
784  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
785  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
790  t->device_index);
791 
793 
794 #if VHOST_USER_COPY_TX_HDR == 1
795  uword indent = format_get_indent (s);
796 #endif
797 
798  s = format (s, "%U virtqueue %d",
800  t->virtqueue);
801 
802 #if VHOST_USER_COPY_TX_HDR == 1
803  s = format (s, "\n%Uvirtio_net_hdr flags 0x%02x gso_type %u hdr_len %u",
804  format_white_space, indent,
805  t->hdr.flags,
806  t->hdr.gso_type,
807  t->hdr.hdr_len);
808 #endif
809 
810  return s;
811 }
812 
814  vlib_node_runtime_t * node,
815  vhost_user_intf_t *vui,
816  i16 virtqueue)
817 {
818  u32 * b, n_left;
820 
822 
823  n_left = vec_len(vui->d_trace_buffers);
824  b = vui->d_trace_buffers;
825 
826  while (n_left >= 1)
827  {
828  u32 bi0;
829  vlib_buffer_t * b0;
831 
832  bi0 = b[0];
833  n_left -= 1;
834 
835  b0 = vlib_get_buffer (vm, bi0);
836  vlib_trace_buffer (vm, node, next_index, b0, /* follow_chain */ 0);
837  t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
838  t0->virtqueue = virtqueue;
839  t0->device_index = vui - vum->vhost_user_interfaces;
840 #if VHOST_USER_COPY_TX_HDR == 1
841  clib_memcpy(&t0->hdr, b0->pre_data, sizeof(virtio_net_hdr_t));
842 #endif
843 
844  b+=1;
845  }
846 }
847 
849 {
851  u64 x = 1;
852  int rv __attribute__((unused));
853  /* $$$$ pay attention to rv */
854  rv = write(vq->callfd, &x, sizeof(x));
855  vq->n_since_last_int = 0;
856  vq->int_deadline = vlib_time_now(vm) + vum->coalesce_time;
857 }
858 
859 
861  vhost_user_main_t * vum,
862  vhost_user_intf_t * vui,
863  vlib_node_runtime_t * node)
864 {
867  uword n_rx_packets = 0, n_rx_bytes = 0;
868  uword n_left;
869  u32 n_left_to_next, * to_next;
870  u32 next_index = 0;
871  u32 next0;
872  uword n_trace = vlib_get_trace_count (vm, node);
873  u16 qsz_mask;
874  u32 cpu_index, rx_len, drops, flush;
875  f64 now = vlib_time_now (vm);
876 
878 
879  /* no descriptor ptr - bail out */
880  if (PREDICT_FALSE(!txvq->desc || !txvq->avail || !txvq->enabled))
881  return 0;
882 
883  /* do we have pending intterupts ? */
884  if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
885  vhost_user_send_call(vm, txvq);
886 
887  if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
888  vhost_user_send_call(vm, rxvq);
889 
890  /* only bit 0 of avail.flags is used so we don't want to deal with this
891  interface if any other bit is set */
892  if (PREDICT_FALSE(txvq->avail->flags & 0xFFFE))
893  return 0;
894 
895  /* nothing to do */
896  if (txvq->avail->idx == txvq->last_avail_idx)
897  return 0;
898 
899  if (PREDICT_TRUE(txvq->avail->idx > txvq->last_avail_idx))
900  n_left = txvq->avail->idx - txvq->last_avail_idx;
901  else /* wrapped */
902  n_left = (u16) -1 - txvq->last_avail_idx + txvq->avail->idx;
903 
904  if (PREDICT_FALSE(!vui->admin_up)) {
905  /* if intf is admin down, just drop all packets waiting in the ring */
906  txvq->last_avail_idx = txvq->last_used_idx = txvq->avail->idx;
908  txvq->used->idx = txvq->last_used_idx;
909  vhost_user_log_dirty_ring(vui, txvq, idx);
910  vhost_user_send_call(vm, txvq);
911  return 0;
912  }
913 
914  if (PREDICT_FALSE(n_left > txvq->qsz))
915  return 0;
916 
917  qsz_mask = txvq->qsz - 1;
918  cpu_index = os_get_cpu_number();
919  drops = 0;
920  flush = 0;
921 
922  if (n_left > VLIB_FRAME_SIZE)
923  n_left = VLIB_FRAME_SIZE;
924 
925  /* Allocate some buffers.
926  * Note that buffers that are chained for jumbo
927  * frames are allocated separately using a slower path.
928  * The idea is to be certain to have enough buffers at least
929  * to cycle through the descriptors without having to check for errors.
930  * For jumbo frames, the bottleneck is memory copy anyway.
931  */
932  if (PREDICT_FALSE(!vum->rx_buffers[cpu_index])) {
933  vec_alloc (vum->rx_buffers[cpu_index], VLIB_FRAME_SIZE);
934 
935  if (PREDICT_FALSE(!vum->rx_buffers[cpu_index]))
936  flush = n_left; //Drop all input
937  }
938 
939  if (PREDICT_FALSE(_vec_len(vum->rx_buffers[cpu_index]) < n_left)) {
940  _vec_len(vum->rx_buffers[cpu_index]) +=
941  vlib_buffer_alloc_from_free_list(vm, vum->rx_buffers[cpu_index] + _vec_len(vum->rx_buffers[cpu_index]),
942  VLIB_FRAME_SIZE - _vec_len(vum->rx_buffers[cpu_index]),
944 
945  if (PREDICT_FALSE(n_left > _vec_len(vum->rx_buffers[cpu_index])))
946  flush = n_left - _vec_len(vum->rx_buffers[cpu_index]);
947  }
948 
949  if (PREDICT_FALSE(flush)) {
950  //Remove some input buffers
951  drops += flush;
952  n_left -= flush;
954  VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
955  while (flush) {
956  u16 desc_chain_head = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
957  txvq->last_avail_idx++;
958  txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head;
959  txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
960  vhost_user_log_dirty_ring(vui, txvq, ring[txvq->last_used_idx & qsz_mask]);
961  txvq->last_used_idx++;
962  flush--;
963  }
964  }
965 
966  rx_len = vec_len(vum->rx_buffers[cpu_index]); //vector might be null
967  while (n_left > 0) {
968  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
969 
970  while (n_left > 0 && n_left_to_next > 0) {
971  vlib_buffer_t *b_head, *b_current;
972  u32 bi_head, bi_current;
973  u16 desc_chain_head, desc_current;
974  u8 error = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
975 
976  desc_chain_head = desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
977  bi_head = bi_current = vum->rx_buffers[cpu_index][--rx_len];
978  b_head = b_current = vlib_get_buffer (vm, bi_head);
979  vlib_buffer_chain_init(b_head);
980 
981  uword offset;
982  if (PREDICT_TRUE(vui->is_any_layout) ||
983  !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)) {
984  /* ANYLAYOUT or single buffer */
985  offset = vui->virtio_net_hdr_sz;
986  } else {
987  /* CSR case without ANYLAYOUT, skip 1st buffer */
988  offset = txvq->desc[desc_current].len;
989  }
990 
991  while(1) {
992  void * buffer_addr = map_guest_mem(vui, txvq->desc[desc_current].addr);
993  if (PREDICT_FALSE(buffer_addr == 0)) {
994  error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
995  break;
996  }
997 
998 #if VHOST_USER_COPY_TX_HDR == 1
999  if (PREDICT_TRUE(offset))
1000  clib_memcpy(b->pre_data, buffer_addr, sizeof(virtio_net_hdr_t)); /* 12 byte hdr is not used on tx */
1001 #endif
1002 
1003  if (txvq->desc[desc_current].len > offset) {
1004  u16 len = txvq->desc[desc_current].len - offset;
1006  b_head, &b_current, buffer_addr + offset, len);
1007 
1008  if (copied != len) {
1009  error = VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER;
1010  break;
1011  }
1012  }
1013  offset = 0;
1014 
1015  /* if next flag is set, take next desc in the chain */
1016  if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT )
1017  desc_current = txvq->desc[desc_current].next;
1018  else
1019  break;
1020  }
1021 
1022  /* consume the descriptor and return it as used */
1023  txvq->last_avail_idx++;
1024  txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head;
1025  txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0;
1026  vhost_user_log_dirty_ring(vui, txvq, ring[txvq->last_used_idx & qsz_mask]);
1027  txvq->last_used_idx++;
1028 
1029  if(PREDICT_FALSE(b_head->current_length < 14 &&
1030  error == VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR)) {
1031  error = VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME;
1032  }
1033 
1035 
1036  vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
1037  vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32)~0;
1038  b_head->error = node->errors[error];
1039 
1040  if (PREDICT_FALSE (n_trace > n_rx_packets))
1041  vec_add1 (vui->d_trace_buffers, bi_head);
1042 
1043  if (PREDICT_FALSE(error)) {
1044  drops++;
1045  next0 = VHOST_USER_RX_NEXT_DROP;
1046  } else {
1047  n_rx_bytes += b_head->current_length + b_head->total_length_not_including_first_buffer;
1048  n_rx_packets++;
1050  }
1051 
1052  to_next[0] = bi_head;
1053  to_next++;
1054  n_left_to_next--;
1055  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1056  to_next, n_left_to_next,
1057  bi_head, next0);
1058  n_left--;
1059  }
1060 
1061  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1062  }
1063 
1064  if (PREDICT_TRUE(vum->rx_buffers[cpu_index] != 0))
1065  _vec_len(vum->rx_buffers[cpu_index]) = rx_len;
1066 
1067  /* give buffers back to driver */
1069  txvq->used->idx = txvq->last_used_idx;
1070  vhost_user_log_dirty_ring(vui, txvq, idx);
1071 
1072  if (PREDICT_FALSE (vec_len (vui->d_trace_buffers) > 0))
1073  {
1075  vlib_set_trace_count (vm, node, n_trace - vec_len (vui->d_trace_buffers));
1076  }
1077 
1078  /* interrupt (call) handling */
1079  if((txvq->callfd > 0) && !(txvq->avail->flags & 1)) {
1080  txvq->n_since_last_int += n_rx_packets;
1081 
1082  if(txvq->n_since_last_int > vum->coalesce_frames)
1083  vhost_user_send_call(vm, txvq);
1084  }
1085 
1086  if (PREDICT_FALSE(drops)) {
1090  vui->sw_if_index, drops);
1091  }
1092 
1093  /* increase rx counters */
1098  vui->sw_if_index,
1099  n_rx_packets, n_rx_bytes);
1100 
1101  return n_rx_packets;
1102 }
1103 
1104 static uword
1106  vlib_node_runtime_t * node,
1107  vlib_frame_t * f)
1108 {
1110  dpdk_main_t * dm = &dpdk_main;
1111  vhost_user_intf_t * vui;
1112  uword n_rx_packets = 0;
1113  u32 cpu_index = os_get_cpu_number();
1114  int i;
1115 
1116  for(i = 0; i < vec_len(vum->vhost_user_interfaces); i++ )
1117  {
1118  vui = vec_elt_at_index(vum->vhost_user_interfaces, i);
1119  if (vui->is_up &&
1120  (i % dm->input_cpu_count) == (cpu_index - dm->input_cpu_first_index))
1121  n_rx_packets += vhost_user_if_input (vm, vum, vui, node);
1122  }
1123  return n_rx_packets;
1124 }
1125 
1127  .function = vhost_user_input,
1128  .type = VLIB_NODE_TYPE_INPUT,
1129  .name = "vhost-user-input",
1130 
1131  /* Will be enabled if/when hardware is detected. */
1132  .state = VLIB_NODE_STATE_DISABLED,
1133 
1134  .format_buffer = format_ethernet_header_with_length,
1135  .format_trace = format_vhost_user_input_trace,
1136 
1137  .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
1138  .error_strings = vhost_user_input_func_error_strings,
1139 
1140  .n_next_nodes = VHOST_USER_RX_N_NEXT,
1141  .next_nodes = {
1142  [VHOST_USER_RX_NEXT_DROP] = "error-drop",
1143  [VHOST_USER_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
1144  },
1145 };
1146 
1147 static uword
1149  vlib_node_runtime_t * node,
1150  vlib_frame_t * frame)
1151 {
1152  u32 * buffers = vlib_frame_args (frame);
1153  u32 n_left = 0;
1154  u16 used_index;
1156  uword n_packets = 0;
1157  vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
1159  vhost_user_vring_t * rxvq = &vui->vrings[VHOST_NET_VRING_IDX_RX];
1160  u16 qsz_mask;
1161  u8 error = VHOST_USER_TX_FUNC_ERROR_NONE;
1162 
1163  if (PREDICT_FALSE(!vui->is_up))
1164  goto done2;
1165 
1166  if (PREDICT_FALSE(!rxvq->desc || !rxvq->avail || vui->sock_errno != 0 || !rxvq->enabled)) {
1167  error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
1168  goto done2;
1169  }
1170 
1171  if (PREDICT_FALSE(vui->lockp != 0))
1172  {
1173  while (__sync_lock_test_and_set (vui->lockp, 1))
1174  ;
1175  }
1176 
1177  /* only bit 0 of avail.flags is used so we don't want to deal with this
1178  interface if any other bit is set */
1179  if (PREDICT_FALSE(rxvq->avail->flags & 0xFFFE)) {
1180  error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
1181  goto done2;
1182  }
1183 
1184  if (PREDICT_FALSE((rxvq->avail->idx == rxvq->last_avail_idx))) {
1185  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1186  goto done2;
1187  }
1188 
1189  n_left = n_packets = frame->n_vectors;
1190  used_index = rxvq->used->idx;
1191  qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */
1192 
1193  while (n_left > 0)
1194  {
1195  vlib_buffer_t *b0, *current_b0;
1196  u16 desc_chain_head, desc_current, desc_len;
1197  void *buffer_addr;
1198  uword offset;
1199 
1200  if (n_left >= 2)
1201  vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
1202 
1203  b0 = vlib_get_buffer (vm, buffers[0]);
1204  buffers++;
1205  n_left--;
1206 
1207  if (PREDICT_FALSE(rxvq->last_avail_idx == rxvq->avail->idx)) {
1208  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1209  goto done;
1210  }
1211 
1212  desc_current = desc_chain_head = rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
1213  offset = vui->virtio_net_hdr_sz;
1214  desc_len = offset;
1215  if (PREDICT_FALSE(!(buffer_addr = map_guest_mem(vui, rxvq->desc[desc_current].addr)))) {
1216  error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1217  goto done;
1218  }
1219  CLIB_PREFETCH(buffer_addr, clib_min(rxvq->desc[desc_current].len,
1220  4*CLIB_CACHE_LINE_BYTES), STORE);
1221 
1222  virtio_net_hdr_mrg_rxbuf_t * hdr = (virtio_net_hdr_mrg_rxbuf_t *) buffer_addr;
1223  hdr->hdr.flags = 0;
1224  hdr->hdr.gso_type = 0;
1225 
1226  vhost_user_log_dirty_pages(vui, rxvq->desc[desc_current].addr, vui->virtio_net_hdr_sz);
1227 
1228  if (vui->virtio_net_hdr_sz == 12)
1229  hdr->num_buffers = 1;
1230 
1231  u16 bytes_left = b0->current_length;
1232  buffer_addr += offset;
1233  current_b0 = b0;
1234 
1235  //FIXME: This was in the code but I don't think it is valid
1236  /*if (PREDICT_FALSE(!vui->is_any_layout && (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT))) {
1237  rxvq->desc[desc_current].len = vui->virtio_net_hdr_sz;
1238  }*/
1239 
1240  while(1) {
1241  if (!bytes_left) { //Get new input
1242  if (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT) {
1243  current_b0 = vlib_get_buffer(vm, current_b0->next_buffer);
1244  bytes_left = current_b0->current_length;
1245  } else {
1246  //End of packet
1247  break;
1248  }
1249  }
1250 
1251  if (rxvq->desc[desc_current].len <= offset) { //Get new output
1252  if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) {
1253  offset = 0;
1254  desc_current = rxvq->desc[desc_current].next;
1255  if (PREDICT_FALSE(!(buffer_addr = map_guest_mem(vui, rxvq->desc[desc_current].addr)))) {
1256  used_index -= hdr->num_buffers - 1;
1257  rxvq->last_avail_idx -= hdr->num_buffers - 1;
1258  error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1259  goto done;
1260  }
1261  } else if (vui->virtio_net_hdr_sz == 12) { //MRG is available
1262 
1263  //Move from available to used buffer
1264  rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head;
1265  rxvq->used->ring[used_index & qsz_mask].len = desc_len;
1266  vhost_user_log_dirty_ring(vui, rxvq, ring[used_index & qsz_mask]);
1267  rxvq->last_avail_idx++;
1268  used_index++;
1269  hdr->num_buffers++;
1270 
1271  if (PREDICT_FALSE(rxvq->last_avail_idx == rxvq->avail->idx)) {
1272  //Dequeue queued descriptors for this packet
1273  used_index -= hdr->num_buffers - 1;
1274  rxvq->last_avail_idx -= hdr->num_buffers - 1;
1275  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1276  goto done;
1277  }
1278 
1279  //Look at next one
1280  desc_chain_head = rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
1281  desc_current = desc_chain_head;
1282  desc_len = 0;
1283  offset = 0;
1284  if (PREDICT_FALSE(!(buffer_addr = map_guest_mem(vui, rxvq->desc[desc_current].addr)))) {
1285  //Dequeue queued descriptors for this packet
1286  used_index -= hdr->num_buffers - 1;
1287  rxvq->last_avail_idx -= hdr->num_buffers - 1;
1288  error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
1289  goto done;
1290  }
1291  } else {
1292  error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
1293  goto done;
1294  }
1295  }
1296 
1297  u16 bytes_to_copy = bytes_left > (rxvq->desc[desc_current].len - offset) ? (rxvq->desc[desc_current].len - offset) : bytes_left;
1298  clib_memcpy(buffer_addr, vlib_buffer_get_current (current_b0) + current_b0->current_length - bytes_left, bytes_to_copy);
1299 
1300  vhost_user_log_dirty_pages(vui, rxvq->desc[desc_current].addr + offset, bytes_to_copy);
1301  bytes_left -= bytes_to_copy;
1302  offset += bytes_to_copy;
1303  buffer_addr += bytes_to_copy;
1304  desc_len += bytes_to_copy;
1305  }
1306 
1307  //Move from available to used ring
1308  rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head;
1309  rxvq->used->ring[used_index & qsz_mask].len = desc_len;
1310  vhost_user_log_dirty_ring(vui, rxvq, ring[used_index & qsz_mask]);
1311 
1312  rxvq->last_avail_idx++;
1313  used_index++;
1314  }
1315 
1316 done:
1318  rxvq->used->idx = used_index;
1319  vhost_user_log_dirty_ring(vui, rxvq, idx);
1320 
1321  /* interrupt (call) handling */
1322  if((rxvq->callfd > 0) && !(rxvq->avail->flags & 1)) {
1323  rxvq->n_since_last_int += n_packets - n_left;
1324 
1325  if(rxvq->n_since_last_int > vum->coalesce_frames)
1326  vhost_user_send_call(vm, rxvq);
1327  }
1328 
1329 done2:
1330 
1331  if (PREDICT_FALSE(vui->lockp != 0))
1332  *vui->lockp = 0;
1333 
1334  if (PREDICT_FALSE(n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE)) {
1335  vlib_error_count(vm, node->node_index, error, n_left);
1340  vui->sw_if_index,
1341  n_left);
1342  }
1343 
1344  vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
1345  return frame->n_vectors;
1346 }
1347 
1348 static clib_error_t *
1350 {
1351  vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index);
1352  uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1355 
1356  vui->admin_up = is_up;
1357 
1358  if (is_up)
1361 
1362  return /* no error */ 0;
1363 }
1364 
1365 VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
1366  .name = "vhost-user",
1367  .tx_function = vhost_user_intfc_tx,
1368  .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
1369  .tx_function_error_strings = vhost_user_tx_func_error_strings,
1370  .format_device_name = format_vhost_user_interface_name,
1371  .name_renumber = vhost_user_name_renumber,
1372  .admin_up_down_function = vhost_user_interface_admin_up_down,
1373  .no_flatten_output_chains = 1,
1374 };
1375 
1376 static uword
1378  vlib_node_runtime_t * rt,
1379  vlib_frame_t * f)
1380 {
1382  vhost_user_intf_t * vui;
1383  struct sockaddr_un sun;
1384  int sockfd;
1385  unix_file_t template = {0};
1386  f64 timeout = 3153600000.0 /* 100 years */;
1387  uword *event_data = 0;
1388 
1389  sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
1390  sun.sun_family = AF_UNIX;
1391  template.read_function = vhost_user_socket_read;
1392  template.error_function = vhost_user_socket_error;
1393 
1394 
1395  if (sockfd < 0)
1396  return 0;
1397 
1398  while (1) {
1400  vlib_process_get_events (vm, &event_data);
1401  vec_reset_length (event_data);
1402 
1403  timeout = 3.0;
1404 
1405  vec_foreach (vui, vum->vhost_user_interfaces) {
1406 
1407  if (vui->sock_is_server || !vui->active)
1408  continue;
1409 
1410  if (vui->unix_fd == -1) {
1411  /* try to connect */
1412 
1413  strncpy(sun.sun_path, (char *) vui->sock_filename, sizeof(sun.sun_path) - 1);
1414 
1415  if (connect(sockfd, (struct sockaddr *) &sun, sizeof(struct sockaddr_un)) == 0) {
1416  vui->sock_errno = 0;
1417  vui->unix_fd = sockfd;
1418  template.file_descriptor = sockfd;
1419  vui->unix_file_index = unix_file_add (&unix_main, &template);
1421 
1422  sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
1423  if (sockfd < 0)
1424  return 0;
1425  }
1426  else {
1427  vui->sock_errno = errno;
1428  }
1429  } else {
1430  /* check if socket is alive */
1431  int error = 0;
1432  socklen_t len = sizeof (error);
1433  int retval = getsockopt(vui->unix_fd, SOL_SOCKET, SO_ERROR, &error, &len);
1434 
1435  if (retval)
1437  }
1438  }
1439  }
1440  return 0;
1441 }
1442 
1444  .function = vhost_user_process,
1445  .type = VLIB_NODE_TYPE_PROCESS,
1446  .name = "vhost-user-process",
1447 };
1448 
1450  u32 sw_if_index)
1451 {
1453  vhost_user_intf_t * vui;
1454  uword *p = NULL;
1455  int rv = 0;
1456 
1458  sw_if_index);
1459  if (p == 0) {
1460  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1461  } else {
1462  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
1463  }
1464 
1465  // interface is inactive
1466  vui->active = 0;
1467  // disconnect interface sockets
1469  // add to inactive interface list
1471 
1472  // reset renumbered iface
1475 
1477  DBG_SOCK ("deleted (deactivated) vhost-user interface instance %d", p[0]);
1478 
1479  return rv;
1480 }
1481 
1482 // init server socket on specified sock_filename
1483 static int vhost_user_init_server_sock(const char * sock_filename, int *sockfd)
1484 {
1485  int rv = 0, len;
1486  struct sockaddr_un un;
1487  int fd;
1488  /* create listening socket */
1489  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1490 
1491  if (fd < 0) {
1492  return VNET_API_ERROR_SYSCALL_ERROR_1;
1493  }
1494 
1495  un.sun_family = AF_UNIX;
1496  strcpy((char *) un.sun_path, (char *) sock_filename);
1497 
1498  /* remove if exists */
1499  unlink( (char *) sock_filename);
1500 
1501  len = strlen((char *) un.sun_path) + strlen((char *) sock_filename);
1502 
1503  if (bind(fd, (struct sockaddr *) &un, len) == -1) {
1504  rv = VNET_API_ERROR_SYSCALL_ERROR_2;
1505  goto error;
1506  }
1507 
1508  if (listen(fd, 1) == -1) {
1509  rv = VNET_API_ERROR_SYSCALL_ERROR_3;
1510  goto error;
1511  }
1512 
1513  unix_file_t template = {0};
1515  template.file_descriptor = fd;
1516  unix_file_add (&unix_main, &template);
1517  *sockfd = fd;
1518  return rv;
1519 
1520 error:
1521  close(fd);
1522  return rv;
1523 }
1524 
1525 // get new vhost_user_intf_t from inactive interfaces or create new one
1527 {
1529  vhost_user_intf_t * vui = NULL;
1530  int inactive_cnt = vec_len(vum->vhost_user_inactive_interfaces_index);
1531  // if there are any inactive ifaces
1532  if (inactive_cnt > 0) {
1533  // take last
1534  u32 vui_idx = vum->vhost_user_inactive_interfaces_index[inactive_cnt - 1];
1535  if (vec_len(vum->vhost_user_interfaces) > vui_idx) {
1536  vui = vec_elt_at_index (vum->vhost_user_interfaces, vui_idx);
1537  DBG_SOCK("reusing inactive vhost-user interface index %d", vui_idx);
1538  }
1539  // "remove" from inactive list
1540  _vec_len(vum->vhost_user_inactive_interfaces_index) -= 1;
1541  }
1542 
1543  // vui was not retrieved from inactive ifaces - create new
1544  if (!vui)
1545  vec_add2 (vum->vhost_user_interfaces, vui, 1);
1546  return vui;
1547 }
1548 
1549 // create ethernet interface for vhost user intf
1551  vhost_user_intf_t *vui, u8 *hwaddress)
1552 {
1554  u8 hwaddr[6];
1555  clib_error_t * error;
1556 
1557  /* create hw and sw interface */
1558  if (hwaddress) {
1559  clib_memcpy(hwaddr, hwaddress, 6);
1560  } else {
1561  f64 now = vlib_time_now(vm);
1562  u32 rnd;
1563  rnd = (u32) (now * 1e6);
1564  rnd = random_u32 (&rnd);
1565 
1566  clib_memcpy (hwaddr+2, &rnd, sizeof(rnd));
1567  hwaddr[0] = 2;
1568  hwaddr[1] = 0xfe;
1569  }
1570 
1572  (vnm,
1573  vhost_user_dev_class.index,
1574  vui - vum->vhost_user_interfaces /* device instance */,
1575  hwaddr /* ethernet address */,
1576  &vui->hw_if_index,
1577  0 /* flag change */);
1578  if (error)
1579  clib_error_report (error);
1580 
1583 }
1584 
1585 // initialize vui with specified attributes
1587  vhost_user_intf_t *vui, int sockfd,
1588  const char * sock_filename,
1589  u8 is_server, u64 feature_mask,
1590  u32 * sw_if_index)
1591 {
1592  vnet_sw_interface_t * sw;
1593  sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
1595  int q;
1596 
1597  vui->unix_fd = sockfd;
1598  vui->sw_if_index = sw->sw_if_index;
1599  vui->num_vrings = 2;
1600  vui->sock_is_server = is_server;
1601  strncpy(vui->sock_filename, sock_filename, ARRAY_LEN(vui->sock_filename)-1);
1602  vui->sock_errno = 0;
1603  vui->is_up = 0;
1604  vui->feature_mask = feature_mask;
1605  vui->active = 1;
1606  vui->unix_file_index = ~0;
1607  vui->log_base_addr = 0;
1608 
1609  for (q = 0; q < 2; q++) {
1610  vui->vrings[q].enabled = 0;
1611  }
1612 
1614 
1615  if (sw_if_index)
1616  *sw_if_index = vui->sw_if_index;
1617 
1618  if (tm->n_vlib_mains > 1)
1619  {
1622  memset ((void *) vui->lockp, 0, CLIB_CACHE_LINE_BYTES);
1623  }
1624 }
1625 
1626 // register vui and start polling on it
1628 {
1630  dpdk_main_t * dm = &dpdk_main;
1631  int cpu_index;
1633 
1635  vui - vum->vhost_user_interfaces);
1637  vui - vum->vhost_user_interfaces);
1638 
1639  /* start polling */
1640  cpu_index = dm->input_cpu_first_index +
1641  (vui - vum->vhost_user_interfaces) % dm->input_cpu_count;
1642 
1643  if (tm->n_vlib_mains == 1)
1645  VLIB_NODE_STATE_POLLING);
1646  else if (!dm->have_io_threads)
1648  VLIB_NODE_STATE_POLLING);
1649 
1650  /* tell process to start polling for sockets */
1652 }
1653 
1655  const char * sock_filename,
1656  u8 is_server,
1657  u32 * sw_if_index,
1658  u64 feature_mask,
1659  u8 renumber, u32 custom_dev_instance,
1660  u8 *hwaddr)
1661 {
1662  vhost_user_intf_t * vui = NULL;
1663  dpdk_main_t * dm = &dpdk_main;
1665  u32 sw_if_idx = ~0;
1666  int sockfd = -1;
1667  int rv = 0;
1668 
1669  if (tm->n_vlib_mains > 1 && dm->have_io_threads)
1670  {
1671  clib_warning("vhost-user interfaces are not supported with multiple io threads");
1672  return -1;
1673  }
1674 
1675  if (is_server) {
1676  if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) {
1677  return rv;
1678  }
1679  }
1680 
1681  vui = vhost_user_vui_new ();
1682  ASSERT(vui != NULL);
1683 
1684  vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
1685  vhost_user_vui_init (vnm, vui, sockfd, sock_filename, is_server,
1686  feature_mask, &sw_if_idx);
1687 
1688  if (renumber) {
1689  vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
1690  }
1691 
1692  vhost_user_vui_register (vm, vui);
1693 
1694  if (sw_if_index)
1695  *sw_if_index = sw_if_idx;
1696 
1697  return rv;
1698 }
1699 
1701  const char * sock_filename,
1702  u8 is_server,
1703  u32 sw_if_index,
1704  u64 feature_mask,
1705  u8 renumber, u32 custom_dev_instance)
1706 {
1708  vhost_user_intf_t * vui = NULL;
1709  u32 sw_if_idx = ~0;
1710  int sockfd = -1;
1711  int rv = 0;
1712  uword *p = NULL;
1713 
1715  sw_if_index);
1716  if (p == 0) {
1717  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1718  } else {
1719  vui = vec_elt_at_index (vum->vhost_user_interfaces, p[0]);
1720  }
1721 
1722  // interface is inactive
1723  vui->active = 0;
1724  // disconnect interface sockets
1726 
1727  if (is_server) {
1728  if ((rv = vhost_user_init_server_sock (sock_filename, &sockfd)) != 0) {
1729  return rv;
1730  }
1731  }
1732 
1733  vhost_user_vui_init (vnm, vui, sockfd, sock_filename, is_server,
1734  feature_mask, &sw_if_idx);
1735 
1736  if (renumber) {
1737  vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
1738  }
1739 
1740  vhost_user_vui_register (vm, vui);
1741 
1742  return rv;
1743 }
1744 
1745 clib_error_t *
1747  unformat_input_t * input,
1748  vlib_cli_command_t * cmd)
1749 {
1750  unformat_input_t _line_input, * line_input = &_line_input;
1751  u8 * sock_filename = NULL;
1752  u32 sw_if_index;
1753  u8 is_server = 0;
1754  u64 feature_mask = (u64)~0;
1755  u8 renumber = 0;
1756  u32 custom_dev_instance = ~0;
1757  u8 hwaddr[6];
1758  u8 *hw = NULL;
1759 
1760  /* Get a line of input. */
1761  if (! unformat_user (input, unformat_line_input, line_input))
1762  return 0;
1763 
1764  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
1765  if (unformat (line_input, "socket %s", &sock_filename))
1766  ;
1767  else if (unformat (line_input, "server"))
1768  is_server = 1;
1769  else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
1770  ;
1771  else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
1772  hw = hwaddr;
1773  else if (unformat (line_input, "renumber %d", &custom_dev_instance)) {
1774  renumber = 1;
1775  }
1776  else
1777  return clib_error_return (0, "unknown input `%U'",
1778  format_unformat_error, input);
1779  }
1780  unformat_free (line_input);
1781 
1782  vnet_main_t *vnm = vnet_get_main();
1783 
1784  vhost_user_create_if(vnm, vm, (char *)sock_filename,
1785  is_server, &sw_if_index, feature_mask,
1786  renumber, custom_dev_instance, hw);
1787 
1788  vec_free(sock_filename);
1789 
1790  return 0;
1791 }
1792 
1793 clib_error_t *
1795  unformat_input_t * input,
1796  vlib_cli_command_t * cmd)
1797 {
1798  unformat_input_t _line_input, * line_input = &_line_input;
1799  u32 sw_if_index = ~0;
1800 
1801  /* Get a line of input. */
1802  if (! unformat_user (input, unformat_line_input, line_input))
1803  return 0;
1804 
1805  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
1806  if (unformat (line_input, "sw_if_index %d", &sw_if_index))
1807  ;
1808  else
1809  return clib_error_return (0, "unknown input `%U'",
1810  format_unformat_error, input);
1811  }
1812  unformat_free (line_input);
1813 
1814  vnet_main_t *vnm = vnet_get_main();
1815 
1816  vhost_user_delete_if(vnm, vm, sw_if_index);
1817 
1818  return 0;
1819 }
1820 
1822 {
1823  int rv = 0;
1825  vhost_user_intf_t * vui;
1826  vhost_user_intf_details_t * r_vuids = NULL;
1828  u32 * hw_if_indices = 0;
1830  u8 *s = NULL;
1831  int i;
1832 
1833  if (!out_vuids)
1834  return -1;
1835 
1836  vec_foreach (vui, vum->vhost_user_interfaces) {
1837  if (vui->active)
1838  vec_add1(hw_if_indices, vui->hw_if_index);
1839  }
1840 
1841  for (i = 0; i < vec_len (hw_if_indices); i++) {
1842  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
1844 
1845  vec_add2(r_vuids, vuid, 1);
1846  vuid->sw_if_index = vui->sw_if_index;
1847  vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
1848  vuid->features = vui->features;
1849  vuid->is_server = vui->sock_is_server;
1850  vuid->num_regions = vui->nregions;
1851  vuid->sock_errno = vui->sock_errno;
1852  strncpy((char *)vuid->sock_filename, (char *)vui->sock_filename,
1853  ARRAY_LEN(vuid->sock_filename)-1);
1854 
1855  s = format (s, "%v%c", hi->name, 0);
1856 
1857  strncpy((char *)vuid->if_name, (char *)s,
1858  ARRAY_LEN(vuid->if_name)-1);
1859  _vec_len(s) = 0;
1860  }
1861 
1862  vec_free (s);
1863  vec_free (hw_if_indices);
1864 
1865  *out_vuids = r_vuids;
1866 
1867  return rv;
1868 }
1869 
1870 clib_error_t *
1872  unformat_input_t * input,
1873  vlib_cli_command_t * cmd)
1874 {
1875  clib_error_t * error = 0;
1876  vnet_main_t * vnm = vnet_get_main();
1878  vhost_user_intf_t * vui;
1879  u32 hw_if_index, * hw_if_indices = 0;
1881  int i, j, q;
1882  int show_descr = 0;
1883  struct feat_struct { u8 bit; char *str;};
1884  struct feat_struct *feat_entry;
1885 
1886  static struct feat_struct feat_array[] = {
1887 #define _(s,b) { .str = #s, .bit = b, },
1889 #undef _
1890  { .str = NULL }
1891  };
1892 
1893  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
1894  if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) {
1895  vec_add1 (hw_if_indices, hw_if_index);
1896  vlib_cli_output(vm, "add %d", hw_if_index);
1897  }
1898  else if (unformat (input, "descriptors") || unformat (input, "desc") )
1899  show_descr = 1;
1900  else {
1901  error = clib_error_return (0, "unknown input `%U'",
1902  format_unformat_error, input);
1903  goto done;
1904  }
1905  }
1906  if (vec_len (hw_if_indices) == 0) {
1907  vec_foreach (vui, vum->vhost_user_interfaces) {
1908  if (vui->active)
1909  vec_add1(hw_if_indices, vui->hw_if_index);
1910  }
1911  }
1912  vlib_cli_output (vm, "Virtio vhost-user interfaces");
1913  vlib_cli_output (vm, "Global:\n coalesce frames %d time %e\n\n",
1914  vum->coalesce_frames, vum->coalesce_time);
1915 
1916  for (i = 0; i < vec_len (hw_if_indices); i++) {
1917  hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
1919  vlib_cli_output (vm, "Interface: %s (ifindex %d)",
1920  hi->name, hw_if_indices[i]);
1921 
1922  vlib_cli_output (vm, "virtio_net_hdr_sz %d\n features (0x%llx): \n",
1923  vui->virtio_net_hdr_sz, vui->features);
1924 
1925  feat_entry = (struct feat_struct *) &feat_array;
1926  while(feat_entry->str) {
1927  if (vui->features & (1 << feat_entry->bit))
1928  vlib_cli_output (vm, " %s (%d)", feat_entry->str, feat_entry->bit);
1929  feat_entry++;
1930  }
1931 
1932  vlib_cli_output (vm, "\n");
1933 
1934 
1935  vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
1936  vui->sock_filename, vui->sock_is_server ? "server" : "client",
1937  strerror(vui->sock_errno));
1938 
1939  vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
1940 
1941  if (vui->nregions){
1942  vlib_cli_output(vm, " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
1943  vlib_cli_output(vm, " ====== ===== ================== ================== ================== ================== ==================\n");
1944  }
1945  for (j = 0; j < vui->nregions; j++) {
1946  vlib_cli_output(vm, " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", j,
1947  vui->region_mmap_fd[j],
1948  vui->regions[j].guest_phys_addr,
1949  vui->regions[j].memory_size,
1950  vui->regions[j].userspace_addr,
1951  vui->regions[j].mmap_offset,
1952  (u64) vui->region_mmap_addr[j]);
1953  }
1954  for (q = 0; q < vui->num_vrings; q++) {
1955  vlib_cli_output(vm, "\n Virtqueue %d\n", q);
1956 
1957  vlib_cli_output(vm, " qsz %d last_avail_idx %d last_used_idx %d\n",
1958  vui->vrings[q].qsz,
1959  vui->vrings[q].last_avail_idx,
1960  vui->vrings[q].last_used_idx);
1961 
1962  if (vui->vrings[q].avail && vui->vrings[q].used)
1963  vlib_cli_output(vm, " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
1964  vui->vrings[q].avail->flags,
1965  vui->vrings[q].avail->idx,
1966  vui->vrings[q].used->flags,
1967  vui->vrings[q].used->idx);
1968 
1969  vlib_cli_output(vm, " kickfd %d callfd %d errfd %d\n",
1970  vui->vrings[q].kickfd,
1971  vui->vrings[q].callfd,
1972  vui->vrings[q].errfd);
1973 
1974  if (show_descr) {
1975  vlib_cli_output(vm, "\n descriptor table:\n");
1976  vlib_cli_output(vm, " id addr len flags next user_addr\n");
1977  vlib_cli_output(vm, " ===== ================== ===== ====== ===== ==================\n");
1978  for(j = 0; j < vui->vrings[q].qsz; j++) {
1979  vlib_cli_output(vm, " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
1980  j,
1981  vui->vrings[q].desc[j].addr,
1982  vui->vrings[q].desc[j].len,
1983  vui->vrings[q].desc[j].flags,
1984  vui->vrings[q].desc[j].next,
1985  (u64) map_guest_mem(vui, vui->vrings[q].desc[j].addr));}
1986  }
1987  }
1988  vlib_cli_output (vm, "\n");
1989  }
1990 done:
1991  vec_free (hw_if_indices);
1992  return error;
1993 }
1994 
1995 static clib_error_t *
1997 {
1999 
2000  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2001  {
2002  if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
2003  ;
2004  else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
2005  ;
2006  else if (unformat (input, "dont-dump-memory"))
2007  vum->dont_dump_vhost_user_memory = 1;
2008  else
2009  return clib_error_return (0, "unknown input `%U'",
2010  format_unformat_error, input);
2011  }
2012 
2013  return 0;
2014 }
2015 
2016 /* vhost-user { ... } configuration. */
2017 VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
2018 
2019 void
2021 {
2023  vhost_user_intf_t * vui;
2024 
2025  if (vum->dont_dump_vhost_user_memory)
2026  {
2028  {
2029  unmap_all_mem_regions(vui);
2030  }
2031  }
2032 }
unformat_function_t unformat_vnet_hw_interface
static clib_error_t * vhost_user_init(vlib_main_t *vm)
Definition: vhost-user.c:734
unix_file_t * file_pool
Definition: unix.h:85
static void vhost_user_vui_init(vnet_main_t *vnm, vhost_user_intf_t *vui, int sockfd, const char *sock_filename, u8 is_server, u64 feature_mask, u32 *sw_if_index)
Definition: vhost-user.c:1586
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Definition: main.c:459
static void * map_user_mem(vhost_user_intf_t *vui, u64 addr)
Definition: vhost-user.c:156
vmrglw vmrglh hi
static void vhost_user_if_disconnect(vhost_user_intf_t *vui)
Definition: vhost-user.c:211
always_inline void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
Definition: error_funcs.h:54
static void * map_guest_mem(vhost_user_intf_t *vui, u64 addr)
Definition: vhost-user.c:143
#define hash_set(h, key, value)
Definition: hash.h:237
static uword vhost_user_intfc_tx(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: vhost-user.c:1148
always_inline vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:267
#define clib_min(x, y)
Definition: clib.h:295
vring_desc_t * desc
Definition: vhost-user.h:179
#define CLIB_UNUSED(x)
Definition: clib.h:79
uword unformat(unformat_input_t *i, char *fmt,...)
Definition: unformat.c:942
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: interface.c:454
always_inline uword vlib_process_get_events(vlib_main_t *vm, uword **data_vector)
Definition: node_funcs.h:410
unix_file_function_t * read_function
Definition: unix.h:61
#define hash_unset(h, key)
Definition: hash.h:243
static void vhost_user_create_ethernet(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_t *vui, u8 *hwaddress)
Definition: vhost-user.c:1550
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
Definition: interface.c:205
dpdk_main_t dpdk_main
Definition: dpdk.h:415
static clib_error_t * vhost_user_socket_error(unix_file_t *uf)
Definition: vhost-user.c:674
uword * vhost_user_interface_index_by_sock_fd
Definition: vhost-user.h:230
vnet_interface_main_t interface_main
Definition: vnet.h:62
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:383
static vhost_user_intf_t * vhost_user_vui_new()
Definition: vhost-user.c:1526
#define PREDICT_TRUE(x)
Definition: clib.h:98
always_inline void unformat_free(unformat_input_t *i)
Definition: format.h:160
#define UNFORMAT_END_OF_INPUT
Definition: format.h:142
#define NULL
Definition: clib.h:55
vring_avail_t * avail
Definition: vhost-user.h:180
static uword vhost_user_input(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
Definition: vhost-user.c:1105
always_inline uword unix_file_add(unix_main_t *um, unix_file_t *template)
Definition: unix.h:131
always_inline void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
Definition: trace_funcs.h:160
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:480
int vnet_interface_name_renumber(u32 sw_if_index, u32 new_show_dev_instance)
Definition: interface.c:1028
static u32 vhost_user_if_input(vlib_main_t *vm, vhost_user_main_t *vum, vhost_user_intf_t *vui, vlib_node_runtime_t *node)
Definition: vhost-user.c:860
static u8 * format_vhost_user_input_trace(u8 *s, va_list *va)
Definition: vhost-user.c:782
struct _vlib_node_registration vlib_node_registration_t
#define VHOST_USER_MSG_HDR_SZ
Definition: vhost-user.h:20
static clib_error_t * vhost_user_interface_admin_up_down(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
Definition: vhost-user.c:1349
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:519
always_inline void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
Definition: node_funcs.h:789
always_inline void vhost_user_log_dirty_pages(vhost_user_intf_t *vui, u64 addr, u64 len)
Definition: vhost-user.c:242
static void vhost_user_vui_register(vlib_main_t *vm, vhost_user_intf_t *vui)
Definition: vhost-user.c:1627
clib_error_t * show_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:1871
#define clib_error_report(e)
Definition: error.h:126
#define VNET_HW_INTERFACE_FLAG_LINK_UP
Definition: interface.h:241
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:405
vlib_error_t * errors
Definition: node.h:378
static char * vhost_user_input_func_error_strings[]
Definition: vhost-user.c:95
static char * vhost_user_tx_func_error_strings[]
Definition: vhost-user.c:76
vring_used_t * used
Definition: vhost-user.h:181
#define vec_alloc(V, N)
Allocate space for N more elements (no header, unspecified alignment)
Definition: vec.h:237
always_inline void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:184
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
static int vhost_user_name_renumber(vnet_hw_interface_t *hi, u32 new_dev_instance)
Definition: vhost-user.c:125
always_inline uword unformat_check_input(unformat_input_t *i)
Definition: format.h:168
#define VHOST_VRING_F_LOG
Definition: vhost-user.h:31
vnet_main_t * vnet_get_main(void)
Definition: misc.c:45
vhost_user_vring_t vrings[2]
Definition: vhost-user.h:215
VNET_DEVICE_CLASS(vhost_user_dev_class, static)
static u8 * format_vhost_user_interface_name(u8 *s, va_list *args)
Definition: vhost-user.c:109
always_inline u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
Definition: trace_funcs.h:144
#define vlib_prefetch_buffer_with_index(vm, bi, type)
Prefetch buffer metadata by buffer index The first 64 bytes of buffer contains most header informatio...
Definition: buffer_funcs.h:181
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:109
static clib_error_t * ip4_init(vlib_main_t *vm)
Definition: ip4_input.c:392
#define always_inline
Definition: clib.h:84
vlib_combined_counter_main_t * combined_sw_if_counters
Definition: interface.h:458
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:107
void * log_base_addr
Definition: vhost-user.h:220
always_inline void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 cpu_index, u32 index, u32 packet_increment, u32 byte_increment)
Definition: counter.h:210
int input_cpu_first_index
Definition: dpdk.h:399
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
always_inline void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Definition: node_funcs.h:100
void vhost_user_rx_trace(vlib_main_t *vm, vlib_node_runtime_t *node, vhost_user_intf_t *vui, i16 virtqueue)
Definition: vhost-user.c:813
u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]
Space for inserting data before buffer start.
Definition: buffer.h:142
vhost_user_tx_func_error_t
Definition: vhost-user.c:69
#define clib_warning(format, args...)
Definition: error.h:59
unsigned long u64
Definition: types.h:89
static void unmap_all_mem_regions(vhost_user_intf_t *vui)
Definition: vhost-user.c:175
uword unformat_user(unformat_input_t *input, unformat_function_t *func,...)
Definition: unformat.c:953
vhost_user_input_func_error_t
Definition: vhost-user.c:88
#define vlib_call_init_function(vm, x)
Definition: init.h:159
always_inline u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:68
static clib_error_t * vhost_user_socket_read(unix_file_t *uf)
Definition: vhost-user.c:268
static uword vhost_user_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
Definition: vhost-user.c:1377
#define VLIB_BUFFER_NEXT_PRESENT
Definition: buffer.h:93
always_inline void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:113
#define VLIB_BUFFER_PRE_DATA_SIZE
Definition: buffer.h:56
#define hash_get(h, key)
Definition: hash.h:231
format_function_t format_vnet_sw_interface_name
#define pool_elt_at_index(p, i)
Definition: pool.h:346
u32 file_descriptor
Definition: unix.h:51
void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer.c:1060
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:81
int vhost_user_delete_if(vnet_main_t *vnm, vlib_main_t *vm, u32 sw_if_index)
Definition: vhost-user.c:1449
always_inline void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 cpu_index, u32 index, u32 increment)
Definition: counter.h:70
uword os_get_cpu_number(void)
Definition: unix-misc.c:206
#define clib_error_return_unix(e, args...)
Definition: error.h:115
always_inline f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Definition: node_funcs.h:551
u32 vlib_buffer_alloc_from_free_list(vlib_main_t *vm, u32 *buffers, u32 n_buffers, u32 free_list_index)
Allocate buffers from specific freelist into supplied array.
Definition: buffer.c:782
#define PREDICT_FALSE(x)
Definition: clib.h:97
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:116
#define vhost_user_log_dirty_ring(vui, vq, member)
Definition: vhost-user.c:262
vlib_node_registration_t vhost_user_process_node
(constructor) VLIB_REGISTER_NODE (vhost_user_process_node)
Definition: vhost-user.c:1443
void vhost_user_unmap_all(void)
Definition: vhost-user.c:2020
char sock_filename[256]
Definition: vhost-user.h:201
vnet_main_t vnet_main
Definition: misc.c:42
int input_cpu_count
Definition: dpdk.h:400
#define VLIB_FRAME_SIZE
Definition: node.h:292
vlib_simple_counter_main_t * sw_if_counters
Definition: interface.h:457
u8 have_io_threads
Definition: dpdk.h:396
u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:214
static void vhost_user_send_call(vlib_main_t *vm, vhost_user_vring_t *vq)
Definition: vhost-user.c:848
vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:212
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Definition: buffer_node.h:83
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Definition: node_funcs.h:265
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
Definition: cli.c:538
always_inline void * vlib_frame_args(vlib_frame_t *f)
Definition: node_funcs.h:209
int vhost_user_dump_ifs(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_details_t **out_vuids)
Definition: vhost-user.c:1821
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:129
static clib_error_t * vhost_user_exit(vlib_main_t *vm)
Definition: vhost-user.c:759
u16 vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm, u32 free_list_index, vlib_buffer_t *first, vlib_buffer_t **last, void *data, u16 data_len)
Definition: buffer.c:1224
u8 * format_ethernet_header_with_length(u8 *s, va_list *args)
Definition: format.c:70
u32 * show_dev_instance_by_real_dev_instance
Definition: vhost-user.h:232
int vhost_user_create_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 *sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance, u8 *hwaddr)
Definition: vhost-user.c:1654
vhost_user_intf_t * vhost_user_interfaces
Definition: vhost-user.h:227
u16 n_vectors
Definition: node.h:307
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:82
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:298
#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x)
Definition: init.h:113
int vhost_user_modify_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance)
Definition: vhost-user.c:1700
#define clib_memcpy(a, b, c)
Definition: string.h:63
#define VHOST_MEMORY_MAX_NREGIONS
Definition: vhost-user.h:19
#define ARRAY_LEN(x)
Definition: clib.h:59
always_inline vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD
Definition: vhost-user.h:30
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
Definition: buffer.h:296
u32 * vhost_user_inactive_interfaces_index
Definition: vhost-user.h:228
#define hash_create(elts, value_bytes)
Definition: hash.h:615
#define VNET_SW_INTERFACE_FLAG_ADMIN_UP
Definition: interface.h:373
always_inline void unix_file_del(unix_main_t *um, unix_file_t *f)
Definition: unix.h:141
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
Definition: interface.h:313
uword unformat_ethernet_address(unformat_input_t *input, va_list *args)
Definition: format.c:187
#define ASSERT(truth)
unsigned int u32
Definition: types.h:88
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:87
#define vnet_buffer(b)
Definition: buffer.h:300
u8 * format(u8 *s, char *fmt,...)
Definition: format.c:405
static long get_huge_page_size(int fd)
Definition: vhost-user.c:168
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:112
clib_error_t * vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:1794
#define VIRTQ_DESC_F_NEXT
Definition: vhost-user.h:26
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:157
unix_main_t unix_main
Definition: main.c:57
always_inline uword format_get_indent(u8 *s)
Definition: format.h:72
u64 uword
Definition: types.h:112
volatile u32 * lockp
Definition: vhost-user.h:195
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:106
#define foreach_vhost_user_tx_func_error
Definition: vhost-user.c:63
void * region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]
Definition: vhost-user.h:213
Definition: defs.h:46
unsigned short u16
Definition: types.h:57
static clib_error_t * vhost_user_socksvr_accept_ready(unix_file_t *uf)
Definition: vhost-user.c:694
static clib_error_t * vhost_user_config(vlib_main_t *vm, unformat_input_t *input)
Definition: vhost-user.c:1996
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
double f64
Definition: types.h:140
unsigned char u8
Definition: types.h:56
always_inline vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
static int vhost_user_init_server_sock(const char *sock_filename, int *sockfd)
Definition: vhost-user.c:1483
#define VHOST_NET_VRING_IDX_RX
Definition: vhost-user.h:22
uword * vhost_user_interface_index_by_listener_fd
Definition: vhost-user.h:229
Definition: unix.h:49
vlib_node_registration_t vhost_user_input_node
(constructor) VLIB_REGISTER_NODE (vhost_user_input_node)
Definition: vhost-user.c:61
always_inline void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace_funcs.h:55
#define DBG_SOCK(args...)
Definition: vhost-user.c:52
static vhost_user_main_t vhost_user_main
Definition: vhost-user.c:101
short i16
Definition: types.h:46
#define DBG_VQ(args...)
Definition: vhost-user.c:58
#define VHOST_NET_VRING_IDX_TX
Definition: vhost-user.h:23
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:140
static clib_error_t * vhost_user_callfd_read_ready(unix_file_t *uf)
Definition: vhost-user.c:203
#define vec_foreach(var, vec)
Vector iterator.
#define foreach_vhost_user_input_func_error
Definition: vhost-user.c:82
always_inline f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:182
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:101
vhost_vring_addr_t addr
Definition: vhost-user.h:78
#define clib_error_return(e, args...)
Definition: error.h:112
struct _unformat_input_t unformat_input_t
u32 flags
Definition: vhost-user.h:73
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
Definition: vec.h:443
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:67
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:84
always_inline void vlib_buffer_chain_init(vlib_buffer_t *first)
Definition: buffer_funcs.h:420
uword * vhost_user_interface_index_by_sw_if_index
Definition: vhost-user.h:231
always_inline void vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
Definition: trace_funcs.h:106
unformat_function_t unformat_line_input
Definition: format.h:279
uword runtime_data[(128-1 *sizeof(vlib_node_function_t *)-1 *sizeof(vlib_error_t *)-11 *sizeof(u32)-5 *sizeof(u16))/sizeof(uword)]
Definition: node.h:432
always_inline vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:69
#define VHOST_LOG_PAGE
Definition: vhost-user.c:241
clib_error_t * vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
Definition: vhost-user.c:1746
VNET_HW_INTERFACE_CLASS(vhost_interface_class, static)
vlib_main_t ** vlib_mains
Definition: buffer.c:244
Definition: defs.h:45
int dont_dump_vhost_user_memory
Definition: vhost-user.h:235
always_inline vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)