21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 25 #include <sys/types.h> 27 #include <netinet/in.h> 30 #include <linux/if_arp.h> 31 #include <linux/if_tun.h> 52 #define VHOST_USER_DEBUG_SOCKET 0 53 #define VHOST_DEBUG_VQ 0 55 #if VHOST_USER_DEBUG_SOCKET == 1 56 #define DBG_SOCK(args...) clib_warning(args); 58 #define DBG_SOCK(args...) 61 #if VHOST_DEBUG_VQ == 1 62 #define DBG_VQ(args...) clib_warning(args); 64 #define DBG_VQ(args...) 72 #define VHOST_USER_DOWN_DISCARD_COUNT 256 78 #define VHOST_USER_RX_BUFFER_STARVATION 32 88 #define VHOST_USER_RX_COPY_THRESHOLD 64 90 #define UNIX_GET_FD(unixfd_idx) \ 91 (unixfd_idx != ~0) ? \ 92 pool_elt_at_index (unix_main.file_pool, \ 93 unixfd_idx)->file_descriptor : -1; 95 #define foreach_virtio_trace_flags \ 96 _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \ 97 _ (SINGLE_DESC, 1, "Single descriptor packet") \ 98 _ (INDIRECT, 2, "Indirect descriptor") \ 99 _ (MAP_ERROR, 4, "Memory mapping error") 103 #define _(n,i,s) VIRTIO_TRACE_F_##n, 110 #define foreach_vhost_user_tx_func_error \ 111 _(NONE, "no error") \ 112 _(NOT_READY, "vhost vring not ready") \ 113 _(DOWN, "vhost interface is down") \ 114 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \ 115 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \ 116 _(MMAP_FAIL, "mmap failure") \ 117 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow") 121 #define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f, 133 #define foreach_vhost_user_input_func_error \ 134 _(NO_ERROR, "no error") \ 135 _(NO_BUFFER, "no available buffer") \ 136 _(MMAP_FAIL, "mmap failure") \ 137 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \ 138 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \ 139 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)") 143 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f, 161 .name =
"vhost-user",
169 u32 show_dev_instance = ~0;
175 if (show_dev_instance != ~0)
176 i = show_dev_instance;
178 s =
format (s,
"VirtualEthernet0/0/%d", i);
193 DBG_SOCK (
"renumbered vhost-user interface dev_instance %d to %d",
204 ((vui->
regions[i].guest_phys_addr +
205 vui->
regions[i].memory_size) > addr)))
211 __m128i rl, rh, al, ah, r;
212 al = _mm_set1_epi64x (addr + 1);
213 ah = _mm_set1_epi64x (addr);
216 rl = _mm_cmpgt_epi64 (al, rl);
218 rh = _mm_cmpgt_epi64 (rh, ah);
219 r = _mm_and_si128 (rl, rh);
222 rl = _mm_cmpgt_epi64 (al, rl);
224 rh = _mm_cmpgt_epi64 (rh, ah);
225 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
228 rl = _mm_cmpgt_epi64 (al, rl);
230 rh = _mm_cmpgt_epi64 (rh, ah);
231 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
234 rl = _mm_cmpgt_epi64 (al, rl);
236 rh = _mm_cmpgt_epi64 (rh, ah);
237 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
239 r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
240 i = __builtin_ctzll (_mm_movemask_epi8 (r));
252 if ((vui->
regions[i].guest_phys_addr <= addr) &&
262 DBG_VQ (
"failed to map guest mem addr %llx", addr);
273 if ((vui->
regions[i].userspace_addr <= addr) &&
303 ssize_t map_sz = (vui->
regions[
i].memory_size +
305 page_sz) & ~(page_sz - 1);
312 (
"unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
319 clib_warning (
"failed to unmap memory region (errno %d)",
378 VLIB_NODE_STATE_DISABLED);
391 u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers;
393 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
395 vhost_user_vring_t *txvq =
396 &vui->vrings[VHOST_VRING_IDX_TX (qid)];
400 i %= vec_len (vui_workers);
401 u32 cpu_index = vui_workers[i];
403 vhc = &vum->cpus[cpu_index];
406 iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
407 vec_add1 (vhc->rx_queues, iaq);
408 vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] :
409 &vlib_global_main, vhost_user_input_node.index,
410 VLIB_NODE_STATE_POLLING);
423 if (worker_thread_index < vum->input_cpu_first_index ||
424 worker_thread_index >=
435 if (*w == worker_thread_index)
448 else if (found == ~0)
461 int i, found[2] = { };
467 return found[0] && found[1];
475 if (is_up != vui->
is_up)
478 is_up ?
"ready" :
"down");
491 __attribute__ ((unused))
int n;
500 __attribute__ ((unused))
int n;
523 return __sync_lock_test_and_set (vui->
vring_locks[qid], 1);
549 memset (vring, 0,
sizeof (*vring));
563 if (qid == 0 || qid == 1)
585 if (vring->
errfd != -1)
586 close (vring->
errfd);
613 #define VHOST_LOG_PAGE 0x1000 619 || !(vui->
features & (1 << FEAT_VHOST_F_LOG_ALL))))
629 DBG_SOCK (
"vhost_user_log_dirty_pages(): out of range\n");
648 #define vhost_user_log_dirty_ring(vui, vq, member) \ 649 if (PREDICT_FALSE(vq->log_used)) { \ 650 vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \ 651 sizeof(vq->used->member)); \ 658 int fd, number_of_fds = 0;
660 vhost_user_msg_t msg;
665 struct cmsghdr *cmsg;
674 memset (&mh, 0,
sizeof (mh));
675 memset (control, 0,
sizeof (control));
681 iov[0].iov_base = (
void *) &msg;
686 mh.msg_control = control;
687 mh.msg_controllen =
sizeof (control);
698 DBG_SOCK (
"recvmsg returned error %d %s", errno, strerror (errno));
702 DBG_SOCK (
"n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
708 if (mh.msg_flags & MSG_CTRUNC)
714 cmsg = CMSG_FIRSTHDR (&mh);
716 if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
717 (cmsg->cmsg_type == SCM_RIGHTS) &&
718 (cmsg->cmsg_len - CMSG_LEN (0) <=
719 VHOST_MEMORY_MAX_NREGIONS *
sizeof (
int)))
721 number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) /
sizeof (
int);
722 clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds *
sizeof (
int));
726 if ((msg.flags & 7) != 1)
728 DBG_SOCK (
"malformed message received. closing socket");
739 DBG_SOCK (
"read failed %s", strerror (errno));
742 else if (rv != msg.size)
744 DBG_SOCK (
"message too short (read %dB should be %dB)", rv, msg.size);
753 msg.u64 = (1ULL << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
754 (1ULL << FEAT_VIRTIO_NET_F_CTRL_VQ) |
755 (1ULL << FEAT_VIRTIO_F_ANY_LAYOUT) |
756 (1ULL << FEAT_VIRTIO_F_INDIRECT_DESC) |
757 (1ULL << FEAT_VHOST_F_LOG_ALL) |
758 (1ULL << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
759 (1ULL << FEAT_VIRTIO_NET_F_MQ) |
760 (1ULL << FEAT_VHOST_USER_F_PROTOCOL_FEATURES) |
761 (1ULL << FEAT_VIRTIO_F_VERSION_1);
763 msg.size =
sizeof (msg.u64);
764 DBG_SOCK (
"if %d msg VHOST_USER_GET_FEATURES - reply 0x%016llx",
769 DBG_SOCK (
"if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
775 ((1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
776 (1ULL << FEAT_VIRTIO_F_VERSION_1)))
782 (vui->
features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
794 DBG_SOCK (
"if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
797 if ((msg.memory.nregions < 1) ||
798 (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
801 DBG_SOCK (
"number of mem regions must be between 1 and %i",
802 VHOST_MEMORY_MAX_NREGIONS);
807 if (msg.memory.nregions != number_of_fds)
809 DBG_SOCK (
"each memory region must have FD");
813 for (i = 0; i < msg.memory.nregions; i++)
816 sizeof (vhost_user_memory_region_t));
821 ssize_t map_sz = (vui->
regions[
i].memory_size +
823 page_sz) & ~(page_sz - 1);
826 MAP_SHARED, fds[i], 0);
832 (
"map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx " 838 clib_warning (
"failed to map memory. errno is %d", errno);
844 vui->
nregions = msg.memory.nregions;
848 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
851 if ((msg.state.num > 32768) ||
852 (msg.state.num == 0) ||
853 ((msg.state.num - 1) & msg.state.num))
855 vui->
vrings[msg.state.index].
qsz = msg.state.num;
859 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
864 DBG_SOCK (
"invalid vring index VHOST_USER_SET_VRING_ADDR:" 869 if (msg.size < sizeof (msg.addr))
871 DBG_SOCK (
"vhost message is too short (%d < %d)",
872 msg.size, sizeof (msg.addr));
876 vui->
vrings[msg.state.index].
desc = (vring_desc_t *)
878 vui->
vrings[msg.state.index].
used = (vring_used_t *)
880 vui->
vrings[msg.state.index].
avail = (vring_avail_t *)
887 DBG_SOCK (
"failed to map user memory for hw_if_index %d",
898 if (!(vui->
features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES)))
920 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_CALL u64 %d",
923 q = (
u8) (msg.u64 & 0xFF);
934 if (!(msg.u64 & 0x100))
936 if (number_of_fds != 1)
938 DBG_SOCK (
"More than one fd received !");
943 template.file_descriptor = fds[0];
944 template.private_data =
953 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_KICK u64 %d",
956 q = (
u8) (msg.u64 & 0xFF);
966 if (!(msg.u64 & 0x100))
968 if (number_of_fds != 1)
970 DBG_SOCK (
"More than one fd received !");
975 template.file_descriptor = fds[0];
976 template.private_data =
991 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_ERR u64 %d",
994 q = (
u8) (msg.u64 & 0xFF);
999 if (!(msg.u64 & 0x100))
1001 if (number_of_fds != 1)
1012 DBG_SOCK (
"if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
1013 vui->
hw_if_index, msg.state.index, msg.state.num);
1019 DBG_SOCK (
"if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
1020 vui->
hw_if_index, msg.state.index, msg.state.num);
1024 DBG_SOCK (
"invalid vring index VHOST_USER_GET_VRING_BASE:" 1034 msg.size =
sizeof (msg.state);
1046 if (msg.size != sizeof (msg.log))
1049 (
"invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
1050 msg.size, sizeof (msg.log));
1058 (
"VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1066 (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1);
1068 vui->
log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1072 (
"map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
1077 clib_warning (
"failed to map memory. errno is %d", errno);
1085 msg.size =
sizeof (msg.u64);
1096 DBG_SOCK (
"if %d msg VHOST_USER_GET_PROTOCOL_FEATURES",
1102 msg.size =
sizeof (msg.u64);
1106 DBG_SOCK (
"if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx",
1117 msg.size =
sizeof (msg.u64);
1121 DBG_SOCK (
"if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
1122 vui->
hw_if_index, msg.state.num ?
"enable" :
"disable",
1126 DBG_SOCK (
"invalid vring index VHOST_USER_SET_VRING_ENABLE:" 1135 DBG_SOCK (
"unknown vhost-user message %d received. closing socket",
1147 DBG_SOCK (
"could not send message response");
1182 int client_fd, client_len;
1183 struct sockaddr_un client;
1190 client_len =
sizeof (client);
1192 (
struct sockaddr *) &client,
1193 (socklen_t *) & client_len);
1201 template.file_descriptor = client_fd;
1239 if (tr && tr->
count > 0)
1281 if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \ 1282 s = format (s, "%U %s %s\n", format_white_space, indent, #n, st); 1285 s =
format (s,
"%U virtio_net_hdr first_desc_len %u\n",
1288 s =
format (s,
"%U flags 0x%02x gso_type %u\n",
1290 t->
hdr.hdr.flags, t->
hdr.hdr.gso_type);
1293 s =
format (s,
"%U num_buff %u",
1305 u32 qsz_mask = txvq->
qsz - 1;
1307 u32 desc_current = txvq->
avail->ring[last_avail_idx & qsz_mask];
1308 vring_desc_t *hdr_desc = 0;
1309 virtio_net_hdr_mrg_rxbuf_t *hdr;
1312 memset (t, 0,
sizeof (*t));
1316 hdr_desc = &txvq->
desc[desc_current];
1327 if (!(txvq->
desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1328 !(txvq->
desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1335 if (!hdr_desc || !(hdr =
map_guest_mem (vui, hdr_desc->addr, &hint)))
1342 memcpy (&t->
hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
1352 int rv __attribute__ ((unused));
1354 rv = write (fd, &x,
sizeof (x));
1361 u16 copy_len,
u32 * map_hint)
1363 void *src0, *src1, *src2, *src3;
1386 clib_memcpy ((
void *) cpy[0].dst, src0, cpy[0].len);
1387 clib_memcpy ((
void *) cpy[1].dst, src1, cpy[1].len);
1417 u32 discarded_packets = 0;
1419 u16 qsz_mask = txvq->
qsz - 1;
1420 while (discarded_packets != discard_max)
1425 u16 desc_chain_head =
1433 discarded_packets++;
1440 return discarded_packets;
1453 b_current->
flags = 0;
1454 while (b_current != b_head)
1460 b_current->
flags = 0;
1471 u16 n_rx_packets = 0;
1474 u32 n_left_to_next, *to_next;
1524 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
1527 qsz_mask = txvq->
qsz - 1;
1561 interface_main.sw_if_counters +
1567 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
1575 while (n_left > 0 && n_left_to_next > 0)
1580 u32 desc_data_offset;
1581 vring_desc_t *desc_table = txvq->
desc;
1598 to_next[0] = bi_current;
1604 [vum->
cpus[cpu_index].
1605 rx_buffers_len - 1], LOAD);
1653 desc_data_offset = desc_table[desc_current].len;
1659 if (desc_data_offset == desc_table[desc_current].len)
1664 desc_current = desc_table[desc_current].next;
1665 desc_data_offset = 0;
1692 &vum->
cpus[cpu_index],
1705 bi_current = bi_next;
1713 desc_table[desc_current].
len - desc_data_offset;
1715 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
1717 cpy->
src = desc_table[desc_current].addr + desc_data_offset;
1719 desc_data_offset += cpy->
len;
1751 u32 bi = to_next[-1];
1753 to_next, n_left_to_next,
1768 copy_len, &map_hint)))
1771 (
"Memory mapping error on interface hw_if_index=%d " 1772 "(Shutting down - Switch interface down and up to restart)",
1793 copy_len, &map_hint)))
1795 clib_warning (
"Memory mapping error on interface hw_if_index=%d " 1796 "(Shutting down - Switch interface down and up to restart)",
1821 return n_rx_packets;
1829 uword n_rx_packets = 0;
1841 return n_rx_packets;
1848 .name =
"vhost-user-input",
1849 .sibling_of =
"device-input",
1852 .state = VLIB_NODE_STATE_DISABLED,
1871 u32 qsz_mask = rxvq->
qsz - 1;
1873 u32 desc_current = rxvq->
avail->ring[last_avail_idx & qsz_mask];
1874 vring_desc_t *hdr_desc = 0;
1877 memset (t, 0,
sizeof (*t));
1881 hdr_desc = &rxvq->
desc[desc_current];
1892 if (!(rxvq->
desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1893 !(rxvq->
desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1903 u16 copy_len,
u32 * map_hint)
1905 void *dst0, *dst1, *dst2, *dst3;
1927 clib_memcpy (dst0, (
void *) cpy[0].src, cpy[0].len);
1928 clib_memcpy (dst1, (
void *) cpy[1].src, cpy[1].len);
1971 error = VHOST_USER_TX_FUNC_ERROR_DOWN;
1977 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
1984 rxvq = &vui->vrings[qid];
1988 qsz_mask = rxvq->
qsz - 1;
1991 error = VHOST_USER_TX_FUNC_ERROR_NONE;
1997 u16 desc_head, desc_index, desc_len;
1998 vring_desc_t *desc_table;
1999 uword buffer_map_addr;
2014 vui, qid / 2, b0, rxvq);
2019 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
2023 desc_table = rxvq->
desc;
2024 desc_head = desc_index =
2032 (rxvq->
desc[desc_head].len < sizeof (vring_desc_t)))
2034 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
2042 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
2048 desc_len = vui->virtio_net_hdr_sz;
2049 buffer_map_addr = desc_table[desc_index].addr;
2050 buffer_len = desc_table[desc_index].len;
2054 virtio_net_hdr_mrg_rxbuf_t *hdr =
2058 hdr->hdr.gso_type = 0;
2059 hdr->num_buffers = 1;
2064 cpy->
len = vui->virtio_net_hdr_sz;
2065 cpy->
dst = buffer_map_addr;
2069 buffer_map_addr += vui->virtio_net_hdr_sz;
2070 buffer_len -= vui->virtio_net_hdr_sz;
2075 if (buffer_len == 0)
2080 desc_index = desc_table[desc_index].next;
2081 buffer_map_addr = desc_table[desc_index].addr;
2082 buffer_len = desc_table[desc_index].len;
2084 else if (vui->virtio_net_hdr_sz == 12)
2086 virtio_net_hdr_mrg_rxbuf_t *hdr =
2109 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
2113 desc_table = rxvq->
desc;
2114 desc_head = desc_index =
2117 (rxvq->
desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
2122 (rxvq->
desc[desc_head].len < sizeof (vring_desc_t)))
2124 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
2130 rxvq->
desc[desc_index].addr,
2133 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
2138 buffer_map_addr = desc_table[desc_index].addr;
2139 buffer_len = desc_table[desc_index].len;
2143 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
2151 cpy->
len = bytes_left;
2152 cpy->
len = (cpy->
len > buffer_len) ? buffer_len : cpy->
len;
2153 cpy->
dst = buffer_map_addr;
2157 bytes_left -= cpy->
len;
2158 buffer_len -= cpy->
len;
2159 buffer_map_addr += cpy->
len;
2160 desc_len += cpy->
len;
2204 copy_len, &map_hint)))
2206 clib_warning (
"Memory mapping error on interface hw_if_index=%d " 2207 "(Shutting down - Switch interface down and up to restart)",
2228 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
2246 if (
PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
2280 .name =
"vhost-user",
2300 struct sockaddr_un sun;
2303 f64 timeout = 3153600000.0 ;
2304 uword *event_data = 0;
2306 sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
2307 sun.sun_family = AF_UNIX;
2325 if (vui->unix_server_index == ~0) {
2326 if (vui->unix_file_index == ~0)
2329 strncpy (sun.sun_path, (char *) vui->sock_filename,
2330 sizeof (sun.sun_path) - 1);
2333 fcntl(sockfd, F_SETFL, O_NONBLOCK);
2334 if (connect (sockfd, (struct sockaddr *) &sun,
2335 sizeof (struct sockaddr_un)) == 0)
2338 fcntl(sockfd, F_SETFL, 0);
2339 vui->sock_errno = 0;
2340 template.file_descriptor = sockfd;
2341 template.private_data =
2342 vui - vhost_user_main.vhost_user_interfaces;
2343 vui->unix_file_index = unix_file_add (&unix_main, &template);
2346 if ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) {
2347 clib_warning(
"Critical: Could not open unix socket");
2353 vui->sock_errno = errno;
2360 socklen_t len = sizeof (error);
2361 int fd = UNIX_GET_FD(vui->unix_file_index);
2363 getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
2367 DBG_SOCK (
"getsockopt returned %d", retval);
2368 vhost_user_if_disconnect (vui);
2380 .function = vhost_user_process,
2382 .name =
"vhost-user-process",
2419 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
2421 DBG_SOCK (
"Deleting vhost-user interface %s (instance %d)",
2449 struct sockaddr_un un = { };
2452 if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
2453 return VNET_API_ERROR_SYSCALL_ERROR_1;
2455 un.sun_family = AF_UNIX;
2456 strncpy ((
char *) un.sun_path, (
char *) sock_filename,
2457 sizeof (un.sun_path) - 1);
2460 unlink ((
char *) sock_filename);
2462 if (bind (fd, (
struct sockaddr *) &un,
sizeof (un)) == -1)
2464 rv = VNET_API_ERROR_SYSCALL_ERROR_2;
2468 if (listen (fd, 1) == -1)
2470 rv = VNET_API_ERROR_SYSCALL_ERROR_3;
2508 vhost_user_dev_class.index,
2527 const char *sock_filename,
2528 u64 feature_mask,
u32 * sw_if_index)
2534 if (server_sock_fd != -1)
2538 template.file_descriptor = server_sock_fd;
2578 const char *sock_filename,
2582 u8 renumber,
u32 custom_dev_instance,
u8 * hwaddr)
2587 int server_sock_fd = -1;
2602 feature_mask, &sw_if_idx);
2608 *sw_if_index = sw_if_idx;
2617 const char *sock_filename,
2620 u64 feature_mask,
u8 renumber,
u32 custom_dev_instance)
2625 int server_sock_fd = -1;
2631 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
2638 &server_sock_fd)) != 0)
2643 sock_filename, feature_mask, &sw_if_idx);
2659 u8 *sock_filename =
NULL;
2662 u64 feature_mask = (
u64) ~ (0ULL);
2664 u32 custom_dev_instance = ~0;
2674 if (
unformat (line_input,
"socket %s", &sock_filename))
2676 else if (
unformat (line_input,
"server"))
2678 else if (
unformat (line_input,
"feature-mask 0x%llx", &feature_mask))
2684 else if (
unformat (line_input,
"renumber %d", &custom_dev_instance))
2698 is_server, &sw_if_index, feature_mask,
2699 renumber, custom_dev_instance, hw)))
2717 u32 sw_if_index = ~0;
2726 if (
unformat (line_input,
"sw_if_index %d", &sw_if_index))
2756 u32 *hw_if_indices = 0;
2768 for (i = 0; i <
vec_len (hw_if_indices); i++)
2784 strncpy ((
char *) vuid->
if_name, (
char *) s,
2792 *out_vuids = r_vuids;
2806 u32 hw_if_index, *hw_if_indices = 0;
2819 struct feat_struct *feat_entry;
2821 static struct feat_struct feat_array[] = {
2822 #define _(s,b) { .str = #s, .bit = b, }, 2828 #define foreach_protocol_feature \ 2829 _(VHOST_USER_PROTOCOL_F_MQ) \ 2830 _(VHOST_USER_PROTOCOL_F_LOG_SHMFD) 2832 static struct feat_struct proto_feat_array[] = {
2833 #define _(s) { .str = #s, .bit = s}, 2844 vec_add1 (hw_if_indices, hw_if_index);
2855 if (
vec_len (hw_if_indices) == 0)
2865 for (i = 0; i <
vec_len (hw_if_indices); i++)
2870 hi->
name, hw_if_indices[i]);
2873 " features mask (0x%llx): \n" 2874 " features (0x%llx): \n",
2878 feat_entry = (
struct feat_struct *) &feat_array;
2879 while (feat_entry->str)
2881 if (vui->
features & (1ULL << feat_entry->bit))
2889 feat_entry = (
struct feat_struct *) &proto_feat_array;
2890 while (feat_entry->str)
2932 " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2934 " ====== ===== ================== ================== ================== ================== ==================\n");
2936 for (j = 0; j < vui->
nregions; j++)
2939 " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2941 vui->
regions[j].guest_phys_addr,
2943 vui->
regions[j].userspace_addr,
2953 (q & 1) ?
"RX" :
"TX",
2957 " qsz %d last_avail_idx %d last_used_idx %d\n",
2963 " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
2978 " id addr len flags next user_addr\n");
2980 " ===== ================== ===== ====== ===== ==================\n");
2981 for (j = 0; j < vui->
vrings[q].
qsz; j++)
2985 " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
3057 .path =
"create vhost-user",
3058 .short_help =
"create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>]",
3077 .path =
"delete vhost-user",
3078 .short_help =
"delete vhost-user {<interface> | sw_if_index <sw_idx>}",
3217 .path =
"show vhost-user",
3218 .short_help =
"show vhost-user [<interface> [<interface> [..]]] [descriptors]",
3234 else if (
unformat (input,
"dont-dump-memory"))
3266 u32 worker_thread_index;
3277 &sw_if_index, &worker_thread_index))
3310 .path =
"vhost thread",
3311 .short_help =
"vhost thread <iface> <worker-index> [del]",
unformat_function_t unformat_vnet_hw_interface
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
static clib_error_t * vhost_user_init(vlib_main_t *vm)
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
static void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 cpu_index, u32 index, u32 increment)
Increment a simple counter.
static void vhost_user_vring_close(vhost_user_intf_t *vui, u32 qid)
static void vhost_user_if_disconnect(vhost_user_intf_t *vui)
#define vec_foreach_index(var, v)
Iterate over vector indices.
sll srl srl sll sra u16x4 i
u32 virtio_ring_flags
The device index.
virtio_net_hdr_mrg_rxbuf_t hdr
Length of the first data descriptor.
static uword random_default_seed(void)
Default random seed (unix/linux user-mode)
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
static f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
Suspend a cooperative multi-tasking thread Waits for an event, or for the indicated number of seconds...
unix_file_function_t * read_function
vhost_cpu_t * cpus
Per-CPU data for vhost-user.
static void vhost_user_create_ethernet(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_t *vui, u8 *hwaddress)
Create ethernet interface for vhost user interface.
#define VHOST_USER_DOWN_DISCARD_COUNT
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
#define VHOST_VRING_IDX_TX(qid)
static vlib_main_t * vlib_get_main(void)
static vnet_hw_interface_t * vnet_get_sup_hw_interface(vnet_main_t *vnm, u32 sw_if_index)
static clib_error_t * vhost_user_socket_error(unix_file_t *uf)
void vhost_user_rx_trace(vhost_trace_t *t, vhost_user_intf_t *vui, u16 qid, vlib_buffer_t *b, vhost_user_vring_t *txvq)
u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS]
vnet_interface_main_t interface_main
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
u32 vlib_buffer_alloc_from_free_list(vlib_main_t *vm, u32 *buffers, u32 n_buffers, u32 free_list_index)
Allocate buffers from specific freelist into supplied array.
static f64 vlib_time_now(vlib_main_t *vm)
#define foreach_virtio_trace_flags
vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N]
static void vhost_user_term_if(vhost_user_intf_t *vui)
Disables and reset interface structure.
static uword vhost_user_input(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
static_always_inline void vnet_feature_start_device_input_x1(u32 sw_if_index, u32 *next0, vlib_buffer_t *b0, u16 buffer_advanced0)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
int vnet_interface_name_renumber(u32 sw_if_index, u32 new_show_dev_instance)
struct _vlib_node_registration vlib_node_registration_t
static_always_inline u32 vhost_user_input_copy(vhost_user_intf_t *vui, vhost_copy_t *cpy, u16 copy_len, u32 *map_hint)
#define VHOST_USER_MSG_HDR_SZ
static clib_error_t * vhost_user_interface_admin_up_down(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
static vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
static clib_error_t * vhost_thread_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
clib_error_t * show_vhost_user_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
unformat_function_t unformat_vnet_sw_interface
#define clib_error_report(e)
#define VNET_HW_INTERFACE_FLAG_LINK_UP
static char * vhost_user_input_func_error_strings[]
static char * vhost_user_tx_func_error_strings[]
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
format_function_t format_vnet_sw_if_index_name
vhost_trace_t * current_trace
static uword unix_file_add(unix_main_t *um, unix_file_t *template)
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
static int vhost_user_name_renumber(vnet_hw_interface_t *hi, u32 new_dev_instance)
static void vhost_user_vui_init(vnet_main_t *vnm, vhost_user_intf_t *vui, int server_sock_fd, const char *sock_filename, u64 feature_mask, u32 *sw_if_index)
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
static void vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
#define VHOST_VRING_F_LOG
vnet_main_t * vnet_get_main(void)
VNET_DEVICE_CLASS(vhost_user_dev_class, static)
static u8 * format_vhost_user_interface_name(u8 *s, va_list *args)
#define static_always_inline
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
#define vlib_prefetch_buffer_with_index(vm, bi, type)
Prefetch buffer metadata by buffer index The first 64 bytes of buffer contains most header informatio...
#define VLIB_INIT_FUNCTION(x)
static uword vlib_process_get_events(vlib_main_t *vm, uword **data_vector)
Return the first event type which has occurred and a vector of per-event data of that type...
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
vlib_combined_counter_main_t * combined_sw_if_counters
#define foreach_protocol_feature
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
vhost_user_tx_func_error_t
#define clib_warning(format, args...)
static void unmap_all_mem_regions(vhost_user_intf_t *vui)
vhost_iface_and_queue_t * rx_queues
vhost_user_input_func_error_t
#define vlib_call_init_function(vm, x)
static clib_error_t * vhost_user_socket_read(unix_file_t *uf)
static uword pointer_to_uword(const void *p)
#define VLIB_BUFFER_NEXT_PRESENT
#define UNIX_GET_FD(unixfd_idx)
#define VLIB_BUFFER_PRE_DATA_SIZE
static int vhost_user_init_server_sock(const char *sock_filename, int *sock_fd)
Open server unix socket on specified sock_filename.
static void unix_file_del(unix_main_t *um, unix_file_t *f)
VLIB_DEVICE_TX_FUNCTION_MULTIARCH(vhost_user_dev_class, vhost_user_tx)
static void vhost_user_vring_unlock(vhost_user_intf_t *vui, u32 qid)
Unlock the vring lock.
format_function_t format_vnet_sw_interface_name
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
u16 current_length
Nbytes between current data and the end of this buffer.
static void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
int vhost_user_delete_if(vnet_main_t *vnm, vlib_main_t *vm, u32 sw_if_index)
static void * map_user_mem(vhost_user_intf_t *vui, uword addr)
u32 random
Pseudo random iterator.
static_always_inline void vhost_user_log_dirty_pages(vhost_user_intf_t *vui, u64 addr, u64 len)
uword os_get_cpu_number(void)
#define VIRTQ_DESC_F_INDIRECT
#define clib_error_return_unix(e, args...)
#define pool_put(P, E)
Free an object E in pool P.
void vhost_user_tx_trace(vhost_trace_t *t, vhost_user_intf_t *vui, u16 qid, vlib_buffer_t *b, vhost_user_vring_t *rxvq)
#define VLIB_CONFIG_FUNCTION(x, n,...)
#define vhost_user_log_dirty_ring(vui, vq, member)
static vlib_node_registration_t vhost_user_process_node
(constructor) VLIB_REGISTER_NODE (vhost_user_process_node)
void vhost_user_unmap_all(void)
#define vec_del1(v, i)
Delete the element at index I.
vlib_simple_counter_main_t * sw_if_counters
u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS]
static void vhost_user_send_call(vlib_main_t *vm, vhost_user_vring_t *vq)
vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
int vhost_user_dump_ifs(vnet_main_t *vnm, vlib_main_t *vm, vhost_user_intf_details_t **out_vuids)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
static clib_error_t * vhost_user_exit(vlib_main_t *vm)
static void vhost_user_tx_thread_placement(vhost_user_intf_t *vui)
static void vhost_user_vring_init(vhost_user_intf_t *vui, u32 qid)
u8 * format_ethernet_header_with_length(u8 *s, va_list *args)
u32 * show_dev_instance_by_real_dev_instance
int vhost_user_create_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 *sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance, u8 *hwaddr)
u16 device_index
The interface queue index (Not the virtio vring idx)
vhost_user_intf_t * vhost_user_interfaces
static clib_error_t * vhost_user_kickfd_read_ready(unix_file_t *uf)
#define CLIB_PREFETCH(addr, size, type)
static_always_inline void vhost_user_log_dirty_pages_2(vhost_user_intf_t *vui, u64 addr, u64 len, u8 is_host_address)
#define vec_free(V)
Free vector's memory (no header).
static vlib_thread_main_t * vlib_get_thread_main()
static int vhost_user_vring_try_lock(vhost_user_intf_t *vui, u32 qid)
Try once to lock the vring.
#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x)
int vhost_user_modify_if(vnet_main_t *vnm, vlib_main_t *vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, u8 renumber, u32 custom_dev_instance)
#define clib_memcpy(a, b, c)
#define VHOST_MEMORY_MAX_NREGIONS
static_always_inline void * map_guest_mem(vhost_user_intf_t *vui, uword addr, u32 *hint)
#define VLIB_BUFFER_TOTAL_LENGTH_VALID
vlib_main_t vlib_global_main
void vlib_worker_thread_barrier_sync(vlib_main_t *vm)
static uword vhost_user_tx(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
u16 first_desc_len
Runtime queue flags.
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD
static void vhost_user_input_rewind_buffers(vlib_main_t *vm, vhost_cpu_t *cpu, vlib_buffer_t *b_head)
#define VLIB_CLI_COMMAND(x,...)
#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX
#define VNET_SW_INTERFACE_FLAG_ADMIN_UP
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]
uword unformat_ethernet_address(unformat_input_t *input, va_list *args)
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 cpu_index, u32 index, u32 packet_increment, u32 byte_increment)
Increment a combined counter.
#define VHOST_USER_RX_BUFFER_STARVATION
void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
static long get_huge_page_size(int fd)
u32 next_buffer
Next buffer for this linked-list of buffers.
clib_error_t * vhost_user_delete_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
static int vhost_user_thread_placement(u32 sw_if_index, u32 worker_thread_index, u8 del)
#define VIRTQ_DESC_F_NEXT
volatile u32 * vring_locks[VHOST_VRING_MAX_N]
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
static void * vlib_frame_args(vlib_frame_t *f)
Get pointer to frame scalar data.
static void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
Set node dispatch state.
uword * thread_registrations_by_name
static u8 * format_vhost_trace(u8 *s, va_list *va)
#define VHOST_USER_RX_COPY_THRESHOLD
#define VLIB_BUFFER_IS_TRACED
static void vhost_user_vring_lock(vhost_user_intf_t *vui, u32 qid)
Spin until the vring is successfully locked.
static void vhost_user_rx_thread_placement()
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static void vhost_user_update_iface_state(vhost_user_intf_t *vui)
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
#define foreach_vhost_user_tx_func_error
void * region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]
static clib_error_t * vhost_user_socksvr_accept_ready(unix_file_t *uf)
static clib_error_t * vhost_user_config(vlib_main_t *vm, unformat_input_t *input)
u32 input_cpu_count
total cpu count
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
#define VRING_USED_F_NO_NOTIFY
#define VHOST_USER_RX_BUFFERS_N
int vhost_user_intf_ready(vhost_user_intf_t *vui)
Returns whether at least one TX and one RX vring are enabled.
vhost_user_vring_t vrings[VHOST_VRING_MAX_N]
u32 input_cpu_first_index
first cpu index
#define VHOST_VRING_MAX_N
vlib_main_t ** vlib_mains
vlib_node_registration_t vhost_user_input_node
(constructor) VLIB_REGISTER_NODE (vhost_user_input_node)
#define VLIB_BUFFER_DATA_SIZE
#define DBG_SOCK(args...)
#define hash_get_mem(h, key)
u32 vhost_user_rx_discard_packet(vlib_main_t *vm, vhost_user_intf_t *vui, vhost_user_vring_t *txvq, u32 discard_max)
Try to discard packets from the tx ring (VPP RX path).
static vhost_user_main_t vhost_user_main
static void * clib_mem_alloc_aligned(uword size, uword align)
#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn)
static u32 random_u32(u32 *seed)
32-bit random number generator
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
#define VLIB_REGISTER_NODE(x,...)
u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS]
static clib_error_t * vhost_user_callfd_read_ready(unix_file_t *uf)
static u32 vhost_user_if_input(vlib_main_t *vm, vhost_user_main_t *vum, vhost_user_intf_t *vui, u16 qid, vlib_node_runtime_t *node)
#define vec_foreach(var, vec)
Vector iterator.
#define foreach_vhost_user_input_func_error
#define CLIB_MEMORY_BARRIER()
virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE]
#define clib_error_return(e, args...)
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
#define CLIB_CACHE_LINE_BYTES
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
static_always_inline u32 vhost_user_tx_copy(vhost_user_intf_t *vui, vhost_copy_t *cpy, u16 copy_len, u32 *map_hint)
#define VHOST_USER_PROTOCOL_F_MQ
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
clib_error_t * vhost_user_connect_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
VNET_HW_INTERFACE_CLASS(vhost_interface_class, static)
int dont_dump_vhost_user_memory
#define VHOST_VRING_IDX_RX(qid)