21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
25 #include <sys/types.h>
27 #include <netinet/in.h>
30 #include <linux/if_arp.h>
31 #include <linux/if_tun.h>
53 #define VHOST_USER_DOWN_DISCARD_COUNT 256
59 #define VHOST_USER_RX_BUFFER_STARVATION 32
69 #define VHOST_USER_RX_COPY_THRESHOLD 64
73 #define foreach_vhost_user_input_func_error \
74 _(NO_ERROR, "no error") \
75 _(NO_BUFFER, "no available buffer") \
76 _(MMAP_FAIL, "mmap failure") \
77 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
78 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
79 _(NOT_READY, "vhost interface not ready or down") \
80 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
84 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
105 virtio_net_hdr_mrg_rxbuf_t *hdr;
112 hdr_desc = &txvq->
desc[desc_current];
144 u16 copy_len,
u32 * map_hint)
146 void *src0, *src1, *src2, *src3;
200 u32 discarded_packets = 0;
205 while (discarded_packets != discard_max)
207 if (avail_idx == last_avail_idx)
225 return discarded_packets;
238 b_current->
flags = 0;
239 while (b_current != b_head)
245 b_current->
flags = 0;
252 virtio_net_hdr_t * hdr)
257 u16 ethertype = clib_net_to_host_u16 (eh->
type);
265 ethertype = clib_net_to_host_u16 (vlan->
type);
266 l2hdr_sz +=
sizeof (*vlan);
267 if (ethertype == ETHERNET_TYPE_VLAN)
270 ethertype = clib_net_to_host_u16 (vlan->
type);
271 l2hdr_sz +=
sizeof (*vlan);
277 b0->
flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
278 VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
279 VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
284 l4_proto =
ip4->protocol;
285 b0->
flags |= VNET_BUFFER_F_IS_IP4;
286 oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
291 l4_proto =
ip6->protocol;
292 b0->
flags |= VNET_BUFFER_F_IS_IP6;
295 if (l4_proto == IP_PROTOCOL_TCP)
300 oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
302 else if (l4_proto == IP_PROTOCOL_UDP)
305 oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
312 b0->
flags |= VNET_BUFFER_F_GSO;
318 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
324 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
349 u32 ** to_next,
u32 * n_left_to_next)
390 u16 n_rx_packets = 0;
393 u32 n_left_to_next, *to_next;
401 u32 current_config_index = ~(
u32) 0;
464 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
497 interface_main.sw_if_counters +
503 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
518 u32 desc_data_offset;
535 to_next[0] = bi_current;
549 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
573 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
582 virtio_net_hdr_mrg_rxbuf_t *hdr;
590 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
595 if ((desc_data_offset == desc_table[desc_current].
len) &&
598 current = desc_table[desc_current].
next;
604 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
610 b_data = (
u8 *) hdr + desc_data_offset;
619 if (desc_data_offset == desc_table[desc_current].
len)
624 desc_current = desc_table[desc_current].
next;
625 desc_data_offset = 0;
658 b_current->
flags |= VLIB_BUFFER_NEXT_PRESENT;
659 bi_current = bi_next;
667 u32 desc_data_l = desc_table[desc_current].
len - desc_data_offset;
669 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
672 cpy->
src = desc_table[desc_current].
addr + desc_data_offset;
674 desc_data_offset += cpy->
len;
696 if (current_config_index != ~(
u32) 0)
699 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
712 copy_len, &map_hint)))
715 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
721 txvq->
used->
idx = last_used_idx;
736 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
758 n_rx_packets, n_rx_bytes);
769 u16 n_descs_processed)
771 vring_packed_desc_t *desc_table = txvq->
packed_desc;
775 for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
778 desc_table[(desc_head + desc_idx) &
mask].
flags |=
781 desc_table[(desc_head + desc_idx) &
mask].
flags &=
793 vring_packed_desc_t *hdr_desc;
794 virtio_net_hdr_mrg_rxbuf_t *hdr;
818 if (!hdr_desc || !(hdr =
map_guest_mem (vui, hdr_desc->addr, &hint)))
824 len > hdr_desc->len ? hdr_desc->len :
len);
834 u32 discarded_packets = 0;
836 u16 desc_current, desc_head;
845 while ((discarded_packets != discard_max) &&
850 desc_current = (desc_current + 1) &
mask;
855 return (discarded_packets);
860 u16 copy_len,
u32 * map_hint)
862 void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
864 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
872 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
891 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
914 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
926 vring_packed_desc_t * desc_table,
u16 desc_current,
929 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
930 virtio_net_hdr_mrg_rxbuf_t *hdr;
936 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
939 if (desc_data_offset == desc_table[desc_current].
len)
941 desc_current = (desc_current + 1) &
mask;
945 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
951 b_data = (
u8 *) hdr + desc_data_offset;
963 u32 buffers_required;
967 buffers_required = desc_len >> 11;
968 if ((desc_len & 2047) != 0)
970 return (buffers_required);
973 if (desc_len < buffer_data_size)
976 result = div (desc_len, buffer_data_size);
978 buffers_required = result.quot + 1;
980 buffers_required = result.quot;
982 return (buffers_required);
988 u32 buffer_data_size,
u16 desc_current,
991 vring_packed_desc_t *desc_table = txvq->
packed_desc;
994 u16 desc_idx = desc_current;
997 n_descs = desc_table[desc_idx].len >> 4;
1002 for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
1003 desc_len += desc_table[desc_idx].
len;
1006 desc_len -= desc_data_offset;
1014 u32 buffer_data_size,
u16 * current,
1017 vring_packed_desc_t *desc_table = txvq->
packed_desc;
1023 desc_len += desc_table[*current].len;
1025 *current = (*current + 1) &
mask;
1028 desc_len += desc_table[*current].len;
1030 *current = (*current + 1) &
mask;
1045 u32 * buffers_used,
u32 buffers_required,
1046 u32 * desc_data_offset,
u32 buffer_data_size,
1051 while (*desc_data_offset < desc_table[*desc_idx].
len)
1054 if (
PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
1059 (*b_current)->next_buffer = bi_next;
1060 (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
1061 *bi_current = bi_next;
1065 ASSERT (*buffers_used <= buffers_required);
1072 desc_data_l = desc_table[*desc_idx].
len - *desc_data_offset;
1073 cpy->
len = buffer_data_size - (*b_current)->current_length;
1074 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
1076 (*b_current)->current_length);
1077 cpy->
src = desc_table[*desc_idx].addr + *desc_data_offset;
1079 *desc_data_offset += cpy->
len;
1081 (*b_current)->current_length += cpy->
len;
1084 *desc_idx = (*desc_idx + 1) &
mask;;
1085 *desc_data_offset = 0;
1096 u16 n_rx_packets = 0;
1099 u32 buffers_required = 0;
1100 u32 n_left_to_next, *to_next;
1107 u32 current_config_index = ~0;
1109 u16 desc_current, desc_head, last_used_idx;
1110 vring_packed_desc_t *desc_table = 0;
1111 u32 n_descs_processed = 0;
1115 u32 buffers_used = 0;
1116 u16 current, n_descs_to_process;
1143 txvq->
used_event->flags = VRING_EVENT_F_DISABLE;
1147 desc_head = desc_current = last_used_idx;
1164 VHOST_USER_INPUT_FUNC_ERROR_NOT_READY,
rv);
1175 current = desc_current;
1183 current, &map_hint);
1185 current = (current + 1) &
mask;
1197 if (
PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
1201 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER,
rv);
1212 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER,
rv);
1220 n_descs_processed =
n_left;
1226 u32 desc_data_offset;
1227 u16 desc_idx = desc_current;
1231 to_next[0] = bi_current =
next[0];
1232 b_head = b_current =
b[0];
1235 ASSERT (buffers_used <= buffers_required);
1242 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1244 n_descs_to_process = 1;
1248 n_descs = desc_table[desc_idx].len >> 4;
1256 (vui, desc_table, desc_idx,
mask, b_head,
1257 &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
1260 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
1271 &b_current, &
next, &
b, &bi_current,
1272 cpu, ©_len, &buffers_used,
1273 buffers_required, &desc_data_offset,
1274 buffer_data_size,
mask);
1299 n_descs_to_process = 1;
1303 &b_current, &
next, &
b, &bi_current,
1304 cpu, ©_len, &buffers_used,
1305 buffers_required, &desc_data_offset,
1306 buffer_data_size,
mask);
1307 n_descs_to_process++;
1310 &b_current, &
next, &
b, &bi_current,
1311 cpu, ©_len, &buffers_used,
1312 buffers_required, &desc_data_offset,
1313 buffer_data_size,
mask);
1326 if (current_config_index != ~0)
1329 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
1334 n_left -= n_descs_to_process;
1337 desc_current = (desc_current + n_descs_to_process) &
mask;
1366 while (n_trace &&
left)
1375 last_used_idx = (last_used_idx + 1) &
mask;
1391 (txvq->
avail_event->flags != VRING_EVENT_F_DISABLE))
1402 n_rx_packets, n_rx_bytes);
1410 return n_rx_packets;
1418 uword n_rx_packets = 0;
1446 return n_rx_packets;
1452 .name =
"vhost-user-input",
1453 .sibling_of =
"device-input",
1457 .state = VLIB_NODE_STATE_DISABLED,