21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
25 #include <sys/types.h>
27 #include <netinet/in.h>
30 #include <linux/if_arp.h>
31 #include <linux/if_tun.h>
50 #define VHOST_USER_DOWN_DISCARD_COUNT 256
56 #define VHOST_USER_RX_BUFFER_STARVATION 32
66 #define VHOST_USER_RX_COPY_THRESHOLD 64
70 #define foreach_vhost_user_input_func_error \
71 _(NO_ERROR, "no error") \
72 _(NO_BUFFER, "no available buffer") \
73 _(MMAP_FAIL, "mmap failure") \
74 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
75 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
76 _(NOT_READY, "vhost interface not ready or down") \
77 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
81 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
102 virtio_net_hdr_mrg_rxbuf_t *hdr;
109 hdr_desc = &txvq->
desc[desc_current];
141 u16 copy_len,
u32 * map_hint)
143 void *src0, *src1, *src2, *src3;
197 u32 discarded_packets = 0;
202 while (discarded_packets != discard_max)
204 if (avail_idx == last_avail_idx)
222 return discarded_packets;
235 b_current->
flags = 0;
236 while (b_current != b_head)
242 b_current->
flags = 0;
249 virtio_net_hdr_t * hdr)
254 u16 ethertype = clib_net_to_host_u16 (eh->
type);
262 ethertype = clib_net_to_host_u16 (vlan->
type);
263 l2hdr_sz +=
sizeof (*vlan);
264 if (ethertype == ETHERNET_TYPE_VLAN)
267 ethertype = clib_net_to_host_u16 (vlan->
type);
268 l2hdr_sz +=
sizeof (*vlan);
274 b0->
flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
275 VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
276 VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
281 l4_proto =
ip4->protocol;
282 b0->
flags |= VNET_BUFFER_F_IS_IP4;
283 oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
288 l4_proto =
ip6->protocol;
289 b0->
flags |= VNET_BUFFER_F_IS_IP6;
292 if (l4_proto == IP_PROTOCOL_TCP)
297 oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
299 else if (l4_proto == IP_PROTOCOL_UDP)
302 oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
309 b0->
flags |= VNET_BUFFER_F_GSO;
315 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
321 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
346 u32 ** to_next,
u32 * n_left_to_next)
387 u16 n_rx_packets = 0;
390 u32 n_left_to_next, *to_next;
398 u32 current_config_index = ~(
u32) 0;
461 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
494 interface_main.sw_if_counters +
500 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
515 u32 desc_data_offset;
532 to_next[0] = bi_current;
546 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
570 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
579 virtio_net_hdr_mrg_rxbuf_t *hdr;
587 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
592 if ((desc_data_offset == desc_table[desc_current].
len) &&
595 current = desc_table[desc_current].
next;
601 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
607 b_data = (
u8 *) hdr + desc_data_offset;
616 if (desc_data_offset == desc_table[desc_current].
len)
621 desc_current = desc_table[desc_current].
next;
622 desc_data_offset = 0;
655 b_current->
flags |= VLIB_BUFFER_NEXT_PRESENT;
656 bi_current = bi_next;
664 u32 desc_data_l = desc_table[desc_current].
len - desc_data_offset;
666 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
669 cpy->
src = desc_table[desc_current].
addr + desc_data_offset;
671 desc_data_offset += cpy->
len;
693 if (current_config_index != ~(
u32) 0)
696 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
709 copy_len, &map_hint)))
712 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
718 txvq->
used->
idx = last_used_idx;
733 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
755 n_rx_packets, n_rx_bytes);
766 u16 n_descs_processed)
768 vring_packed_desc_t *desc_table = txvq->
packed_desc;
772 for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
775 desc_table[(desc_head + desc_idx) &
mask].
flags |=
778 desc_table[(desc_head + desc_idx) &
mask].
flags &=
790 vring_packed_desc_t *hdr_desc;
791 virtio_net_hdr_mrg_rxbuf_t *hdr;
815 if (!hdr_desc || !(hdr =
map_guest_mem (vui, hdr_desc->addr, &hint)))
821 len > hdr_desc->len ? hdr_desc->len :
len);
831 u32 discarded_packets = 0;
833 u16 desc_current, desc_head;
842 while ((discarded_packets != discard_max) &&
847 desc_current = (desc_current + 1) &
mask;
852 return (discarded_packets);
857 u16 copy_len,
u32 * map_hint)
859 void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
861 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
869 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
888 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
911 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
923 vring_packed_desc_t * desc_table,
u16 desc_current,
926 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
927 virtio_net_hdr_mrg_rxbuf_t *hdr;
933 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
936 if (desc_data_offset == desc_table[desc_current].
len)
938 desc_current = (desc_current + 1) &
mask;
942 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
948 b_data = (
u8 *) hdr + desc_data_offset;
960 u32 buffers_required;
964 buffers_required = desc_len >> 11;
965 if ((desc_len & 2047) != 0)
967 return (buffers_required);
970 if (desc_len < buffer_data_size)
973 result = div (desc_len, buffer_data_size);
975 buffers_required = result.quot + 1;
977 buffers_required = result.quot;
979 return (buffers_required);
985 u32 buffer_data_size,
u16 desc_current,
988 vring_packed_desc_t *desc_table = txvq->
packed_desc;
991 u16 desc_idx = desc_current;
994 n_descs = desc_table[desc_idx].len >> 4;
999 for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
1000 desc_len += desc_table[desc_idx].
len;
1003 desc_len -= desc_data_offset;
1011 u32 buffer_data_size,
u16 * current,
1014 vring_packed_desc_t *desc_table = txvq->
packed_desc;
1020 desc_len += desc_table[*current].len;
1022 *current = (*current + 1) &
mask;
1025 desc_len += desc_table[*current].len;
1027 *current = (*current + 1) &
mask;
1042 u32 * buffers_used,
u32 buffers_required,
1043 u32 * desc_data_offset,
u32 buffer_data_size,
1048 while (*desc_data_offset < desc_table[*desc_idx].
len)
1051 if (
PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
1056 (*b_current)->next_buffer = bi_next;
1057 (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
1058 *bi_current = bi_next;
1062 ASSERT (*buffers_used <= buffers_required);
1069 desc_data_l = desc_table[*desc_idx].
len - *desc_data_offset;
1070 cpy->
len = buffer_data_size - (*b_current)->current_length;
1071 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
1073 (*b_current)->current_length);
1074 cpy->
src = desc_table[*desc_idx].addr + *desc_data_offset;
1076 *desc_data_offset += cpy->
len;
1078 (*b_current)->current_length += cpy->
len;
1081 *desc_idx = (*desc_idx + 1) &
mask;;
1082 *desc_data_offset = 0;
1093 u16 n_rx_packets = 0;
1096 u32 buffers_required = 0;
1097 u32 n_left_to_next, *to_next;
1104 u32 current_config_index = ~0;
1106 u16 desc_current, desc_head, last_used_idx;
1107 vring_packed_desc_t *desc_table = 0;
1108 u32 n_descs_processed = 0;
1112 u32 buffers_used = 0;
1113 u16 current, n_descs_to_process;
1140 txvq->
used_event->flags = VRING_EVENT_F_DISABLE;
1144 desc_head = desc_current = last_used_idx;
1161 VHOST_USER_INPUT_FUNC_ERROR_NOT_READY,
rv);
1172 current = desc_current;
1180 current, &map_hint);
1182 current = (current + 1) &
mask;
1194 if (
PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
1198 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER,
rv);
1209 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER,
rv);
1217 n_descs_processed =
n_left;
1223 u32 desc_data_offset;
1224 u16 desc_idx = desc_current;
1228 to_next[0] = bi_current =
next[0];
1229 b_head = b_current =
b[0];
1232 ASSERT (buffers_used <= buffers_required);
1239 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1241 n_descs_to_process = 1;
1245 n_descs = desc_table[desc_idx].len >> 4;
1253 (vui, desc_table, desc_idx,
mask, b_head,
1254 &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
1257 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
1268 &b_current, &
next, &
b, &bi_current,
1269 cpu, ©_len, &buffers_used,
1270 buffers_required, &desc_data_offset,
1271 buffer_data_size,
mask);
1296 n_descs_to_process = 1;
1300 &b_current, &
next, &
b, &bi_current,
1301 cpu, ©_len, &buffers_used,
1302 buffers_required, &desc_data_offset,
1303 buffer_data_size,
mask);
1304 n_descs_to_process++;
1307 &b_current, &
next, &
b, &bi_current,
1308 cpu, ©_len, &buffers_used,
1309 buffers_required, &desc_data_offset,
1310 buffer_data_size,
mask);
1323 if (current_config_index != ~0)
1326 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
1331 n_left -= n_descs_to_process;
1334 desc_current = (desc_current + n_descs_to_process) &
mask;
1363 while (n_trace &&
left)
1372 last_used_idx = (last_used_idx + 1) &
mask;
1388 (txvq->
avail_event->flags != VRING_EVENT_F_DISABLE))
1399 n_rx_packets, n_rx_bytes);
1407 return n_rx_packets;
1415 uword n_rx_packets = 0;
1443 return n_rx_packets;
1449 .name =
"vhost-user-input",
1450 .sibling_of =
"device-input",
1454 .state = VLIB_NODE_STATE_DISABLED,