21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 25 #include <sys/types.h> 27 #include <netinet/in.h> 30 #include <linux/if_arp.h> 31 #include <linux/if_tun.h> 49 #define VHOST_USER_DOWN_DISCARD_COUNT 256 55 #define VHOST_USER_RX_BUFFER_STARVATION 32 65 #define VHOST_USER_RX_COPY_THRESHOLD 64 69 #define foreach_vhost_user_input_func_error \ 70 _(NO_ERROR, "no error") \ 71 _(NO_BUFFER, "no available buffer") \ 72 _(MMAP_FAIL, "mmap failure") \ 73 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \ 74 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \ 75 _(NOT_READY, "vhost interface not ready or down") \ 76 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)") 80 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f, 101 virtio_net_hdr_mrg_rxbuf_t *hdr;
108 hdr_desc = &txvq->
desc[desc_current];
119 if (!(txvq->
desc[desc_current].
flags & VRING_DESC_F_NEXT) &&
120 !(txvq->
desc[desc_current].
flags & VRING_DESC_F_INDIRECT))
134 memcpy (&t->
hdr, hdr, len > hdr_desc->
len ? hdr_desc->
len : len);
140 u16 copy_len,
u32 * map_hint)
142 void *src0, *src1, *src2, *src3;
196 u32 discarded_packets = 0;
201 while (discarded_packets != discard_max)
203 if (avail_idx == last_avail_idx)
221 return discarded_packets;
234 b_current->
flags = 0;
235 while (b_current != b_head)
241 b_current->
flags = 0;
248 virtio_net_hdr_t * hdr)
253 u16 ethertype = clib_net_to_host_u16 (eh->
type);
260 ethertype = clib_net_to_host_u16 (vlan->
type);
261 l2hdr_sz +=
sizeof (*vlan);
262 if (ethertype == ETHERNET_TYPE_VLAN)
265 ethertype = clib_net_to_host_u16 (vlan->
type);
266 l2hdr_sz +=
sizeof (*vlan);
272 b0->
flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
273 VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
274 VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
280 b0->
flags |= VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
286 b0->
flags |= VNET_BUFFER_F_IS_IP6;
289 if (l4_proto == IP_PROTOCOL_TCP)
294 b0->
flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
296 else if (l4_proto == IP_PROTOCOL_UDP)
299 b0->
flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
306 b0->
flags |= VNET_BUFFER_F_GSO;
312 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
318 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
338 u32 * current_config_index,
u32 * next_index,
339 u32 ** to_next,
u32 * n_left_to_next)
382 u16 n_rx_packets = 0;
385 u32 n_left_to_next, *to_next;
393 u32 current_config_index = ~(
u32) 0;
456 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
489 interface_main.sw_if_counters +
495 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
500 &next_index, &to_next, &n_left_to_next);
510 u32 desc_data_offset;
527 to_next[0] = bi_current;
541 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
565 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
574 virtio_net_hdr_mrg_rxbuf_t *hdr;
582 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
587 if ((desc_data_offset == desc_table[desc_current].
len) &&
590 current = desc_table[desc_current].
next;
596 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
602 b_data = (
u8 *) hdr + desc_data_offset;
611 if (desc_data_offset == desc_table[desc_current].
len)
616 desc_current = desc_table[desc_current].
next;
617 desc_data_offset = 0;
650 b_current->
flags |= VLIB_BUFFER_NEXT_PRESENT;
651 bi_current = bi_next;
659 u32 desc_data_l = desc_table[desc_current].
len - desc_data_offset;
661 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
664 cpy->
src = desc_table[desc_current].
addr + desc_data_offset;
666 desc_data_offset += cpy->
len;
690 if (current_config_index != ~(
u32) 0)
693 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
706 copy_len, &map_hint)))
709 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
715 txvq->
used->
idx = last_used_idx;
730 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
752 n_rx_packets, n_rx_bytes);
763 u16 n_descs_processed)
765 vring_packed_desc_t *desc_table = txvq->
packed_desc;
769 for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
772 desc_table[(desc_head + desc_idx) & mask].
flags |=
775 desc_table[(desc_head + desc_idx) & mask].
flags &=
787 vring_packed_desc_t *hdr_desc;
788 virtio_net_hdr_mrg_rxbuf_t *hdr;
806 if (!(txvq->
packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
807 !(txvq->
packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
812 if (!hdr_desc || !(hdr =
map_guest_mem (vui, hdr_desc->addr, &hint)))
818 len > hdr_desc->len ? hdr_desc->len : len);
828 u32 discarded_packets = 0;
830 u16 desc_current, desc_head;
839 while ((discarded_packets != discard_max) &&
844 desc_current = (desc_current + 1) & mask;
849 return (discarded_packets);
854 u16 copy_len,
u32 * map_hint)
856 void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
858 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
866 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
885 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
908 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
920 vring_packed_desc_t * desc_table,
u16 desc_current,
923 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
924 virtio_net_hdr_mrg_rxbuf_t *hdr;
930 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
933 if (desc_data_offset == desc_table[desc_current].
len)
935 desc_current = (desc_current + 1) & mask;
937 map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
939 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
945 b_data = (
u8 *) hdr + desc_data_offset;
957 u32 buffers_required;
961 buffers_required = desc_len >> 11;
962 if ((desc_len & 2047) != 0)
964 return (buffers_required);
967 if (desc_len < buffer_data_size)
970 result = div (desc_len, buffer_data_size);
972 buffers_required = result.quot + 1;
974 buffers_required = result.quot;
976 return (buffers_required);
982 u32 buffer_data_size,
u16 desc_current,
985 vring_packed_desc_t *desc_table = txvq->
packed_desc;
988 u16 desc_idx = desc_current;
991 n_descs = desc_table[desc_idx].len >> 4;
996 for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
997 desc_len += desc_table[desc_idx].
len;
1000 desc_len -= desc_data_offset;
1008 u32 buffer_data_size,
u16 * current,
1011 vring_packed_desc_t *desc_table = txvq->
packed_desc;
1017 desc_len += desc_table[*current].len;
1019 *current = (*current + 1) & mask;
1022 desc_len += desc_table[*current].len;
1024 *current = (*current + 1) & mask;
1039 u32 * buffers_used,
u32 buffers_required,
1040 u32 * desc_data_offset,
u32 buffer_data_size,
1045 while (*desc_data_offset < desc_table[*desc_idx].
len)
1048 if (
PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
1051 u32 bi_next = **next;
1053 (*b_current)->next_buffer = bi_next;
1054 (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
1055 *bi_current = bi_next;
1059 ASSERT (*buffers_used <= buffers_required);
1066 desc_data_l = desc_table[*desc_idx].
len - *desc_data_offset;
1067 cpy->
len = buffer_data_size - (*b_current)->current_length;
1068 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
1070 (*b_current)->current_length);
1071 cpy->
src = desc_table[*desc_idx].addr + *desc_data_offset;
1073 *desc_data_offset += cpy->
len;
1075 (*b_current)->current_length += cpy->
len;
1078 *desc_idx = (*desc_idx + 1) & mask;;
1079 *desc_data_offset = 0;
1091 u16 n_rx_packets = 0;
1094 u32 buffers_required = 0;
1095 u32 n_left_to_next, *to_next;
1102 u32 current_config_index = ~0;
1104 u16 desc_current, desc_head, last_used_idx;
1105 vring_packed_desc_t *desc_table = 0;
1106 u32 n_descs_processed = 0;
1110 u32 buffers_used = 0;
1111 u16 current, n_descs_to_process;
1142 desc_head = desc_current = last_used_idx;
1159 VHOST_USER_INPUT_FUNC_ERROR_NOT_READY, rv);
1164 &next_index, &to_next, &n_left_to_next);
1170 current = desc_current;
1178 current, &map_hint);
1180 current = (current + 1) & mask;
1192 if (
PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
1196 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
1207 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
1215 n_descs_processed = n_left;
1221 u32 desc_data_offset;
1222 u16 desc_idx = desc_current;
1226 to_next[0] = bi_current = next[0];
1227 b_head = b_current = b[0];
1230 ASSERT (buffers_used <= buffers_required);
1237 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1239 n_descs_to_process = 1;
1243 n_descs = desc_table[desc_idx].len >> 4;
1251 (vui, desc_table, desc_idx, mask, b_head,
1252 &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
1255 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
1266 &b_current, &next, &b, &bi_current,
1267 cpu, ©_len, &buffers_used,
1268 buffers_required, &desc_data_offset,
1269 buffer_data_size, mask);
1279 if (
PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1294 n_descs_to_process = 1;
1298 &b_current, &next, &b, &bi_current,
1299 cpu, ©_len, &buffers_used,
1300 buffers_required, &desc_data_offset,
1301 buffer_data_size, mask);
1302 n_descs_to_process++;
1305 &b_current, &next, &b, &bi_current,
1306 cpu, ©_len, &buffers_used,
1307 buffers_required, &desc_data_offset,
1308 buffer_data_size, mask);
1323 if (current_config_index != ~0)
1326 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
1330 ASSERT (n_left >= n_descs_to_process);
1331 n_left -= n_descs_to_process;
1334 desc_current = (desc_current + n_descs_to_process) & mask;
1345 if (
PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1354 if (
PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1363 while (n_trace && left)
1367 (vm, node, next_index, b[0], 0)))
1372 last_used_idx = (last_used_idx + 1) & mask;
1399 n_rx_packets, n_rx_bytes);
1407 return n_rx_packets;
1415 uword n_rx_packets = 0;
1423 if ((
node->state == VLIB_NODE_STATE_POLLING) ||
1451 return n_rx_packets;
1457 .name =
"vhost-user-input",
1458 .sibling_of =
"device-input",
1462 .state = VLIB_NODE_STATE_DISABLED,
vnet_config_main_t config_main
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
#define VRING_EVENT_F_DISABLE
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static void vnet_device_increment_rx_packets(u32 thread_index, u64 count)
u32 virtio_ring_flags
The device index.
virtio_net_hdr_mrg_rxbuf_t hdr
Length of the first data descriptor.
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
vhost_cpu_t * cpus
Per-CPU data for vhost-user.
vl_api_wireguard_peer_flags_t flags
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 thread_index, u32 index, u64 n_packets, u64 n_bytes)
Increment a combined counter.
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
vring_desc_event_t * used_event
#define VIRTIO_NET_HDR_F_NEEDS_CSUM
vnet_interface_main_t interface_main
#define CLIB_MEMORY_STORE_BARRIER()
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
#define clib_memcpy_fast(a, b, c)
vring_used_elem_t ring[0]
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
#define VLIB_NODE_FLAG_TRACE_SUPPORTED
static f64 vlib_time_now(vlib_main_t *vm)
vring_packed_desc_t * packed_desc
vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N]
u32 * config_index_by_sw_if_index
u16 current_length
Nbytes between current data and the end of this buffer.
static vlib_frame_t * vlib_get_frame(vlib_main_t *vm, vlib_frame_t *f)
static void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 thread_index, u32 index, u64 increment)
Increment a simple counter.
static_always_inline void vhost_user_advance_last_avail_idx(vhost_user_vring_t *vring)
static_always_inline int vnet_have_features(u8 arc, u32 sw_if_index)
#define VLIB_NODE_FN(node)
struct _tcp_header tcp_header_t
#define VIRTIO_FEATURE(X)
#define VRING_AVAIL_F_NO_INTERRUPT
#define static_always_inline
#define vlib_prefetch_buffer_with_index(vm, bi, type)
Prefetch buffer metadata by buffer index The first 64 bytes of buffer contains most header informatio...
#define ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX
vlib_combined_counter_main_t * combined_sw_if_counters
static_always_inline void * map_guest_mem(vhost_user_intf_t *vui, uword addr, u32 *hint)
#define VHOST_VRING_IDX_TX(qid)
#define vlib_get_new_next_frame(vm, node, next_index, vectors, n_vectors_left)
#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE
static_always_inline u8 * format_vhost_trace(u8 *s, va_list *va)
static vlib_next_frame_t * vlib_node_runtime_get_next_frame(vlib_main_t *vm, vlib_node_runtime_t *n, u32 next_index)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
#define VRING_USED_F_NO_NOTIFY
#define VHOST_USER_COPY_ARRAY_N
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
static __clib_warn_unused_result int vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
static void * vnet_get_config_data(vnet_config_main_t *cm, u32 *config_index, u32 *next_index, u32 n_data_bytes)
vhost_user_main_t vhost_user_main
u32 node_index
Node index.
#define VIRTIO_NET_HDR_GSO_TCPV4
vl_api_tunnel_mode_t mode
static_always_inline u32 vlib_buffer_get_default_data_size(vlib_main_t *vm)
#define VHOST_VRING_IDX_RX(qid)
u8 * format_ethernet_header_with_length(u8 *s, va_list *args)
u16 device_index
The interface queue index (Not the virtio vring idx)
vhost_user_intf_t * vhost_user_interfaces
#define VLIB_REGISTER_NODE(x,...)
#define CLIB_PREFETCH(addr, size, type)
static_always_inline u64 vhost_user_is_packed_ring_supported(vhost_user_intf_t *vui)
static void * vlib_frame_scalar_args(vlib_frame_t *f)
Get pointer to frame scalar data.
u32 current_config_index
Used by feature subgraph arcs to visit enabled feature nodes.
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
u16 first_desc_len
Runtime queue flags.
static_always_inline u8 vhost_user_packed_desc_available(vhost_user_vring_t *vring, u16 idx)
vlib_main_t vlib_node_runtime_t * node
u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]
#define VHOST_USER_RX_BUFFERS_N
#define clib_atomic_swap_acq_n(a, b)
static_always_inline void vhost_user_send_call(vlib_main_t *vm, vhost_user_vring_t *vq)
vlib_buffer_t ** rx_buffers_pdesc
static_always_inline int ethernet_frame_is_tagged(u16 type)
vring_desc_event_t * avail_event
#define VRING_DESC_F_NEXT
#define vec_elt(v, i)
Get vector value at index i.
u8 device_input_feature_arc_index
Feature arc index for device-input.
struct _vlib_node_registration vlib_node_registration_t
u32 next_buffer
Next buffer for this linked-list of buffers.
vhost_user_vring_t vrings[VHOST_VRING_MAX_N]
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
VLIB buffer representation.
#define vhost_user_log_dirty_ring(vui, vq, member)
static int tcp_header_bytes(tcp_header_t *t)
#define VRING_DESC_F_USED
#define vec_foreach(var, vec)
Vector iterator.
u16 flags
Copy of main node flags.
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static_always_inline void vhost_user_advance_last_used_idx(vhost_user_vring_t *vring)
static void vlib_frame_no_append(vlib_frame_t *f)
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
vnet_feature_config_main_t * feature_config_mains
feature config main objects
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
#define VIRTIO_NET_HDR_GSO_UDP
vnet_feature_main_t feature_main
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE
#define VRING_DESC_F_INDIRECT
#define VIRTIO_NET_HDR_GSO_TCPV6
#define VRING_DESC_F_AVAIL