21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 25 #include <sys/types.h> 27 #include <netinet/in.h> 30 #include <linux/if_arp.h> 31 #include <linux/if_tun.h> 50 #define VHOST_USER_DOWN_DISCARD_COUNT 256 56 #define VHOST_USER_RX_BUFFER_STARVATION 32 66 #define VHOST_USER_RX_COPY_THRESHOLD 64 70 #define foreach_vhost_user_input_func_error \ 71 _(NO_ERROR, "no error") \ 72 _(NO_BUFFER, "no available buffer") \ 73 _(MMAP_FAIL, "mmap failure") \ 74 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \ 75 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \ 76 _(NOT_READY, "vhost interface not ready or down") \ 77 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)") 81 #define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f, 102 virtio_net_hdr_mrg_rxbuf_t *hdr;
109 hdr_desc = &txvq->
desc[desc_current];
120 if (!(txvq->
desc[desc_current].
flags & VRING_DESC_F_NEXT) &&
121 !(txvq->
desc[desc_current].
flags & VRING_DESC_F_INDIRECT))
135 memcpy (&t->
hdr, hdr, len > hdr_desc->
len ? hdr_desc->
len : len);
141 u16 copy_len,
u32 * map_hint)
143 void *src0, *src1, *src2, *src3;
197 u32 discarded_packets = 0;
202 while (discarded_packets != discard_max)
204 if (avail_idx == last_avail_idx)
222 return discarded_packets;
235 b_current->
flags = 0;
236 while (b_current != b_head)
242 b_current->
flags = 0;
249 virtio_net_hdr_t * hdr)
254 u16 ethertype = clib_net_to_host_u16 (eh->
type);
261 ethertype = clib_net_to_host_u16 (vlan->
type);
262 l2hdr_sz +=
sizeof (*vlan);
263 if (ethertype == ETHERNET_TYPE_VLAN)
266 ethertype = clib_net_to_host_u16 (vlan->
type);
267 l2hdr_sz +=
sizeof (*vlan);
273 b0->
flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
274 VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
275 VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
281 b0->
flags |= VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
287 b0->
flags |= VNET_BUFFER_F_IS_IP6;
290 if (l4_proto == IP_PROTOCOL_TCP)
295 b0->
flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
297 else if (l4_proto == IP_PROTOCOL_UDP)
300 b0->
flags |= VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
307 b0->
flags |= VNET_BUFFER_F_GSO;
313 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
319 b0->
flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
339 u32 * current_config_index,
u32 * next_index,
340 u32 ** to_next,
u32 * n_left_to_next)
383 u16 n_rx_packets = 0;
386 u32 n_left_to_next, *to_next;
394 u32 current_config_index = ~(
u32) 0;
457 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
490 interface_main.sw_if_counters +
496 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
501 &next_index, &to_next, &n_left_to_next);
511 u32 desc_data_offset;
528 to_next[0] = bi_current;
542 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
566 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
575 virtio_net_hdr_mrg_rxbuf_t *hdr;
583 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
588 if ((desc_data_offset == desc_table[desc_current].
len) &&
591 current = desc_table[desc_current].
next;
597 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
603 b_data = (
u8 *) hdr + desc_data_offset;
612 if (desc_data_offset == desc_table[desc_current].
len)
617 desc_current = desc_table[desc_current].
next;
618 desc_data_offset = 0;
651 b_current->
flags |= VLIB_BUFFER_NEXT_PRESENT;
652 bi_current = bi_next;
660 u32 desc_data_l = desc_table[desc_current].
len - desc_data_offset;
662 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
665 cpy->
src = desc_table[desc_current].
addr + desc_data_offset;
667 desc_data_offset += cpy->
len;
691 if (current_config_index != ~(
u32) 0)
694 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
707 copy_len, &map_hint)))
710 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
716 txvq->
used->
idx = last_used_idx;
731 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
753 n_rx_packets, n_rx_bytes);
764 u16 n_descs_processed)
766 vring_packed_desc_t *desc_table = txvq->
packed_desc;
770 for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
773 desc_table[(desc_head + desc_idx) & mask].
flags |=
776 desc_table[(desc_head + desc_idx) & mask].
flags &=
788 vring_packed_desc_t *hdr_desc;
789 virtio_net_hdr_mrg_rxbuf_t *hdr;
807 if (!(txvq->
packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
808 !(txvq->
packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
813 if (!hdr_desc || !(hdr =
map_guest_mem (vui, hdr_desc->addr, &hint)))
819 len > hdr_desc->len ? hdr_desc->len : len);
829 u32 discarded_packets = 0;
831 u16 desc_current, desc_head;
840 while ((discarded_packets != discard_max) &&
845 desc_current = (desc_current + 1) & mask;
850 return (discarded_packets);
855 u16 copy_len,
u32 * map_hint)
857 void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
859 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
867 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
886 bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
909 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
921 vring_packed_desc_t * desc_table,
u16 desc_current,
924 u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
925 virtio_net_hdr_mrg_rxbuf_t *hdr;
931 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
934 if (desc_data_offset == desc_table[desc_current].
len)
936 desc_current = (desc_current + 1) & mask;
938 map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
940 rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
946 b_data = (
u8 *) hdr + desc_data_offset;
958 u32 buffers_required;
962 buffers_required = desc_len >> 11;
963 if ((desc_len & 2047) != 0)
965 return (buffers_required);
968 if (desc_len < buffer_data_size)
971 result = div (desc_len, buffer_data_size);
973 buffers_required = result.quot + 1;
975 buffers_required = result.quot;
977 return (buffers_required);
983 u32 buffer_data_size,
u16 desc_current,
986 vring_packed_desc_t *desc_table = txvq->
packed_desc;
989 u16 desc_idx = desc_current;
992 n_descs = desc_table[desc_idx].len >> 4;
997 for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
998 desc_len += desc_table[desc_idx].
len;
1001 desc_len -= desc_data_offset;
1009 u32 buffer_data_size,
u16 * current,
1012 vring_packed_desc_t *desc_table = txvq->
packed_desc;
1018 desc_len += desc_table[*current].len;
1020 *current = (*current + 1) & mask;
1023 desc_len += desc_table[*current].len;
1025 *current = (*current + 1) & mask;
1040 u32 * buffers_used,
u32 buffers_required,
1041 u32 * desc_data_offset,
u32 buffer_data_size,
1046 while (*desc_data_offset < desc_table[*desc_idx].
len)
1049 if (
PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
1052 u32 bi_next = **next;
1054 (*b_current)->next_buffer = bi_next;
1055 (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
1056 *bi_current = bi_next;
1060 ASSERT (*buffers_used <= buffers_required);
1067 desc_data_l = desc_table[*desc_idx].
len - *desc_data_offset;
1068 cpy->
len = buffer_data_size - (*b_current)->current_length;
1069 cpy->
len = (cpy->
len > desc_data_l) ? desc_data_l : cpy->
len;
1071 (*b_current)->current_length);
1072 cpy->
src = desc_table[*desc_idx].addr + *desc_data_offset;
1074 *desc_data_offset += cpy->
len;
1076 (*b_current)->current_length += cpy->
len;
1079 *desc_idx = (*desc_idx + 1) & mask;;
1080 *desc_data_offset = 0;
1092 u16 n_rx_packets = 0;
1095 u32 buffers_required = 0;
1096 u32 n_left_to_next, *to_next;
1103 u32 current_config_index = ~0;
1105 u16 desc_current, desc_head, last_used_idx;
1106 vring_packed_desc_t *desc_table = 0;
1107 u32 n_descs_processed = 0;
1111 u32 buffers_used = 0;
1112 u16 current, n_descs_to_process;
1143 desc_head = desc_current = last_used_idx;
1160 VHOST_USER_INPUT_FUNC_ERROR_NOT_READY, rv);
1165 &next_index, &to_next, &n_left_to_next);
1171 current = desc_current;
1179 current, &map_hint);
1181 current = (current + 1) & mask;
1193 if (
PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
1197 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
1208 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
1216 n_descs_processed = n_left;
1222 u32 desc_data_offset;
1223 u16 desc_idx = desc_current;
1227 to_next[0] = bi_current = next[0];
1228 b_head = b_current = b[0];
1231 ASSERT (buffers_used <= buffers_required);
1238 b_head->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1240 n_descs_to_process = 1;
1244 n_descs = desc_table[desc_idx].len >> 4;
1252 (vui, desc_table, desc_idx, mask, b_head,
1253 &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
1256 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
1267 &b_current, &next, &b, &bi_current,
1268 cpu, ©_len, &buffers_used,
1269 buffers_required, &desc_data_offset,
1270 buffer_data_size, mask);
1280 if (
PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1295 n_descs_to_process = 1;
1299 &b_current, &next, &b, &bi_current,
1300 cpu, ©_len, &buffers_used,
1301 buffers_required, &desc_data_offset,
1302 buffer_data_size, mask);
1303 n_descs_to_process++;
1306 &b_current, &next, &b, &bi_current,
1307 cpu, ©_len, &buffers_used,
1308 buffers_required, &desc_data_offset,
1309 buffer_data_size, mask);
1324 if (current_config_index != ~0)
1327 vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
1331 ASSERT (n_left >= n_descs_to_process);
1332 n_left -= n_descs_to_process;
1335 desc_current = (desc_current + n_descs_to_process) & mask;
1346 if (
PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1355 if (
PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
1364 while (n_trace && left)
1373 last_used_idx = (last_used_idx + 1) & mask;
1398 n_rx_packets, n_rx_bytes);
1406 return n_rx_packets;
1414 uword n_rx_packets = 0;
1422 if ((
node->state == VLIB_NODE_STATE_POLLING) ||
1450 return n_rx_packets;
1456 .name =
"vhost-user-input",
1457 .sibling_of =
"device-input",
1461 .state = VLIB_NODE_STATE_DISABLED,
vnet_config_main_t config_main
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
#define VRING_EVENT_F_DISABLE
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static void vnet_device_increment_rx_packets(u32 thread_index, u64 count)
u32 virtio_ring_flags
The device index.
virtio_net_hdr_mrg_rxbuf_t hdr
Length of the first data descriptor.
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
vhost_cpu_t * cpus
Per-CPU data for vhost-user.
vl_api_wireguard_peer_flags_t flags
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 thread_index, u32 index, u64 n_packets, u64 n_bytes)
Increment a combined counter.
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
vring_desc_event_t * used_event
#define VIRTIO_NET_HDR_F_NEEDS_CSUM
vnet_interface_main_t interface_main
#define CLIB_MEMORY_STORE_BARRIER()
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
#define clib_memcpy_fast(a, b, c)
vring_used_elem_t ring[0]
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
#define VLIB_NODE_FLAG_TRACE_SUPPORTED
static f64 vlib_time_now(vlib_main_t *vm)
vring_packed_desc_t * packed_desc
vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N]
u32 * config_index_by_sw_if_index
u16 current_length
Nbytes between current data and the end of this buffer.
static vlib_frame_t * vlib_get_frame(vlib_main_t *vm, vlib_frame_t *f)
static void vlib_increment_simple_counter(vlib_simple_counter_main_t *cm, u32 thread_index, u32 index, u64 increment)
Increment a simple counter.
static_always_inline void vhost_user_advance_last_avail_idx(vhost_user_vring_t *vring)
static_always_inline int vnet_have_features(u8 arc, u32 sw_if_index)
#define VLIB_NODE_FN(node)
struct _tcp_header tcp_header_t
#define VIRTIO_FEATURE(X)
static void vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
vnet_hw_interface_rx_mode
#define VRING_AVAIL_F_NO_INTERRUPT
#define static_always_inline
#define vlib_prefetch_buffer_with_index(vm, bi, type)
Prefetch buffer metadata by buffer index The first 64 bytes of buffer contains most header informatio...
#define ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX
vlib_combined_counter_main_t * combined_sw_if_counters
static_always_inline void * map_guest_mem(vhost_user_intf_t *vui, uword addr, u32 *hint)
#define VHOST_VRING_IDX_TX(qid)
#define vlib_get_new_next_frame(vm, node, next_index, vectors, n_vectors_left)
#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE
static_always_inline u8 * format_vhost_trace(u8 *s, va_list *va)
static vlib_next_frame_t * vlib_node_runtime_get_next_frame(vlib_main_t *vm, vlib_node_runtime_t *n, u32 next_index)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
#define VRING_USED_F_NO_NOTIFY
#define VHOST_USER_COPY_ARRAY_N
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
static void * vnet_get_config_data(vnet_config_main_t *cm, u32 *config_index, u32 *next_index, u32 n_data_bytes)
vhost_user_main_t vhost_user_main
u32 node_index
Node index.
#define VIRTIO_NET_HDR_GSO_TCPV4
vl_api_tunnel_mode_t mode
static_always_inline u32 vlib_buffer_get_default_data_size(vlib_main_t *vm)
#define VHOST_VRING_IDX_RX(qid)
u8 * format_ethernet_header_with_length(u8 *s, va_list *args)
u16 device_index
The interface queue index (Not the virtio vring idx)
vhost_user_intf_t * vhost_user_interfaces
#define VLIB_REGISTER_NODE(x,...)
#define CLIB_PREFETCH(addr, size, type)
static_always_inline u64 vhost_user_is_packed_ring_supported(vhost_user_intf_t *vui)
static void * vlib_frame_scalar_args(vlib_frame_t *f)
Get pointer to frame scalar data.
u32 current_config_index
Used by feature subgraph arcs to visit enabled feature nodes.
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
u16 first_desc_len
Runtime queue flags.
static_always_inline u8 vhost_user_packed_desc_available(vhost_user_vring_t *vring, u16 idx)
vlib_main_t vlib_node_runtime_t * node
u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]
#define VHOST_USER_RX_BUFFERS_N
#define clib_atomic_swap_acq_n(a, b)
static_always_inline void vhost_user_send_call(vlib_main_t *vm, vhost_user_vring_t *vq)
vlib_buffer_t ** rx_buffers_pdesc
static_always_inline int ethernet_frame_is_tagged(u16 type)
vring_desc_event_t * avail_event
#define VRING_DESC_F_NEXT
#define vec_elt(v, i)
Get vector value at index i.
u8 device_input_feature_arc_index
Feature arc index for device-input.
struct _vlib_node_registration vlib_node_registration_t
u32 next_buffer
Next buffer for this linked-list of buffers.
vhost_user_vring_t vrings[VHOST_VRING_MAX_N]
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
VLIB buffer representation.
#define vhost_user_log_dirty_ring(vui, vq, member)
static int tcp_header_bytes(tcp_header_t *t)
#define VRING_DESC_F_USED
#define vec_foreach(var, vec)
Vector iterator.
u16 flags
Copy of main node flags.
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static_always_inline void vhost_user_advance_last_used_idx(vhost_user_vring_t *vring)
static void vlib_frame_no_append(vlib_frame_t *f)
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
vnet_feature_config_main_t * feature_config_mains
feature config main objects
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
#define VIRTIO_NET_HDR_GSO_UDP
vnet_feature_main_t feature_main
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE
#define VRING_DESC_F_INDIRECT
#define VIRTIO_NET_HDR_GSO_TCPV6
#define VRING_DESC_F_AVAIL