26 #define foreach_rdma_input_error \ 27 _(BUFFER_ALLOC, "buffer alloc error") 31 #define _(f,s) RDMA_INPUT_ERROR_##f, 49 s[0].length = data_size;
62 u32 first_slot,
u32 n_alloc)
69 for (i = 0; i < n_alloc; i++)
74 rxq->
wqes + ((first_slot +
i) << log_wqe_sz);
87 for (
int j = 0; j < chain_sz - 1; j++)
91 bufs[j]->
flags |= VLIB_BUFFER_NEXT_PRESENT;
96 if (chain_sz < rxq->n_ds_per_wqe - 1)
99 bufs[chain_sz - 1]->
flags |= VLIB_BUFFER_NEXT_PRESENT;
103 bufs[chain_sz - 1]->
flags &= ~VLIB_BUFFER_NEXT_PRESENT;
107 for (
int j = 0; j < chain_sz; j++)
113 current_wqe[j + 1].
addr = clib_host_to_net_u64 (addr);
125 const int is_mlx5dv,
const int is_striding)
129 struct ibv_recv_wr wr[VLIB_FRAME_SIZE], *w = wr;
130 struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
150 rxq->
tail += n_completed;
153 if (n_completed != n_incomplete)
158 u32 alloc_multiple = 1 << (
clib_max (3, log_stride_per_wqe));
162 n_alloc =
clib_min (VLIB_FRAME_SIZE, ring_space);
165 if (n_alloc < 2 * alloc_multiple)
171 n_alloc &= ~(alloc_multiple - 1);
179 if (n < alloc_multiple)
187 n_free = n & (alloc_multiple - 1);
198 u64 __clib_aligned (32) va[8];
202 u32 wqes_slot = slot << (log_wqe_sz - log_stride_per_wqe);
205 const int wqe_sz = 1 << log_wqe_sz;
206 const int stride_per_wqe = 1 << log_stride_per_wqe;
207 int current_data_seg = 0;
211 const int log_skip_wqe = is_striding ? 0 : log_wqe_sz;
217 #ifdef CLIB_HAVE_VEC256 221 for (
int i = 0;
i < 8;
i++)
222 va[
i] = clib_host_to_net_u64 (va[
i]);
231 if (is_striding && !(current_data_seg & (wqe_sz - 1)))
235 .next_wqe_index = clib_host_to_net_u16 (((wqes_slot >> log_wqe_sz) + 1) & (wqe_cnt - 1)),
243 if (!is_striding || !(current_data_seg & ~(stride_per_wqe - 1)))
245 wqe[(0 << log_skip_wqe) + is_striding].
addr = va[0];
246 wqe[(1 << log_skip_wqe) + is_striding].
addr = va[1];
247 wqe[(2 << log_skip_wqe) + is_striding].
addr = va[2];
248 wqe[(3 << log_skip_wqe) + is_striding].
addr = va[3];
249 wqe[(4 << log_skip_wqe) + is_striding].
addr = va[4];
250 wqe[(5 << log_skip_wqe) + is_striding].
addr = va[5];
251 wqe[(6 << log_skip_wqe) + is_striding].
addr = va[6];
252 wqe[(7 << log_skip_wqe) + is_striding].
addr = va[7];
256 wqe += 8 << log_skip_wqe;
257 wqes_slot += 8 << log_skip_wqe;
258 current_data_seg += 8;
259 current_data_seg &= wqe_sz - 1;
265 int first_slot = slot - n_alloc;
274 rxq->
tail += n_alloc;
278 rxq->
wq_db[MLX5_RCV_DBR] =
282 rxq->
wq_db[MLX5_RCV_DBR] = clib_host_to_net_u32 (rxq->
tail);
314 if (ibv_post_wq_recv (rxq->
wq, wr, &w) != 0)
327 const u32 * bi,
u32 next_index,
u16 * cqe_flags,
335 while (n_trace && n_left)
344 tr->
cqe_flags = is_mlx5dv ? clib_net_to_host_u16 (cqe_flags[0]) : 0;
387 while (n_left_from >= 4)
416 while (n_left_from >= 1)
434 u32 mcqe_array_index = (cq_ci + 1) & mask;
437 mcqe_array_index = cq_ci;
445 mcqe_array_index = (mcqe_array_index + n) & mask;
453 for (
int i = 0;
i < n;
i++)
454 byte_cnt[
i] = mcqe[skip +
i].byte_count;
455 mcqe_array_index = (mcqe_array_index + 8) & mask;
465 for (
int i = 0;
i < 8;
i++)
466 byte_cnt[
i] = mcqe[
i].byte_count;
470 mcqe_array_index = (mcqe_array_index + 8) & mask;
476 for (
int i = 0;
i < n_left;
i++)
477 byte_cnt[
i] = mcqe[
i].byte_count;
517 offset = cq_ci &
mask;
518 owner = 0xf0 | ((cq_ci >> log2_cq_size) & 1);
520 if (offset + n_mini_cqes < cq_size)
535 u32 * byte_cnt,
u16 * cqe_flags)
537 u32 n_rx_packets = 0;
548 n_mini_cqes_left, cq_ci, mask, byte_cnt);
552 n_rx_packets = n_mini_cqes_left;
553 byte_cnt += n_mini_cqes_left;
554 cqe_flags += n_mini_cqes_left;
556 rxq->
cq_ci = cq_ci = cq_ci + n_mini_cqes;
561 u8 cqe_last_byte, owner;
566 owner = (cq_ci >> log2_cq_size) & 1;
569 if ((cqe_last_byte & 0x1) != owner)
572 cqe_last_byte &= 0xfc;
574 if (cqe_last_byte == 0x2c)
580 if (n_left >= n_mini_cqes)
586 n_rx_packets += n_mini_cqes;
587 byte_cnt += n_mini_cqes;
588 cqe_flags += n_mini_cqes;
589 cq_ci += n_mini_cqes;
604 if (cqe_last_byte == 0x20)
607 cqe_flags[0] = cqe->
flags;
614 rd->
flags |= RDMA_DEVICE_F_ERROR;
630 for (
int i = 0;
i < n_rx_packets;
i++)
637 return n_rx_packets != *n_rx_segs || filler;
644 #if defined CLIB_HAVE_VEC256 645 u32x8 thresh8 = u32x8_splat (buf_sz);
646 for (
int i = 0;
i < n_rx_packets;
i += 8)
647 if (!u32x8_is_all_zero (*(u32x8 *) (bc +
i) > thresh8))
649 #elif defined CLIB_HAVE_VEC128 650 u32x4 thresh4 = u32x4_splat (buf_sz);
651 for (
int i = 0;
i < n_rx_packets;
i += 4)
652 if (!u32x4_is_all_zero (*(
u32x4 *) (bc +
i) > thresh4))
669 * ptd,
int n_rx_packets,
u32 * bc)
676 int skip_ip4_cksum = 1;
678 #if defined CLIB_HAVE_VEC256 679 u16x16 mask16 = u16x16_splat (mask);
680 u16x16 match16 = u16x16_splat (match);
683 for (
int i = 0;
i * 16 < n_rx_packets;
i++)
686 if (!u16x16_is_all_zero (r))
689 for (
int i = 0;
i < n_rx_packets;
i += 8)
691 #elif defined CLIB_HAVE_VEC128 692 u16x8 mask8 = u16x8_splat (mask);
693 u16x8 match8 = u16x8_splat (match);
696 for (
int i = 0;
i * 8 < n_rx_packets;
i++)
699 if (!u16x8_is_all_zero (r))
702 for (
int i = 0;
i < n_rx_packets;
i += 4)
705 for (
int i = 0;
i < n_rx_packets;
i++)
709 for (
int i = 0;
i < n_rx_packets;
i++)
710 bc[
i] = clib_net_to_host_u32 (bc[
i]);
712 return skip_ip4_cksum;
723 u32 n_left = n_rx_segs;
728 rxq->
head += n_rx_segs;
769 u32 n_wrap_around = (slot + n) & (qs_mask + 1) ? (slot + n) & qs_mask : 0;
776 if (total_length > buf_sz)
779 u8 current_chain_sz = 0;
781 total_length -= buf_sz;
783 current_buf->
flags |= VLIB_BUFFER_NEXT_PRESENT;
792 while (total_length > 0);
793 current_buf->
flags &= ~VLIB_BUFFER_NEXT_PRESENT;
797 n_used_per_chain[0] = current_chain_sz;
819 int n_rx_segs,
int *n_rx_packets,
820 u32 * bc,
int slow_path_needed)
837 uword n_segs_remaining = 0;
838 u32 n_bytes_remaining = 0;
839 u32 *next_in_frame = to_next;
857 if (!n_segs_remaining)
872 pkt_prev->
flags |= VLIB_BUFFER_NEXT_PRESENT;
873 pkt[0]->
flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
875 if (n_segs_remaining == 1)
880 (next_to_free++)[0] = pkt_head_idx;
886 (next_in_frame++)[0] = pkt_head_idx;
905 while (n_rx_segs > 0);
915 u16 qid,
const int use_mlx5dv)
922 struct ibv_wc wc[VLIB_FRAME_SIZE];
925 u32 next_index, *to_next, n_left_to_next, n_rx_bytes = 0;
926 int n_rx_packets, skip_ip4_cksum = 0;
928 const int is_striding = ! !(rd->
flags & RDMA_DEVICE_F_STRIDING_RQ);
934 n_rx_packets = ibv_poll_cq (rxq->
cq, VLIB_FRAME_SIZE, wc);
954 int slow_path_needed;
977 n_rx_packets, bc, ~0);
991 rxq->
size, n_rx_packets);
993 rxq->
head += n_rx_packets;
1002 next_index, ptd->
cqe_flags, use_mlx5dv);
1007 combined_sw_if_counters +
1010 n_rx_packets, n_rx_bytes);
1013 return n_rx_packets;
1045 .name =
"rdma-input",
1047 .sibling_of =
"device-input",
1050 .state = VLIB_NODE_STATE_DISABLED,
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static __clib_warn_unused_result u32 vlib_buffer_alloc_to_ring_from_pool(vlib_main_t *vm, u32 *ring, u32 start, u32 ring_size, u32 n_buffers, u8 buffer_pool_index)
Allocate buffers into ring from specific buffer pool.
static_always_inline u32x4 u32x4_byte_swap(u32x4 v)
static u32 vlib_get_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt)
vl_api_wireguard_peer_flags_t flags
static void vlib_increment_combined_counter(vlib_combined_counter_main_t *cm, u32 thread_index, u32 index, u64 n_packets, u64 n_bytes)
Increment a combined counter.
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
static_always_inline void clib_prefetch_load(void *p)
vnet_main_t * vnet_get_main(void)
vnet_interface_main_t interface_main
#define CLIB_MEMORY_STORE_BARRIER()
#define VLIB_NODE_FLAG_TRACE_SUPPORTED
u8 opcode_cqefmt_se_owner
u16 current_length
Nbytes between current data and the end of this buffer.
static vlib_frame_t * vlib_get_frame(vlib_main_t *vm, vlib_frame_t *f)
u32 per_interface_next_index
format_function_t format_rdma_input_trace
static_always_inline void vlib_get_buffers_with_offset(vlib_main_t *vm, u32 *bi, void **b, int count, i32 offset)
Translate array of buffer indices into buffer pointers with offset.
#define VLIB_NODE_FN(node)
u16 cqe_flags[VLIB_FRAME_SIZE]
u8 buffer_pool_index
index of buffer pool this buffer belongs.
u16x8 cqe_flags8[VLIB_FRAME_SIZE/8]
u32 to_free_buffers[VLIB_FRAME_SIZE]
rdma_per_thread_data_t * per_thread_data
#define static_always_inline
static uword pow2_mask(uword x)
#define ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX
#define CQE_FLAG_L3_HDR_TYPE_SHIFT
#define CQE_FLAG_L3_HDR_TYPE_IP4
static_always_inline int vnet_device_input_have_features(u32 sw_if_index)
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define vlib_get_new_next_frame(vm, node, next_index, vectors, n_vectors_left)
epu16_epi64 epu8_epi64 epu8_epi64 epi16_epi64 epi8_epi64 epi8_epi64 static_always_inline u64x4 u64x4_byte_swap(u64x4 v)
static vlib_next_frame_t * vlib_node_runtime_get_next_frame(vlib_main_t *vm, vlib_node_runtime_t *n, u32 next_index)
static void vlib_buffer_free_from_ring(vlib_main_t *vm, u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
Free buffers from ring.
static_always_inline void vlib_buffer_copy_template(vlib_buffer_t *b, vlib_buffer_t *bt)
static __clib_warn_unused_result int vlib_trace_buffer(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, vlib_buffer_t *b, int follow_chain)
#define ETH_INPUT_FRAME_F_IP4_CKSUM_OK
static_always_inline u32x8 u32x8_byte_swap(u32x8 v)
#define CQE_FLAG_L3_HDR_TYPE_MASK
static_always_inline u32 vlib_buffer_get_default_data_size(vlib_main_t *vm)
vlib_buffer_t buffer_template
u32 tmp_bi[VLIB_FRAME_SIZE]
#define VLIB_REGISTER_NODE(x,...)
static_always_inline uword vlib_get_thread_index(void)
#define CLIB_PREFETCH(addr, size, type)
sll srl srl sll sra u16x4 i
static void * vlib_frame_scalar_args(vlib_frame_t *f)
Get pointer to frame scalar data.
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
vlib_main_t vlib_node_runtime_t * node
#define CQE_BC_CONSUMED_STRIDES_SHIFT
vlib_buffer_t * tmp_bufs[VLIB_FRAME_SIZE]
#define CQE_BC_FILLER_MASK
#define CQE_BC_BYTE_COUNT_MASK
#define CQE_BC_CONSUMED_STRIDES_MASK
static_always_inline void clib_memset_u16(void *p, u16 val, uword count)
#define foreach_device_and_queue(var, vec)
u32 current_segs[VLIB_FRAME_SIZE]
static_always_inline void clib_prefetch_store(void *p)
u16 n_total_additional_segs
u16x16 cqe_flags16[VLIB_FRAME_SIZE/16]
u32 next_buffer
Next buffer for this linked-list of buffers.
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
VLIB buffer representation.
static void vlib_buffer_copy_indices_from_ring(u32 *dst, u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
struct clib_bihash_value offset
template key/value backing page structure
static_always_inline void vnet_feature_start_device_input_x1(u32 sw_if_index, u32 *next0, vlib_buffer_t *b0)
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static void vlib_set_trace_count(vlib_main_t *vm, vlib_node_runtime_t *rt, u32 count)
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
#define CLIB_CACHE_LINE_BYTES
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
static __clib_warn_unused_result u32 vlib_buffer_alloc_from_pool(vlib_main_t *vm, u32 *buffers, u32 n_buffers, u8 buffer_pool_index)
Allocate buffers from specific pool into supplied array.