FD.io VPP  v21.10.1-2-g0a485f517
Vector Packet Processing
rdma.h
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2018 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17 
18 #ifndef _RDMA_H_
19 #define _RDMA_H_
20 
21 #include <infiniband/verbs.h>
22 #include <vlib/log.h>
23 #include <vlib/pci/pci.h>
24 #include <vnet/interface.h>
26 #include <rdma/rdma_mlx5dv.h>
27 
28 #define foreach_rdma_device_flags \
29  _(0, ERROR, "error") \
30  _(1, ADMIN_UP, "admin-up") \
31  _(2, LINK_UP, "link-up") \
32  _(3, PROMISC, "promiscuous") \
33  _(4, MLX5DV, "mlx5dv") \
34  _(5, STRIDING_RQ, "striding-rq")
35 
36 enum
37 {
38 #define _(a, b, c) RDMA_DEVICE_F_##b = (1 << a),
40 #undef _
41 };
42 
43 #ifndef MLX5_ETH_L2_INLINE_HEADER_SIZE
44 #define MLX5_ETH_L2_INLINE_HEADER_SIZE 18
45 #endif
46 
47 typedef struct
48 {
49  CLIB_ALIGN_MARK (align0, MLX5_SEND_WQE_BB);
50  union
51  {
52  struct mlx5_wqe_ctrl_seg ctrl;
53  struct
54  {
59  };
60  };
61  struct mlx5_wqe_eth_seg eseg;
62  struct mlx5_wqe_data_seg dseg;
64 #define RDMA_MLX5_WQE_SZ sizeof(rdma_mlx5_wqe_t)
65 #define RDMA_MLX5_WQE_DS (RDMA_MLX5_WQE_SZ/sizeof(struct mlx5_wqe_data_seg))
66 STATIC_ASSERT (RDMA_MLX5_WQE_SZ == MLX5_SEND_WQE_BB &&
67  RDMA_MLX5_WQE_SZ % sizeof (struct mlx5_wqe_data_seg) == 0,
68  "bad size");
69 
70 typedef struct
71 {
72  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
73  struct ibv_cq *cq;
74  struct ibv_wq *wq;
86  CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
87  volatile u32 *wq_db;
88  volatile u32 *cq_db;
94  union
95  {
96  struct
97  {
98  u32 striding_wqe_tail; /* Striding RQ: number of released whole WQE */
99  u8 log_stride_per_wqe; /* Striding RQ: number of strides in a single WQE */
100  };
101 
102  struct
103  {
104  u8 *n_used_per_chain; /* Legacy RQ: for each buffer chain, how many additional segments are needed */
105 
106  u32 *second_bufs; /* Legacy RQ: ring of second buffers of each chain */
107  u32 incomplete_tail; /* Legacy RQ: tail index in bufs,
108  corresponds to buffer chains with recycled valid head buffer,
109  but whose other buffers are not yet recycled (due to pool exhaustion). */
111  u8 n_ds_per_wqe; /* Legacy RQ: number of nonnull data segs per WQE */
112  };
113  };
114  u8 log_wqe_sz; /* log-size of a single WQE (in data segments) */
115 } rdma_rxq_t;
116 
117 typedef struct
118 {
119  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
120 
121  /* following fields are accessed in datapath */
123 
124  union
125  {
126  struct
127  {
128  /* ibverb datapath. Cache of cq, sq below */
129  struct ibv_cq *ibv_cq;
130  struct ibv_qp *ibv_qp;
131  };
132  struct
133  {
134  /* direct verbs datapath */
136  volatile u32 *dv_sq_dbrec;
137  volatile u64 *dv_sq_db;
138  struct mlx5_cqe64 *dv_cq_cqes;
139  volatile u32 *dv_cq_dbrec;
140  };
141  };
142 
143  u32 *bufs; /* vlib_buffer ring buffer */
146  u16 dv_cq_idx; /* monotonic CQE index (valid only for direct verbs) */
147  u8 bufs_log2sz; /* log2 vlib_buffer entries */
148  u8 dv_sq_log2sz:4; /* log2 SQ WQE entries (valid only for direct verbs) */
149  u8 dv_cq_log2sz:4; /* log2 CQ CQE entries (valid only for direct verbs) */
150  STRUCT_MARK (cacheline1);
151 
152  /* WQE template (valid only for direct verbs) */
153  u8 dv_wqe_tmpl[64];
154 
155  /* end of 2nd 64-bytes cacheline (or 1st 128-bytes cacheline) */
156  STRUCT_MARK (cacheline2);
157 
158  /* fields below are not accessed in datapath */
159  struct ibv_cq *cq;
160  struct ibv_qp *qp;
161 
162 } rdma_txq_t;
163 STATIC_ASSERT_OFFSET_OF (rdma_txq_t, cacheline1, 64);
164 STATIC_ASSERT_OFFSET_OF (rdma_txq_t, cacheline2, 128);
165 
166 #define RDMA_TXQ_DV_INVALID_ID 0xffffffff
167 
168 #define RDMA_TXQ_BUF_SZ(txq) (1U << (txq)->bufs_log2sz)
169 #define RDMA_TXQ_DV_SQ_SZ(txq) (1U << (txq)->dv_sq_log2sz)
170 #define RDMA_TXQ_DV_CQ_SZ(txq) (1U << (txq)->dv_cq_log2sz)
171 
172 #define RDMA_TXQ_USED_SZ(head, tail) ((u16)((u16)(tail) - (u16)(head)))
173 #define RDMA_TXQ_AVAIL_SZ(txq, head, tail) ((u16)(RDMA_TXQ_BUF_SZ (txq) - RDMA_TXQ_USED_SZ (head, tail)))
174 #define RDMA_RXQ_MAX_CHAIN_LOG_SZ 3 /* This should NOT be lower than 3! */
175 #define RDMA_RXQ_MAX_CHAIN_SZ (1U << RDMA_RXQ_MAX_CHAIN_LOG_SZ)
176 #define RDMA_RXQ_LEGACY_MODE_MAX_CHAIN_SZ 5
177 
178 typedef enum
179 {
184 } rdma_rss4_t;
185 
186 typedef enum
187 {
192 } rdma_rss6_t;
193 
194 typedef struct
195 {
196  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
197 
198  /* following fields are accessed in datapath */
205  u32 lkey; /* cache of mr->lkey */
206  u8 pool; /* buffer pool index */
207 
208  /* fields below are not accessed in datapath */
217 
218  struct ibv_context *ctx;
219  struct ibv_pd *pd;
220  struct ibv_mr *mr;
221  struct ibv_qp *rx_qp4;
222  struct ibv_qp *rx_qp6;
223  struct ibv_rwq_ind_table *rx_rwq_ind_tbl;
224  struct ibv_flow *flow_ucast4;
225  struct ibv_flow *flow_mcast4;
226  struct ibv_flow *flow_ucast6;
227  struct ibv_flow *flow_mcast6;
228 
230 } rdma_device_t;
231 
232 typedef struct
233 {
234  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
235  union
236  {
237  u16 cqe_flags[VLIB_FRAME_SIZE];
238  u16x8 cqe_flags8[VLIB_FRAME_SIZE / 8];
239  u16x16 cqe_flags16[VLIB_FRAME_SIZE / 16];
240  };
241  union
242  {
243  struct
244  {
245  u32 current_segs[VLIB_FRAME_SIZE];
246  u32 to_free_buffers[VLIB_FRAME_SIZE];
247  }; /* Specific to STRIDING RQ mode */
248  struct
249  {
252  }; /* Specific to LEGACY RQ mode */
253  };
254 
257 
258 typedef struct
259 {
264 } rdma_main_t;
265 
266 extern rdma_main_t rdma_main;
267 
268 typedef enum
269 {
273 } rdma_mode_t;
274 
275 typedef struct
276 {
288 
289  /* return */
290  int rv;
294 
297 
300 
306 
307 typedef struct
308 {
313 
314 #define foreach_rdma_tx_func_error \
315 _(SEGMENT_SIZE_EXCEEDED, "segment size exceeded") \
316 _(NO_FREE_SLOTS, "no free tx slots") \
317 _(SUBMISSION, "tx submission errors") \
318 _(COMPLETION, "tx completion errors")
319 
320 typedef enum
321 {
322 #define _(f,s) RDMA_TX_ERROR_##f,
324 #undef _
327 
328 #endif /* _RDMA_H_ */
329 
330 /*
331  * fd.io coding-style-patch-verification: ON
332  *
333  * Local Variables:
334  * eval: (c-set-style "gnu")
335  * End:
336  */
rdma_rxq_t::wqes
mlx5dv_wqe_ds_t * wqes
Definition: rdma.h:85
rdma_txq_t::lock
clib_spinlock_t lock
Definition: rdma.h:122
rdma_main_t::devices
rdma_device_t * devices
Definition: rdma.h:261
RDMA_RSS6_AUTO
@ RDMA_RSS6_AUTO
Definition: rdma.h:188
rdma_create_if_args_t::max_pktlen
u16 max_pktlen
Definition: rdma.h:285
rdma_txq_t
Definition: rdma.h:117
rdma_device_t::flow_mcast4
struct ibv_flow * flow_mcast4
Definition: rdma.h:225
rdma_main
rdma_main_t rdma_main
Definition: device.c:47
unformat_rdma_create_if_args
unformat_function_t unformat_rdma_create_if_args
Definition: rdma.h:305
vnet_device_class_t
struct _vnet_device_class vnet_device_class_t
foreach_rdma_tx_func_error
#define foreach_rdma_tx_func_error
Definition: rdma.h:314
rdma_txq_t::dv_sq_db
volatile u64 * dv_sq_db
Definition: rdma.h:137
rdma_rss6_t
rdma_rss6_t
Definition: rdma.h:186
rdma_txq_t::bufs_log2sz
u8 bufs_log2sz
Definition: rdma.h:147
rdma_create_if_args_t::rxq_num
u32 rxq_num
Definition: rdma.h:281
rdma_mlx5_wqe_t::opcode
u8 opcode
Definition: rdma.h:58
rdma_rxq_t::cq_ci
u32 cq_ci
Definition: rdma.h:79
rdma_device_t::rss6
rdma_rss6_t rss6
Definition: rdma.h:216
rdma_device_t::rx_rwq_ind_tbl
struct ibv_rwq_ind_table * rx_rwq_ind_tbl
Definition: rdma.h:223
CLIB_CACHE_LINE_ALIGN_MARK
#define CLIB_CACHE_LINE_ALIGN_MARK(mark)
Definition: cache.h:60
vlib_log_class_t
u32 vlib_log_class_t
Definition: vlib.h:52
rdma_rxq_t::n_mini_cqes
u16 n_mini_cqes
Definition: rdma.h:81
rdma_txq_t::dv_cq_log2sz
u8 dv_cq_log2sz
Definition: rdma.h:149
rdma_rxq_t
Definition: rdma.h:70
rdma_txq_t::dv_sq_log2sz
u8 dv_sq_log2sz
Definition: rdma.h:148
rdma_device_class
vnet_device_class_t rdma_device_class
rdma_txq_t::ibv_qp
struct ibv_qp * ibv_qp
Definition: rdma.h:130
rdma_mlx5dv.h
VLIB_FRAME_SIZE
#define VLIB_FRAME_SIZE
Definition: node.h:368
rdma_rxq_t::queue_index
u32 queue_index
Definition: rdma.h:93
foreach_rdma_device_flags
#define foreach_rdma_device_flags
Definition: rdma.h:28
rdma_device_t::mr
struct ibv_mr * mr
Definition: rdma.h:220
rdma_txq_t::dv_sq_dbrec
volatile u32 * dv_sq_dbrec
Definition: rdma.h:136
rdma_txq_t::dv_cq_idx
u16 dv_cq_idx
Definition: rdma.h:146
RDMA_RSS4_IP_TCP
@ RDMA_RSS4_IP_TCP
Definition: rdma.h:183
u16
unsigned short u16
Definition: types.h:57
rdma_delete_if
void rdma_delete_if(vlib_main_t *vm, rdma_device_t *rd)
Definition: device.c:1052
rdma_device_t::flags
u32 flags
Definition: rdma.h:201
rdma_create_if_args_t::error
clib_error_t * error
Definition: rdma.h:292
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
rdma_input_trace_t::cqe_flags
u16 cqe_flags
Definition: rdma.h:311
rdma_device_t::flow_mcast6
struct ibv_flow * flow_mcast6
Definition: rdma.h:227
rdma_mlx5_wqe_t
Definition: rdma.h:47
rdma_rxq_t::buf_sz
u32 buf_sz
Definition: rdma.h:92
rdma_device_t::rx_qp6
struct ibv_qp * rx_qp6
Definition: rdma.h:222
rdma_rxq_t::striding_wqe_tail
u32 striding_wqe_tail
Definition: rdma.h:98
rdma_device_t::name
u8 * name
Definition: rdma.h:210
RDMA_RSS4_IP_UDP
@ RDMA_RSS4_IP_UDP
Definition: rdma.h:182
rdma_main_t::per_thread_data
rdma_per_thread_data_t * per_thread_data
Definition: rdma.h:260
rdma_create_if
void rdma_create_if(vlib_main_t *vm, rdma_create_if_args_t *args)
Definition: device.c:881
RDMA_RSS4_AUTO
@ RDMA_RSS4_AUTO
Definition: rdma.h:180
CLIB_ALIGN_MARK
#define CLIB_ALIGN_MARK(name, alignment)
Definition: clib.h:93
rdma_txq_t::cq
struct ibv_cq * cq
Definition: rdma.h:159
rdma_rxq_t::wqe_cnt
u32 wqe_cnt
Definition: rdma.h:90
rdma_device_t::ctx
struct ibv_context * ctx
Definition: rdma.h:218
vlib_pci_device_info
Definition: pci.h:60
rdma_txq_t::dv_cq_dbrec
volatile u32 * dv_cq_dbrec
Definition: rdma.h:139
rdma_mlx5_wqe_t::wqe_index_hi
u8 wqe_index_hi
Definition: rdma.h:56
rdma_rxq_t::last_cqe_flags
u16 last_cqe_flags
Definition: rdma.h:83
rdma_rxq_t::n_ds_per_wqe
u8 n_ds_per_wqe
Definition: rdma.h:111
rdma_create_if_args_t::mode
rdma_mode_t mode
Definition: rdma.h:282
rdma_create_if_args_t::txq_size
u32 txq_size
Definition: rdma.h:280
rdma_create_if_args_t::disable_striding_rq
u8 disable_striding_rq
Definition: rdma.h:284
format_rdma_device_name
format_function_t format_rdma_device_name
Definition: rdma.h:302
rdma_device_t::pci
vlib_pci_device_info_t * pci
Definition: rdma.h:209
rdma_main_t::log_class
vlib_log_class_t log_class
Definition: rdma.h:262
rdma_txq_t::bufs
u32 * bufs
Definition: rdma.h:143
mlx5dv_wqe_ds_t
Definition: rdma_mlx5dv.h:75
rdma_per_thread_data_t
Definition: rdma.h:232
mlx5dv_cqe_t
Definition: rdma_mlx5dv.h:44
rdma_device_t::linux_ifname
u8 * linux_ifname
Definition: rdma.h:211
RDMA_MODE_AUTO
@ RDMA_MODE_AUTO
Definition: rdma.h:270
mac_address.h
rdma_rxq_t::log2_cq_size
u16 log2_cq_size
Definition: rdma.h:80
rdma_main_t
Definition: rdma.h:258
log.h
rdma_device_t::lkey
u32 lkey
Definition: rdma.h:205
clib_spinlock_s
Definition: lock.h:51
rdma_create_if_args_t::rv
int rv
Definition: rdma.h:290
rdma_rxq_t::tail
u32 tail
Definition: rdma.h:78
rdma_rxq_t::wq_stride
u32 wq_stride
Definition: rdma.h:91
format_rdma_input_trace
format_function_t format_rdma_input_trace
Definition: rdma.h:303
rdma_create_if_args_t::rxq_size
u32 rxq_size
Definition: rdma.h:279
rdma_mlx5_wqe_t::wqe_index_lo
u8 wqe_index_lo
Definition: rdma.h:57
rdma_create_if_args_t
Definition: rdma.h:275
rdma_input_node
vlib_node_registration_t rdma_input_node
(constructor) VLIB_REGISTER_NODE (rdma_input_node)
Definition: input.c:1045
rdma_device_t
Definition: rdma.h:194
rdma_txq_t::ibv_cq
struct ibv_cq * ibv_cq
Definition: rdma.h:129
interface.h
RDMA_TX_N_ERROR
@ RDMA_TX_N_ERROR
Definition: rdma.h:325
rdma_device_t::sw_if_index
u32 sw_if_index
Definition: rdma.h:203
rdma_txq_t::tail
u16 tail
Definition: rdma.h:145
rdma_create_if_args_t::no_multi_seg
u8 no_multi_seg
Definition: rdma.h:283
rdma_rss4_t
rdma_rss4_t
Definition: rdma.h:178
rdma_create_if_args_t::rss6
rdma_rss6_t rss6
Definition: rdma.h:287
RDMA_RSS6_IP_UDP
@ RDMA_RSS6_IP_UDP
Definition: rdma.h:190
rdma_device_t::async_event_clib_file_index
u32 async_event_clib_file_index
Definition: rdma.h:213
RDMA_RSS6_IP_TCP
@ RDMA_RSS6_IP_TCP
Definition: rdma.h:191
rdma_rxq_t::wq
struct ibv_wq * wq
Definition: rdma.h:74
rdma_rxq_t::bufs
u32 * bufs
Definition: rdma.h:75
rdma_device_t::rx_qp4
struct ibv_qp * rx_qp4
Definition: rdma.h:221
vlib_node_registration_t
struct _vlib_node_registration vlib_node_registration_t
rdma_input_trace_t
Definition: rdma.h:307
format_function_t
u8 *() format_function_t(u8 *s, va_list *args)
Definition: format.h:48
RDMA_RSS6_IP
@ RDMA_RSS6_IP
Definition: rdma.h:189
u16x8
_mm_packus_epi16 u16x8
Definition: vector_sse42.h:159
rdma_txq_t::dv_sq_wqes
rdma_mlx5_wqe_t * dv_sq_wqes
Definition: rdma.h:135
rdma_rxq_t::n_used_per_chain
u8 * n_used_per_chain
Definition: rdma.h:104
RDMA_RSS4_IP
@ RDMA_RSS4_IP
Definition: rdma.h:181
rdma_input_trace_t::next_index
u32 next_index
Definition: rdma.h:309
u64
unsigned long u64
Definition: types.h:89
format_rdma_rxq
format_function_t format_rdma_rxq
Definition: rdma.h:304
RDMA_MODE_IBV
@ RDMA_MODE_IBV
Definition: rdma.h:271
rdma_device_t::pd
struct ibv_pd * pd
Definition: rdma.h:219
rdma_create_if_args_t::name
u8 * name
Definition: rdma.h:278
rdma_rxq_t::cq
struct ibv_cq * cq
Definition: rdma.h:73
rdma_device_t::hwaddr
mac_address_t hwaddr
Definition: rdma.h:212
u32
unsigned int u32
Definition: types.h:88
rdma_rxq_t::n_mini_cqes_left
u16 n_mini_cqes_left
Definition: rdma.h:82
RDMA_MLX5_WQE_SZ
#define RDMA_MLX5_WQE_SZ
Definition: rdma.h:64
rdma_device_t::per_interface_next_index
u32 per_interface_next_index
Definition: rdma.h:202
rdma_create_if_args_t::rss4
rdma_rss4_t rss4
Definition: rdma.h:286
rdma_rxq_t::cq_db
volatile u32 * cq_db
Definition: rdma.h:88
rdma_rxq_t::cqn
u32 cqn
Definition: rdma.h:89
rdma_input_trace_t::hw_if_index
u32 hw_if_index
Definition: rdma.h:310
pci.h
rdma_rxq_t::size
u32 size
Definition: rdma.h:76
rdma_mode_t
rdma_mode_t
Definition: rdma.h:268
unformat_function_t
uword() unformat_function_t(unformat_input_t *input, va_list *args)
Definition: format.h:225
rdma_create_if_args_t::ifname
u8 * ifname
Definition: rdma.h:277
rdma_device_t::flow_ucast4
struct ibv_flow * flow_ucast4
Definition: rdma.h:224
rdma_per_thread_data_t::buffer_template
vlib_buffer_t buffer_template
Definition: rdma.h:255
mac_address_t_
Definition: mac_address.h:21
rdma_rxq_t::log_stride_per_wqe
u8 log_stride_per_wqe
Definition: rdma.h:99
STRUCT_MARK
#define STRUCT_MARK(mark)
Definition: clib.h:78
vlib_main_t
Definition: main.h:102
u8
unsigned char u8
Definition: types.h:56
clib_error_t
Definition: clib_error.h:21
rdma_device_t::hw_if_index
u32 hw_if_index
Definition: rdma.h:204
rdma_device_t::error
clib_error_t * error
Definition: rdma.h:229
rdma_rxq_t::n_total_additional_segs
u16 n_total_additional_segs
Definition: rdma.h:110
rdma_rxq_t::log_wqe_sz
u8 log_wqe_sz
Definition: rdma.h:114
STATIC_ASSERT
STATIC_ASSERT(RDMA_MLX5_WQE_SZ==MLX5_SEND_WQE_BB &&RDMA_MLX5_WQE_SZ % sizeof(struct mlx5_wqe_data_seg)==0, "bad size")
rdma_rxq_t::wq_db
volatile u32 * wq_db
Definition: rdma.h:87
rdma_txq_t::dv_cq_cqes
struct mlx5_cqe64 * dv_cq_cqes
Definition: rdma.h:138
rdma_mlx5_wqe_t::opc_mod
u8 opc_mod
Definition: rdma.h:55
rdma_rxq_t::second_bufs
u32 * second_bufs
Definition: rdma.h:106
rdma_device_t::dev_instance
u32 dev_instance
Definition: rdma.h:214
RDMA_MODE_DV
@ RDMA_MODE_DV
Definition: rdma.h:272
rdma_rxq_t::head
u32 head
Definition: rdma.h:77
rdma_device_t::pool
u8 pool
Definition: rdma.h:206
rdma_device_t::rss4
rdma_rss4_t rss4
Definition: rdma.h:215
rdma_device_t::flow_ucast6
struct ibv_flow * flow_ucast6
Definition: rdma.h:226
STATIC_ASSERT_OFFSET_OF
STATIC_ASSERT_OFFSET_OF(rdma_txq_t, cacheline1, 64)
rdma_tx_func_error_t
rdma_tx_func_error_t
Definition: rdma.h:320
rdma_txq_t::qp
struct ibv_qp * qp
Definition: rdma.h:160
rdma_create_if_args_t::sw_if_index
u32 sw_if_index
Definition: rdma.h:291
rdma_rxq_t::cqes
mlx5dv_cqe_t * cqes
Definition: rdma.h:84
format_rdma_device
format_function_t format_rdma_device
Definition: rdma.h:301
rdma_device_t::txqs
rdma_txq_t * txqs
Definition: rdma.h:200
u16x16
_mm256_packus_epi16 u16x16
Definition: vector_avx2.h:118
vlib_buffer_t
VLIB buffer representation.
Definition: buffer.h:111
rdma_main_t::msg_id_base
u16 msg_id_base
Definition: rdma.h:263
rdma_rxq_t::incomplete_tail
u32 incomplete_tail
Definition: rdma.h:107
rdma_txq_t::head
u16 head
Definition: rdma.h:144
rdma_device_t::rxqs
rdma_rxq_t * rxqs
Definition: rdma.h:199