FD.io VPP  v21.10.1-2-g0a485f517
Vector Packet Processing
output.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2018 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17 
18 #include <vlib/vlib.h>
19 #include <vlib/unix/unix.h>
20 #include <vlib/pci/pci.h>
21 #include <vppinfra/ring.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/devices/devices.h>
24 #include <rdma/rdma.h>
25 
26 #define RDMA_TX_RETRIES 5
27 
28 #define RDMA_TXQ_DV_DSEG_SZ(txq) (RDMA_MLX5_WQE_DS * RDMA_TXQ_DV_SQ_SZ(txq))
29 #define RDMA_TXQ_DV_DSEG2WQE(d) (((d) + RDMA_MLX5_WQE_DS - 1) / RDMA_MLX5_WQE_DS)
30 
31 /*
32  * MLX5 direct verbs tx/free functions
33  */
34 
37  const vlib_node_runtime_t * node,
38  rdma_txq_t * txq)
39 {
40  u16 idx = txq->dv_cq_idx;
41  u32 cq_mask = pow2_mask (txq->dv_cq_log2sz);
42  u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
43  u32 mask = pow2_mask (txq->bufs_log2sz);
44  u32 buf_sz = RDMA_TXQ_BUF_SZ (txq);
45  u32 log2_cq_sz = txq->dv_cq_log2sz;
46  struct mlx5_cqe64 *cqes = txq->dv_cq_cqes, *cur = cqes + (idx & cq_mask);
47  u8 op_own, saved;
48  const rdma_mlx5_wqe_t *wqe;
49 
50  for (;;)
51  {
52  op_own = *(volatile u8 *) &cur->op_own;
53  if (((idx >> log2_cq_sz) & MLX5_CQE_OWNER_MASK) !=
54  (op_own & MLX5_CQE_OWNER_MASK) || (op_own >> 4) == MLX5_CQE_INVALID)
55  break;
56  if (PREDICT_FALSE ((op_own >> 4)) != MLX5_CQE_REQ)
57  vlib_error_count (vm, node->node_index, RDMA_TX_ERROR_COMPLETION, 1);
58  idx++;
59  cur = cqes + (idx & cq_mask);
60  }
61 
62  if (idx == txq->dv_cq_idx)
63  return; /* nothing to do */
64 
65  cur = cqes + ((idx - 1) & cq_mask);
66  saved = cur->op_own;
67  (void) saved;
68  cur->op_own = 0xf0;
69  txq->dv_cq_idx = idx;
70 
71  /* retrieve original WQE and get new tail counter */
72  wqe = txq->dv_sq_wqes + (be16toh (cur->wqe_counter) & sq_mask);
73  if (PREDICT_FALSE (wqe->ctrl.imm == RDMA_TXQ_DV_INVALID_ID))
74  return; /* can happen if CQE reports error for an intermediate WQE */
75 
76  ASSERT (RDMA_TXQ_USED_SZ (txq->head, wqe->ctrl.imm) <= buf_sz &&
77  RDMA_TXQ_USED_SZ (wqe->ctrl.imm, txq->tail) < buf_sz);
78 
79  /* free sent buffers and update txq head */
80  vlib_buffer_free_from_ring (vm, txq->bufs, txq->head & mask, buf_sz,
81  RDMA_TXQ_USED_SZ (txq->head, wqe->ctrl.imm));
82  txq->head = wqe->ctrl.imm;
83 
84  /* ring doorbell */
86  txq->dv_cq_dbrec[0] = htobe32 (idx);
87 }
88 
91  const u16 tail, u32 sq_mask)
92 {
93  last->ctrl.imm = tail; /* register item to free */
94  last->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; /* generate a CQE so we can free buffers */
95 
96  ASSERT (tail != txq->tail &&
97  RDMA_TXQ_AVAIL_SZ (txq, txq->head, txq->tail) >=
98  RDMA_TXQ_USED_SZ (txq->tail, tail));
99 
101  txq->dv_sq_dbrec[MLX5_SND_DBR] = htobe32 (tail);
103  txq->dv_sq_db[0] = *(u64 *) last;
104 }
105 
107 rdma_mlx5_wqe_init (rdma_mlx5_wqe_t * wqe, const void *tmpl,
108  vlib_buffer_t * b, const u16 tail)
109 {
110  u16 sz = b->current_length;
111  const void *cur = vlib_buffer_get_current (b);
112  uword addr = pointer_to_uword (cur);
113 
114  clib_memcpy_fast (wqe, tmpl, RDMA_MLX5_WQE_SZ);
115  /* speculatively copy at least MLX5_ETH_L2_INLINE_HEADER_SIZE (18-bytes) */
116  STATIC_ASSERT (STRUCT_SIZE_OF (struct mlx5_wqe_eth_seg, inline_hdr_start) +
117  STRUCT_SIZE_OF (struct mlx5_wqe_eth_seg,
118  inline_hdr) >=
119  MLX5_ETH_L2_INLINE_HEADER_SIZE, "wrong size");
120  clib_memcpy_fast (wqe->eseg.inline_hdr_start, cur,
122 
123  wqe->wqe_index_lo = tail;
124  wqe->wqe_index_hi = tail >> 8;
126  {
127  /* inline_hdr_sz is set to MLX5_ETH_L2_INLINE_HEADER_SIZE
128  in the template */
129  wqe->dseg.byte_count = htobe32 (sz - MLX5_ETH_L2_INLINE_HEADER_SIZE);
130  wqe->dseg.addr = htobe64 (addr + MLX5_ETH_L2_INLINE_HEADER_SIZE);
131  }
132  else
133  {
134  /* dseg.byte_count and desg.addr are set to 0 in the template */
135  wqe->eseg.inline_hdr_sz = htobe16 (sz);
136  }
137 }
138 
139 /*
140  * specific data path for chained buffers, supporting ring wrap-around
141  * contrary to the normal path - otherwise we may fail to enqueue chained
142  * buffers because we are close to the end of the ring while we still have
143  * plenty of descriptors available
144  */
147  const vlib_node_runtime_t *node,
148  const rdma_device_t *rd, rdma_txq_t *txq,
149  const u32 n_left_from, const u32 *bi,
150  vlib_buffer_t **b, u16 tail)
151 {
152  u32 wqe_n = RDMA_TXQ_AVAIL_SZ (txq, txq->head, tail);
153  u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
154  u32 mask = pow2_mask (txq->bufs_log2sz);
155  u32 dseg_mask = RDMA_TXQ_DV_DSEG_SZ (txq) - 1;
156  const u32 lkey = clib_host_to_net_u32 (rd->lkey);
157  const u32 done = RDMA_TXQ_USED_SZ (txq->tail, tail);
158  u32 n = n_left_from - done;
159  rdma_mlx5_wqe_t *last = txq->dv_sq_wqes + (tail & sq_mask);
160 
161  bi += done;
162 
163  while (n >= 1 && wqe_n >= 1)
164  {
165  u32 *bufs = txq->bufs + (tail & mask);
166  rdma_mlx5_wqe_t *wqe = txq->dv_sq_wqes + (tail & sq_mask);
167 
168  /* setup the head WQE */
169  rdma_mlx5_wqe_init (wqe, txq->dv_wqe_tmpl, b[0], tail);
170 
171  bufs[0] = bi[0];
172 
173  if (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
174  {
175  /*
176  * max number of available dseg:
177  * - 4 dseg per WQEBB available
178  * - max 32 dseg per WQE (5-bits length field in WQE ctrl)
179  */
180 #define RDMA_MLX5_WQE_DS_MAX (1 << 5)
181  const u32 dseg_max =
183  vlib_buffer_t *chained_b = b[0];
184  u32 chained_n = 0;
185 
186  /* there are exactly 4 dseg per WQEBB and we rely on that */
188  sizeof (struct mlx5_wqe_data_seg) ==
189  MLX5_SEND_WQE_BB, "wrong size");
190 
191  /*
192  * iterate over fragments, supporting ring wrap-around contrary to
193  * the normal path - otherwise we may fail to enqueue chained
194  * buffers because we are close to the end of the ring while we
195  * still have plenty of descriptors available
196  */
197  while (chained_n < dseg_max
198  && chained_b->flags & VLIB_BUFFER_NEXT_PRESENT)
199  {
200  struct mlx5_wqe_data_seg *dseg = (void *) txq->dv_sq_wqes;
201  dseg += ((tail + 1) * RDMA_MLX5_WQE_DS + chained_n) & dseg_mask;
202  if (((clib_address_t) dseg & (MLX5_SEND_WQE_BB - 1)) == 0)
203  {
204  /*
205  * start of new WQEBB
206  * head/tail are shared between buffers and descriptor
207  * In order to maintain 1:1 correspondance between
208  * buffer index and descriptor index, we build
209  * 4-fragments chains and save the head
210  */
211  chained_b->flags &= ~(VLIB_BUFFER_NEXT_PRESENT |
212  VLIB_BUFFER_TOTAL_LENGTH_VALID);
213  u32 idx = tail + 1 + RDMA_TXQ_DV_DSEG2WQE (chained_n);
214  idx &= mask;
215  txq->bufs[idx] = chained_b->next_buffer;
216  }
217 
218  chained_b = vlib_get_buffer (vm, chained_b->next_buffer);
219  dseg->byte_count = htobe32 (chained_b->current_length);
220  dseg->lkey = lkey;
221  dseg->addr = htobe64 (vlib_buffer_get_current_va (chained_b));
222 
223  chained_n += 1;
224  }
225 
226  if (chained_b->flags & VLIB_BUFFER_NEXT_PRESENT)
227  {
228  /*
229  * no descriptors left: drop the chain including 1st WQE
230  * skip the problematic packet and continue
231  */
232  vlib_buffer_free_from_ring (vm, txq->bufs, tail & mask,
233  RDMA_TXQ_BUF_SZ (txq), 1 +
234  RDMA_TXQ_DV_DSEG2WQE (chained_n));
235  vlib_error_count (vm, node->node_index,
236  dseg_max == chained_n ?
237  RDMA_TX_ERROR_SEGMENT_SIZE_EXCEEDED :
238  RDMA_TX_ERROR_NO_FREE_SLOTS, 1);
239 
240  /* fixup tail to overwrite wqe head with next packet */
241  tail -= 1;
242  }
243  else
244  {
245  /* update WQE descriptor with new dseg number */
246  ((u8 *) & wqe[0].ctrl.qpn_ds)[3] = RDMA_MLX5_WQE_DS + chained_n;
247 
248  tail += RDMA_TXQ_DV_DSEG2WQE (chained_n);
249  wqe_n -= RDMA_TXQ_DV_DSEG2WQE (chained_n);
250 
251  last = wqe;
252  }
253  }
254  else
255  {
256  /* not chained */
257  last = wqe;
258  }
259 
260  tail += 1;
261  bi += 1;
262  b += 1;
263  wqe_n -= 1;
264  n -= 1;
265  }
266 
267  if (n != n_left_from)
268  rdma_device_output_tx_mlx5_doorbell (txq, last, tail, sq_mask);
269 
270  txq->tail = tail;
271  return n_left_from - n;
272 }
273 
276  const rdma_device_t *rd, rdma_txq_t *txq,
277  const u32 n_left_from, const u32 *bi,
278  vlib_buffer_t **b)
279 {
280 
281  u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
282  u32 mask = pow2_mask (txq->bufs_log2sz);
283  rdma_mlx5_wqe_t *wqe;
284  u32 n, n_wrap;
285  u16 tail = txq->tail;
286 
287  ASSERT (RDMA_TXQ_BUF_SZ (txq) <= RDMA_TXQ_DV_SQ_SZ (txq));
288 
289  /* avoid wrap-around logic in core loop */
290  n = clib_min (n_left_from, RDMA_TXQ_BUF_SZ (txq) - (tail & mask));
291  n_wrap = n_left_from - n;
292 
293 wrap_around:
294  wqe = txq->dv_sq_wqes + (tail & sq_mask);
295 
296  while (n >= 8)
297  {
298  u32 flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
299  if (PREDICT_FALSE (flags & VLIB_BUFFER_NEXT_PRESENT))
300  return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq,
301  n_left_from, bi, b, tail);
302 
303  vlib_prefetch_buffer_header (b[4], LOAD);
304  rdma_mlx5_wqe_init (wqe + 0, txq->dv_wqe_tmpl, b[0], tail + 0);
305 
306  vlib_prefetch_buffer_header (b[5], LOAD);
307  rdma_mlx5_wqe_init (wqe + 1, txq->dv_wqe_tmpl, b[1], tail + 1);
308 
309  vlib_prefetch_buffer_header (b[6], LOAD);
310  rdma_mlx5_wqe_init (wqe + 2, txq->dv_wqe_tmpl, b[2], tail + 2);
311 
312  vlib_prefetch_buffer_header (b[7], LOAD);
313  rdma_mlx5_wqe_init (wqe + 3, txq->dv_wqe_tmpl, b[3], tail + 3);
314 
315  b += 4;
316  tail += 4;
317  wqe += 4;
318  n -= 4;
319  }
320 
321  while (n >= 1)
322  {
323  if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
324  return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq,
325  n_left_from, bi, b, tail);
326 
327  rdma_mlx5_wqe_init (wqe, txq->dv_wqe_tmpl, b[0], tail);
328 
329  b += 1;
330  tail += 1;
331  wqe += 1;
332  n -= 1;
333  }
334 
335  if (n_wrap)
336  {
337  n = n_wrap;
338  n_wrap = 0;
339  goto wrap_around;
340  }
341 
342  rdma_device_output_tx_mlx5_doorbell (txq, &wqe[-1], tail, sq_mask);
343  txq->tail = tail;
344  return n_left_from;
345 }
346 
347 /*
348  * standard ibverb tx/free functions
349  */
350 
353  const vlib_node_runtime_t * node,
354  rdma_txq_t * txq)
355 {
356  struct ibv_wc wc[VLIB_FRAME_SIZE];
357  u32 mask = pow2_mask (txq->bufs_log2sz);
358  u16 tail;
359  int n;
360 
361  n = ibv_poll_cq (txq->ibv_cq, VLIB_FRAME_SIZE, wc);
362  if (n <= 0)
363  {
364  if (PREDICT_FALSE (n < 0))
365  vlib_error_count (vm, node->node_index, RDMA_TX_ERROR_COMPLETION, 1);
366  return;
367  }
368 
369  while (PREDICT_FALSE (IBV_WC_SUCCESS != wc[n - 1].status))
370  {
371  vlib_error_count (vm, node->node_index, RDMA_TX_ERROR_COMPLETION, 1);
372  n--;
373  if (0 == n)
374  return;
375  }
376 
377  tail = wc[n - 1].wr_id;
378  vlib_buffer_free_from_ring (vm, txq->bufs, txq->head & mask,
379  RDMA_TXQ_BUF_SZ (txq),
380  RDMA_TXQ_USED_SZ (txq->head, tail));
381  txq->head = tail;
382 }
383 
386  const vlib_node_runtime_t * node,
387  const rdma_device_t * rd, rdma_txq_t * txq,
388  u32 n_left_from, u32 * bi, vlib_buffer_t ** b)
389 {
390  struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr;
391  struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
392  u32 n = n_left_from;
393 
394  while (n >= 8)
395  {
396  vlib_prefetch_buffer_header (b[4], LOAD);
397  s[0].addr = vlib_buffer_get_current_va (b[0]);
398  s[0].length = b[0]->current_length;
399  s[0].lkey = rd->lkey;
400 
401  vlib_prefetch_buffer_header (b[5], LOAD);
402  s[1].addr = vlib_buffer_get_current_va (b[1]);
403  s[1].length = b[1]->current_length;
404  s[1].lkey = rd->lkey;
405 
406  vlib_prefetch_buffer_header (b[6], LOAD);
407  s[2].addr = vlib_buffer_get_current_va (b[2]);
408  s[2].length = b[2]->current_length;
409  s[2].lkey = rd->lkey;
410 
411  vlib_prefetch_buffer_header (b[7], LOAD);
412  s[3].addr = vlib_buffer_get_current_va (b[3]);
413  s[3].length = b[3]->current_length;
414  s[3].lkey = rd->lkey;
415 
416  clib_memset_u8 (&w[0], 0, sizeof (w[0]));
417  w[0].next = &w[0] + 1;
418  w[0].sg_list = &s[0];
419  w[0].num_sge = 1;
420  w[0].opcode = IBV_WR_SEND;
421 
422  clib_memset_u8 (&w[1], 0, sizeof (w[1]));
423  w[1].next = &w[1] + 1;
424  w[1].sg_list = &s[1];
425  w[1].num_sge = 1;
426  w[1].opcode = IBV_WR_SEND;
427 
428  clib_memset_u8 (&w[2], 0, sizeof (w[2]));
429  w[2].next = &w[2] + 1;
430  w[2].sg_list = &s[2];
431  w[2].num_sge = 1;
432  w[2].opcode = IBV_WR_SEND;
433 
434  clib_memset_u8 (&w[3], 0, sizeof (w[3]));
435  w[3].next = &w[3] + 1;
436  w[3].sg_list = &s[3];
437  w[3].num_sge = 1;
438  w[3].opcode = IBV_WR_SEND;
439 
440  s += 4;
441  w += 4;
442  b += 4;
443  n -= 4;
444  }
445 
446  while (n >= 1)
447  {
448  s[0].addr = vlib_buffer_get_current_va (b[0]);
449  s[0].length = b[0]->current_length;
450  s[0].lkey = rd->lkey;
451 
452  clib_memset_u8 (&w[0], 0, sizeof (w[0]));
453  w[0].next = &w[0] + 1;
454  w[0].sg_list = &s[0];
455  w[0].num_sge = 1;
456  w[0].opcode = IBV_WR_SEND;
457 
458  s += 1;
459  w += 1;
460  b += 1;
461  n -= 1;
462  }
463 
464  w[-1].wr_id = txq->tail; /* register item to free */
465  w[-1].next = 0; /* fix next pointer in WR linked-list */
466  w[-1].send_flags = IBV_SEND_SIGNALED; /* generate a CQE so we can free buffers */
467 
468  w = wr;
469  if (PREDICT_FALSE (0 != ibv_post_send (txq->ibv_qp, w, &w)))
470  {
471  vlib_error_count (vm, node->node_index, RDMA_TX_ERROR_SUBMISSION,
472  n_left_from - (w - wr));
473  n_left_from = w - wr;
474  }
475  txq->tail += n_left_from;
476  return n_left_from;
477 }
478 
479 /*
480  * common tx/free functions
481  */
482 
483 static void
485  const rdma_device_t *rd, rdma_txq_t *txq)
486 {
487  if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
489  else
491 }
492 
493 static u32
495  const rdma_device_t *rd, rdma_txq_t *txq,
496  u32 n_left_from, u32 *bi)
497 {
499  const u32 mask = pow2_mask (txq->bufs_log2sz);
500 
501  /* do not enqueue more packet than ring space */
503  txq->tail));
504  /* if ring is full, do nothing */
505  if (PREDICT_FALSE (n_left_from == 0))
506  return 0;
507 
508  /* speculatively copy buffer indices */
511 
513 
514  if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
515  n_left_from =
516  rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi, b);
517  else
518  n_left_from =
520 
521  return n_left_from;
522 }
523 
524 static uword
526  rdma_device_t *rd, rdma_txq_t *txq, u32 *from,
528 {
529  int i;
530 
531  for (i = 0; i < RDMA_TX_RETRIES && n_left_from > 0; i++)
532  {
533  u32 n_enq;
534  rdma_device_output_free (vm, node, rd, txq);
535  n_enq = rdma_device_output_tx_try (vm, node, rd, txq, n_left_from, from);
536  n_left_from -= n_enq;
537  from += n_enq;
538  }
539 
540  return n_left_from;
541 }
542 
546 {
547  rdma_main_t *rm = &rdma_main;
548  vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
550  rdma_txq_t *txq =
551  vec_elt_at_index (rd->txqs, vm->thread_index % vec_len (rd->txqs));
553 
555 
557  n_buffers = frame->n_vectors;
558 
560 
562 
564 
565  if (PREDICT_FALSE (n_left))
566  {
568  vlib_error_count (vm, node->node_index, RDMA_TX_ERROR_NO_FREE_SLOTS,
569  n_left);
570  }
571 
572  return n_buffers - n_left;
573 }
574 
575 /*
576  * fd.io coding-style-patch-verification: ON
577  *
578  * Local Variables:
579  * eval: (c-set-style "gnu")
580  * End:
581  */
rdma_device_class
VNET_DEVICE_CLASS_TX_FN() rdma_device_class(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: output.c:543
vlib.h
rdma_device_output_free
static void rdma_device_output_free(vlib_main_t *vm, const vlib_node_runtime_t *node, const rdma_device_t *rd, rdma_txq_t *txq)
Definition: output.c:484
vlib_buffer_t::next_buffer
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:149
rdma_txq_t::lock
clib_spinlock_t lock
Definition: rdma.h:122
rdma_main_t::devices
rdma_device_t * devices
Definition: rdma.h:261
rdma_device_output_tx_mlx5_doorbell
static_always_inline void rdma_device_output_tx_mlx5_doorbell(rdma_txq_t *txq, rdma_mlx5_wqe_t *last, const u16 tail, u32 sq_mask)
Definition: output.c:90
vlib_buffer_free
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:979
rdma_txq_t
Definition: rdma.h:117
bufs
vlib_buffer_t * bufs[VLIB_FRAME_SIZE]
Definition: nat44_ei_out2in.c:717
frame
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: nat44_ei.c:3048
vlib_prefetch_buffer_header
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:231
n_buffers
u32 n_buffers
Definition: interface_output.c:421
RDMA_MLX5_WQE_DS_MAX
#define RDMA_MLX5_WQE_DS_MAX
rdma_txq_t::dv_sq_db
volatile u64 * dv_sq_db
Definition: rdma.h:137
rdma_txq_t::bufs_log2sz
u8 bufs_log2sz
Definition: rdma.h:147
RDMA_TXQ_DV_DSEG_SZ
#define RDMA_TXQ_DV_DSEG_SZ(txq)
Definition: output.c:28
clib_spinlock_lock_if_init
static_always_inline void clib_spinlock_lock_if_init(clib_spinlock_t *p)
Definition: lock.h:106
vlib_get_buffer
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:111
pow2_mask
static uword pow2_mask(uword x)
Definition: clib.h:252
pointer_to_uword
static uword pointer_to_uword(const void *p)
Definition: types.h:131
pool_elt_at_index
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:549
rdma_txq_t::dv_cq_log2sz
u8 dv_cq_log2sz
Definition: rdma.h:149
vlib_get_buffers
vlib_get_buffers(vm, from, b, n_left_from)
rdma_txq_t::dv_sq_log2sz
u8 dv_sq_log2sz
Definition: rdma.h:148
rdma_txq_t::ibv_qp
struct ibv_qp * ibv_qp
Definition: rdma.h:130
VLIB_FRAME_SIZE
#define VLIB_FRAME_SIZE
Definition: node.h:368
node
vlib_main_t vlib_node_runtime_t * node
Definition: nat44_ei.c:3047
vlib_buffer_copy_indices_to_ring
static void vlib_buffer_copy_indices_to_ring(u32 *ring, u32 *src, u32 start, u32 ring_size, u32 n_buffers)
Definition: buffer_funcs.h:152
rdma_txq_t::dv_sq_dbrec
volatile u32 * dv_sq_dbrec
Definition: rdma.h:136
rdma_txq_t::dv_cq_idx
u16 dv_cq_idx
Definition: rdma.h:146
u16
unsigned short u16
Definition: types.h:57
rdma_device_t::flags
u32 flags
Definition: rdma.h:201
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
rdma_mlx5_wqe_t::dseg
struct mlx5_wqe_data_seg dseg
Definition: rdma.h:62
rdma_mlx5_wqe_t
Definition: rdma.h:47
RDMA_TXQ_AVAIL_SZ
#define RDMA_TXQ_AVAIL_SZ(txq, head, tail)
Definition: rdma.h:173
addr
vhost_vring_addr_t addr
Definition: vhost_user.h:130
vlib_error_count
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
Definition: error_funcs.h:57
vlib_frame_t
Definition: node.h:372
RDMA_TXQ_DV_DSEG2WQE
#define RDMA_TXQ_DV_DSEG2WQE(d)
Definition: output.c:29
clib_memcpy_fast
static_always_inline void * clib_memcpy_fast(void *restrict dst, const void *restrict src, size_t n)
Definition: string.h:92
RDMA_MLX5_WQE_DS
#define RDMA_MLX5_WQE_DS
Definition: rdma.h:65
ethernet.h
VNET_DEVICE_CLASS_TX_FN
#define VNET_DEVICE_CLASS_TX_FN(devclass)
Definition: interface.h:317
rdma_txq_t::dv_cq_dbrec
volatile u32 * dv_cq_dbrec
Definition: rdma.h:139
rdma_mlx5_wqe_t::wqe_index_hi
u8 wqe_index_hi
Definition: rdma.h:56
RDMA_TXQ_USED_SZ
#define RDMA_TXQ_USED_SZ(head, tail)
Definition: rdma.h:172
vlib_buffer_free_from_ring
static void vlib_buffer_free_from_ring(vlib_main_t *vm, u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
Free buffers from ring.
Definition: buffer_funcs.h:1026
CLIB_COMPILER_BARRIER
#define CLIB_COMPILER_BARRIER()
Definition: clib.h:134
vec_len
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
Definition: vec_bootstrap.h:142
clib_address_t
u64 clib_address_t
Definition: types.h:121
CLIB_MEMORY_STORE_BARRIER
#define CLIB_MEMORY_STORE_BARRIER()
Definition: clib.h:140
vnet_interface_output_runtime_t::dev_instance
u32 dev_instance
Definition: interface_funcs.h:479
RDMA_TXQ_BUF_SZ
#define RDMA_TXQ_BUF_SZ(txq)
Definition: rdma.h:168
rdma_main
rdma_main_t rdma_main
Definition: device.c:47
rdma_txq_t::bufs
u32 * bufs
Definition: rdma.h:143
vec_elt_at_index
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
Definition: vec_bootstrap.h:203
PREDICT_FALSE
#define PREDICT_FALSE(x)
Definition: clib.h:124
vlib_frame_vector_args
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:301
rdma_main_t
Definition: rdma.h:258
rdma_device_t::lkey
u32 lkey
Definition: rdma.h:205
static_always_inline
#define static_always_inline
Definition: clib.h:112
uword
u64 uword
Definition: types.h:112
last
static heap_elt_t * last(heap_header_t *h)
Definition: heap.c:53
if
if(node->flags &VLIB_NODE_FLAG_TRACE) vnet_interface_output_trace(vm
rdma_mlx5_wqe_init
static_always_inline void rdma_mlx5_wqe_init(rdma_mlx5_wqe_t *wqe, const void *tmpl, vlib_buffer_t *b, const u16 tail)
Definition: output.c:107
rdma_mlx5_wqe_t::wqe_index_lo
u8 wqe_index_lo
Definition: rdma.h:57
vlib_main_t::thread_index
u32 thread_index
Definition: main.h:215
rdma_device_t
Definition: rdma.h:194
rdma_txq_t::ibv_cq
struct ibv_cq * ibv_cq
Definition: rdma.h:129
rdma_txq_t::tail
u16 tail
Definition: rdma.h:145
mask
vl_api_pnat_mask_t mask
Definition: pnat.api:45
rdma_device_output_tx_mlx5
static_always_inline u32 rdma_device_output_tx_mlx5(vlib_main_t *vm, const vlib_node_runtime_t *node, const rdma_device_t *rd, rdma_txq_t *txq, const u32 n_left_from, const u32 *bi, vlib_buffer_t **b)
Definition: output.c:275
clib_min
#define clib_min(x, y)
Definition: clib.h:342
STATIC_ASSERT
#define STATIC_ASSERT(truth,...)
Definition: error_bootstrap.h:111
rdma_device_output_free_ibverb
static_always_inline void rdma_device_output_free_ibverb(vlib_main_t *vm, const vlib_node_runtime_t *node, rdma_txq_t *txq)
Definition: output.c:352
clib_memset_u8
static_always_inline void clib_memset_u8(void *p, u8 val, uword count)
Definition: string.h:441
rdma_device_output_free_mlx5
static_always_inline void rdma_device_output_free_mlx5(vlib_main_t *vm, const vlib_node_runtime_t *node, rdma_txq_t *txq)
Definition: output.c:36
vlib_buffer_t::current_length
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:122
rdma_txq_t::dv_wqe_tmpl
u8 dv_wqe_tmpl[64]
Definition: rdma.h:153
rdma_mlx5_wqe_t::eseg
struct mlx5_wqe_eth_seg eseg
Definition: rdma.h:61
MLX5_ETH_L2_INLINE_HEADER_SIZE
#define MLX5_ETH_L2_INLINE_HEADER_SIZE
Definition: rdma.h:44
rdma_txq_t::dv_sq_wqes
rdma_mlx5_wqe_t * dv_sq_wqes
Definition: rdma.h:135
RDMA_TXQ_DV_SQ_SZ
#define RDMA_TXQ_DV_SQ_SZ(txq)
Definition: rdma.h:169
u64
unsigned long u64
Definition: types.h:89
ring.h
ASSERT
#define ASSERT(truth)
Definition: error_bootstrap.h:69
u32
unsigned int u32
Definition: types.h:88
rdma_device_output_tx_ibverb
static_always_inline u32 rdma_device_output_tx_ibverb(vlib_main_t *vm, const vlib_node_runtime_t *node, const rdma_device_t *rd, rdma_txq_t *txq, u32 n_left_from, u32 *bi, vlib_buffer_t **b)
Definition: output.c:385
RDMA_MLX5_WQE_SZ
#define RDMA_MLX5_WQE_SZ
Definition: rdma.h:64
pci.h
rdma_device_output_tx
static uword rdma_device_output_tx(vlib_main_t *vm, vlib_node_runtime_t *node, rdma_device_t *rd, rdma_txq_t *txq, u32 *from, u32 n_left_from)
Definition: output.c:525
n_left
u32 n_left
Definition: interface_output.c:1096
vlib_main_t
Definition: main.h:102
vlib_buffer_get_current_va
static uword vlib_buffer_get_current_va(vlib_buffer_t *b)
Definition: buffer.h:265
b
vlib_buffer_t ** b
Definition: nat44_ei_out2in.c:717
u8
unsigned char u8
Definition: types.h:56
unix.h
vlib_buffer_get_current
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:257
rdma_mlx5_wqe_t::ctrl
struct mlx5_wqe_ctrl_seg ctrl
Definition: rdma.h:52
i
int i
Definition: flowhash_template.h:376
devices.h
rdma_txq_t::dv_cq_cqes
struct mlx5_cqe64 * dv_cq_cqes
Definition: rdma.h:138
rdma_device_output_tx_mlx5_chained
static_always_inline u32 rdma_device_output_tx_mlx5_chained(vlib_main_t *vm, const vlib_node_runtime_t *node, const rdma_device_t *rd, rdma_txq_t *txq, const u32 n_left_from, const u32 *bi, vlib_buffer_t **b, u16 tail)
Definition: output.c:146
vlib_node_runtime_t
Definition: node.h:454
clib_spinlock_unlock_if_init
static_always_inline void clib_spinlock_unlock_if_init(clib_spinlock_t *p)
Definition: lock.h:129
RDMA_TXQ_DV_INVALID_ID
#define RDMA_TXQ_DV_INVALID_ID
Definition: rdma.h:166
from
from
Definition: nat44_ei_hairpinning.c:415
PREDICT_TRUE
#define PREDICT_TRUE(x)
Definition: clib.h:125
STRUCT_SIZE_OF
#define STRUCT_SIZE_OF(t, f)
Definition: clib.h:75
n_left_from
n_left_from
Definition: nat44_ei_hairpinning.c:416
rdma_device_output_tx_try
static u32 rdma_device_output_tx_try(vlib_main_t *vm, const vlib_node_runtime_t *node, const rdma_device_t *rd, rdma_txq_t *txq, u32 n_left_from, u32 *bi)
Definition: output.c:494
rdma.h
vlib_buffer_t::flags
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,...
Definition: buffer.h:133
vlib_buffer_t
VLIB buffer representation.
Definition: buffer.h:111
vnet_interface_output_runtime_t
Definition: interface_funcs.h:475
rdma_txq_t::head
u16 head
Definition: rdma.h:144
flags
vl_api_wireguard_peer_flags_t flags
Definition: wireguard.api:105