FD.io VPP  v20.01-48-g3e0dafb74
Vector Packet Processing
output.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2018 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17 
18 #include <vlib/vlib.h>
19 #include <vlib/unix/unix.h>
20 #include <vlib/pci/pci.h>
21 #include <vppinfra/ring.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/devices/devices.h>
24 
25 #include <avf/avf.h>
26 
29 {
30  return d->qword[1] & 0x0f;
31 }
32 
35  u32 * buffers, u32 n_packets, int use_va_dma)
36 {
37  u16 next = txq->next;
39  u16 n_desc = 0;
40  u16 *slot, n_desc_left, n_packets_left = n_packets;
41  u16 mask = txq->size - 1;
42  vlib_buffer_t *b[4];
43  avf_tx_desc_t *d = txq->descs + next;
44  u16 n_desc_needed;
45  vlib_buffer_t *b0;
46 
47  /* avoid ring wrap */
48  n_desc_left = txq->size - clib_max (txq->next, txq->n_enqueued + 8);
49 
50  if (n_desc_left == 0)
51  return 0;
52 
53  /* Fast path, no ring wrap */
54  while (n_packets_left && n_desc_left)
55  {
56  u32 or_flags;
57  if (n_packets_left < 8 || n_desc_left < 4)
58  goto one_by_one;
59 
60  vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD);
61  vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD);
62  vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD);
63  vlib_prefetch_buffer_with_index (vm, buffers[7], LOAD);
64 
65  b[0] = vlib_get_buffer (vm, buffers[0]);
66  b[1] = vlib_get_buffer (vm, buffers[1]);
67  b[2] = vlib_get_buffer (vm, buffers[2]);
68  b[3] = vlib_get_buffer (vm, buffers[3]);
69 
70  or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
71 
72  if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
73  goto one_by_one;
74 
75  vlib_buffer_copy_indices (txq->bufs + next, buffers, 4);
76 
77  if (use_va_dma)
78  {
79  d[0].qword[0] = vlib_buffer_get_current_va (b[0]);
80  d[1].qword[0] = vlib_buffer_get_current_va (b[1]);
81  d[2].qword[0] = vlib_buffer_get_current_va (b[2]);
82  d[3].qword[0] = vlib_buffer_get_current_va (b[3]);
83  }
84  else
85  {
86  d[0].qword[0] = vlib_buffer_get_current_pa (vm, b[0]);
87  d[1].qword[0] = vlib_buffer_get_current_pa (vm, b[1]);
88  d[2].qword[0] = vlib_buffer_get_current_pa (vm, b[2]);
89  d[3].qword[0] = vlib_buffer_get_current_pa (vm, b[3]);
90  }
91 
92  d[0].qword[1] = ((u64) b[0]->current_length) << 34 | bits;
93  d[1].qword[1] = ((u64) b[1]->current_length) << 34 | bits;
94  d[2].qword[1] = ((u64) b[2]->current_length) << 34 | bits;
95  d[3].qword[1] = ((u64) b[3]->current_length) << 34 | bits;
96 
97  next += 4;
98  n_desc += 4;
99  buffers += 4;
100  n_packets_left -= 4;
101  n_desc_left -= 4;
102  d += 4;
103  continue;
104 
105  one_by_one:
106  txq->bufs[next] = buffers[0];
107  b[0] = vlib_get_buffer (vm, buffers[0]);
108 
109  /* Deal with chain buffer if present */
110  if (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
111  {
112  n_desc_needed = 1;
113  b0 = b[0];
114 
115  /* Wish there were a buffer count for chain buffer */
116  while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
117  {
118  b0 = vlib_get_buffer (vm, b0->next_buffer);
119  n_desc_needed++;
120  }
121 
122  /* spec says data descriptor is limited to 8 segments */
123  if (PREDICT_FALSE (n_desc_needed > 8))
124  {
125  vlib_buffer_free_one (vm, buffers[0]);
126  vlib_error_count (vm, node->node_index,
127  AVF_TX_ERROR_SEGMENT_SIZE_EXCEEDED, 1);
128  n_packets_left -= 1;
129  buffers += 1;
130  continue;
131  }
132 
133  if (PREDICT_FALSE (n_desc_left < n_desc_needed))
134  /*
135  * Slow path may be able to to deal with this since it can handle
136  * ring wrap
137  */
138  break;
139 
140  while (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
141  {
142  if (use_va_dma)
143  d[0].qword[0] = vlib_buffer_get_current_va (b[0]);
144  else
145  d[0].qword[0] = vlib_buffer_get_current_pa (vm, b[0]);
146 
147  d[0].qword[1] = (((u64) b[0]->current_length) << 34) |
149 
150  next += 1;
151  n_desc += 1;
152  n_desc_left -= 1;
153  d += 1;
154 
155  txq->bufs[next] = b[0]->next_buffer;
156  b[0] = vlib_get_buffer (vm, b[0]->next_buffer);
157  }
158  }
159 
160  if (use_va_dma)
161  d[0].qword[0] = vlib_buffer_get_current_va (b[0]);
162  else
163  d[0].qword[0] = vlib_buffer_get_current_pa (vm, b[0]);
164 
165  d[0].qword[1] = (((u64) b[0]->current_length) << 34) | bits;
166 
167  next += 1;
168  n_desc += 1;
169  buffers += 1;
170  n_packets_left -= 1;
171  n_desc_left -= 1;
172  d += 1;
173  }
174 
175  /* Slow path to support ring wrap */
176  if (PREDICT_FALSE (n_packets_left))
177  {
178  txq->n_enqueued += n_desc;
179 
180  n_desc = 0;
181  d = txq->descs + (next & mask);
182 
183  /* +8 to be consistent with fast path */
184  n_desc_left = txq->size - (txq->n_enqueued + 8);
185 
186  while (n_packets_left && n_desc_left)
187  {
188  txq->bufs[next & mask] = buffers[0];
189  b[0] = vlib_get_buffer (vm, buffers[0]);
190 
191  /* Deal with chain buffer if present */
192  if (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
193  {
194  n_desc_needed = 1;
195  b0 = b[0];
196 
197  while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
198  {
199  b0 = vlib_get_buffer (vm, b0->next_buffer);
200  n_desc_needed++;
201  }
202 
203  /* Spec says data descriptor is limited to 8 segments */
204  if (PREDICT_FALSE (n_desc_needed > 8))
205  {
206  vlib_buffer_free_one (vm, buffers[0]);
207  vlib_error_count (vm, node->node_index,
208  AVF_TX_ERROR_SEGMENT_SIZE_EXCEEDED, 1);
209  n_packets_left -= 1;
210  buffers += 1;
211  continue;
212  }
213 
214  if (PREDICT_FALSE (n_desc_left < n_desc_needed))
215  break;
216 
217  while (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
218  {
219  if (use_va_dma)
220  d[0].qword[0] = vlib_buffer_get_current_va (b[0]);
221  else
222  d[0].qword[0] = vlib_buffer_get_current_pa (vm, b[0]);
223 
224  d[0].qword[1] = (((u64) b[0]->current_length) << 34) |
226 
227  next += 1;
228  n_desc += 1;
229  n_desc_left -= 1;
230  d = txq->descs + (next & mask);
231 
232  txq->bufs[next & mask] = b[0]->next_buffer;
233  b[0] = vlib_get_buffer (vm, b[0]->next_buffer);
234  }
235  }
236 
237  if (use_va_dma)
238  d[0].qword[0] = vlib_buffer_get_current_va (b[0]);
239  else
240  d[0].qword[0] = vlib_buffer_get_current_pa (vm, b[0]);
241 
242  d[0].qword[1] = (((u64) b[0]->current_length) << 34) | bits;
243 
244  next += 1;
245  n_desc += 1;
246  buffers += 1;
247  n_packets_left -= 1;
248  n_desc_left -= 1;
249  d = txq->descs + (next & mask);
250  }
251  }
252 
253  if ((slot = clib_ring_enq (txq->rs_slots)))
254  {
255  u16 rs_slot = slot[0] = (next - 1) & mask;
256  d = txq->descs + rs_slot;
257  d[0].qword[1] |= AVF_TXD_CMD_RS;
258  }
259 
261  *(txq->qtx_tail) = txq->next = next & mask;
262  txq->n_enqueued += n_desc;
263  return n_packets - n_packets_left;
264 }
265 
269 {
270  avf_main_t *am = &avf_main;
271  vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
273  u32 thread_index = vm->thread_index;
274  u8 qid = thread_index;
275  avf_txq_t *txq = vec_elt_at_index (ad->txqs, qid % ad->num_queue_pairs);
276  u32 *buffers = vlib_frame_vector_args (frame);
277  u16 n_enq, n_left;
278  u16 n_retry = 2;
279 
281 
282  n_left = frame->n_vectors;
283 
284 retry:
285  /* release consumed bufs */
286  if (txq->n_enqueued)
287  {
288  i32 complete_slot = -1;
289  while (1)
290  {
292 
293  if (slot == 0)
294  break;
295 
296  if (avf_tx_desc_get_dtyp (txq->descs + slot[0]) != 0x0F)
297  break;
298 
299  complete_slot = slot[0];
300 
301  clib_ring_deq (txq->rs_slots);
302  }
303 
304  if (complete_slot >= 0)
305  {
306  u16 first, mask, n_free;
307  mask = txq->size - 1;
308  first = (txq->next - txq->n_enqueued) & mask;
309  n_free = (complete_slot + 1 - first) & mask;
310 
311  txq->n_enqueued -= n_free;
312  vlib_buffer_free_from_ring_no_next (vm, txq->bufs, first, txq->size,
313  n_free);
314  }
315  }
316 
317  if (ad->flags & AVF_DEVICE_F_VA_DMA)
318  n_enq = avf_tx_enqueue (vm, node, txq, buffers, n_left, 1);
319  else
320  n_enq = avf_tx_enqueue (vm, node, txq, buffers, n_left, 0);
321 
322  n_left -= n_enq;
323 
324  if (n_left)
325  {
326  buffers += n_enq;
327 
328  if (n_retry--)
329  goto retry;
330 
331  vlib_buffer_free (vm, buffers, n_left);
332  vlib_error_count (vm, node->node_index,
333  AVF_TX_ERROR_NO_FREE_SLOTS, n_left);
334  }
335 
337 
338  return frame->n_vectors - n_left;
339 }
340 
341 /*
342  * fd.io coding-style-patch-verification: ON
343  *
344  * Local Variables:
345  * eval: (c-set-style "gnu")
346  * End:
347  */
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:124
static uword vlib_buffer_get_current_pa(vlib_main_t *vm, vlib_buffer_t *b)
Definition: buffer_funcs.h:427
static uword vlib_buffer_get_current_va(vlib_buffer_t *b)
Definition: buffer.h:237
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:890
unsigned long u64
Definition: types.h:89
static void vlib_error_count(vlib_main_t *vm, uword node_index, uword counter, uword increment)
Definition: error_funcs.h:57
static_always_inline void clib_spinlock_unlock_if_init(clib_spinlock_t *p)
Definition: lock.h:110
u32 thread_index
Definition: main.h:218
avf_device_t * devices
Definition: avf.h:223
volatile u32 * qtx_tail
Definition: avf.h:135
unsigned char u8
Definition: types.h:56
u16 * rs_slots
Definition: avf.h:142
#define static_always_inline
Definition: clib.h:99
#define AVF_TXD_CMD_RSV
Definition: avf.h:48
#define vlib_prefetch_buffer_with_index(vm, bi, type)
Prefetch buffer metadata by buffer index The first 64 bytes of buffer contains most header informatio...
Definition: buffer_funcs.h:440
clib_spinlock_t lock
Definition: avf.h:138
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
VNET_DEVICE_CLASS_TX_FN() avf_device_class(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: output.c:266
#define VNET_DEVICE_CLASS_TX_FN(devclass)
Definition: interface.h:305
unsigned int u32
Definition: types.h:88
static heap_elt_t * first(heap_header_t *h)
Definition: heap.c:59
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:519
#define clib_ring_deq(ring)
Definition: ring.h:120
#define AVF_TXD_CMD_EOP
Definition: avf.h:46
unsigned short u16
Definition: types.h:57
#define clib_ring_get_first(ring)
Definition: ring.h:123
#define PREDICT_FALSE(x)
Definition: clib.h:111
u32 node_index
Node index.
Definition: node.h:496
vlib_main_t * vm
Definition: in2out_ed.c:1810
u8 slot
Definition: pci_types.api:22
static_always_inline u16 avf_tx_enqueue(vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq, u32 *buffers, u32 n_packets, int use_va_dma)
Definition: output.c:34
u32 flags
Definition: vhost_user.h:141
#define clib_ring_enq(ring)
Definition: ring.h:94
avf_tx_desc_t * descs
Definition: avf.h:139
vlib_main_t vlib_node_runtime_t * node
Definition: in2out_ed.c:1810
static_always_inline u8 avf_tx_desc_get_dtyp(avf_tx_desc_t *d)
Definition: output.c:28
u32 * bufs
Definition: avf.h:140
signed int i32
Definition: types.h:77
Definition: avf.h:132
u16 n_enqueued
Definition: avf.h:141
avf_main_t avf_main
Definition: device.c:37
#define clib_max(x, y)
Definition: clib.h:288
#define AVF_TXD_CMD_RS
Definition: avf.h:47
u16 next
Definition: avf.h:136
static_always_inline void vlib_buffer_copy_indices(u32 *dst, u32 *src, u32 n_indices)
Definition: buffer_funcs.h:102
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:140
VLIB buffer representation.
Definition: buffer.h:102
u64 qword[2]
Definition: avf.h:110
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:244
u16 size
Definition: avf.h:137
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: in2out_ed.c:1811
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:115
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:923
static void vlib_buffer_free_from_ring_no_next(vlib_main_t *vm, u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
Free buffers from ring without freeing tail buffers.
Definition: buffer_funcs.h:962
static_always_inline void clib_spinlock_lock_if_init(clib_spinlock_t *p)
Definition: lock.h:95
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:85