FD.io VPP  v21.10.1-2-g0a485f517
Vector Packet Processing
buffer_funcs.c
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright(c) 2021 Cisco Systems, Inc.
3  */
4 
5 #include <vppinfra/clib.h>
6 #include <vlib/vlib.h>
9 
12  u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers,
13  u32 n_left, u32 *tmp)
14 {
15  u64 match_bmp[VLIB_FRAME_SIZE / 64];
16  vlib_frame_t *f;
17  u32 n_extracted, n_free;
18  u32 *to;
19 
21 
23 
24  /* if frame contains enough space for worst case scenario, we can avoid
25  * use of tmp */
26  if (n_free >= n_left)
28  else
29  to = tmp;
30 
32 
33  n_extracted = clib_compress_u32 (to, buffers, match_bmp, n_buffers);
34 
35  for (int i = 0; i < ARRAY_LEN (match_bmp); i++)
36  used_elt_bmp[i] |= match_bmp[i];
37 
38  if (to != tmp)
39  {
40  /* indices already written to frame, just close it */
41  vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
42  }
43  else if (n_free >= n_extracted)
44  {
45  /* enough space in the existing frame */
47  vlib_buffer_copy_indices (to, tmp, n_extracted);
48  vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
49  }
50  else
51  {
52  /* full frame */
56 
57  /* second frame */
58  u32 n_2nd_frame = n_extracted - n_free;
61  vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
63  VLIB_FRAME_SIZE - n_2nd_frame);
64  }
65 
66  return n_left - n_extracted;
67 }
68 
69 void __clib_section (".vlib_buffer_enqueue_to_next_fn")
70 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
72  uword count)
73 {
75  u32 n_left;
77 
78  while (count >= VLIB_FRAME_SIZE)
79  {
80  u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {};
82  u32 off = 0;
83 
84  next_index = nexts[0];
85  n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
87 
88  while (n_left)
89  {
90  while (PREDICT_FALSE (used_elt_bmp[off] == ~0))
91  {
92  off++;
93  ASSERT (off < ARRAY_LEN (used_elt_bmp));
94  }
95 
96  next_index =
97  nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])];
98  n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers,
100  }
101 
102  buffers += VLIB_FRAME_SIZE;
105  }
106 
107  if (count)
108  {
109  u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {};
110  next_index = nexts[0];
111  n_left = count;
112  u32 off = 0;
113 
114  n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
115  count, n_left, tmp);
116 
117  while (n_left)
118  {
119  while (PREDICT_FALSE (used_elt_bmp[off] == ~0))
120  {
121  off++;
122  ASSERT (off < ARRAY_LEN (used_elt_bmp));
123  }
124 
125  next_index =
126  nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])];
127  n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers,
128  nexts, count, n_left, tmp);
129  }
130  }
131 }
132 
133 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn);
134 
135 void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
136 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
138  u32 count)
139 {
140  u32 *to_next, n_left_to_next, n_enq;
141 
142  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
143 
144  if (PREDICT_TRUE (n_left_to_next >= count))
145  {
146  vlib_buffer_copy_indices (to_next, buffers, count);
147  n_left_to_next -= count;
148  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
149  return;
150  }
151 
152  n_enq = n_left_to_next;
153 next:
154  vlib_buffer_copy_indices (to_next, buffers, n_enq);
155  n_left_to_next -= n_enq;
156 
157  if (PREDICT_FALSE (count > n_enq))
158  {
159  count -= n_enq;
160  buffers += n_enq;
161 
162  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
163  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
164  n_enq = clib_min (n_left_to_next, count);
165  goto next;
166  }
167  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
168 }
169 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn);
170 
171 static inline vlib_frame_queue_elt_t *
173  int dont_wait)
174 {
175  vlib_frame_queue_t *fq;
176  u64 nelts, tail, new_tail;
177 
178  fq = fqm->vlib_frame_queues[index];
179  ASSERT (fq);
180  nelts = fq->nelts;
181 
182 retry:
183  tail = __atomic_load_n (&fq->tail, __ATOMIC_ACQUIRE);
184  new_tail = tail + 1;
185 
186  if (new_tail >= fq->head + nelts)
187  {
188  if (dont_wait)
189  return 0;
190 
191  /* Wait until a ring slot is available */
192  while (new_tail >= fq->head + nelts)
194  }
195 
196  if (!__atomic_compare_exchange_n (&fq->tail, &tail, new_tail, 0 /* weak */,
197  __ATOMIC_RELAXED, __ATOMIC_RELAXED))
198  goto retry;
199 
200  return fq->elts + (new_tail & (nelts - 1));
201 }
202 
207  u32 *buffer_indices, u16 *thread_indices,
208  u32 n_packets, int drop_on_congestion)
209 {
210  u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0;
211  u64 used_elts[VLIB_FRAME_SIZE / 64] = {};
212  u64 mask[VLIB_FRAME_SIZE / 64];
213  vlib_frame_queue_elt_t *hf = 0;
215  u32 n_comp, off = 0, n_left = n_packets;
216 
217  thread_index = thread_indices[0];
218 
219 more:
220  clib_mask_compare_u16 (thread_index, thread_indices, mask, n_packets);
221  hf = vlib_get_frame_queue_elt (fqm, thread_index, drop_on_congestion);
222 
223  n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop,
224  buffer_indices, mask, n_packets);
225 
226  if (hf)
227  {
228  if (node->flags & VLIB_NODE_FLAG_TRACE)
229  hf->maybe_trace = 1;
230  hf->n_vectors = n_comp;
231  __atomic_store_n (&hf->valid, 1, __ATOMIC_RELEASE);
233  }
234  else
235  n_drop += n_comp;
236 
237  n_left -= n_comp;
238 
239  if (n_left)
240  {
241  for (int i = 0; i < ARRAY_LEN (used_elts); i++)
242  used_elts[i] |= mask[i];
243 
244  while (PREDICT_FALSE (used_elts[off] == ~0))
245  {
246  off++;
248  }
249 
250  thread_index =
251  thread_indices[off * 64 + count_trailing_zeros (~used_elts[off])];
252  goto more;
253  }
254 
255  if (drop_on_congestion && n_drop)
256  vlib_buffer_free (vm, drop_list, n_drop);
257 
258  return n_packets - n_drop;
259 }
260 
261 u32 __clib_section (".vlib_buffer_enqueue_to_thread_fn")
262 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
263 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
264  u32 *buffer_indices, u16 *thread_indices, u32 n_packets,
265  int drop_on_congestion)
266 {
269  u32 n_enq = 0;
270 
271  fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
272 
273  while (n_packets >= VLIB_FRAME_SIZE)
274  {
276  vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
277  drop_on_congestion);
278  buffer_indices += VLIB_FRAME_SIZE;
279  thread_indices += VLIB_FRAME_SIZE;
280  n_packets -= VLIB_FRAME_SIZE;
281  }
282 
283  if (n_packets == 0)
284  return n_enq;
285 
286  n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices,
287  thread_indices, n_packets,
288  drop_on_congestion);
289 
290  return n_enq;
291 }
292 
293 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
294 
295 u32 __clib_section (".vlib_frame_queue_dequeue_fn")
296 CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
298 {
299  u32 thread_id = vm->thread_index;
300  vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
301  u32 mask = fq->nelts - 1;
303  u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0;
304  vlib_frame_t *f = 0;
305 
306  ASSERT (fq);
307  ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
308 
309  if (PREDICT_FALSE (fqm->node_index == ~0))
310  return 0;
311  /*
312  * Gather trace data for frame queues
313  */
314  if (PREDICT_FALSE (fq->trace))
315  {
316  frame_queue_trace_t *fqt;
318  u32 elix;
319 
320  fqt = &fqm->frame_queue_traces[thread_id];
321 
322  fqt->nelts = fq->nelts;
323  fqt->head = fq->head;
324  fqt->tail = fq->tail;
325  fqt->threshold = fq->vector_threshold;
326  fqt->n_in_use = fqt->tail - fqt->head;
327  if (fqt->n_in_use >= fqt->nelts)
328  {
329  // if beyond max then use max
330  fqt->n_in_use = fqt->nelts - 1;
331  }
332 
333  /* Record the number of elements in use in the histogram */
334  fqh = &fqm->frame_queue_histogram[thread_id];
335  fqh->count[fqt->n_in_use]++;
336 
337  /* Record a snapshot of the elements in use */
338  for (elix = 0; elix < fqt->nelts; elix++)
339  {
340  elt = fq->elts + ((fq->head + 1 + elix) & (mask));
341  if (1 || elt->valid)
342  {
343  fqt->n_vectors[elix] = elt->n_vectors;
344  }
345  }
346  fqt->written = 1;
347  }
348 
349  while (1)
350  {
351  if (fq->head == fq->tail)
352  break;
353 
354  elt = fq->elts + ((fq->head + 1) & mask);
355 
356  if (!__atomic_load_n (&elt->valid, __ATOMIC_ACQUIRE))
357  break;
358 
359  from = elt->buffer_index + elt->offset;
360 
361  ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE);
362 
363  if (f == 0)
364  {
365  f = vlib_get_frame_to_node (vm, fqm->node_index);
368  }
369 
370  if (elt->maybe_trace)
372 
373  n_copy = clib_min (n_free, elt->n_vectors);
374 
375  vlib_buffer_copy_indices (to, from, n_copy);
376  to += n_copy;
377  n_free -= n_copy;
378  vectors += n_copy;
379 
380  if (n_free == 0)
381  {
383  vlib_put_frame_to_node (vm, fqm->node_index, f);
384  f = 0;
385  }
386 
387  if (n_copy < elt->n_vectors)
388  {
389  /* not empty - leave it on the ring */
390  elt->n_vectors -= n_copy;
391  elt->offset += n_copy;
392  }
393  else
394  {
395  /* empty - reset and bump head */
396  u32 sz = STRUCT_OFFSET_OF (vlib_frame_queue_elt_t, end_of_reset);
397  clib_memset (elt, 0, sz);
398  __atomic_store_n (&fq->head, fq->head + 1, __ATOMIC_RELEASE);
399  processed++;
400  }
401 
402  /* Limit the number of packets pushed into the graph */
403  if (vectors >= fq->vector_threshold)
404  break;
405  }
406 
407  if (f)
408  {
410  vlib_put_frame_to_node (vm, fqm->node_index, f);
411  }
412 
413  return processed;
414 }
415 
416 CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
417 
418 #ifndef CLIB_MARCH_VARIANT
420 
421 static clib_error_t *
423 {
426  CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn);
428  CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
430  CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
432  CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
433  return 0;
434 }
435 
437 #endif
vlib.h
to
u32 * to
Definition: interface_output.c:1096
tmp
u32 * tmp
Definition: interface_output.c:1096
vlib_frame_t::n_vectors
u16 n_vectors
Definition: node.h:387
clib_mask_compare_u16
static_always_inline void clib_mask_compare_u16(u16 v, u16 *a, u64 *mask, u32 n_elts)
Compare 16-bit elemments with provied value and return bitmap.
Definition: mask_compare.h:67
vlib_buffer_free
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:979
count_trailing_zeros
#define count_trailing_zeros(x)
Definition: clib.h:161
thread_index
u32 thread_index
Definition: nat44_ei_hairpinning.c:495
compress.h
vlib_buffer_func_main_t::buffer_enqueue_to_thread_fn
vlib_buffer_enqueue_to_thread_fn_t * buffer_enqueue_to_thread_fn
Definition: buffer_funcs.h:74
n_comp
u32 n_comp
Definition: interface_output.c:1096
n_buffers
u32 n_buffers
Definition: interface_output.c:421
next_index
nat44_ei_hairpin_src_next_t next_index
Definition: nat44_ei_hairpinning.c:412
vlib_frame_queue_t::tail
volatile u64 tail
Definition: threads.h:125
vlib_buffer_copy_indices
vlib_buffer_copy_indices(to, tmp, n_free)
f
vlib_frame_t * f
Definition: interface_output.c:1098
vlib_frame_queue_t::vector_threshold
u64 vector_threshold
Definition: threads.h:119
frame_queue_trace_t::n_in_use
u32 n_in_use
Definition: node.h:768
vlib_worker_thread_barrier_check
static void vlib_worker_thread_barrier_check(void)
Definition: threads.h:357
clib.h
next
u16 * next
Definition: nat44_ei_out2in.c:718
VLIB_FRAME_SIZE
#define VLIB_FRAME_SIZE
Definition: node.h:368
node
vlib_main_t vlib_node_runtime_t * node
Definition: nat44_ei.c:3047
u16
unsigned short u16
Definition: types.h:57
CLIB_MULTIARCH_FN
#define CLIB_MULTIARCH_FN(fn)
Definition: cpu.h:53
frame_queue_trace_t::written
u32 written
Definition: node.h:770
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
vlib_get_frame_queue_elt
static vlib_frame_queue_elt_t * vlib_get_frame_queue_elt(vlib_frame_queue_main_t *fqm, u32 index, int dont_wait)
Definition: buffer_funcs.c:172
vlib_frame_queue_t::trace
u64 trace
Definition: threads.h:120
vlib_frame_t::frame_flags
u16 frame_flags
Definition: node.h:375
vlib_frame_t
Definition: node.h:372
vlib_get_frame_to_node
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
Definition: main.c:184
vlib_main_t::check_frame_queues
volatile uword check_frame_queues
Definition: main.h:259
vlib_buffer_func_main_t::buffer_enqueue_to_next_fn
vlib_buffer_enqueue_to_next_fn_t * buffer_enqueue_to_next_fn
Definition: buffer_funcs.h:72
vlib_buffer_func_main_t::buffer_enqueue_to_single_next_fn
vlib_buffer_enqueue_to_single_next_fn_t * buffer_enqueue_to_single_next_fn
Definition: buffer_funcs.h:73
vlib_frame_queue_elt_t
Definition: threads.h:67
STRUCT_OFFSET_OF
#define STRUCT_OFFSET_OF(t, f)
Definition: clib.h:73
vlib_put_frame_to_node
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
Definition: main.c:218
enqueue_one
static_always_inline u32 enqueue_one(vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp, u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers, u32 n_left, u32 *tmp)
Definition: buffer_funcs.c:11
count
u8 count
Definition: dhcp.api:208
vlib_buffer_func_main
vlib_buffer_func_main_t vlib_buffer_func_main
Definition: buffer_funcs.c:419
vlib_frame_queue_elt_t::buffer_index
u32 buffer_index[VLIB_FRAME_SIZE]
Definition: threads.h:77
vlib_frame_queue_elt_t::maybe_trace
u32 maybe_trace
Definition: threads.h:71
vec_elt_at_index
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
Definition: vec_bootstrap.h:203
VLIB_NODE_FLAG_TRACE
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:291
PREDICT_FALSE
#define PREDICT_FALSE(x)
Definition: clib.h:124
ARRAY_LEN
#define ARRAY_LEN(x)
Definition: clib.h:70
vlib_frame_vector_args
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:301
vlib_frame_queue_elt_t::n_vectors
u32 n_vectors
Definition: threads.h:72
vlib_global_main
vlib_global_main_t vlib_global_main
Definition: main.c:1786
static_always_inline
#define static_always_inline
Definition: clib.h:112
frame_queue_trace_t::threshold
u32 threshold
Definition: node.h:771
uword
u64 uword
Definition: types.h:112
frame_queue_trace_t::nelts
u32 nelts
Definition: node.h:769
CLIB_MARCH_FN_REGISTRATION
CLIB_MARCH_FN_REGISTRATION(vlib_buffer_enqueue_to_next_fn)
vlib_main_t::thread_index
u32 thread_index
Definition: main.h:215
vlib_buffer_enqueue_to_thread_inline
static_always_inline u32 vlib_buffer_enqueue_to_thread_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_queue_main_t *fqm, u32 *buffer_indices, u16 *thread_indices, u32 n_packets, int drop_on_congestion)
Definition: buffer_funcs.c:204
vlib_frame_queue_main_t::vlib_frame_queues
vlib_frame_queue_t ** vlib_frame_queues
Definition: threads.h:138
frame_queue_trace_t::head
u64 head
Definition: node.h:766
mask
vl_api_pnat_mask_t mask
Definition: pnat.api:45
vlib_buffer_func_main_t
Definition: buffer_funcs.h:70
clib_min
#define clib_min(x, y)
Definition: clib.h:342
clib_compress_u32
static_always_inline u32 clib_compress_u32(u32 *dst, u32 *src, u64 *mask, u32 n_elts)
Compress array of 32-bit elemments into destination array based on mask.
Definition: compress.h:117
frame_queue_trace_t::n_vectors
i32 n_vectors[FRAME_QUEUE_MAX_NELTS]
Definition: node.h:772
index
u32 index
Definition: flow_types.api:221
vlib_buffer_funcs_init
static clib_error_t * vlib_buffer_funcs_init(vlib_main_t *vm)
Definition: buffer_funcs.c:422
mask_compare.h
u64
unsigned long u64
Definition: types.h:89
ASSERT
#define ASSERT(truth)
Definition: error_bootstrap.h:69
n_free
u32 n_free
Definition: interface_output.c:1096
off
u32 off
Definition: interface_output.c:1096
vlib_put_next_frame
vlib_put_next_frame(vm, node, next_index, 0)
u32
unsigned int u32
Definition: types.h:88
VLIB_INIT_FUNCTION
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:172
vlib_thread_main_t
Definition: threads.h:243
n_left
u32 n_left
Definition: interface_output.c:1096
vlib_get_main_by_index
static vlib_main_t * vlib_get_main_by_index(u32 thread_index)
Definition: global_funcs.h:29
n_vectors
return frame n_vectors
Definition: nat44_ei_hairpinning.c:488
elt
app_rx_mq_elt_t * elt
Definition: application.c:488
frame_queue_nelt_counter_t::count
u64 count[FRAME_QUEUE_MAX_NELTS]
Definition: node.h:777
clib_memset
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
vlib_main_t
Definition: main.h:102
vlib_frame_queue_t::nelts
u32 nelts
Definition: threads.h:121
vlib_frame_queue_t::elts
vlib_frame_queue_elt_t * elts
Definition: threads.h:118
clib_error_t
Definition: clib_error.h:21
frame_queue_trace_t
Definition: node.h:763
vlib_init_function_t
clib_error_t *() vlib_init_function_t(struct vlib_main_t *vm)
Definition: init.h:51
i
int i
Definition: flowhash_template.h:376
nexts
u16 nexts[VLIB_FRAME_SIZE]
Definition: nat44_ei_out2in.c:718
vlib_buffer_func_main_t::frame_queue_dequeue_fn
vlib_frame_queue_dequeue_fn_t * frame_queue_dequeue_fn
Definition: buffer_funcs.h:75
vlib_frame_queue_t
Definition: threads.h:114
vlib_node_runtime_t
Definition: node.h:454
CLIB_MARCH_FN_POINTER
#define CLIB_MARCH_FN_POINTER(fn)
Definition: cpu.h:84
from
from
Definition: nat44_ei_hairpinning.c:415
PREDICT_TRUE
#define PREDICT_TRUE(x)
Definition: clib.h:125
vlib_get_thread_main
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:56
vlib_global_main_t::vlib_mains
vlib_main_t ** vlib_mains
Definition: main.h:281
vlib_get_next_frame
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:395
vlib_thread_main_t::frame_queue_mains
vlib_frame_queue_main_t * frame_queue_mains
Definition: threads.h:289
vlib_frame_queue_elt_t::valid
volatile u32 valid
Definition: threads.h:70
vlib_frame_queue_main_t
Definition: threads.h:133
used_elts
u64 used_elts[VLIB_FRAME_SIZE/64]
Definition: interface_output.c:1094
vlib_get_next_frame_internal
vlib_frame_t * vlib_get_next_frame_internal(vlib_main_t *vm, vlib_node_runtime_t *node, u32 next_index, u32 allocate_new_next_frame)
Definition: main.c:384
vlib_frame_queue_t::head
volatile u64 head
Definition: threads.h:129
frame_queue_trace_t::tail
u64 tail
Definition: node.h:767
frame_queue_nelt_counter_t
Definition: node.h:775