FD.io VPP  v21.10.1-2-g0a485f517
Vector Packet Processing
input.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * input.c: Unix file input
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39 
40 #include <vlib/vlib.h>
41 #include <vlib/unix/unix.h>
42 #include <signal.h>
43 #include <unistd.h>
45 
46 /* FIXME autoconf */
47 #define HAVE_LINUX_EPOLL
48 
49 #ifdef HAVE_LINUX_EPOLL
50 
51 #include <sys/epoll.h>
52 
53 typedef struct
54 {
55  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
56  int epoll_fd;
57  struct epoll_event *epoll_events;
59 
60  /* Statistics. */
64 
66 
67 static void
69 {
72  f->polling_thread_index);
73  struct epoll_event e = { 0 };
74  int op, add_del = 0;
75 
76  e.events = EPOLLIN;
78  e.events |= EPOLLOUT;
80  e.events |= EPOLLET;
81  e.data.u32 = f - fm->file_pool;
82 
83  op = -1;
84 
85  switch (update_type)
86  {
88  op = EPOLL_CTL_ADD;
89  add_del = 1;
90  break;
91 
93  op = EPOLL_CTL_MOD;
94  break;
95 
97  op = EPOLL_CTL_DEL;
98  add_del = -1;
99  break;
100 
101  default:
102  clib_warning ("unknown update_type %d", update_type);
103  return;
104  }
105 
106  /* worker threads open epoll fd only if needed */
107  if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
108  {
109  em->epoll_fd = epoll_create (1);
110  if (em->epoll_fd < 0)
111  {
112  clib_unix_warning ("epoll_create");
113  return;
114  }
115  em->n_epoll_fds = 0;
116  }
117 
118  if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
119  {
120  clib_unix_warning ("epoll_ctl");
121  return;
122  }
123 
124  em->n_epoll_fds += add_del;
125 
126  if (em->n_epoll_fds == 0)
127  {
128  close (em->epoll_fd);
129  em->epoll_fd = -1;
130  }
131 }
132 
136 {
137  unix_main_t *um = &unix_main;
140  struct epoll_event *e;
141  int n_fds_ready;
142  int is_main = (thread_index == 0);
143 
144  {
146  u32 ticks_until_expiration;
147  f64 timeout;
148  f64 now;
149  int timeout_ms = 0, max_timeout_ms = 10;
150  f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
151 
152  if (is_main == 0)
153  now = vlib_time_now (vm);
154 
155  /*
156  * If we've been asked for a fixed-sleep between main loop polls,
157  * do so right away.
158  */
159  if (PREDICT_FALSE (is_main && um->poll_sleep_usec))
160  {
161  struct timespec ts, tsrem;
162  timeout = 0;
163  timeout_ms = 0;
164  node->input_main_loops_per_call = 0;
165  ts.tv_sec = 0;
166  ts.tv_nsec = 1000 * um->poll_sleep_usec;
167 
168  while (nanosleep (&ts, &tsrem) < 0)
169  {
170  ts = tsrem;
171  }
172  }
173  /* If we're not working very hard, decide how long to sleep */
174  else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
175  && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
176  {
177  ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
178  ((TWT (tw_timer_wheel) *) nm->timing_wheel);
179 
180  /* Nothing on the fast wheel, sleep 10ms */
181  if (ticks_until_expiration == TW_SLOTS_PER_RING)
182  {
183  timeout = 10e-3;
184  timeout_ms = max_timeout_ms;
185  }
186  else
187  {
188  timeout = (f64) ticks_until_expiration *1e-5;
189  if (timeout < 1e-3)
190  timeout_ms = 0;
191  else
192  {
193  timeout_ms = timeout * 1e3;
194  /* Must be between 1 and 10 ms. */
195  timeout_ms = clib_max (1, timeout_ms);
196  timeout_ms = clib_min (max_timeout_ms, timeout_ms);
197  }
198  }
199  node->input_main_loops_per_call = 0;
200  }
201  else if (is_main == 0 && vector_rate < 2 &&
202  (vlib_get_first_main ()->time_last_barrier_release + 0.5 < now) &&
203  nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
204  {
205  timeout = 10e-3;
206  timeout_ms = max_timeout_ms;
207  node->input_main_loops_per_call = 0;
208  }
209  else /* busy */
210  {
211  /* Don't come back for a respectable number of dispatch cycles */
212  node->input_main_loops_per_call = 1024;
213  }
214 
215  /* Allow any signal to wakeup our sleep. */
216  if (is_main || em->epoll_fd != -1)
217  {
218  static sigset_t unblock_all_signals;
219  n_fds_ready = epoll_pwait (em->epoll_fd,
220  em->epoll_events,
221  vec_len (em->epoll_events),
222  timeout_ms, &unblock_all_signals);
223 
224  /* This kludge is necessary to run over absurdly old kernels */
225  if (n_fds_ready < 0 && errno == ENOSYS)
226  {
227  n_fds_ready = epoll_wait (em->epoll_fd,
228  em->epoll_events,
229  vec_len (em->epoll_events), timeout_ms);
230  }
231 
232  }
233  else
234  {
235  /*
236  * Worker thread, no epoll fd's, sleep for 100us at a time
237  * and check for a barrier sync request
238  */
239  if (timeout_ms)
240  {
241  struct timespec ts, tsrem;
242  f64 limit = now + (f64) timeout_ms * 1e-3;
243 
244  while (vlib_time_now (vm) < limit)
245  {
246  /* Sleep for 100us at a time */
247  ts.tv_sec = 0;
248  ts.tv_nsec = 1000 * 100;
249 
250  while (nanosleep (&ts, &tsrem) < 0)
251  ts = tsrem;
253  *nm->pending_interrupts)
254  goto done;
255  }
256  }
257  goto done;
258  }
259  }
260 
261  if (n_fds_ready < 0)
262  {
263  if (unix_error_is_fatal (errno))
264  vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
265 
266  /* non fatal error (e.g. EINTR). */
267  goto done;
268  }
269 
270  em->epoll_waits += 1;
271  em->epoll_files_ready += n_fds_ready;
272 
273  for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
274  {
275  u32 i = e->data.u32;
276  clib_file_t *f;
277  clib_error_t *errors[4];
278  int n_errors = 0;
279 
280  /*
281  * Under rare scenarios, epoll may still post us events for the
282  * deleted file descriptor. We just deal with it and throw away the
283  * events for the corresponding file descriptor.
284  */
285  f = fm->file_pool + i;
286  if (PREDICT_FALSE (pool_is_free (fm->file_pool, f)))
287  {
288  if (e->events & EPOLLIN)
289  {
290  errors[n_errors] =
291  clib_error_return (0, "epoll event EPOLLIN dropped due "
292  "to free index %u", i);
293  n_errors++;
294  }
295  if (e->events & EPOLLOUT)
296  {
297  errors[n_errors] =
298  clib_error_return (0, "epoll event EPOLLOUT dropped due "
299  "to free index %u", i);
300  n_errors++;
301  }
302  if (e->events & EPOLLERR)
303  {
304  errors[n_errors] =
305  clib_error_return (0, "epoll event EPOLLERR dropped due "
306  "to free index %u", i);
307  n_errors++;
308  }
309  }
310  else if (PREDICT_TRUE (!(e->events & EPOLLERR)))
311  {
312  if (e->events & EPOLLIN)
313  {
314  f->read_events++;
315  errors[n_errors] = f->read_function (f);
316  /* Make sure f is valid if the file pool moves */
317  if (pool_is_free_index (fm->file_pool, i))
318  continue;
319  f = pool_elt_at_index (fm->file_pool, i);
320  n_errors += errors[n_errors] != 0;
321  }
322  if (e->events & EPOLLOUT)
323  {
324  f->write_events++;
325  errors[n_errors] = f->write_function (f);
326  n_errors += errors[n_errors] != 0;
327  }
328  }
329  else
330  {
331  if (f->error_function)
332  {
333  f->error_events++;
334  errors[n_errors] = f->error_function (f);
335  n_errors += errors[n_errors] != 0;
336  }
337  else
338  close (f->file_descriptor);
339  }
340 
341  ASSERT (n_errors < ARRAY_LEN (errors));
342  for (i = 0; i < n_errors; i++)
343  {
344  unix_save_error (um, errors[i]);
345  }
346  }
347 
348 done:
350  {
353  }
354 
355  return 0;
356 }
357 
358 static uword
361 {
363 
364  if (thread_index == 0)
365  return linux_epoll_input_inline (vm, node, frame, 0);
366  else
368 }
369 
370 /* *INDENT-OFF* */
372  .function = linux_epoll_input,
373  .type = VLIB_NODE_TYPE_PRE_INPUT,
374  .name = "unix-epoll-input",
375 };
376 /* *INDENT-ON* */
377 
378 clib_error_t *
380 {
381  linux_epoll_main_t *em;
384 
385 
388 
390  {
391  /* Allocate some events. */
393 
394  if (linux_epoll_mains == em)
395  {
396  em->epoll_fd = epoll_create (1);
397  if (em->epoll_fd < 0)
398  return clib_error_return_unix (0, "epoll_create");
399  }
400  else
401  em->epoll_fd = -1;
402  }
403 
404  fm->file_update = linux_epoll_file_update;
405 
406  return 0;
407 }
408 
410 
411 #endif /* HAVE_LINUX_EPOLL */
412 
413 static clib_error_t *
415 {
416  return 0;
417 }
418 
419 /* *INDENT-OFF* */
421 {
422  .runs_before = VLIB_INITS ("linux_epoll_input_init"),
423 };
424 /* *INDENT-ON* */
425 
426 /*
427  * fd.io coding-style-patch-verification: ON
428  *
429  * Local Variables:
430  * eval: (c-set-style "gnu")
431  * End:
432  */
vlib.h
linux_epoll_main_t::epoll_fd
int epoll_fd
Definition: input.c:56
linux_epoll_input
static uword linux_epoll_input(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
Definition: input.c:359
vlib_main_t::cpu_id
u32 cpu_id
Definition: main.h:216
file_main
clib_file_main_t file_main
Definition: main.c:63
linux_epoll_main_t::epoll_events
struct epoll_event * epoll_events
Definition: input.c:57
thread_index
u32 thread_index
Definition: nat44_ei_hairpinning.c:495
frame
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: nat44_ei.c:3048
linux_epoll_main_t::epoll_files_ready
u64 epoll_files_ready
Definition: input.c:61
linux_epoll_file_update
static void linux_epoll_file_update(clib_file_t *f, clib_file_update_type_t update_type)
Definition: input.c:68
VLIB_NODE_TYPE_PRE_INPUT
@ VLIB_NODE_TYPE_PRE_INPUT
Definition: node.h:81
clib_max
#define clib_max(x, y)
Definition: clib.h:335
f
vlib_frame_t * f
Definition: interface_output.c:1098
pool_elt_at_index
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:549
CLIB_CACHE_LINE_ALIGN_MARK
#define CLIB_CACHE_LINE_ALIGN_MARK(mark)
Definition: cache.h:60
unix_main
unix_main_t unix_main
Definition: main.c:62
unix_error_is_fatal
static word unix_error_is_fatal(word error)
Definition: error.h:118
VLIB_FRAME_SIZE
#define VLIB_FRAME_SIZE
Definition: node.h:368
linux_epoll_input_node
static vlib_node_registration_t linux_epoll_input_node
(constructor) VLIB_REGISTER_NODE (linux_epoll_input_node)
Definition: input.c:371
node
vlib_main_t vlib_node_runtime_t * node
Definition: nat44_ei.c:3047
linux_epoll_mains
static linux_epoll_main_t * linux_epoll_mains
Definition: input.c:65
clib_error_return
#define clib_error_return(e, args...)
Definition: error.h:99
linux_epoll_main_t
Definition: input.c:53
unix_main_t
Definition: unix.h:53
linux_epoll_input_init
clib_error_t * linux_epoll_input_init(vlib_main_t *vm)
Definition: input.c:379
vlib_main_t::node_main
vlib_node_main_t node_main
Definition: main.h:173
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
fm
vnet_feature_main_t * fm
Definition: nat44_ei_hairpinning.c:592
clib_get_current_cpu_id
__clib_export u32 clib_get_current_cpu_id()
Definition: cpu.c:226
clib_file_main_t
Definition: file.h:85
UNIX_FILE_UPDATE_ADD
@ UNIX_FILE_UPDATE_ADD
Definition: file.h:80
vlib_frame_t
Definition: node.h:372
vlib_worker_threads
vlib_worker_thread_t * vlib_worker_threads
Definition: threads.c:35
unix_save_error
static void unix_save_error(unix_main_t *um, clib_error_t *error)
Definition: unix.h:127
clib_unix_warning
#define clib_unix_warning(format, args...)
Definition: error.h:68
vlib_panic_with_error
static void vlib_panic_with_error(vlib_main_t *vm, clib_error_t *error)
Definition: main.h:379
pool_is_free_index
#define pool_is_free_index(P, I)
Use free bitmap to query whether given index is free.
Definition: pool.h:302
vlib_worker_thread_t::wait_at_barrier
volatile u32 * wait_at_barrier
Definition: threads.h:85
vlib_thread_main_t::n_vlib_mains
u32 n_vlib_mains
Definition: threads.h:262
vec_len
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
Definition: vec_bootstrap.h:142
UNIX_FILE_EVENT_EDGE_TRIGGERED
#define UNIX_FILE_EVENT_EDGE_TRIGGERED
Definition: file.h:58
UNIX_FILE_DATA_AVAILABLE_TO_WRITE
#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE
Definition: file.h:57
linux_epoll_main_t::n_epoll_fds
int n_epoll_fds
Definition: input.c:58
vlib_last_vectors_per_main_loop
static u32 vlib_last_vectors_per_main_loop(vlib_main_t *vm)
Definition: main.h:431
vec_elt_at_index
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
Definition: vec_bootstrap.h:203
clib_file
Definition: file.h:51
PREDICT_FALSE
#define PREDICT_FALSE(x)
Definition: clib.h:124
vlib_get_thread_index
static_always_inline uword vlib_get_thread_index(void)
Definition: threads.h:187
ARRAY_LEN
#define ARRAY_LEN(x)
Definition: clib.h:70
vec_validate_aligned
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:534
static_always_inline
#define static_always_inline
Definition: clib.h:112
pool_is_free
#define pool_is_free(P, E)
Use free bitmap to query whether given element is free.
Definition: pool.h:294
uword
u64 uword
Definition: types.h:112
unix_main_t::poll_sleep_usec
u32 poll_sleep_usec
Definition: unix.h:110
TW_SLOTS_PER_RING
#define TW_SLOTS_PER_RING
Definition: tcp_types.h:464
f64
double f64
Definition: types.h:142
TWT
#define TWT(a)
Definition: tw_timer_template.h:27
clib_get_current_numa_node
__clib_export u32 clib_get_current_numa_node()
Definition: cpu.c:234
clib_min
#define clib_min(x, y)
Definition: clib.h:342
CLIB_CACHE_LINE_BYTES
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:58
vlib_node_registration_t
struct _vlib_node_registration vlib_node_registration_t
UNIX_FILE_UPDATE_DELETE
@ UNIX_FILE_UPDATE_DELETE
Definition: file.h:82
vlib_main_t::numa_node
u32 numa_node
Definition: main.h:217
u64
unsigned long u64
Definition: types.h:89
ASSERT
#define ASSERT(truth)
Definition: error_bootstrap.h:69
vlib_frame_t::flags
u16 flags
Definition: node.h:378
u32
unsigned int u32
Definition: types.h:88
VLIB_INIT_FUNCTION
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:172
vlib_thread_main_t
Definition: threads.h:243
vlib_get_first_main
static vlib_main_t * vlib_get_first_main(void)
Definition: global_funcs.h:44
vec_foreach
#define vec_foreach(var, vec)
Vector iterator.
Definition: vec_bootstrap.h:213
UNIX_FILE_UPDATE_MODIFY
@ UNIX_FILE_UPDATE_MODIFY
Definition: file.h:81
clib_error_return_unix
#define clib_error_return_unix(e, args...)
Definition: error.h:102
unix_input_init
static clib_error_t * unix_input_init(vlib_main_t *vm)
Definition: input.c:414
nm
nat44_ei_main_t * nm
Definition: nat44_ei_hairpinning.c:413
vec_resize
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V,...
Definition: vec.h:296
now
f64 now
Definition: nat44_ei_out2in.c:710
vlib_main_t
Definition: main.h:102
tw_timer_1t_3w_1024sl_ov.h
linux_epoll_input_inline
static_always_inline uword linux_epoll_input_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, u32 thread_index)
Definition: input.c:134
VLIB_INITS
#define VLIB_INITS(...)
Definition: init.h:352
linux_epoll_main_t::epoll_waits
u64 epoll_waits
Definition: input.c:62
clib_error_t
Definition: clib_error.h:21
unix.h
clib_file_update_type_t
clib_file_update_type_t
Definition: file.h:78
vlib_init_function_t
clib_error_t *() vlib_init_function_t(struct vlib_main_t *vm)
Definition: init.h:51
i
int i
Definition: flowhash_template.h:376
clib_warning
#define clib_warning(format, args...)
Definition: error.h:59
TW
#define TW(a)
Definition: tw_timer_template.h:31
vlib_time_now
static f64 vlib_time_now(vlib_main_t *vm)
Definition: main.h:327
vlib_node_main_t
Definition: node.h:665
vlib_node_runtime_t
Definition: node.h:454
PREDICT_TRUE
#define PREDICT_TRUE(x)
Definition: clib.h:125
vlib_get_thread_main
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:56
VLIB_REGISTER_NODE
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169