27 #define FRAME_QUEUE_NELTS 32 45 #ifdef BARRIER_TRACING 49 #ifdef BARRIER_TRACING_ELOG 65 name_copy =
format (0,
"%s%c", msg_name, 0);
83 .format =
"barrier <%d#%s(O:%dus:%dus)(%dus)",
84 .format_args =
"i4T4i4i4i4",
89 u32 count, caller, t_entry, t_open, t_closed;
93 ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
95 ed->t_entry = (int) (1000000.0 * t_entry);
96 ed->t_open = (int) (1000000.0 * t_open);
97 ed->t_closed = (int) (1000000.0 * t_closed);
106 .format =
"barrier <%d(%dus)%s",
107 .format_args =
"i4i4T4",
112 u32 depth, t_entry, caller;
116 ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
117 ed->t_entry = (int) (1000000.0 * t_entry);
127 .format =
"barrier (%dus)%d>",
128 .format_args =
"i4i4",
137 ed->t_entry = (int) (1000000.0 * t_entry);
138 ed->depth = (int) vlib_worker_threads[0].recursion_level;
147 .format =
"barrier (%dus){%d}(C:%dus)#%d>",
148 .format_args =
"i4i4i4i4",
153 u32 t_entry, t_update_main, t_closed_total,
count;
157 ed->t_entry = (int) (1000000.0 * t_entry);
158 ed->t_update_main = (int) (1000000.0 * t_update_main);
159 ed->t_closed_total = (int) (1000000.0 * t_closed_total);
160 ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
163 vlib_worker_threads[0].barrier_context =
NULL;
166 char barrier_trace[65536];
167 char *btp = barrier_trace;
178 btp += sprintf (btp,
"<%u#%s",
179 (
unsigned int) vlib_worker_threads[0].barrier_sync_count,
180 vlib_worker_threads[0].barrier_caller);
182 if (vlib_worker_threads[0].barrier_context)
184 btp += sprintf (btp,
"[%s]", vlib_worker_threads[0].barrier_context);
188 btp += sprintf (btp,
"(O:%dus:%dus)(%dus):",
189 (
int) (1000000.0 * t_entry),
190 (
int) (1000000.0 * t_open), (
int) (1000000.0 * t_closed));
197 btp += sprintf (btp,
"<%u(%dus)%s:",
198 (
int) vlib_worker_threads[0].recursion_level - 1,
199 (
int) (1000000.0 * t_entry),
200 vlib_worker_threads[0].barrier_caller);
206 btp += sprintf (btp,
":(%dus)%u>", (
int) (1000000.0 * t_entry),
207 (
int) vlib_worker_threads[0].recursion_level);
214 btp += sprintf (btp,
":(%dus)", (
int) (1000000.0 * t_entry));
215 if (t_update_main > 0)
217 btp += sprintf (btp,
"{%dus}", (
int) (1000000.0 * t_update_main));
220 btp += sprintf (btp,
"(C:%dus)#%u>",
221 (
int) (1000000.0 * t_closed_total),
222 (
int) vlib_worker_threads[0].barrier_sync_count);
225 fformat (stderr,
"BTRC %s\n", barrier_trace);
227 vlib_worker_threads[0].barrier_context =
NULL;
269 int pthread_setname_np (pthread_t __target_thread,
const char *__name);
271 pthread_t
thread = pthread_self ();
275 rv = pthread_setname_np (thread, name);
287 return ((
i32) ((*tr0)->no_data_structure_clone)
288 - ((
i32) ((*tr1)->no_data_structure_clone)));
297 fp = fopen (filename,
"r");
303 if (fgets ((
char *) buffer, 256, fp))
307 strlen ((
char *) buffer));
308 if (
unformat (&in,
"%U", unformat_bitmap_list, &r) != 1)
327 u32 n_vlib_mains = 1;
384 pthread_setaffinity_np (pthread_self (),
sizeof (cpu_set_t), &cpuset);
392 _vec_len (vlib_worker_threads) = 1;
397 w->
lwp = syscall (SYS_gettid);
403 struct sched_param sched_param;
404 if (!sched_getparam (w->
lwp, &sched_param))
428 first_index += tr->
count;
440 if (clib_bitmap_get(avail_cpu, c) == 0)
441 return clib_error_return (0,
"cpu %u is not available to be used" 442 " for the '%s' thread",c, tr->name);
444 avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
451 for (j = 0; j < tr->
count; j++)
456 "no available cpus to be used for" 457 " the '%s' thread", tr->
name);
481 memset (fq, 0,
sizeof (*fq));
489 fformat (stderr,
"WARNING: fq->tail unaligned\n");
491 fformat (stderr,
"WARNING: fq->head unaligned\n");
493 fformat (stderr,
"WARNING: fq->elts unaligned\n");
496 fformat (stderr,
"WARNING: fq->elts[0] size %d\n",
497 sizeof (fq->
elts[0]));
498 if (nelts & (nelts - 1))
500 fformat (stderr,
"FATAL: nelts MUST be a power of 2\n");
517 vlib_frame_queue_dequeue_internal (
int thread_id,
525 u32 node_runtime_index;
545 node_runtime_index = elt->node_runtime_index;
550 case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS:
553 case VLIB_FRAME_QUEUE_ELT_FREE_FRAME:
566 case VLIB_FRAME_QUEUE_ELT_API_MSG:
570 clib_warning (
"bogus frame queue message, type %d", msg_type);
588 return vlib_frame_queue_dequeue_internal (thread_id, vm, nm);
604 new_tail = __sync_add_and_fetch (&fq->
tail, 1);
615 elt = fq->
elts + (new_tail & (fq->
nelts - 1));
626 elt->node_runtime_index = node_runtime_index;
679 w->
lwp = syscall (SYS_gettid);
695 void *(*fp_arg) (
void *) = fp;
705 CPU_SET (lcore_id, &cpuset);
707 if (pthread_create (&worker,
NULL , fp_arg, (
void *) w))
710 if (pthread_setaffinity_np (worker,
sizeof (cpu_set_t), &cpuset))
728 u32 worker_thread_index;
755 if (n_vlib_mains > 1)
785 worker_thread_index = 1;
799 for (k = 0; k < tr->
count; k++)
803 vec_add2 (vlib_worker_threads, w, 1);
818 (
char *)
format (0,
"%s %d", tr->
name, k + 1);
850 u32 save_node_runtime_index;
857 nf->
flags = save_flags;
926 (
vlib_mains[0]->error_main.counters_last_clear,
938 ASSERT (fl_orig - orig_freelist_pool
941 fl_clone[0] = fl_orig[0];
947 worker_thread_index++;
958 for (j = 0; j < tr->
count; j++)
960 vec_add2 (vlib_worker_threads, w, 1);
972 (
char *)
format (0,
"%s %d", tr->
name, j + 1);
980 worker_thread_index = 1;
991 for (j = 0; j < tr->
count; j++)
993 w = vlib_worker_threads + worker_thread_index++;
1005 w = vlib_worker_threads + worker_thread_index++;
1006 err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
1009 clib_error_report (err);
1063 n = nm_clone->
nodes[j];
1111 u32 save_node_runtime_index;
1118 nf->
flags = save_flags;
1121 old_nodes_clone = nm_clone->
nodes;
1122 nm_clone->
nodes = 0;
1134 new_n = nm->
nodes[j];
1135 old_n_clone = old_nodes_clone[j];
1137 clib_memcpy (new_n_clone, new_n,
sizeof (*new_n));
1141 if (j >= vec_len (old_nodes_clone))
1188 for (j = 0; j <
vec_len (old_rt); j++)
1215 for (j = 0; j <
vec_len (old_rt); j++)
1242 u32 *r = va_arg (*args,
u32 *);
1245 #define _(v,f,s) else if (unformat (input, s)) *r = SCHED_POLICY_##f; 1281 if (
unformat (input,
"use-pthreads"))
1289 else if (
unformat (input,
"coremask-%s %llx", &name, &coremask))
1299 "coremask cannot be set for '%s' threads",
1306 else if (
unformat (input,
"corelist-%s %U", &name, unformat_bitmap_list,
1317 "corelist cannot be set for '%s' threads",
1330 else if (
unformat (input,
"%s %u", &name, &count))
1339 (0,
"number of %s threads not configurable", tr->
name);
1361 "scheduling priority (%d) is not allowed for `normal` scheduling policy",
1383 #if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__) 1385 __sync_fetch_and_add_8 (
void)
1387 fformat (stderr,
"%s called\n", __FUNCTION__);
1392 __sync_add_and_fetch_8 (
void)
1394 fformat (stderr,
"%s called\n", __FUNCTION__);
1435 #ifndef BARRIER_MINIMUM_OPEN_LIMIT 1436 #define BARRIER_MINIMUM_OPEN_LIMIT 0.001 1439 #ifndef BARRIER_MINIMUM_OPEN_FACTOR 1440 #define BARRIER_MINIMUM_OPEN_FACTOR 3 1451 f64 max_vector_rate;
1467 if (++vlib_worker_threads[0].recursion_level > 1)
1477 max_vector_rate = 0.0;
1492 if (max_vector_rate > 10.0)
1504 (
"clock change: would have waited for %.4f seconds",
1521 fformat (stderr,
"%s: worker thread deadlock\n", __FUNCTION__);
1552 f64 t_update_main = 0.0;
1553 int refork_needed = 0;
1564 if (--vlib_worker_threads[0].recursion_level > 0)
1608 fformat (stderr,
"%s: worker thread deadlock\n", __FUNCTION__);
1624 fformat (stderr,
"%s: worker thread refork deadlock\n",
1701 for (elix = 0; elix < fqt->
nelts; elix++)
1704 if (1 || elt->
valid)
1740 while (n_left_to_node >= 4)
1748 n_left_to_node -= 4;
1751 while (n_left_to_node > 0)
1825 if (frame_queue_nelts == 0)
1873 (*fp) (callback, args, arg_size);
1876 clib_warning (
"BUG: rpc_call_main_thread_cb_fn NULL!");
1920 .path =
"show clock",
1921 .short_help =
"show clock",
_vlib_init_function_list_elt_t * worker_init_function_registrations
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
static void barrier_trace_release_rec(f64 t_entry)
static void barrier_trace_sync(f64 t_entry, f64 t_open, f64 t_closed)
static f64 vlib_last_vectors_per_main_loop_as_f64(vlib_main_t *vm)
uword * pending_rpc_requests
vlib_main_t vlib_global_main
never_inline void vlib_node_runtime_sync_stats(vlib_main_t *vm, vlib_node_runtime_t *r, uword n_calls, uword n_vectors, uword n_clocks)
clib_error_t *(* vlib_thread_set_lcore_cb)(u32 thread, u16 lcore)
vlib_process_t ** processes
#define VLIB_PENDING_FRAME_NO_NEXT_FRAME
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
u8 runtime_data[0]
Function dependent node-runtime data.
int vlib_frame_queue_enqueue(vlib_main_t *vm, u32 node_runtime_index, u32 frame_queue_index, vlib_frame_t *frame, vlib_frame_queue_msg_type_t type)
#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x)
word elog_track_register(elog_main_t *em, elog_track_t *t)
register an event track
void * mheap_alloc(void *memory, uword size)
#define CLIB_MEMORY_STORE_BARRIER()
static f64 vlib_time_now(vlib_main_t *vm)
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
clib_error_t * threads_init(vlib_main_t *vm)
u32 vlib_frame_queue_main_init(u32 node_index, u32 frame_queue_nelts)
void * thread_function_arg
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
static int sort_registrations_by_no_clone(void *a0, void *a1)
static u64 clib_cpu_time_now(void)
frame_queue_trace_t * frame_queue_traces
void vlib_process_signal_event_mt_helper(vlib_process_signal_event_mt_args_t *args)
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
static mheap_t * mheap_header(u8 *v)
void vnet_main_fixup(vlib_fork_fixup_t which)
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
#define hash_set_mem(h, key, value)
void vlib_worker_thread_fn(void *arg)
u32 unformat_sched_policy(unformat_input_t *input, va_list *args)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
struct vlib_thread_registration_ * next
#define MHEAP_FLAG_THREAD_SAFE
u32 buffer_index[VLIB_FRAME_SIZE]
vlib_main_t ** vlib_mains
#define clib_bitmap_dup(v)
Duplicate a bitmap.
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
pthread_t thread[MAX_CONNS]
u16 thread_index
thread this node runs on
#define vlib_worker_thread_barrier_sync(X)
u64 * counters_last_clear
static void vlib_worker_thread_barrier_check(void)
vlib_thread_registration_t * next
#define vec_add1_aligned(V, E, A)
Add 1 element to end of vector (alignment specified).
#define VLIB_NODE_RUNTIME_DATA_SIZE
vlib_node_stats_t stats_last_clear
#define clib_smp_atomic_add(addr, increment)
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
void vlib_worker_thread_node_runtime_update(void)
u64 count[FRAME_QUEUE_MAX_NELTS]
#define VLIB_INIT_FUNCTION(x)
#define VLIB_INVALID_NODE_INDEX
vlib_frame_queue_msg_type_t
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
#define clib_error_return(e, args...)
uword * lock
SMP lock, non-zero means locking required.
#define BARRIER_MINIMUM_OPEN_FACTOR
clib_error_t * vlib_call_init_exit_functions(vlib_main_t *vm, _vlib_init_function_list_elt_t *head, int call_once)
vlib_frame_queue_elt_t * elts
f64 time_last_barrier_release
vlib_node_runtime_t * nodes_by_type[VLIB_N_NODE_TYPE]
void vlib_set_thread_name(char *name)
void vl_msg_api_handler_no_free(void *)
#define hash_create_string(elts, value_bytes)
#define BARRIER_SYNC_TIMEOUT
void * rpc_call_main_thread_cb_fn
VLIB_REGISTER_THREAD(worker_thread_reg, static)
vlib_worker_thread_t * vlib_worker_threads
#define clib_bitmap_foreach(i, ai, body)
Macro to iterate across set bits in a bitmap.
vlib_node_stats_t stats_total
u16 state
Input node state.
static void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
static uword clib_bitmap_first_set(uword *ai)
Return the lowest numbered set bit in a bitmap.
u8 * vlib_thread_stack_init(uword thread_index)
static void vlib_next_frame_init(vlib_next_frame_t *nf)
vlib_error_main_t error_main
static u32 vlib_frame_index(vlib_main_t *vm, vlib_frame_t *f)
vlib_thread_callbacks_t cb
#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH
int vlib_thread_cb_register(struct vlib_main_t *vm, vlib_thread_callbacks_t *cb)
char * name
Track name vector.
#define clib_error_return_unix(e, args...)
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
static void * clib_mem_get_per_cpu_heap(void)
void vlib_frame_free(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_frame_t *f)
static clib_error_t * vlib_launch_thread_int(void *fp, vlib_worker_thread_t *w, unsigned lcore_id)
void vlib_worker_thread_node_refork(void)
vlib_buffer_free_list_t * buffer_free_list_pool
u32 node_index
Node index.
uword * init_functions_called
void clib_time_init(clib_time_t *c)
vlib_thread_main_t vlib_thread_main
void(* thread_function)(void *)
static clib_error_t * cpu_config(vlib_main_t *vm, unformat_input_t *input)
#define pool_get_aligned(P, E, A)
Allocate an object E from a pool P (general version).
i32 n_vectors[FRAME_QUEUE_MAX_NELTS]
vlib_frame_queue_t * vlib_frame_queue_alloc(int nelts)
#define clib_mem_alloc_no_fail(size)
#define VLIB_EARLY_CONFIG_FUNCTION(x, n,...)
vlib_frame_queue_t ** vlib_frame_queues
static_always_inline uword vlib_get_thread_index(void)
vlib_pending_frame_t * pending_frames
vlib_thread_function_t * function
int vlib_frame_queue_dequeue(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
#define vec_free(V)
Free vector's memory (no header).
static void * clib_mem_set_heap(void *heap)
#define clib_warning(format, args...)
static vlib_node_runtime_t * vlib_node_get_runtime(vlib_main_t *vm, u32 node_index)
Get node runtime by node index.
#define clib_memcpy(a, b, c)
frame_queue_nelt_counter_t * frame_queue_histogram
static uword * clib_bitmap_set_multiple(uword *bitmap, uword i, uword value, uword n_bits)
sets the ith through ith + n_bits bits in a bitmap
#define VLIB_FRAME_PENDING
static uword clib_bitmap_get(uword *ai, uword i)
Gets the ith bit value from a bitmap.
#define ELOG_TYPE_DECLARE(f)
void vlib_worker_thread_init(vlib_worker_thread_t *w)
uword os_get_nthreads(void)
static void * clib_mem_get_heap(void)
volatile u32 * wait_at_barrier
#define VLIB_CLI_COMMAND(x,...)
#define FRAME_QUEUE_NELTS
void vlib_stat_segment_unlock(void)
#define hash_create(elts, value_bytes)
static clib_error_t * show_clock_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
static void barrier_trace_sync_rec(f64 t_entry)
vlib_frame_queue_main_t * frame_queue_mains
static u32 elog_id_for_msg_name(mc_main_t *m, char *msg_name)
static void clib_mem_free(void *p)
#define clib_error_report(e)
#define clib_bitmap_free(v)
Free a bitmap.
void vlib_worker_thread_barrier_sync_int(vlib_main_t *vm)
int need_vlib_worker_thread_node_runtime_update
uword * thread_registrations_by_name
#define BARRIER_MINIMUM_OPEN_LIMIT
volatile u32 * node_reforks_required
static vlib_main_t * vlib_get_main(void)
void vlib_node_sync_stats(vlib_main_t *vm, vlib_node_t *n)
static uword clib_bitmap_count_set_bits(uword *ai)
Return the number of set bits in a bitmap.
static void barrier_trace_release(f64 t_entry, f64 t_closed_total, f64 t_update_main)
void vlib_worker_loop(vlib_main_t *vm)
#define vec_dup_aligned(V, A)
Return copy of vector (no header, alignment specified).
u32 elog_string(elog_main_t *em, char *fmt,...)
add a string to the event-log string table
f64 barrier_no_close_before
static clib_error_t * start_workers(vlib_main_t *vm)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
void vlib_rpc_call_main_thread(void *callback, u8 *args, u32 arg_size)
vlib_node_main_t node_main
vlib_next_frame_t * next_frames
#define vec_sort_with_function(vec, f)
Sort a vector using the supplied element comparison function.
int no_data_structure_clone
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
#define VLIB_THREAD_STACK_SIZE
vlib_frame_size_t * frame_sizes
clib_error_t *(* vlib_launch_thread_cb)(void *fp, vlib_worker_thread_t *w, unsigned lcore_id)
#define hash_get_mem(h, key)
static void worker_thread_node_runtime_update_internal(void)
static void * clib_mem_alloc_aligned(uword size, uword align)
volatile u32 * workers_at_barrier
uword clib_calljmp(uword(*func)(uword func_arg), uword func_arg, void *stack)
static uword * clib_sysfs_list_to_bitmap(char *filename)
void vlib_worker_thread_barrier_release(vlib_main_t *vm)
static vlib_thread_main_t * vlib_get_thread_main()
static f64 vlib_time_now_ticks(vlib_main_t *vm, u64 n)
static vlib_node_t * vlib_get_node(vlib_main_t *vm, u32 i)
Get vlib node by index.
void vlib_stat_segment_lock(void)
#define vec_foreach(var, vec)
Vector iterator.
void * vlib_worker_thread_bootstrap_fn(void *arg)
#define CLIB_MEMORY_BARRIER()
uword * cpu_socket_bitmap
#define foreach_sched_policy
vlib_thread_registration_t ** registrations
#define CLIB_CACHE_LINE_BYTES
vlib_thread_registration_t * registration
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
volatile u32 worker_thread_release
void vlib_worker_thread_fork_fixup(vlib_fork_fixup_t which)
clib_random_buffer_t random_buffer
#define VLIB_FRAME_FREE_AFTER_DISPATCH
clib_error_t * vlib_thread_init(vlib_main_t *vm)