25 #include <vpp/app/version.h> 26 #include <linux/limits.h> 48 #if defined(__x86_64__) 50 asm volatile (
"mov $1, %%eax; cpuid; mov %%eax, %0":
"=r" (cpuid)::
"%eax",
51 "%edx",
"%ecx",
"%rbx");
60 u32 n_models,
u8 model,
u8 stepping)
63 for (i = 0; i < n_models; i++)
65 if (mt[i].model != model)
68 if (mt[i].has_stepping)
70 if (mt[i].stepping != stepping)
81 u8 model,
u8 stepping)
121 model = ((cpuid >> 12) & 0xf0) | ((cpuid >> 4) & 0xf);
122 stepping = cpuid & 0xf;
153 .version = VPP_BUILD_VER,
154 .description =
"Performance Monitor",
155 #if !defined(__x86_64__) 156 .default_disabled = 1,
222 if (
unformat (line_input,
"timeout %u", &timeout_seconds))
224 else if (
unformat (line_input,
"instructions-per-clock"))
226 ec.
name =
"instructions";
227 ec.
pe_type = PERF_TYPE_HARDWARE;
228 ec.
pe_config = PERF_COUNT_HW_INSTRUCTIONS;
231 ec.
name =
"cpu-cycles";
232 ec.
pe_type = PERF_TYPE_HARDWARE;
236 else if (
unformat (line_input,
"branch-mispredict-rate"))
238 ec.
name =
"branch-misses";
239 ec.
pe_type = PERF_TYPE_HARDWARE;
240 ec.
pe_config = PERF_COUNT_HW_BRANCH_MISSES;
243 ec.
name =
"branches";
244 ec.
pe_type = PERF_TYPE_HARDWARE;
245 ec.
pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
248 else if (
unformat (line_input,
"threads %U",
251 else if (
unformat (line_input,
"thread %U",
258 #define _(type,event,str) \ 259 else if (unformat (line_input, str)) \ 263 ec.pe_config = event; \ 264 vec_add1 (pm->single_events_to_collect, ec); \ 280 if (last_set != ~0 && last_set >= num_threads)
302 vlib_cli_output (vm,
"Start collection for %d events, wait %.2f seconds",
331 .short_help =
"set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]",
352 int verbose __attribute__ ((unused)) = va_arg (*args,
int);
358 s =
format (s,
"%=40s%=20s%=16s%=16s%=16s",
359 "Name",
"Counter",
"Count",
"Pkts",
"Counts/Pkt");
387 s =
format (s,
"%-40s%+20s%+16llu%+16llu%+16.2e\n",
388 name,
"instructions-per-clock",
403 mispredict_rate = 0.0;
405 s =
format (s,
"%-40s%+20s%+16llu%+16llu%+16.2e\n",
406 name,
"branch-mispredict-rate",
418 s =
format (s,
"%-40s%+20s%+16llu%+16llu%+16.2e",
429 int verbose = va_arg (*args,
int);
431 #define _(type,config,name) \ 433 s = format (s, "\n %s", name); \ 435 s = format (s, "\n %s (%d, %d)", name, type, config); 453 return strcmp ((
char *) nvp1->
name, (
char *) nvp2->
name);
477 int verbose = va_arg (*args,
int);
486 vec_add2 (sort_nvps, sn, 1);
523 else if (
unformat (input,
"verbose"))
557 vec_add1 (captures, *c);
566 for (i = 0; i <
vec_len (captures); i++)
582 .short_help =
"show pmc [verbose]",
620 .short_help =
"clear the performance monitor counters",
vlib_log_class_t vlib_log_register_class(char *class, char *subclass)
perfmon_capture_t * capture_pool
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
static clib_error_t * set_pmc_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
u64 * vectors_this_counter
perfmon_event_config_t * paired_events_to_collect
static int capture_name_sort(void *a1, void *a2)
vnet_main_t * vnet_get_main(void)
vlib_node_registration_t perfmon_periodic_node
(constructor) VLIB_REGISTER_NODE (perfmon_periodic_node)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
u32 mispredict_event_index
#define hash_set_mem(h, key, value)
#define hash_get_pair_mem(h, key)
#define clib_bitmap_zero(v)
Clear a bitmap.
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
static uword vlib_process_suspend(vlib_main_t *vm, f64 dt)
Suspend a vlib cooperative multi-tasking thread for a period of time.
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
#define VLIB_INIT_FUNCTION(x)
#define clib_error_return(e, args...)
static clib_error_t * show_pmc_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
u8 * thread_and_node_name
static uword unformat_processor_event(unformat_input_t *input, va_list *args)
#define hash_create_string(elts, value_bytes)
static u8 * format_pmc_event(u8 *s, va_list *args)
perfmon_intel_pmc_event_t * events
static perfmon_intel_pmc_event_t * perfmon_find_table_by_model_stepping(perfmon_main_t *pm, u8 model, u8 stepping)
static void vlib_process_signal_event(vlib_main_t *vm, uword node_index, uword type_opaque, uword data)
static uword clib_bitmap_last_set(uword *ai)
Return the higest numbered set bit in a bitmap.
uword * pmc_event_by_name
perfmon_intel_pmc_cpu_model_t * models
perfmon_intel_pmc_event_t * perfmon_table
static u8 * format_capture(u8 *s, va_list *args)
#define hash_foreach_mem(key_var, value_var, h, body)
vlib_log_class_t log_class
#define pool_free(p)
Free a pool.
static u32 get_cpuid(void)
static clib_error_t * clear_pmc_command_fn(vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
sll srl srl sll sra u16x4 i
#define vec_free(V)
Free vector's memory (no header).
static int sort_nvps_by_name(void *a1, void *a2)
#define VLIB_CLI_COMMAND(x,...)
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
perfmon_event_config_t * single_events_to_collect
#define vec_append(v1, v2)
Append v2 after v1.
static u8 * format_processor_events(u8 *s, va_list *args)
static clib_error_t * perfmon_init(vlib_main_t *vm)
perfmon_main_t perfmon_main
void perfmon_register_intel_pmc(perfmon_intel_pmc_cpu_model_t *m, int n_models, perfmon_intel_pmc_event_t *e, int n_events)
#define foreach_perfmon_event
uword * capture_by_thread_and_node_name
static u8 * format_generic_events(u8 *s, va_list *args)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
perfmon_intel_pmc_registration_t * perfmon_tables
#define vec_sort_with_function(vec, f)
Sort a vector using the supplied element comparison function.
static vlib_thread_main_t * vlib_get_thread_main()
#define vec_foreach(var, vec)
Vector iterator.
#define vlib_log_err(...)
static int perfmon_cpu_model_matches(perfmon_intel_pmc_cpu_model_t *mt, u32 n_models, u8 model, u8 stepping)
static uword pool_elts(void *v)
Number of active elements in a pool.