8 CTX_BIT_KERNEL = 1 << 1,
10 CTX_BIT_HOST = 1 << 3,
11 CTX_BIT_IDLE = 1 << 4,
15 #define NUM_CTX CTX_BIT_MAX
17 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
20 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
21 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
22 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
23 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
24 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
25 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
26 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
27 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
28 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
29 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
30 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
32 struct stats walltime_nsecs_stats;
34 static int evsel_context(struct perf_evsel *evsel)
38 if (evsel->attr.exclude_kernel)
39 ctx |= CTX_BIT_KERNEL;
40 if (evsel->attr.exclude_user)
42 if (evsel->attr.exclude_hv)
44 if (evsel->attr.exclude_host)
46 if (evsel->attr.exclude_idle)
52 void perf_stat__reset_shadow_stats(void)
54 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
55 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
56 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
57 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
58 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
59 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
60 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
61 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
62 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
63 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
64 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
65 memset(runtime_cycles_in_tx_stats, 0,
66 sizeof(runtime_cycles_in_tx_stats));
67 memset(runtime_transaction_stats, 0,
68 sizeof(runtime_transaction_stats));
69 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
70 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
74 * Update various tracking values we maintain to print
75 * more semantic information such as miss/hit ratios,
76 * instruction rates, etc:
78 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
81 int ctx = evsel_context(counter);
83 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
84 update_stats(&runtime_nsecs_stats[cpu], count[0]);
85 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
86 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
87 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
88 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
89 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
90 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
91 else if (perf_stat_evsel__is(counter, ELISION_START))
92 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
93 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
94 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
95 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
96 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
97 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
98 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
99 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
100 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
101 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
102 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
103 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
104 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
105 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
106 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
107 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
108 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
109 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
110 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
113 /* used for get_ratio_color() */
115 GRC_STALLED_CYCLES_FE,
116 GRC_STALLED_CYCLES_BE,
121 static const char *get_ratio_color(enum grc_type type, double ratio)
123 static const double grc_table[GRC_MAX_NR][3] = {
124 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
125 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
126 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
128 const char *color = PERF_COLOR_NORMAL;
130 if (ratio > grc_table[type][0])
131 color = PERF_COLOR_RED;
132 else if (ratio > grc_table[type][1])
133 color = PERF_COLOR_MAGENTA;
134 else if (ratio > grc_table[type][2])
135 color = PERF_COLOR_YELLOW;
140 static void print_stalled_cycles_frontend(FILE *out, int cpu,
141 struct perf_evsel *evsel
142 __maybe_unused, double avg)
144 double total, ratio = 0.0;
146 int ctx = evsel_context(evsel);
148 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
151 ratio = avg / total * 100.0;
153 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
156 color_fprintf(out, color, "%6.2f%%", ratio);
157 fprintf(out, " frontend cycles idle ");
160 static void print_stalled_cycles_backend(FILE *out, int cpu,
161 struct perf_evsel *evsel
162 __maybe_unused, double avg)
164 double total, ratio = 0.0;
166 int ctx = evsel_context(evsel);
168 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
171 ratio = avg / total * 100.0;
173 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
176 color_fprintf(out, color, "%6.2f%%", ratio);
177 fprintf(out, " backend cycles idle ");
180 static void print_branch_misses(FILE *out, int cpu,
181 struct perf_evsel *evsel __maybe_unused,
184 double total, ratio = 0.0;
186 int ctx = evsel_context(evsel);
188 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
191 ratio = avg / total * 100.0;
193 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
196 color_fprintf(out, color, "%6.2f%%", ratio);
197 fprintf(out, " of all branches ");
200 static void print_l1_dcache_misses(FILE *out, int cpu,
201 struct perf_evsel *evsel __maybe_unused,
204 double total, ratio = 0.0;
206 int ctx = evsel_context(evsel);
208 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
211 ratio = avg / total * 100.0;
213 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
216 color_fprintf(out, color, "%6.2f%%", ratio);
217 fprintf(out, " of all L1-dcache hits ");
220 static void print_l1_icache_misses(FILE *out, int cpu,
221 struct perf_evsel *evsel __maybe_unused,
224 double total, ratio = 0.0;
226 int ctx = evsel_context(evsel);
228 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
231 ratio = avg / total * 100.0;
233 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
236 color_fprintf(out, color, "%6.2f%%", ratio);
237 fprintf(out, " of all L1-icache hits ");
240 static void print_dtlb_cache_misses(FILE *out, int cpu,
241 struct perf_evsel *evsel __maybe_unused,
244 double total, ratio = 0.0;
246 int ctx = evsel_context(evsel);
248 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
251 ratio = avg / total * 100.0;
253 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
256 color_fprintf(out, color, "%6.2f%%", ratio);
257 fprintf(out, " of all dTLB cache hits ");
260 static void print_itlb_cache_misses(FILE *out, int cpu,
261 struct perf_evsel *evsel __maybe_unused,
264 double total, ratio = 0.0;
266 int ctx = evsel_context(evsel);
268 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
271 ratio = avg / total * 100.0;
273 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
276 color_fprintf(out, color, "%6.2f%%", ratio);
277 fprintf(out, " of all iTLB cache hits ");
280 static void print_ll_cache_misses(FILE *out, int cpu,
281 struct perf_evsel *evsel __maybe_unused,
284 double total, ratio = 0.0;
286 int ctx = evsel_context(evsel);
288 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
291 ratio = avg / total * 100.0;
293 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
296 color_fprintf(out, color, "%6.2f%%", ratio);
297 fprintf(out, " of all LL-cache hits ");
300 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
301 double avg, int cpu, enum aggr_mode aggr)
303 double total, ratio = 0.0, total2;
304 int ctx = evsel_context(evsel);
306 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
307 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
310 fprintf(out, " # %5.2f insns per cycle ", ratio);
314 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
315 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
320 if (aggr == AGGR_NONE)
322 fprintf(out, " # %5.2f stalled cycles per insn", ratio);
325 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
326 runtime_branches_stats[ctx][cpu].n != 0) {
327 print_branch_misses(out, cpu, evsel, avg);
329 evsel->attr.type == PERF_TYPE_HW_CACHE &&
330 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
331 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
332 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
333 runtime_l1_dcache_stats[ctx][cpu].n != 0) {
334 print_l1_dcache_misses(out, cpu, evsel, avg);
336 evsel->attr.type == PERF_TYPE_HW_CACHE &&
337 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
338 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
339 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
340 runtime_l1_icache_stats[ctx][cpu].n != 0) {
341 print_l1_icache_misses(out, cpu, evsel, avg);
343 evsel->attr.type == PERF_TYPE_HW_CACHE &&
344 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
345 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
346 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
347 runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
348 print_dtlb_cache_misses(out, cpu, evsel, avg);
350 evsel->attr.type == PERF_TYPE_HW_CACHE &&
351 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
352 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
353 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
354 runtime_itlb_cache_stats[ctx][cpu].n != 0) {
355 print_itlb_cache_misses(out, cpu, evsel, avg);
357 evsel->attr.type == PERF_TYPE_HW_CACHE &&
358 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
359 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
360 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
361 runtime_ll_cache_stats[ctx][cpu].n != 0) {
362 print_ll_cache_misses(out, cpu, evsel, avg);
363 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
364 runtime_cacherefs_stats[ctx][cpu].n != 0) {
365 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
368 ratio = avg * 100 / total;
370 fprintf(out, " # %8.3f %% of all cache refs ", ratio);
372 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
373 print_stalled_cycles_frontend(out, cpu, evsel, avg);
374 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
375 print_stalled_cycles_backend(out, cpu, evsel, avg);
376 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
377 total = avg_stats(&runtime_nsecs_stats[cpu]);
381 fprintf(out, " # %8.3f GHz ", ratio);
385 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
386 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
389 " # %5.2f%% transactional cycles ",
390 100.0 * (avg / total));
391 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
392 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
393 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
398 " # %5.2f%% aborted cycles ",
399 100.0 * ((total2-avg) / total));
400 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
401 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
402 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
407 fprintf(out, " # %8.0f cycles / transaction ", ratio);
408 } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
409 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
410 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
415 fprintf(out, " # %8.0f cycles / elision ", ratio);
416 } else if (runtime_nsecs_stats[cpu].n != 0) {
419 total = avg_stats(&runtime_nsecs_stats[cpu]);
422 ratio = 1000.0 * avg / total;
428 fprintf(out, " # %8.3f %c/sec ", ratio, unit);