4 #include "util/evlist.h"
5 #include "util/evsel.h"
7 #include "util/cache.h"
8 #include "util/symbol.h"
9 #include "util/thread.h"
10 #include "util/header.h"
11 #include "util/session.h"
12 #include "util/tool.h"
14 #include "util/parse-options.h"
15 #include "util/trace-event.h"
16 #include "util/data.h"
17 #include "util/cpumap.h"
19 #include "util/debug.h"
21 #include <linux/rbtree.h>
22 #include <linux/string.h>
26 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
28 static int alloc_flag;
29 static int caller_flag;
31 static int alloc_lines = -1;
32 static int caller_lines = -1;
49 static struct rb_root root_alloc_stat;
50 static struct rb_root root_alloc_sorted;
51 static struct rb_root root_caller_stat;
52 static struct rb_root root_caller_sorted;
54 static unsigned long total_requested, total_allocated;
55 static unsigned long nr_allocs, nr_cross_allocs;
57 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
58 int bytes_req, int bytes_alloc, int cpu)
60 struct rb_node **node = &root_alloc_stat.rb_node;
61 struct rb_node *parent = NULL;
62 struct alloc_stat *data = NULL;
66 data = rb_entry(*node, struct alloc_stat, node);
69 node = &(*node)->rb_right;
70 else if (ptr < data->ptr)
71 node = &(*node)->rb_left;
76 if (data && data->ptr == ptr) {
78 data->bytes_req += bytes_req;
79 data->bytes_alloc += bytes_alloc;
81 data = malloc(sizeof(*data));
83 pr_err("%s: malloc failed\n", __func__);
89 data->bytes_req = bytes_req;
90 data->bytes_alloc = bytes_alloc;
92 rb_link_node(&data->node, parent, node);
93 rb_insert_color(&data->node, &root_alloc_stat);
95 data->call_site = call_site;
96 data->alloc_cpu = cpu;
100 static int insert_caller_stat(unsigned long call_site,
101 int bytes_req, int bytes_alloc)
103 struct rb_node **node = &root_caller_stat.rb_node;
104 struct rb_node *parent = NULL;
105 struct alloc_stat *data = NULL;
109 data = rb_entry(*node, struct alloc_stat, node);
111 if (call_site > data->call_site)
112 node = &(*node)->rb_right;
113 else if (call_site < data->call_site)
114 node = &(*node)->rb_left;
119 if (data && data->call_site == call_site) {
121 data->bytes_req += bytes_req;
122 data->bytes_alloc += bytes_alloc;
124 data = malloc(sizeof(*data));
126 pr_err("%s: malloc failed\n", __func__);
129 data->call_site = call_site;
132 data->bytes_req = bytes_req;
133 data->bytes_alloc = bytes_alloc;
135 rb_link_node(&data->node, parent, node);
136 rb_insert_color(&data->node, &root_caller_stat);
142 static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
143 struct perf_sample *sample)
145 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
146 call_site = perf_evsel__intval(evsel, sample, "call_site");
147 int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
148 bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
150 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
151 insert_caller_stat(call_site, bytes_req, bytes_alloc))
154 total_requested += bytes_req;
155 total_allocated += bytes_alloc;
161 static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
162 struct perf_sample *sample)
164 int ret = perf_evsel__process_alloc_event(evsel, sample);
167 int node1 = cpu__get_node(sample->cpu),
168 node2 = perf_evsel__intval(evsel, sample, "node");
177 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
178 static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
180 static struct alloc_stat *search_alloc_stat(unsigned long ptr,
181 unsigned long call_site,
182 struct rb_root *root,
185 struct rb_node *node = root->rb_node;
186 struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
189 struct alloc_stat *data;
192 data = rb_entry(node, struct alloc_stat, node);
194 cmp = sort_fn(&key, data);
196 node = node->rb_left;
198 node = node->rb_right;
205 static int perf_evsel__process_free_event(struct perf_evsel *evsel,
206 struct perf_sample *sample)
208 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
209 struct alloc_stat *s_alloc, *s_caller;
211 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
215 if ((short)sample->cpu != s_alloc->alloc_cpu) {
218 s_caller = search_alloc_stat(0, s_alloc->call_site,
219 &root_caller_stat, callsite_cmp);
222 s_caller->pingpong++;
224 s_alloc->alloc_cpu = -1;
229 typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
230 struct perf_sample *sample);
232 static int process_sample_event(struct perf_tool *tool __maybe_unused,
233 union perf_event *event,
234 struct perf_sample *sample,
235 struct perf_evsel *evsel,
236 struct machine *machine)
238 struct thread *thread = machine__findnew_thread(machine, sample->pid,
241 if (thread == NULL) {
242 pr_debug("problem processing %d event, skipping it.\n",
247 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
249 if (evsel->handler != NULL) {
250 tracepoint_handler f = evsel->handler;
251 return f(evsel, sample);
257 static struct perf_tool perf_kmem = {
258 .sample = process_sample_event,
259 .comm = perf_event__process_comm,
260 .mmap = perf_event__process_mmap,
261 .mmap2 = perf_event__process_mmap2,
262 .ordered_events = true,
265 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
270 return 100.0 - (100.0 * n_req / n_alloc);
273 static void __print_result(struct rb_root *root, struct perf_session *session,
274 int n_lines, int is_caller)
276 struct rb_node *next;
277 struct machine *machine = &session->machines.host;
279 printf("%.105s\n", graph_dotted_line);
280 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
281 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
282 printf("%.105s\n", graph_dotted_line);
284 next = rb_first(root);
286 while (next && n_lines--) {
287 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
289 struct symbol *sym = NULL;
295 addr = data->call_site;
297 sym = machine__find_kernel_function(machine, addr, &map, NULL);
302 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
303 addr - map->unmap_ip(map, sym->start));
305 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
306 printf(" %-34s |", buf);
308 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
309 (unsigned long long)data->bytes_alloc,
310 (unsigned long)data->bytes_alloc / data->hit,
311 (unsigned long long)data->bytes_req,
312 (unsigned long)data->bytes_req / data->hit,
313 (unsigned long)data->hit,
314 (unsigned long)data->pingpong,
315 fragmentation(data->bytes_req, data->bytes_alloc));
317 next = rb_next(next);
321 printf(" ... | ... | ... | ... | ... | ... \n");
323 printf("%.105s\n", graph_dotted_line);
326 static void print_summary(void)
328 printf("\nSUMMARY\n=======\n");
329 printf("Total bytes requested: %'lu\n", total_requested);
330 printf("Total bytes allocated: %'lu\n", total_allocated);
331 printf("Total bytes wasted on internal fragmentation: %'lu\n",
332 total_allocated - total_requested);
333 printf("Internal fragmentation: %f%%\n",
334 fragmentation(total_requested, total_allocated));
335 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
338 static void print_result(struct perf_session *session)
341 __print_result(&root_caller_sorted, session, caller_lines, 1);
343 __print_result(&root_alloc_sorted, session, alloc_lines, 0);
347 struct sort_dimension {
350 struct list_head list;
353 static LIST_HEAD(caller_sort);
354 static LIST_HEAD(alloc_sort);
356 static void sort_insert(struct rb_root *root, struct alloc_stat *data,
357 struct list_head *sort_list)
359 struct rb_node **new = &(root->rb_node);
360 struct rb_node *parent = NULL;
361 struct sort_dimension *sort;
364 struct alloc_stat *this;
367 this = rb_entry(*new, struct alloc_stat, node);
370 list_for_each_entry(sort, sort_list, list) {
371 cmp = sort->cmp(data, this);
377 new = &((*new)->rb_left);
379 new = &((*new)->rb_right);
382 rb_link_node(&data->node, parent, new);
383 rb_insert_color(&data->node, root);
386 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
387 struct list_head *sort_list)
389 struct rb_node *node;
390 struct alloc_stat *data;
393 node = rb_first(root);
397 rb_erase(node, root);
398 data = rb_entry(node, struct alloc_stat, node);
399 sort_insert(root_sorted, data, sort_list);
403 static void sort_result(void)
405 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
406 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
409 static int __cmd_kmem(struct perf_session *session)
412 const struct perf_evsel_str_handler kmem_tracepoints[] = {
413 { "kmem:kmalloc", perf_evsel__process_alloc_event, },
414 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
415 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
416 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
417 { "kmem:kfree", perf_evsel__process_free_event, },
418 { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
421 if (!perf_session__has_traces(session, "kmem record"))
424 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
425 pr_err("Initializing perf session tracepoint handlers failed\n");
430 err = perf_session__process_events(session);
434 print_result(session);
439 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
443 else if (l->ptr > r->ptr)
448 static struct sort_dimension ptr_sort_dimension = {
453 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
455 if (l->call_site < r->call_site)
457 else if (l->call_site > r->call_site)
462 static struct sort_dimension callsite_sort_dimension = {
467 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
471 else if (l->hit > r->hit)
476 static struct sort_dimension hit_sort_dimension = {
481 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
483 if (l->bytes_alloc < r->bytes_alloc)
485 else if (l->bytes_alloc > r->bytes_alloc)
490 static struct sort_dimension bytes_sort_dimension = {
495 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
499 x = fragmentation(l->bytes_req, l->bytes_alloc);
500 y = fragmentation(r->bytes_req, r->bytes_alloc);
509 static struct sort_dimension frag_sort_dimension = {
514 static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
516 if (l->pingpong < r->pingpong)
518 else if (l->pingpong > r->pingpong)
523 static struct sort_dimension pingpong_sort_dimension = {
528 static struct sort_dimension *avail_sorts[] = {
530 &callsite_sort_dimension,
532 &bytes_sort_dimension,
533 &frag_sort_dimension,
534 &pingpong_sort_dimension,
537 #define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
539 static int sort_dimension__add(const char *tok, struct list_head *list)
541 struct sort_dimension *sort;
544 for (i = 0; i < NUM_AVAIL_SORTS; i++) {
545 if (!strcmp(avail_sorts[i]->name, tok)) {
546 sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
548 pr_err("%s: memdup failed\n", __func__);
551 list_add_tail(&sort->list, list);
559 static int setup_sorting(struct list_head *sort_list, const char *arg)
562 char *str = strdup(arg);
566 pr_err("%s: strdup failed\n", __func__);
571 tok = strsep(&pos, ",");
574 if (sort_dimension__add(tok, sort_list) < 0) {
575 error("Unknown --sort key: '%s'", tok);
585 static int parse_sort_opt(const struct option *opt __maybe_unused,
586 const char *arg, int unset __maybe_unused)
591 if (caller_flag > alloc_flag)
592 return setup_sorting(&caller_sort, arg);
594 return setup_sorting(&alloc_sort, arg);
599 static int parse_caller_opt(const struct option *opt __maybe_unused,
600 const char *arg __maybe_unused,
601 int unset __maybe_unused)
603 caller_flag = (alloc_flag + 1);
607 static int parse_alloc_opt(const struct option *opt __maybe_unused,
608 const char *arg __maybe_unused,
609 int unset __maybe_unused)
611 alloc_flag = (caller_flag + 1);
615 static int parse_line_opt(const struct option *opt __maybe_unused,
616 const char *arg, int unset __maybe_unused)
623 lines = strtoul(arg, NULL, 10);
625 if (caller_flag > alloc_flag)
626 caller_lines = lines;
633 static int __cmd_record(int argc, const char **argv)
635 const char * const record_args[] = {
636 "record", "-a", "-R", "-c", "1",
637 "-e", "kmem:kmalloc",
638 "-e", "kmem:kmalloc_node",
640 "-e", "kmem:kmem_cache_alloc",
641 "-e", "kmem:kmem_cache_alloc_node",
642 "-e", "kmem:kmem_cache_free",
644 unsigned int rec_argc, i, j;
645 const char **rec_argv;
647 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
648 rec_argv = calloc(rec_argc + 1, sizeof(char *));
650 if (rec_argv == NULL)
653 for (i = 0; i < ARRAY_SIZE(record_args); i++)
654 rec_argv[i] = strdup(record_args[i]);
656 for (j = 1; j < (unsigned int)argc; j++, i++)
657 rec_argv[i] = argv[j];
659 return cmd_record(i, rec_argv, NULL);
662 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
664 const char * const default_sort_order = "frag,hit,bytes";
665 const struct option kmem_options[] = {
666 OPT_STRING('i', "input", &input_name, "file", "input file name"),
667 OPT_INCR('v', "verbose", &verbose,
668 "be more verbose (show symbol address, etc)"),
669 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
670 "show per-callsite statistics", parse_caller_opt),
671 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
672 "show per-allocation statistics", parse_alloc_opt),
673 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
674 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
676 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
677 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
680 const char *const kmem_subcommands[] = { "record", "stat", NULL };
681 const char *kmem_usage[] = {
685 struct perf_session *session;
686 struct perf_data_file file = {
688 .mode = PERF_DATA_MODE_READ,
692 argc = parse_options_subcommand(argc, argv, kmem_options,
693 kmem_subcommands, kmem_usage, 0);
696 usage_with_options(kmem_usage, kmem_options);
698 if (!strncmp(argv[0], "rec", 3)) {
700 return __cmd_record(argc, argv);
703 session = perf_session__new(&file, false, &perf_kmem);
707 symbol__init(&session->header.env);
709 if (!strcmp(argv[0], "stat")) {
710 setlocale(LC_ALL, "");
712 if (cpu__setup_cpunode_map())
715 if (list_empty(&caller_sort))
716 setup_sorting(&caller_sort, default_sort_order);
717 if (list_empty(&alloc_sort))
718 setup_sorting(&alloc_sort, default_sort_order);
720 ret = __cmd_kmem(session);
722 usage_with_options(kmem_usage, kmem_options);
725 perf_session__delete(session);