1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49 .nr_entries = ARRAY_SIZE(array), \
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54 struct syscall_arg *arg)
57 struct strarray *sa = arg->parm;
59 if (idx < 0 || idx >= sa->nr_entries)
60 return scnprintf(bf, size, "%d", idx);
62 return scnprintf(bf, size, "%s", sa->entries[idx]);
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68 struct syscall_arg *arg)
70 return scnprintf(bf, size, "%#lx", arg->val);
73 #define SCA_HEX syscall_arg__scnprintf_hex
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76 struct syscall_arg *arg)
78 int printed = 0, prot = arg->val;
80 if (prot == PROT_NONE)
81 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN);
99 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107 struct syscall_arg *arg)
109 int printed = 0, flags = arg->val;
111 #define P_MMAP_FLAG(n) \
112 if (flags & MAP_##n) { \
113 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
118 P_MMAP_FLAG(PRIVATE);
122 P_MMAP_FLAG(ANONYMOUS);
123 P_MMAP_FLAG(DENYWRITE);
124 P_MMAP_FLAG(EXECUTABLE);
127 P_MMAP_FLAG(GROWSDOWN);
129 P_MMAP_FLAG(HUGETLB);
132 P_MMAP_FLAG(NONBLOCK);
133 P_MMAP_FLAG(NORESERVE);
134 P_MMAP_FLAG(POPULATE);
136 #ifdef MAP_UNINITIALIZED
137 P_MMAP_FLAG(UNINITIALIZED);
142 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150 struct syscall_arg *arg)
152 int behavior = arg->val;
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158 P_MADV_BHV(SEQUENTIAL);
159 P_MADV_BHV(WILLNEED);
160 P_MADV_BHV(DONTNEED);
162 P_MADV_BHV(DONTFORK);
164 P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166 P_MADV_BHV(SOFT_OFFLINE);
168 P_MADV_BHV(MERGEABLE);
169 P_MADV_BHV(UNMERGEABLE);
171 P_MADV_BHV(HUGEPAGE);
173 #ifdef MADV_NOHUGEPAGE
174 P_MADV_BHV(NOHUGEPAGE);
177 P_MADV_BHV(DONTDUMP);
186 return scnprintf(bf, size, "%#x", behavior);
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 enum syscall_futex_args {
194 SCF_UADDR = (1 << 0),
197 SCF_TIMEOUT = (1 << 3),
198 SCF_UADDR2 = (1 << 4),
202 int cmd = op & FUTEX_CMD_MASK;
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
208 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
211 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(WAKE_OP); break;
214 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
217 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
218 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
220 default: printed = scnprintf(bf, size, "%#x", cmd); break;
223 if (op & FUTEX_PRIVATE_FLAG)
224 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 if (op & FUTEX_CLOCK_REALTIME)
227 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
232 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
237 static const char *whences[] = { "SET", "CUR", "END",
245 static DEFINE_STRARRAY(whences);
247 static const char *fcntl_cmds[] = {
248 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
253 static DEFINE_STRARRAY(fcntl_cmds);
255 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
256 static DEFINE_STRARRAY(sighow);
258 static const char *socket_families[] = {
259 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
260 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
261 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
262 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
263 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
264 "ALG", "NFC", "VSOCK",
266 static DEFINE_STRARRAY(socket_families);
268 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
269 struct syscall_arg *arg)
271 int printed = 0, flags = arg->val;
273 if (!(flags & O_CREAT))
274 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
277 return scnprintf(bf, size, "RDONLY");
279 if (flags & O_##n) { \
280 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
304 if ((flags & O_SYNC) == O_SYNC)
305 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
317 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
322 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
324 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
329 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
364 return scnprintf(bf, size, "%#x", sig);
367 #define SCA_SIGNUM syscall_arg__scnprintf_signum
369 static struct syscall_fmt {
372 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
378 { .name = "access", .errmsg = true, },
379 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
380 { .name = "brk", .hexret = true,
381 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
382 { .name = "mmap", .hexret = true, },
383 { .name = "connect", .errmsg = true, },
384 { .name = "fcntl", .errmsg = true,
385 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
386 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
387 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
388 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
389 { .name = "futex", .errmsg = true,
390 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
391 { .name = "getitimer", .errmsg = true,
392 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
393 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
394 { .name = "ioctl", .errmsg = true,
395 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
396 { .name = "kill", .errmsg = true,
397 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
398 { .name = "lseek", .errmsg = true,
399 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
400 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
401 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
402 { .name = "madvise", .errmsg = true,
403 .arg_scnprintf = { [0] = SCA_HEX, /* start */
404 [2] = SCA_MADV_BHV, /* behavior */ }, },
405 { .name = "mmap", .hexret = true,
406 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
407 [2] = SCA_MMAP_PROT, /* prot */
408 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
409 { .name = "mprotect", .errmsg = true,
410 .arg_scnprintf = { [0] = SCA_HEX, /* start */
411 [2] = SCA_MMAP_PROT, /* prot */ }, },
412 { .name = "mremap", .hexret = true,
413 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
414 [4] = SCA_HEX, /* new_addr */ }, },
415 { .name = "munmap", .errmsg = true,
416 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
417 { .name = "open", .errmsg = true,
418 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
419 { .name = "open_by_handle_at", .errmsg = true,
420 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
421 { .name = "openat", .errmsg = true,
422 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
423 { .name = "poll", .errmsg = true, .timeout = true, },
424 { .name = "ppoll", .errmsg = true, .timeout = true, },
425 { .name = "pread", .errmsg = true, .alias = "pread64", },
426 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
427 { .name = "read", .errmsg = true, },
428 { .name = "recvfrom", .errmsg = true, },
429 { .name = "rt_sigaction", .errmsg = true,
430 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
431 { .name = "rt_sigprocmask", .errmsg = true,
432 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
433 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
434 { .name = "rt_sigqueueinfo", .errmsg = true,
435 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
436 { .name = "rt_tgsigqueueinfo", .errmsg = true,
437 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
438 { .name = "select", .errmsg = true, .timeout = true, },
439 { .name = "setitimer", .errmsg = true,
440 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
441 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
442 { .name = "socket", .errmsg = true,
443 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ },
444 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
445 { .name = "stat", .errmsg = true, .alias = "newstat", },
446 { .name = "tgkill", .errmsg = true,
447 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
448 { .name = "tkill", .errmsg = true,
449 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
450 { .name = "uname", .errmsg = true, .alias = "newuname", },
453 static int syscall_fmt__cmp(const void *name, const void *fmtp)
455 const struct syscall_fmt *fmt = fmtp;
456 return strcmp(name, fmt->name);
459 static struct syscall_fmt *syscall_fmt__find(const char *name)
461 const int nmemb = ARRAY_SIZE(syscall_fmts);
462 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
466 struct event_format *tp_format;
469 struct syscall_fmt *fmt;
470 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
474 static size_t fprintf_duration(unsigned long t, FILE *fp)
476 double duration = (double)t / NSEC_PER_MSEC;
477 size_t printed = fprintf(fp, "(");
480 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
481 else if (duration >= 0.01)
482 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
484 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
485 return printed + fprintf(fp, "): ");
488 struct thread_trace {
492 unsigned long nr_events;
497 static struct thread_trace *thread_trace__new(void)
499 return zalloc(sizeof(struct thread_trace));
502 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
504 struct thread_trace *ttrace;
509 if (thread->priv == NULL)
510 thread->priv = thread_trace__new();
512 if (thread->priv == NULL)
515 ttrace = thread->priv;
520 color_fprintf(fp, PERF_COLOR_RED,
521 "WARNING: not enough memory, dropping samples!\n");
526 struct perf_tool tool;
530 struct syscall *table;
532 struct perf_record_opts opts;
536 unsigned long nr_events;
537 struct strlist *ev_qualifier;
538 bool not_ev_qualifier;
539 struct intlist *tid_list;
540 struct intlist *pid_list;
542 bool multiple_threads;
543 double duration_filter;
547 static bool trace__filter_duration(struct trace *trace, double t)
549 return t < (trace->duration_filter * NSEC_PER_MSEC);
552 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
554 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
556 return fprintf(fp, "%10.3f ", ts);
559 static bool done = false;
561 static void sig_handler(int sig __maybe_unused)
566 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
567 u64 duration, u64 tstamp, FILE *fp)
569 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
570 printed += fprintf_duration(duration, fp);
572 if (trace->multiple_threads)
573 printed += fprintf(fp, "%d ", thread->tid);
578 static int trace__process_event(struct trace *trace, struct machine *machine,
579 union perf_event *event)
583 switch (event->header.type) {
584 case PERF_RECORD_LOST:
585 color_fprintf(trace->output, PERF_COLOR_RED,
586 "LOST %" PRIu64 " events!\n", event->lost.lost);
587 ret = machine__process_lost_event(machine, event);
589 ret = machine__process_event(machine, event);
596 static int trace__tool_process(struct perf_tool *tool,
597 union perf_event *event,
598 struct perf_sample *sample __maybe_unused,
599 struct machine *machine)
601 struct trace *trace = container_of(tool, struct trace, tool);
602 return trace__process_event(trace, machine, event);
605 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
607 int err = symbol__init();
612 machine__init(&trace->host, "", HOST_KERNEL_ID);
613 machine__create_kernel_maps(&trace->host);
615 if (perf_target__has_task(&trace->opts.target)) {
616 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
620 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
630 static int syscall__set_arg_fmts(struct syscall *sc)
632 struct format_field *field;
635 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
636 if (sc->arg_scnprintf == NULL)
640 sc->arg_parm = sc->fmt->arg_parm;
642 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
643 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
644 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
645 else if (field->flags & FIELD_IS_POINTER)
646 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
653 static int trace__read_syscall_info(struct trace *trace, int id)
657 const char *name = audit_syscall_to_name(id, trace->audit_machine);
662 if (id > trace->syscalls.max) {
663 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
665 if (nsyscalls == NULL)
668 if (trace->syscalls.max != -1) {
669 memset(nsyscalls + trace->syscalls.max + 1, 0,
670 (id - trace->syscalls.max) * sizeof(*sc));
672 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
675 trace->syscalls.table = nsyscalls;
676 trace->syscalls.max = id;
679 sc = trace->syscalls.table + id;
682 if (trace->ev_qualifier) {
683 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
685 if (!(in ^ trace->not_ev_qualifier)) {
688 * No need to do read tracepoint information since this will be
695 sc->fmt = syscall_fmt__find(sc->name);
697 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
698 sc->tp_format = event_format__new("syscalls", tp_name);
700 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
701 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
702 sc->tp_format = event_format__new("syscalls", tp_name);
705 if (sc->tp_format == NULL)
708 return syscall__set_arg_fmts(sc);
711 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
716 if (sc->tp_format != NULL) {
717 struct format_field *field;
719 struct syscall_arg arg = {
724 for (field = sc->tp_format->format.fields->next; field;
725 field = field->next, ++arg.idx, bit <<= 1) {
729 printed += scnprintf(bf + printed, size - printed,
730 "%s%s: ", printed ? ", " : "", field->name);
731 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
732 arg.val = args[arg.idx];
734 arg.parm = sc->arg_parm[arg.idx];
735 printed += sc->arg_scnprintf[arg.idx](bf + printed,
736 size - printed, &arg);
738 printed += scnprintf(bf + printed, size - printed,
739 "%ld", args[arg.idx]);
746 printed += scnprintf(bf + printed, size - printed,
748 printed ? ", " : "", i, args[i]);
756 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
757 struct perf_sample *sample);
759 static struct syscall *trace__syscall_info(struct trace *trace,
760 struct perf_evsel *evsel,
761 struct perf_sample *sample)
763 int id = perf_evsel__intval(evsel, sample, "id");
768 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
769 * before that, leaving at a higher verbosity level till that is
770 * explained. Reproduced with plain ftrace with:
772 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
773 * grep "NR -1 " /t/trace_pipe
775 * After generating some load on the machine.
779 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
780 id, perf_evsel__name(evsel), ++n);
785 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
786 trace__read_syscall_info(trace, id))
789 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
792 return &trace->syscalls.table[id];
796 fprintf(trace->output, "Problems reading syscall %d", id);
797 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
798 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
799 fputs(" information\n", trace->output);
804 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
805 struct perf_sample *sample)
810 struct thread *thread;
811 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
812 struct thread_trace *ttrace;
820 thread = machine__findnew_thread(&trace->host, sample->pid,
822 ttrace = thread__trace(thread, trace->output);
826 args = perf_evsel__rawptr(evsel, sample, "args");
828 fprintf(trace->output, "Problems reading syscall arguments\n");
832 ttrace = thread->priv;
834 if (ttrace->entry_str == NULL) {
835 ttrace->entry_str = malloc(1024);
836 if (!ttrace->entry_str)
840 ttrace->entry_time = sample->time;
841 msg = ttrace->entry_str;
842 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
844 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
846 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
847 if (!trace->duration_filter) {
848 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
849 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
852 ttrace->entry_pending = true;
857 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
858 struct perf_sample *sample)
862 struct thread *thread;
863 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
864 struct thread_trace *ttrace;
872 thread = machine__findnew_thread(&trace->host, sample->pid,
874 ttrace = thread__trace(thread, trace->output);
878 ret = perf_evsel__intval(evsel, sample, "ret");
880 ttrace = thread->priv;
882 ttrace->exit_time = sample->time;
884 if (ttrace->entry_time) {
885 duration = sample->time - ttrace->entry_time;
886 if (trace__filter_duration(trace, duration))
888 } else if (trace->duration_filter)
891 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
893 if (ttrace->entry_pending) {
894 fprintf(trace->output, "%-70s", ttrace->entry_str);
896 fprintf(trace->output, " ... [");
897 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
898 fprintf(trace->output, "]: %s()", sc->name);
901 if (sc->fmt == NULL) {
903 fprintf(trace->output, ") = %d", ret);
904 } else if (ret < 0 && sc->fmt->errmsg) {
906 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
907 *e = audit_errno_to_name(-ret);
909 fprintf(trace->output, ") = -1 %s %s", e, emsg);
910 } else if (ret == 0 && sc->fmt->timeout)
911 fprintf(trace->output, ") = 0 Timeout");
912 else if (sc->fmt->hexret)
913 fprintf(trace->output, ") = %#x", ret);
917 fputc('\n', trace->output);
919 ttrace->entry_pending = false;
924 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
925 struct perf_sample *sample)
927 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
928 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
929 struct thread *thread = machine__findnew_thread(&trace->host,
932 struct thread_trace *ttrace = thread__trace(thread, trace->output);
937 ttrace->runtime_ms += runtime_ms;
938 trace->runtime_ms += runtime_ms;
942 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
944 perf_evsel__strval(evsel, sample, "comm"),
945 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
947 perf_evsel__intval(evsel, sample, "vruntime"));
951 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
953 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
954 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
957 if (trace->pid_list || trace->tid_list)
963 static int trace__process_sample(struct perf_tool *tool,
964 union perf_event *event __maybe_unused,
965 struct perf_sample *sample,
966 struct perf_evsel *evsel,
967 struct machine *machine __maybe_unused)
969 struct trace *trace = container_of(tool, struct trace, tool);
972 tracepoint_handler handler = evsel->handler.func;
974 if (skip_sample(trace, sample))
977 if (trace->base_time == 0)
978 trace->base_time = sample->time;
981 handler(trace, evsel, sample);
987 perf_session__has_tp(struct perf_session *session, const char *name)
989 struct perf_evsel *evsel;
991 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
993 return evsel != NULL;
996 static int parse_target_str(struct trace *trace)
998 if (trace->opts.target.pid) {
999 trace->pid_list = intlist__new(trace->opts.target.pid);
1000 if (trace->pid_list == NULL) {
1001 pr_err("Error parsing process id string\n");
1006 if (trace->opts.target.tid) {
1007 trace->tid_list = intlist__new(trace->opts.target.tid);
1008 if (trace->tid_list == NULL) {
1009 pr_err("Error parsing thread id string\n");
1017 static int trace__run(struct trace *trace, int argc, const char **argv)
1019 struct perf_evlist *evlist = perf_evlist__new();
1020 struct perf_evsel *evsel;
1022 unsigned long before;
1023 const bool forks = argc > 0;
1025 if (evlist == NULL) {
1026 fprintf(trace->output, "Not enough memory to run!\n");
1030 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1031 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1032 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1033 goto out_delete_evlist;
1037 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1038 trace__sched_stat_runtime)) {
1039 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1040 goto out_delete_evlist;
1043 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1045 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1046 goto out_delete_evlist;
1049 err = trace__symbols_init(trace, evlist);
1051 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1052 goto out_delete_maps;
1055 perf_evlist__config(evlist, &trace->opts);
1057 signal(SIGCHLD, sig_handler);
1058 signal(SIGINT, sig_handler);
1061 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1062 argv, false, false);
1064 fprintf(trace->output, "Couldn't run the workload!\n");
1065 goto out_delete_maps;
1069 err = perf_evlist__open(evlist);
1071 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1072 goto out_delete_maps;
1075 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1077 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1078 goto out_close_evlist;
1081 perf_evlist__enable(evlist);
1084 perf_evlist__start_workload(evlist);
1086 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1088 before = trace->nr_events;
1090 for (i = 0; i < evlist->nr_mmaps; i++) {
1091 union perf_event *event;
1093 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1094 const u32 type = event->header.type;
1095 tracepoint_handler handler;
1096 struct perf_sample sample;
1100 err = perf_evlist__parse_sample(evlist, event, &sample);
1102 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1106 if (trace->base_time == 0)
1107 trace->base_time = sample.time;
1109 if (type != PERF_RECORD_SAMPLE) {
1110 trace__process_event(trace, &trace->host, event);
1114 evsel = perf_evlist__id2evsel(evlist, sample.id);
1115 if (evsel == NULL) {
1116 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1120 if (sample.raw_data == NULL) {
1121 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1122 perf_evsel__name(evsel), sample.tid,
1123 sample.cpu, sample.raw_size);
1127 handler = evsel->handler.func;
1128 handler(trace, evsel, &sample);
1131 goto out_unmap_evlist;
1135 if (trace->nr_events == before) {
1137 goto out_unmap_evlist;
1139 poll(evlist->pollfd, evlist->nr_fds, -1);
1143 perf_evlist__disable(evlist);
1148 perf_evlist__munmap(evlist);
1150 perf_evlist__close(evlist);
1152 perf_evlist__delete_maps(evlist);
1154 perf_evlist__delete(evlist);
1159 static int trace__replay(struct trace *trace)
1161 const struct perf_evsel_str_handler handlers[] = {
1162 { "raw_syscalls:sys_enter", trace__sys_enter, },
1163 { "raw_syscalls:sys_exit", trace__sys_exit, },
1166 struct perf_session *session;
1169 trace->tool.sample = trace__process_sample;
1170 trace->tool.mmap = perf_event__process_mmap;
1171 trace->tool.mmap2 = perf_event__process_mmap2;
1172 trace->tool.comm = perf_event__process_comm;
1173 trace->tool.exit = perf_event__process_exit;
1174 trace->tool.fork = perf_event__process_fork;
1175 trace->tool.attr = perf_event__process_attr;
1176 trace->tool.tracing_data = perf_event__process_tracing_data;
1177 trace->tool.build_id = perf_event__process_build_id;
1179 trace->tool.ordered_samples = true;
1180 trace->tool.ordering_requires_timestamps = true;
1182 /* add tid to output */
1183 trace->multiple_threads = true;
1185 if (symbol__init() < 0)
1188 session = perf_session__new(input_name, O_RDONLY, 0, false,
1190 if (session == NULL)
1193 err = perf_session__set_tracepoints_handlers(session, handlers);
1197 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1198 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1202 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1203 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1207 err = parse_target_str(trace);
1213 err = perf_session__process_events(session, &trace->tool);
1215 pr_err("Failed to process events, error %d", err);
1218 perf_session__delete(session);
1223 static size_t trace__fprintf_threads_header(FILE *fp)
1227 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1228 printed += fprintf(fp," __) Summary of events (__\n\n");
1229 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1230 printed += fprintf(fp," _____________________________________________________________________\n\n");
1235 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1237 size_t printed = trace__fprintf_threads_header(fp);
1240 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1241 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1242 struct thread_trace *ttrace = thread->priv;
1249 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1251 color = PERF_COLOR_NORMAL;
1253 color = PERF_COLOR_RED;
1254 else if (ratio > 25.0)
1255 color = PERF_COLOR_GREEN;
1256 else if (ratio > 5.0)
1257 color = PERF_COLOR_YELLOW;
1259 printed += color_fprintf(fp, color, "%20s", thread->comm);
1260 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1261 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1262 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1268 static int trace__set_duration(const struct option *opt, const char *str,
1269 int unset __maybe_unused)
1271 struct trace *trace = opt->value;
1273 trace->duration_filter = atof(str);
1277 static int trace__open_output(struct trace *trace, const char *filename)
1281 if (!stat(filename, &st) && st.st_size) {
1282 char oldname[PATH_MAX];
1284 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1286 rename(filename, oldname);
1289 trace->output = fopen(filename, "w");
1291 return trace->output == NULL ? -errno : 0;
1294 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1296 const char * const trace_usage[] = {
1297 "perf trace [<options>] [<command>]",
1298 "perf trace [<options>] -- <command> [<options>]",
1301 struct trace trace = {
1302 .audit_machine = audit_detect_machine(),
1311 .user_freq = UINT_MAX,
1312 .user_interval = ULLONG_MAX,
1318 const char *output_name = NULL;
1319 const char *ev_qualifier_str = NULL;
1320 const struct option trace_options[] = {
1321 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1322 "list of events to trace"),
1323 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1324 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1325 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1326 "trace events on existing process id"),
1327 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1328 "trace events on existing thread id"),
1329 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1330 "system-wide collection from all CPUs"),
1331 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1332 "list of cpus to monitor"),
1333 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1334 "child tasks do not inherit counters"),
1335 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1336 "number of mmap data pages"),
1337 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1339 OPT_CALLBACK(0, "duration", &trace, "float",
1340 "show only events with duration > N.M ms",
1341 trace__set_duration),
1342 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1343 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1349 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1351 if (output_name != NULL) {
1352 err = trace__open_output(&trace, output_name);
1354 perror("failed to create output file");
1359 if (ev_qualifier_str != NULL) {
1360 const char *s = ev_qualifier_str;
1362 trace.not_ev_qualifier = *s == '!';
1363 if (trace.not_ev_qualifier)
1365 trace.ev_qualifier = strlist__new(true, s);
1366 if (trace.ev_qualifier == NULL) {
1367 fputs("Not enough memory to parse event qualifier",
1374 err = perf_target__validate(&trace.opts.target);
1376 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1377 fprintf(trace.output, "%s", bf);
1381 err = perf_target__parse_uid(&trace.opts.target);
1383 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1384 fprintf(trace.output, "%s", bf);
1388 if (!argc && perf_target__none(&trace.opts.target))
1389 trace.opts.target.system_wide = true;
1392 err = trace__replay(&trace);
1394 err = trace__run(&trace, argc, argv);
1396 if (trace.sched && !err)
1397 trace__fprintf_thread_summary(&trace, trace.output);
1400 if (output_name != NULL)
1401 fclose(trace.output);