1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49 .nr_entries = ARRAY_SIZE(array), \
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54 struct syscall_arg *arg)
57 struct strarray *sa = arg->parm;
59 if (idx < 0 || idx >= sa->nr_entries)
60 return scnprintf(bf, size, "%d", idx);
62 return scnprintf(bf, size, "%s", sa->entries[idx]);
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68 struct syscall_arg *arg)
70 return scnprintf(bf, size, "%#lx", arg->val);
73 #define SCA_HEX syscall_arg__scnprintf_hex
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76 struct syscall_arg *arg)
78 int printed = 0, prot = arg->val;
80 if (prot == PROT_NONE)
81 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN);
99 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107 struct syscall_arg *arg)
109 int printed = 0, flags = arg->val;
111 #define P_MMAP_FLAG(n) \
112 if (flags & MAP_##n) { \
113 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
118 P_MMAP_FLAG(PRIVATE);
122 P_MMAP_FLAG(ANONYMOUS);
123 P_MMAP_FLAG(DENYWRITE);
124 P_MMAP_FLAG(EXECUTABLE);
127 P_MMAP_FLAG(GROWSDOWN);
129 P_MMAP_FLAG(HUGETLB);
132 P_MMAP_FLAG(NONBLOCK);
133 P_MMAP_FLAG(NORESERVE);
134 P_MMAP_FLAG(POPULATE);
136 #ifdef MAP_UNINITIALIZED
137 P_MMAP_FLAG(UNINITIALIZED);
142 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150 struct syscall_arg *arg)
152 int behavior = arg->val;
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158 P_MADV_BHV(SEQUENTIAL);
159 P_MADV_BHV(WILLNEED);
160 P_MADV_BHV(DONTNEED);
162 P_MADV_BHV(DONTFORK);
164 P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166 P_MADV_BHV(SOFT_OFFLINE);
168 P_MADV_BHV(MERGEABLE);
169 P_MADV_BHV(UNMERGEABLE);
171 P_MADV_BHV(HUGEPAGE);
173 #ifdef MADV_NOHUGEPAGE
174 P_MADV_BHV(NOHUGEPAGE);
177 P_MADV_BHV(DONTDUMP);
186 return scnprintf(bf, size, "%#x", behavior);
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 enum syscall_futex_args {
194 SCF_UADDR = (1 << 0),
197 SCF_TIMEOUT = (1 << 3),
198 SCF_UADDR2 = (1 << 4),
202 int cmd = op & FUTEX_CMD_MASK;
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
208 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
211 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(WAKE_OP); break;
214 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
217 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
218 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
220 default: printed = scnprintf(bf, size, "%#x", cmd); break;
223 if (op & FUTEX_PRIVATE_FLAG)
224 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 if (op & FUTEX_CLOCK_REALTIME)
227 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
232 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
237 static const char *whences[] = { "SET", "CUR", "END",
245 static DEFINE_STRARRAY(whences);
247 static const char *fcntl_cmds[] = {
248 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
253 static DEFINE_STRARRAY(fcntl_cmds);
255 static const char *rlimit_resources[] = {
256 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
257 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
260 static DEFINE_STRARRAY(rlimit_resources);
262 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
263 static DEFINE_STRARRAY(sighow);
265 static const char *socket_families[] = {
266 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
267 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
268 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
269 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
270 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
271 "ALG", "NFC", "VSOCK",
273 static DEFINE_STRARRAY(socket_families);
275 #ifndef SOCK_TYPE_MASK
276 #define SOCK_TYPE_MASK 0xf
279 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
280 struct syscall_arg *arg)
284 flags = type & ~SOCK_TYPE_MASK;
286 type &= SOCK_TYPE_MASK;
288 * Can't use a strarray, MIPS may override for ABI reasons.
291 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
296 P_SK_TYPE(SEQPACKET);
301 printed = scnprintf(bf, size, "%#x", type);
304 #define P_SK_FLAG(n) \
305 if (flags & SOCK_##n) { \
306 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
307 flags &= ~SOCK_##n; \
315 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
320 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
323 struct syscall_arg *arg)
328 if (mode == F_OK) /* 0 */
329 return scnprintf(bf, size, "F");
331 if (mode & n##_OK) { \
332 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
342 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
347 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
349 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
350 struct syscall_arg *arg)
352 int printed = 0, flags = arg->val;
354 if (!(flags & O_CREAT))
355 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
358 return scnprintf(bf, size, "RDONLY");
360 if (flags & O_##n) { \
361 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
385 if ((flags & O_SYNC) == O_SYNC)
386 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
405 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
410 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
445 return scnprintf(bf, size, "%#x", sig);
448 #define SCA_SIGNUM syscall_arg__scnprintf_signum
450 static struct syscall_fmt {
453 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
459 { .name = "access", .errmsg = true,
460 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
461 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
462 { .name = "brk", .hexret = true,
463 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
464 { .name = "connect", .errmsg = true, },
465 { .name = "fcntl", .errmsg = true,
466 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
467 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
468 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
469 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
470 { .name = "futex", .errmsg = true,
471 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
472 { .name = "getitimer", .errmsg = true,
473 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
474 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
475 { .name = "getrlimit", .errmsg = true,
476 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
477 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
478 { .name = "ioctl", .errmsg = true,
479 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
480 { .name = "kill", .errmsg = true,
481 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
482 { .name = "lseek", .errmsg = true,
483 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
484 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
485 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
486 { .name = "madvise", .errmsg = true,
487 .arg_scnprintf = { [0] = SCA_HEX, /* start */
488 [2] = SCA_MADV_BHV, /* behavior */ }, },
489 { .name = "mmap", .hexret = true,
490 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
491 [2] = SCA_MMAP_PROT, /* prot */
492 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
493 { .name = "mprotect", .errmsg = true,
494 .arg_scnprintf = { [0] = SCA_HEX, /* start */
495 [2] = SCA_MMAP_PROT, /* prot */ }, },
496 { .name = "mremap", .hexret = true,
497 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
498 [4] = SCA_HEX, /* new_addr */ }, },
499 { .name = "munmap", .errmsg = true,
500 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
501 { .name = "open", .errmsg = true,
502 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
503 { .name = "open_by_handle_at", .errmsg = true,
504 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
505 { .name = "openat", .errmsg = true,
506 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
507 { .name = "poll", .errmsg = true, .timeout = true, },
508 { .name = "ppoll", .errmsg = true, .timeout = true, },
509 { .name = "pread", .errmsg = true, .alias = "pread64", },
510 { .name = "prlimit64", .errmsg = true,
511 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
512 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
513 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
514 { .name = "read", .errmsg = true, },
515 { .name = "recvfrom", .errmsg = true, },
516 { .name = "rt_sigaction", .errmsg = true,
517 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
518 { .name = "rt_sigprocmask", .errmsg = true,
519 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
520 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
521 { .name = "rt_sigqueueinfo", .errmsg = true,
522 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
523 { .name = "rt_tgsigqueueinfo", .errmsg = true,
524 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
525 { .name = "select", .errmsg = true, .timeout = true, },
526 { .name = "setitimer", .errmsg = true,
527 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
528 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
529 { .name = "setrlimit", .errmsg = true,
530 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
531 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
532 { .name = "socket", .errmsg = true,
533 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
534 [1] = SCA_SK_TYPE, /* type */ },
535 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
536 { .name = "stat", .errmsg = true, .alias = "newstat", },
537 { .name = "tgkill", .errmsg = true,
538 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
539 { .name = "tkill", .errmsg = true,
540 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
541 { .name = "uname", .errmsg = true, .alias = "newuname", },
544 static int syscall_fmt__cmp(const void *name, const void *fmtp)
546 const struct syscall_fmt *fmt = fmtp;
547 return strcmp(name, fmt->name);
550 static struct syscall_fmt *syscall_fmt__find(const char *name)
552 const int nmemb = ARRAY_SIZE(syscall_fmts);
553 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
557 struct event_format *tp_format;
560 struct syscall_fmt *fmt;
561 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
565 static size_t fprintf_duration(unsigned long t, FILE *fp)
567 double duration = (double)t / NSEC_PER_MSEC;
568 size_t printed = fprintf(fp, "(");
571 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
572 else if (duration >= 0.01)
573 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
575 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
576 return printed + fprintf(fp, "): ");
579 struct thread_trace {
583 unsigned long nr_events;
588 static struct thread_trace *thread_trace__new(void)
590 return zalloc(sizeof(struct thread_trace));
593 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
595 struct thread_trace *ttrace;
600 if (thread->priv == NULL)
601 thread->priv = thread_trace__new();
603 if (thread->priv == NULL)
606 ttrace = thread->priv;
611 color_fprintf(fp, PERF_COLOR_RED,
612 "WARNING: not enough memory, dropping samples!\n");
617 struct perf_tool tool;
621 struct syscall *table;
623 struct perf_record_opts opts;
628 unsigned long nr_events;
629 struct strlist *ev_qualifier;
630 bool not_ev_qualifier;
631 struct intlist *tid_list;
632 struct intlist *pid_list;
634 bool multiple_threads;
635 double duration_filter;
639 static bool trace__filter_duration(struct trace *trace, double t)
641 return t < (trace->duration_filter * NSEC_PER_MSEC);
644 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
646 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
648 return fprintf(fp, "%10.3f ", ts);
651 static bool done = false;
653 static void sig_handler(int sig __maybe_unused)
658 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
659 u64 duration, u64 tstamp, FILE *fp)
661 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
662 printed += fprintf_duration(duration, fp);
664 if (trace->multiple_threads)
665 printed += fprintf(fp, "%d ", thread->tid);
670 static int trace__process_event(struct trace *trace, struct machine *machine,
671 union perf_event *event)
675 switch (event->header.type) {
676 case PERF_RECORD_LOST:
677 color_fprintf(trace->output, PERF_COLOR_RED,
678 "LOST %" PRIu64 " events!\n", event->lost.lost);
679 ret = machine__process_lost_event(machine, event);
681 ret = machine__process_event(machine, event);
688 static int trace__tool_process(struct perf_tool *tool,
689 union perf_event *event,
690 struct perf_sample *sample __maybe_unused,
691 struct machine *machine)
693 struct trace *trace = container_of(tool, struct trace, tool);
694 return trace__process_event(trace, machine, event);
697 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
699 int err = symbol__init();
704 machine__init(&trace->host, "", HOST_KERNEL_ID);
705 machine__create_kernel_maps(&trace->host);
707 if (perf_target__has_task(&trace->opts.target)) {
708 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
712 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
722 static int syscall__set_arg_fmts(struct syscall *sc)
724 struct format_field *field;
727 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
728 if (sc->arg_scnprintf == NULL)
732 sc->arg_parm = sc->fmt->arg_parm;
734 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
735 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
736 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
737 else if (field->flags & FIELD_IS_POINTER)
738 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
745 static int trace__read_syscall_info(struct trace *trace, int id)
749 const char *name = audit_syscall_to_name(id, trace->audit_machine);
754 if (id > trace->syscalls.max) {
755 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
757 if (nsyscalls == NULL)
760 if (trace->syscalls.max != -1) {
761 memset(nsyscalls + trace->syscalls.max + 1, 0,
762 (id - trace->syscalls.max) * sizeof(*sc));
764 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
767 trace->syscalls.table = nsyscalls;
768 trace->syscalls.max = id;
771 sc = trace->syscalls.table + id;
774 if (trace->ev_qualifier) {
775 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
777 if (!(in ^ trace->not_ev_qualifier)) {
780 * No need to do read tracepoint information since this will be
787 sc->fmt = syscall_fmt__find(sc->name);
789 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
790 sc->tp_format = event_format__new("syscalls", tp_name);
792 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
793 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
794 sc->tp_format = event_format__new("syscalls", tp_name);
797 if (sc->tp_format == NULL)
800 return syscall__set_arg_fmts(sc);
803 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
808 if (sc->tp_format != NULL) {
809 struct format_field *field;
811 struct syscall_arg arg = {
816 for (field = sc->tp_format->format.fields->next; field;
817 field = field->next, ++arg.idx, bit <<= 1) {
821 printed += scnprintf(bf + printed, size - printed,
822 "%s%s: ", printed ? ", " : "", field->name);
823 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
824 arg.val = args[arg.idx];
826 arg.parm = sc->arg_parm[arg.idx];
827 printed += sc->arg_scnprintf[arg.idx](bf + printed,
828 size - printed, &arg);
830 printed += scnprintf(bf + printed, size - printed,
831 "%ld", args[arg.idx]);
838 printed += scnprintf(bf + printed, size - printed,
840 printed ? ", " : "", i, args[i]);
848 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
849 struct perf_sample *sample);
851 static struct syscall *trace__syscall_info(struct trace *trace,
852 struct perf_evsel *evsel,
853 struct perf_sample *sample)
855 int id = perf_evsel__intval(evsel, sample, "id");
860 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
861 * before that, leaving at a higher verbosity level till that is
862 * explained. Reproduced with plain ftrace with:
864 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
865 * grep "NR -1 " /t/trace_pipe
867 * After generating some load on the machine.
871 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
872 id, perf_evsel__name(evsel), ++n);
877 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
878 trace__read_syscall_info(trace, id))
881 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
884 return &trace->syscalls.table[id];
888 fprintf(trace->output, "Problems reading syscall %d", id);
889 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
890 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
891 fputs(" information\n", trace->output);
896 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
897 struct perf_sample *sample)
902 struct thread *thread;
903 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
904 struct thread_trace *ttrace;
912 thread = machine__findnew_thread(&trace->host, sample->pid,
914 ttrace = thread__trace(thread, trace->output);
918 args = perf_evsel__rawptr(evsel, sample, "args");
920 fprintf(trace->output, "Problems reading syscall arguments\n");
924 ttrace = thread->priv;
926 if (ttrace->entry_str == NULL) {
927 ttrace->entry_str = malloc(1024);
928 if (!ttrace->entry_str)
932 ttrace->entry_time = sample->time;
933 msg = ttrace->entry_str;
934 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
936 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
938 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
939 if (!trace->duration_filter) {
940 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
941 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
944 ttrace->entry_pending = true;
949 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
950 struct perf_sample *sample)
954 struct thread *thread;
955 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
956 struct thread_trace *ttrace;
964 thread = machine__findnew_thread(&trace->host, sample->pid,
966 ttrace = thread__trace(thread, trace->output);
970 ret = perf_evsel__intval(evsel, sample, "ret");
972 ttrace = thread->priv;
974 ttrace->exit_time = sample->time;
976 if (ttrace->entry_time) {
977 duration = sample->time - ttrace->entry_time;
978 if (trace__filter_duration(trace, duration))
980 } else if (trace->duration_filter)
983 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
985 if (ttrace->entry_pending) {
986 fprintf(trace->output, "%-70s", ttrace->entry_str);
988 fprintf(trace->output, " ... [");
989 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
990 fprintf(trace->output, "]: %s()", sc->name);
993 if (sc->fmt == NULL) {
995 fprintf(trace->output, ") = %d", ret);
996 } else if (ret < 0 && sc->fmt->errmsg) {
998 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
999 *e = audit_errno_to_name(-ret);
1001 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1002 } else if (ret == 0 && sc->fmt->timeout)
1003 fprintf(trace->output, ") = 0 Timeout");
1004 else if (sc->fmt->hexret)
1005 fprintf(trace->output, ") = %#x", ret);
1009 fputc('\n', trace->output);
1011 ttrace->entry_pending = false;
1016 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1017 struct perf_sample *sample)
1019 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1020 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1021 struct thread *thread = machine__findnew_thread(&trace->host,
1024 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1029 ttrace->runtime_ms += runtime_ms;
1030 trace->runtime_ms += runtime_ms;
1034 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1036 perf_evsel__strval(evsel, sample, "comm"),
1037 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1039 perf_evsel__intval(evsel, sample, "vruntime"));
1043 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1045 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1046 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1049 if (trace->pid_list || trace->tid_list)
1055 static int trace__process_sample(struct perf_tool *tool,
1056 union perf_event *event __maybe_unused,
1057 struct perf_sample *sample,
1058 struct perf_evsel *evsel,
1059 struct machine *machine __maybe_unused)
1061 struct trace *trace = container_of(tool, struct trace, tool);
1064 tracepoint_handler handler = evsel->handler.func;
1066 if (skip_sample(trace, sample))
1069 if (!trace->full_time && trace->base_time == 0)
1070 trace->base_time = sample->time;
1073 handler(trace, evsel, sample);
1079 perf_session__has_tp(struct perf_session *session, const char *name)
1081 struct perf_evsel *evsel;
1083 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1085 return evsel != NULL;
1088 static int parse_target_str(struct trace *trace)
1090 if (trace->opts.target.pid) {
1091 trace->pid_list = intlist__new(trace->opts.target.pid);
1092 if (trace->pid_list == NULL) {
1093 pr_err("Error parsing process id string\n");
1098 if (trace->opts.target.tid) {
1099 trace->tid_list = intlist__new(trace->opts.target.tid);
1100 if (trace->tid_list == NULL) {
1101 pr_err("Error parsing thread id string\n");
1109 static int trace__run(struct trace *trace, int argc, const char **argv)
1111 struct perf_evlist *evlist = perf_evlist__new();
1112 struct perf_evsel *evsel;
1114 unsigned long before;
1115 const bool forks = argc > 0;
1117 if (evlist == NULL) {
1118 fprintf(trace->output, "Not enough memory to run!\n");
1122 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1123 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1124 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1125 goto out_delete_evlist;
1129 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1130 trace__sched_stat_runtime)) {
1131 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1132 goto out_delete_evlist;
1135 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1137 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1138 goto out_delete_evlist;
1141 err = trace__symbols_init(trace, evlist);
1143 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1144 goto out_delete_maps;
1147 perf_evlist__config(evlist, &trace->opts);
1149 signal(SIGCHLD, sig_handler);
1150 signal(SIGINT, sig_handler);
1153 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1154 argv, false, false);
1156 fprintf(trace->output, "Couldn't run the workload!\n");
1157 goto out_delete_maps;
1161 err = perf_evlist__open(evlist);
1163 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1164 goto out_delete_maps;
1167 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1169 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1170 goto out_close_evlist;
1173 perf_evlist__enable(evlist);
1176 perf_evlist__start_workload(evlist);
1178 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1180 before = trace->nr_events;
1182 for (i = 0; i < evlist->nr_mmaps; i++) {
1183 union perf_event *event;
1185 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1186 const u32 type = event->header.type;
1187 tracepoint_handler handler;
1188 struct perf_sample sample;
1192 err = perf_evlist__parse_sample(evlist, event, &sample);
1194 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1198 if (!trace->full_time && trace->base_time == 0)
1199 trace->base_time = sample.time;
1201 if (type != PERF_RECORD_SAMPLE) {
1202 trace__process_event(trace, &trace->host, event);
1206 evsel = perf_evlist__id2evsel(evlist, sample.id);
1207 if (evsel == NULL) {
1208 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1212 if (sample.raw_data == NULL) {
1213 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1214 perf_evsel__name(evsel), sample.tid,
1215 sample.cpu, sample.raw_size);
1219 handler = evsel->handler.func;
1220 handler(trace, evsel, &sample);
1223 goto out_unmap_evlist;
1227 if (trace->nr_events == before) {
1229 goto out_unmap_evlist;
1231 poll(evlist->pollfd, evlist->nr_fds, -1);
1235 perf_evlist__disable(evlist);
1240 perf_evlist__munmap(evlist);
1242 perf_evlist__close(evlist);
1244 perf_evlist__delete_maps(evlist);
1246 perf_evlist__delete(evlist);
1251 static int trace__replay(struct trace *trace)
1253 const struct perf_evsel_str_handler handlers[] = {
1254 { "raw_syscalls:sys_enter", trace__sys_enter, },
1255 { "raw_syscalls:sys_exit", trace__sys_exit, },
1258 struct perf_session *session;
1261 trace->tool.sample = trace__process_sample;
1262 trace->tool.mmap = perf_event__process_mmap;
1263 trace->tool.mmap2 = perf_event__process_mmap2;
1264 trace->tool.comm = perf_event__process_comm;
1265 trace->tool.exit = perf_event__process_exit;
1266 trace->tool.fork = perf_event__process_fork;
1267 trace->tool.attr = perf_event__process_attr;
1268 trace->tool.tracing_data = perf_event__process_tracing_data;
1269 trace->tool.build_id = perf_event__process_build_id;
1271 trace->tool.ordered_samples = true;
1272 trace->tool.ordering_requires_timestamps = true;
1274 /* add tid to output */
1275 trace->multiple_threads = true;
1277 if (symbol__init() < 0)
1280 session = perf_session__new(input_name, O_RDONLY, 0, false,
1282 if (session == NULL)
1285 err = perf_session__set_tracepoints_handlers(session, handlers);
1289 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1290 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1294 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1295 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1299 err = parse_target_str(trace);
1305 err = perf_session__process_events(session, &trace->tool);
1307 pr_err("Failed to process events, error %d", err);
1310 perf_session__delete(session);
1315 static size_t trace__fprintf_threads_header(FILE *fp)
1319 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1320 printed += fprintf(fp," __) Summary of events (__\n\n");
1321 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1322 printed += fprintf(fp," _____________________________________________________________________\n\n");
1327 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1329 size_t printed = trace__fprintf_threads_header(fp);
1332 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1333 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1334 struct thread_trace *ttrace = thread->priv;
1341 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1343 color = PERF_COLOR_NORMAL;
1345 color = PERF_COLOR_RED;
1346 else if (ratio > 25.0)
1347 color = PERF_COLOR_GREEN;
1348 else if (ratio > 5.0)
1349 color = PERF_COLOR_YELLOW;
1351 printed += color_fprintf(fp, color, "%20s", thread->comm);
1352 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1353 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1354 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1360 static int trace__set_duration(const struct option *opt, const char *str,
1361 int unset __maybe_unused)
1363 struct trace *trace = opt->value;
1365 trace->duration_filter = atof(str);
1369 static int trace__open_output(struct trace *trace, const char *filename)
1373 if (!stat(filename, &st) && st.st_size) {
1374 char oldname[PATH_MAX];
1376 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1378 rename(filename, oldname);
1381 trace->output = fopen(filename, "w");
1383 return trace->output == NULL ? -errno : 0;
1386 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1388 const char * const trace_usage[] = {
1389 "perf trace [<options>] [<command>]",
1390 "perf trace [<options>] -- <command> [<options>]",
1393 struct trace trace = {
1394 .audit_machine = audit_detect_machine(),
1403 .user_freq = UINT_MAX,
1404 .user_interval = ULLONG_MAX,
1410 const char *output_name = NULL;
1411 const char *ev_qualifier_str = NULL;
1412 const struct option trace_options[] = {
1413 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1414 "list of events to trace"),
1415 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1416 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1417 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1418 "trace events on existing process id"),
1419 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1420 "trace events on existing thread id"),
1421 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1422 "system-wide collection from all CPUs"),
1423 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1424 "list of cpus to monitor"),
1425 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1426 "child tasks do not inherit counters"),
1427 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1428 "number of mmap data pages"),
1429 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1431 OPT_CALLBACK(0, "duration", &trace, "float",
1432 "show only events with duration > N.M ms",
1433 trace__set_duration),
1434 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1435 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1436 OPT_BOOLEAN('T', "time", &trace.full_time,
1437 "Show full timestamp, not time relative to first start"),
1443 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1445 if (output_name != NULL) {
1446 err = trace__open_output(&trace, output_name);
1448 perror("failed to create output file");
1453 if (ev_qualifier_str != NULL) {
1454 const char *s = ev_qualifier_str;
1456 trace.not_ev_qualifier = *s == '!';
1457 if (trace.not_ev_qualifier)
1459 trace.ev_qualifier = strlist__new(true, s);
1460 if (trace.ev_qualifier == NULL) {
1461 fputs("Not enough memory to parse event qualifier",
1468 err = perf_target__validate(&trace.opts.target);
1470 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1471 fprintf(trace.output, "%s", bf);
1475 err = perf_target__parse_uid(&trace.opts.target);
1477 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1478 fprintf(trace.output, "%s", bf);
1482 if (!argc && perf_target__none(&trace.opts.target))
1483 trace.opts.target.system_wide = true;
1486 err = trace__replay(&trace);
1488 err = trace__run(&trace, argc, argv);
1490 if (trace.sched && !err)
1491 trace__fprintf_thread_summary(&trace, trace.output);
1494 if (output_name != NULL)
1495 fclose(trace.output);