1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
17 #include <linux/futex.h>
19 /* For older distros: */
21 # define MAP_STACK 0x20000
25 # define MADV_HWPOISON 100
28 #ifndef MADV_MERGEABLE
29 # define MADV_MERGEABLE 12
32 #ifndef MADV_UNMERGEABLE
33 # define MADV_UNMERGEABLE 13
48 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
49 .nr_entries = ARRAY_SIZE(array), \
53 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
54 struct syscall_arg *arg)
57 struct strarray *sa = arg->parm;
59 if (idx < 0 || idx >= sa->nr_entries)
60 return scnprintf(bf, size, "%d", idx);
62 return scnprintf(bf, size, "%s", sa->entries[idx]);
65 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
68 struct syscall_arg *arg)
70 return scnprintf(bf, size, "%#lx", arg->val);
73 #define SCA_HEX syscall_arg__scnprintf_hex
75 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
76 struct syscall_arg *arg)
78 int printed = 0, prot = arg->val;
80 if (prot == PROT_NONE)
81 return scnprintf(bf, size, "NONE");
82 #define P_MMAP_PROT(n) \
83 if (prot & PROT_##n) { \
84 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
94 P_MMAP_PROT(GROWSDOWN);
99 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
104 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
107 struct syscall_arg *arg)
109 int printed = 0, flags = arg->val;
111 #define P_MMAP_FLAG(n) \
112 if (flags & MAP_##n) { \
113 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
118 P_MMAP_FLAG(PRIVATE);
122 P_MMAP_FLAG(ANONYMOUS);
123 P_MMAP_FLAG(DENYWRITE);
124 P_MMAP_FLAG(EXECUTABLE);
127 P_MMAP_FLAG(GROWSDOWN);
129 P_MMAP_FLAG(HUGETLB);
132 P_MMAP_FLAG(NONBLOCK);
133 P_MMAP_FLAG(NORESERVE);
134 P_MMAP_FLAG(POPULATE);
136 #ifdef MAP_UNINITIALIZED
137 P_MMAP_FLAG(UNINITIALIZED);
142 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
147 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
150 struct syscall_arg *arg)
152 int behavior = arg->val;
155 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
158 P_MADV_BHV(SEQUENTIAL);
159 P_MADV_BHV(WILLNEED);
160 P_MADV_BHV(DONTNEED);
162 P_MADV_BHV(DONTFORK);
164 P_MADV_BHV(HWPOISON);
165 #ifdef MADV_SOFT_OFFLINE
166 P_MADV_BHV(SOFT_OFFLINE);
168 P_MADV_BHV(MERGEABLE);
169 P_MADV_BHV(UNMERGEABLE);
171 P_MADV_BHV(HUGEPAGE);
173 #ifdef MADV_NOHUGEPAGE
174 P_MADV_BHV(NOHUGEPAGE);
177 P_MADV_BHV(DONTDUMP);
186 return scnprintf(bf, size, "%#x", behavior);
189 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 enum syscall_futex_args {
194 SCF_UADDR = (1 << 0),
197 SCF_TIMEOUT = (1 << 3),
198 SCF_UADDR2 = (1 << 4),
202 int cmd = op & FUTEX_CMD_MASK;
206 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
207 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
208 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
209 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
211 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(WAKE_OP); break;
214 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
215 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
217 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
218 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
220 default: printed = scnprintf(bf, size, "%#x", cmd); break;
223 if (op & FUTEX_PRIVATE_FLAG)
224 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 if (op & FUTEX_CLOCK_REALTIME)
227 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
232 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
234 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
235 static DEFINE_STRARRAY(itimers);
237 static const char *whences[] = { "SET", "CUR", "END",
245 static DEFINE_STRARRAY(whences);
247 static const char *fcntl_cmds[] = {
248 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
249 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
250 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
253 static DEFINE_STRARRAY(fcntl_cmds);
255 static const char *rlimit_resources[] = {
256 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
257 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
260 static DEFINE_STRARRAY(rlimit_resources);
262 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
263 static DEFINE_STRARRAY(sighow);
265 static const char *socket_families[] = {
266 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
267 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
268 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
269 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
270 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
271 "ALG", "NFC", "VSOCK",
273 static DEFINE_STRARRAY(socket_families);
275 #ifndef SOCK_TYPE_MASK
276 #define SOCK_TYPE_MASK 0xf
279 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
280 struct syscall_arg *arg)
284 flags = type & ~SOCK_TYPE_MASK;
286 type &= SOCK_TYPE_MASK;
288 * Can't use a strarray, MIPS may override for ABI reasons.
291 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
296 P_SK_TYPE(SEQPACKET);
301 printed = scnprintf(bf, size, "%#x", type);
304 #define P_SK_FLAG(n) \
305 if (flags & SOCK_##n) { \
306 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
307 flags &= ~SOCK_##n; \
315 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
320 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
323 struct syscall_arg *arg)
328 if (mode == F_OK) /* 0 */
329 return scnprintf(bf, size, "F");
331 if (mode & n##_OK) { \
332 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
342 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
347 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
349 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
350 struct syscall_arg *arg)
352 int printed = 0, flags = arg->val;
354 if (!(flags & O_CREAT))
355 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
358 return scnprintf(bf, size, "RDONLY");
360 if (flags & O_##n) { \
361 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
385 if ((flags & O_SYNC) == O_SYNC)
386 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
405 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
410 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
445 return scnprintf(bf, size, "%#x", sig);
448 #define SCA_SIGNUM syscall_arg__scnprintf_signum
450 static struct syscall_fmt {
453 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
459 { .name = "access", .errmsg = true,
460 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
461 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
462 { .name = "brk", .hexret = true,
463 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
464 { .name = "mmap", .hexret = true, },
465 { .name = "connect", .errmsg = true, },
466 { .name = "fcntl", .errmsg = true,
467 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
468 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
469 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
470 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
471 { .name = "futex", .errmsg = true,
472 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
473 { .name = "getitimer", .errmsg = true,
474 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
475 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
476 { .name = "getrlimit", .errmsg = true,
477 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
478 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
479 { .name = "ioctl", .errmsg = true,
480 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
481 { .name = "kill", .errmsg = true,
482 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
483 { .name = "lseek", .errmsg = true,
484 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
485 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
486 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
487 { .name = "madvise", .errmsg = true,
488 .arg_scnprintf = { [0] = SCA_HEX, /* start */
489 [2] = SCA_MADV_BHV, /* behavior */ }, },
490 { .name = "mmap", .hexret = true,
491 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
492 [2] = SCA_MMAP_PROT, /* prot */
493 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
494 { .name = "mprotect", .errmsg = true,
495 .arg_scnprintf = { [0] = SCA_HEX, /* start */
496 [2] = SCA_MMAP_PROT, /* prot */ }, },
497 { .name = "mremap", .hexret = true,
498 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
499 [4] = SCA_HEX, /* new_addr */ }, },
500 { .name = "munmap", .errmsg = true,
501 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
502 { .name = "open", .errmsg = true,
503 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
504 { .name = "open_by_handle_at", .errmsg = true,
505 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
506 { .name = "openat", .errmsg = true,
507 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
508 { .name = "poll", .errmsg = true, .timeout = true, },
509 { .name = "ppoll", .errmsg = true, .timeout = true, },
510 { .name = "pread", .errmsg = true, .alias = "pread64", },
511 { .name = "prlimit64", .errmsg = true,
512 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
513 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
514 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
515 { .name = "read", .errmsg = true, },
516 { .name = "recvfrom", .errmsg = true, },
517 { .name = "rt_sigaction", .errmsg = true,
518 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
519 { .name = "rt_sigprocmask", .errmsg = true,
520 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
521 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
522 { .name = "rt_sigqueueinfo", .errmsg = true,
523 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
524 { .name = "rt_tgsigqueueinfo", .errmsg = true,
525 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
526 { .name = "select", .errmsg = true, .timeout = true, },
527 { .name = "setitimer", .errmsg = true,
528 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
529 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
530 { .name = "setrlimit", .errmsg = true,
531 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
532 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
533 { .name = "socket", .errmsg = true,
534 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
535 [1] = SCA_SK_TYPE, /* type */ },
536 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
537 { .name = "stat", .errmsg = true, .alias = "newstat", },
538 { .name = "tgkill", .errmsg = true,
539 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
540 { .name = "tkill", .errmsg = true,
541 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
542 { .name = "uname", .errmsg = true, .alias = "newuname", },
545 static int syscall_fmt__cmp(const void *name, const void *fmtp)
547 const struct syscall_fmt *fmt = fmtp;
548 return strcmp(name, fmt->name);
551 static struct syscall_fmt *syscall_fmt__find(const char *name)
553 const int nmemb = ARRAY_SIZE(syscall_fmts);
554 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
558 struct event_format *tp_format;
561 struct syscall_fmt *fmt;
562 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
566 static size_t fprintf_duration(unsigned long t, FILE *fp)
568 double duration = (double)t / NSEC_PER_MSEC;
569 size_t printed = fprintf(fp, "(");
572 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
573 else if (duration >= 0.01)
574 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
576 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
577 return printed + fprintf(fp, "): ");
580 struct thread_trace {
584 unsigned long nr_events;
589 static struct thread_trace *thread_trace__new(void)
591 return zalloc(sizeof(struct thread_trace));
594 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
596 struct thread_trace *ttrace;
601 if (thread->priv == NULL)
602 thread->priv = thread_trace__new();
604 if (thread->priv == NULL)
607 ttrace = thread->priv;
612 color_fprintf(fp, PERF_COLOR_RED,
613 "WARNING: not enough memory, dropping samples!\n");
618 struct perf_tool tool;
622 struct syscall *table;
624 struct perf_record_opts opts;
629 unsigned long nr_events;
630 struct strlist *ev_qualifier;
631 bool not_ev_qualifier;
632 struct intlist *tid_list;
633 struct intlist *pid_list;
635 bool multiple_threads;
636 double duration_filter;
640 static bool trace__filter_duration(struct trace *trace, double t)
642 return t < (trace->duration_filter * NSEC_PER_MSEC);
645 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
647 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
649 return fprintf(fp, "%10.3f ", ts);
652 static bool done = false;
654 static void sig_handler(int sig __maybe_unused)
659 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
660 u64 duration, u64 tstamp, FILE *fp)
662 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
663 printed += fprintf_duration(duration, fp);
665 if (trace->multiple_threads)
666 printed += fprintf(fp, "%d ", thread->tid);
671 static int trace__process_event(struct trace *trace, struct machine *machine,
672 union perf_event *event)
676 switch (event->header.type) {
677 case PERF_RECORD_LOST:
678 color_fprintf(trace->output, PERF_COLOR_RED,
679 "LOST %" PRIu64 " events!\n", event->lost.lost);
680 ret = machine__process_lost_event(machine, event);
682 ret = machine__process_event(machine, event);
689 static int trace__tool_process(struct perf_tool *tool,
690 union perf_event *event,
691 struct perf_sample *sample __maybe_unused,
692 struct machine *machine)
694 struct trace *trace = container_of(tool, struct trace, tool);
695 return trace__process_event(trace, machine, event);
698 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
700 int err = symbol__init();
705 machine__init(&trace->host, "", HOST_KERNEL_ID);
706 machine__create_kernel_maps(&trace->host);
708 if (perf_target__has_task(&trace->opts.target)) {
709 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
713 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
723 static int syscall__set_arg_fmts(struct syscall *sc)
725 struct format_field *field;
728 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
729 if (sc->arg_scnprintf == NULL)
733 sc->arg_parm = sc->fmt->arg_parm;
735 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
736 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
737 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
738 else if (field->flags & FIELD_IS_POINTER)
739 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
746 static int trace__read_syscall_info(struct trace *trace, int id)
750 const char *name = audit_syscall_to_name(id, trace->audit_machine);
755 if (id > trace->syscalls.max) {
756 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
758 if (nsyscalls == NULL)
761 if (trace->syscalls.max != -1) {
762 memset(nsyscalls + trace->syscalls.max + 1, 0,
763 (id - trace->syscalls.max) * sizeof(*sc));
765 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
768 trace->syscalls.table = nsyscalls;
769 trace->syscalls.max = id;
772 sc = trace->syscalls.table + id;
775 if (trace->ev_qualifier) {
776 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
778 if (!(in ^ trace->not_ev_qualifier)) {
781 * No need to do read tracepoint information since this will be
788 sc->fmt = syscall_fmt__find(sc->name);
790 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
791 sc->tp_format = event_format__new("syscalls", tp_name);
793 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
794 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
795 sc->tp_format = event_format__new("syscalls", tp_name);
798 if (sc->tp_format == NULL)
801 return syscall__set_arg_fmts(sc);
804 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
809 if (sc->tp_format != NULL) {
810 struct format_field *field;
812 struct syscall_arg arg = {
817 for (field = sc->tp_format->format.fields->next; field;
818 field = field->next, ++arg.idx, bit <<= 1) {
822 printed += scnprintf(bf + printed, size - printed,
823 "%s%s: ", printed ? ", " : "", field->name);
824 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
825 arg.val = args[arg.idx];
827 arg.parm = sc->arg_parm[arg.idx];
828 printed += sc->arg_scnprintf[arg.idx](bf + printed,
829 size - printed, &arg);
831 printed += scnprintf(bf + printed, size - printed,
832 "%ld", args[arg.idx]);
839 printed += scnprintf(bf + printed, size - printed,
841 printed ? ", " : "", i, args[i]);
849 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
850 struct perf_sample *sample);
852 static struct syscall *trace__syscall_info(struct trace *trace,
853 struct perf_evsel *evsel,
854 struct perf_sample *sample)
856 int id = perf_evsel__intval(evsel, sample, "id");
861 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
862 * before that, leaving at a higher verbosity level till that is
863 * explained. Reproduced with plain ftrace with:
865 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
866 * grep "NR -1 " /t/trace_pipe
868 * After generating some load on the machine.
872 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
873 id, perf_evsel__name(evsel), ++n);
878 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
879 trace__read_syscall_info(trace, id))
882 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
885 return &trace->syscalls.table[id];
889 fprintf(trace->output, "Problems reading syscall %d", id);
890 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
891 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
892 fputs(" information\n", trace->output);
897 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
898 struct perf_sample *sample)
903 struct thread *thread;
904 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
905 struct thread_trace *ttrace;
913 thread = machine__findnew_thread(&trace->host, sample->pid,
915 ttrace = thread__trace(thread, trace->output);
919 args = perf_evsel__rawptr(evsel, sample, "args");
921 fprintf(trace->output, "Problems reading syscall arguments\n");
925 ttrace = thread->priv;
927 if (ttrace->entry_str == NULL) {
928 ttrace->entry_str = malloc(1024);
929 if (!ttrace->entry_str)
933 ttrace->entry_time = sample->time;
934 msg = ttrace->entry_str;
935 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
937 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
939 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
940 if (!trace->duration_filter) {
941 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
942 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
945 ttrace->entry_pending = true;
950 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
951 struct perf_sample *sample)
955 struct thread *thread;
956 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
957 struct thread_trace *ttrace;
965 thread = machine__findnew_thread(&trace->host, sample->pid,
967 ttrace = thread__trace(thread, trace->output);
971 ret = perf_evsel__intval(evsel, sample, "ret");
973 ttrace = thread->priv;
975 ttrace->exit_time = sample->time;
977 if (ttrace->entry_time) {
978 duration = sample->time - ttrace->entry_time;
979 if (trace__filter_duration(trace, duration))
981 } else if (trace->duration_filter)
984 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
986 if (ttrace->entry_pending) {
987 fprintf(trace->output, "%-70s", ttrace->entry_str);
989 fprintf(trace->output, " ... [");
990 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
991 fprintf(trace->output, "]: %s()", sc->name);
994 if (sc->fmt == NULL) {
996 fprintf(trace->output, ") = %d", ret);
997 } else if (ret < 0 && sc->fmt->errmsg) {
999 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1000 *e = audit_errno_to_name(-ret);
1002 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1003 } else if (ret == 0 && sc->fmt->timeout)
1004 fprintf(trace->output, ") = 0 Timeout");
1005 else if (sc->fmt->hexret)
1006 fprintf(trace->output, ") = %#x", ret);
1010 fputc('\n', trace->output);
1012 ttrace->entry_pending = false;
1017 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1018 struct perf_sample *sample)
1020 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1021 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1022 struct thread *thread = machine__findnew_thread(&trace->host,
1025 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1030 ttrace->runtime_ms += runtime_ms;
1031 trace->runtime_ms += runtime_ms;
1035 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1037 perf_evsel__strval(evsel, sample, "comm"),
1038 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1040 perf_evsel__intval(evsel, sample, "vruntime"));
1044 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1046 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1047 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1050 if (trace->pid_list || trace->tid_list)
1056 static int trace__process_sample(struct perf_tool *tool,
1057 union perf_event *event __maybe_unused,
1058 struct perf_sample *sample,
1059 struct perf_evsel *evsel,
1060 struct machine *machine __maybe_unused)
1062 struct trace *trace = container_of(tool, struct trace, tool);
1065 tracepoint_handler handler = evsel->handler.func;
1067 if (skip_sample(trace, sample))
1070 if (!trace->full_time && trace->base_time == 0)
1071 trace->base_time = sample->time;
1074 handler(trace, evsel, sample);
1080 perf_session__has_tp(struct perf_session *session, const char *name)
1082 struct perf_evsel *evsel;
1084 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1086 return evsel != NULL;
1089 static int parse_target_str(struct trace *trace)
1091 if (trace->opts.target.pid) {
1092 trace->pid_list = intlist__new(trace->opts.target.pid);
1093 if (trace->pid_list == NULL) {
1094 pr_err("Error parsing process id string\n");
1099 if (trace->opts.target.tid) {
1100 trace->tid_list = intlist__new(trace->opts.target.tid);
1101 if (trace->tid_list == NULL) {
1102 pr_err("Error parsing thread id string\n");
1110 static int trace__run(struct trace *trace, int argc, const char **argv)
1112 struct perf_evlist *evlist = perf_evlist__new();
1113 struct perf_evsel *evsel;
1115 unsigned long before;
1116 const bool forks = argc > 0;
1118 if (evlist == NULL) {
1119 fprintf(trace->output, "Not enough memory to run!\n");
1123 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1124 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1125 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1126 goto out_delete_evlist;
1130 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1131 trace__sched_stat_runtime)) {
1132 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1133 goto out_delete_evlist;
1136 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1138 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1139 goto out_delete_evlist;
1142 err = trace__symbols_init(trace, evlist);
1144 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1145 goto out_delete_maps;
1148 perf_evlist__config(evlist, &trace->opts);
1150 signal(SIGCHLD, sig_handler);
1151 signal(SIGINT, sig_handler);
1154 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1155 argv, false, false);
1157 fprintf(trace->output, "Couldn't run the workload!\n");
1158 goto out_delete_maps;
1162 err = perf_evlist__open(evlist);
1164 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1165 goto out_delete_maps;
1168 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1170 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1171 goto out_close_evlist;
1174 perf_evlist__enable(evlist);
1177 perf_evlist__start_workload(evlist);
1179 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1181 before = trace->nr_events;
1183 for (i = 0; i < evlist->nr_mmaps; i++) {
1184 union perf_event *event;
1186 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1187 const u32 type = event->header.type;
1188 tracepoint_handler handler;
1189 struct perf_sample sample;
1193 err = perf_evlist__parse_sample(evlist, event, &sample);
1195 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1199 if (!trace->full_time && trace->base_time == 0)
1200 trace->base_time = sample.time;
1202 if (type != PERF_RECORD_SAMPLE) {
1203 trace__process_event(trace, &trace->host, event);
1207 evsel = perf_evlist__id2evsel(evlist, sample.id);
1208 if (evsel == NULL) {
1209 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1213 if (sample.raw_data == NULL) {
1214 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1215 perf_evsel__name(evsel), sample.tid,
1216 sample.cpu, sample.raw_size);
1220 handler = evsel->handler.func;
1221 handler(trace, evsel, &sample);
1224 goto out_unmap_evlist;
1228 if (trace->nr_events == before) {
1230 goto out_unmap_evlist;
1232 poll(evlist->pollfd, evlist->nr_fds, -1);
1236 perf_evlist__disable(evlist);
1241 perf_evlist__munmap(evlist);
1243 perf_evlist__close(evlist);
1245 perf_evlist__delete_maps(evlist);
1247 perf_evlist__delete(evlist);
1252 static int trace__replay(struct trace *trace)
1254 const struct perf_evsel_str_handler handlers[] = {
1255 { "raw_syscalls:sys_enter", trace__sys_enter, },
1256 { "raw_syscalls:sys_exit", trace__sys_exit, },
1259 struct perf_session *session;
1262 trace->tool.sample = trace__process_sample;
1263 trace->tool.mmap = perf_event__process_mmap;
1264 trace->tool.mmap2 = perf_event__process_mmap2;
1265 trace->tool.comm = perf_event__process_comm;
1266 trace->tool.exit = perf_event__process_exit;
1267 trace->tool.fork = perf_event__process_fork;
1268 trace->tool.attr = perf_event__process_attr;
1269 trace->tool.tracing_data = perf_event__process_tracing_data;
1270 trace->tool.build_id = perf_event__process_build_id;
1272 trace->tool.ordered_samples = true;
1273 trace->tool.ordering_requires_timestamps = true;
1275 /* add tid to output */
1276 trace->multiple_threads = true;
1278 if (symbol__init() < 0)
1281 session = perf_session__new(input_name, O_RDONLY, 0, false,
1283 if (session == NULL)
1286 err = perf_session__set_tracepoints_handlers(session, handlers);
1290 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1291 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1295 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1296 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1300 err = parse_target_str(trace);
1306 err = perf_session__process_events(session, &trace->tool);
1308 pr_err("Failed to process events, error %d", err);
1311 perf_session__delete(session);
1316 static size_t trace__fprintf_threads_header(FILE *fp)
1320 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1321 printed += fprintf(fp," __) Summary of events (__\n\n");
1322 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1323 printed += fprintf(fp," _____________________________________________________________________\n\n");
1328 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1330 size_t printed = trace__fprintf_threads_header(fp);
1333 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1334 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1335 struct thread_trace *ttrace = thread->priv;
1342 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1344 color = PERF_COLOR_NORMAL;
1346 color = PERF_COLOR_RED;
1347 else if (ratio > 25.0)
1348 color = PERF_COLOR_GREEN;
1349 else if (ratio > 5.0)
1350 color = PERF_COLOR_YELLOW;
1352 printed += color_fprintf(fp, color, "%20s", thread->comm);
1353 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1354 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1355 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1361 static int trace__set_duration(const struct option *opt, const char *str,
1362 int unset __maybe_unused)
1364 struct trace *trace = opt->value;
1366 trace->duration_filter = atof(str);
1370 static int trace__open_output(struct trace *trace, const char *filename)
1374 if (!stat(filename, &st) && st.st_size) {
1375 char oldname[PATH_MAX];
1377 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1379 rename(filename, oldname);
1382 trace->output = fopen(filename, "w");
1384 return trace->output == NULL ? -errno : 0;
1387 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1389 const char * const trace_usage[] = {
1390 "perf trace [<options>] [<command>]",
1391 "perf trace [<options>] -- <command> [<options>]",
1394 struct trace trace = {
1395 .audit_machine = audit_detect_machine(),
1404 .user_freq = UINT_MAX,
1405 .user_interval = ULLONG_MAX,
1411 const char *output_name = NULL;
1412 const char *ev_qualifier_str = NULL;
1413 const struct option trace_options[] = {
1414 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1415 "list of events to trace"),
1416 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1417 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1418 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1419 "trace events on existing process id"),
1420 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1421 "trace events on existing thread id"),
1422 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1423 "system-wide collection from all CPUs"),
1424 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1425 "list of cpus to monitor"),
1426 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1427 "child tasks do not inherit counters"),
1428 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1429 "number of mmap data pages"),
1430 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1432 OPT_CALLBACK(0, "duration", &trace, "float",
1433 "show only events with duration > N.M ms",
1434 trace__set_duration),
1435 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1436 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1437 OPT_BOOLEAN('T', "time", &trace.full_time,
1438 "Show full timestamp, not time relative to first start"),
1444 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1446 if (output_name != NULL) {
1447 err = trace__open_output(&trace, output_name);
1449 perror("failed to create output file");
1454 if (ev_qualifier_str != NULL) {
1455 const char *s = ev_qualifier_str;
1457 trace.not_ev_qualifier = *s == '!';
1458 if (trace.not_ev_qualifier)
1460 trace.ev_qualifier = strlist__new(true, s);
1461 if (trace.ev_qualifier == NULL) {
1462 fputs("Not enough memory to parse event qualifier",
1469 err = perf_target__validate(&trace.opts.target);
1471 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1472 fprintf(trace.output, "%s", bf);
1476 err = perf_target__parse_uid(&trace.opts.target);
1478 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1479 fprintf(trace.output, "%s", bf);
1483 if (!argc && perf_target__none(&trace.opts.target))
1484 trace.opts.target.system_wide = true;
1487 err = trace__replay(&trace);
1489 err = trace__run(&trace, argc, argv);
1491 if (trace.sched && !err)
1492 trace__fprintf_thread_summary(&trace, trace.output);
1495 if (output_name != NULL)
1496 fclose(trace.output);