1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
16 #include <sys/eventfd.h>
18 #include <linux/futex.h>
20 /* For older distros: */
22 # define MAP_STACK 0x20000
26 # define MADV_HWPOISON 100
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE 12
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE 13
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 .nr_entries = ARRAY_SIZE(array), \
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 struct syscall_arg *arg)
58 struct strarray *sa = arg->parm;
60 if (idx < 0 || idx >= sa->nr_entries)
61 return scnprintf(bf, size, "%d", idx);
63 return scnprintf(bf, size, "%s", sa->entries[idx]);
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 struct syscall_arg *arg)
71 return scnprintf(bf, size, "%#lx", arg->val);
74 #define SCA_HEX syscall_arg__scnprintf_hex
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 struct syscall_arg *arg)
79 int printed = 0, prot = arg->val;
81 if (prot == PROT_NONE)
82 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84 if (prot & PROT_##n) { \
85 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
95 P_MMAP_PROT(GROWSDOWN);
100 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 struct syscall_arg *arg)
110 int printed = 0, flags = arg->val;
112 #define P_MMAP_FLAG(n) \
113 if (flags & MAP_##n) { \
114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
119 P_MMAP_FLAG(PRIVATE);
123 P_MMAP_FLAG(ANONYMOUS);
124 P_MMAP_FLAG(DENYWRITE);
125 P_MMAP_FLAG(EXECUTABLE);
128 P_MMAP_FLAG(GROWSDOWN);
130 P_MMAP_FLAG(HUGETLB);
133 P_MMAP_FLAG(NONBLOCK);
134 P_MMAP_FLAG(NORESERVE);
135 P_MMAP_FLAG(POPULATE);
137 #ifdef MAP_UNINITIALIZED
138 P_MMAP_FLAG(UNINITIALIZED);
143 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 struct syscall_arg *arg)
153 int behavior = arg->val;
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
159 P_MADV_BHV(SEQUENTIAL);
160 P_MADV_BHV(WILLNEED);
161 P_MADV_BHV(DONTNEED);
163 P_MADV_BHV(DONTFORK);
165 P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 P_MADV_BHV(SOFT_OFFLINE);
169 P_MADV_BHV(MERGEABLE);
170 P_MADV_BHV(UNMERGEABLE);
172 P_MADV_BHV(HUGEPAGE);
174 #ifdef MADV_NOHUGEPAGE
175 P_MADV_BHV(NOHUGEPAGE);
178 P_MADV_BHV(DONTDUMP);
187 return scnprintf(bf, size, "%#x", behavior);
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
192 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
193 struct syscall_arg *arg)
195 int printed = 0, op = arg->val;
198 return scnprintf(bf, size, "NONE");
200 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
201 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
216 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
221 #define SCA_FLOCK syscall_arg__scnprintf_flock
223 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
225 enum syscall_futex_args {
226 SCF_UADDR = (1 << 0),
229 SCF_TIMEOUT = (1 << 3),
230 SCF_UADDR2 = (1 << 4),
234 int cmd = op & FUTEX_CMD_MASK;
238 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
239 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
240 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
241 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
242 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
243 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
244 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
245 P_FUTEX_OP(WAKE_OP); break;
246 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
247 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
248 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
249 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
250 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
251 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
252 default: printed = scnprintf(bf, size, "%#x", cmd); break;
255 if (op & FUTEX_PRIVATE_FLAG)
256 printed += scnprintf(bf + printed, size - printed, "|PRIV");
258 if (op & FUTEX_CLOCK_REALTIME)
259 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
264 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
266 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
267 static DEFINE_STRARRAY(epoll_ctl_ops);
269 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
270 static DEFINE_STRARRAY(itimers);
272 static const char *whences[] = { "SET", "CUR", "END",
280 static DEFINE_STRARRAY(whences);
282 static const char *fcntl_cmds[] = {
283 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
284 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
285 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
288 static DEFINE_STRARRAY(fcntl_cmds);
290 static const char *rlimit_resources[] = {
291 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
292 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
295 static DEFINE_STRARRAY(rlimit_resources);
297 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
298 static DEFINE_STRARRAY(sighow);
300 static const char *socket_families[] = {
301 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
302 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
303 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
304 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
305 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
306 "ALG", "NFC", "VSOCK",
308 static DEFINE_STRARRAY(socket_families);
310 #ifndef SOCK_TYPE_MASK
311 #define SOCK_TYPE_MASK 0xf
314 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
315 struct syscall_arg *arg)
319 flags = type & ~SOCK_TYPE_MASK;
321 type &= SOCK_TYPE_MASK;
323 * Can't use a strarray, MIPS may override for ABI reasons.
326 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
331 P_SK_TYPE(SEQPACKET);
336 printed = scnprintf(bf, size, "%#x", type);
339 #define P_SK_FLAG(n) \
340 if (flags & SOCK_##n) { \
341 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
342 flags &= ~SOCK_##n; \
350 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
355 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
358 #define MSG_PROBE 0x10
360 #ifndef MSG_SENDPAGE_NOTLAST
361 #define MSG_SENDPAGE_NOTLAST 0x20000
364 #define MSG_FASTOPEN 0x20000000
367 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
368 struct syscall_arg *arg)
370 int printed = 0, flags = arg->val;
373 return scnprintf(bf, size, "NONE");
374 #define P_MSG_FLAG(n) \
375 if (flags & MSG_##n) { \
376 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
382 P_MSG_FLAG(DONTROUTE);
387 P_MSG_FLAG(DONTWAIT);
394 P_MSG_FLAG(ERRQUEUE);
395 P_MSG_FLAG(NOSIGNAL);
397 P_MSG_FLAG(WAITFORONE);
398 P_MSG_FLAG(SENDPAGE_NOTLAST);
399 P_MSG_FLAG(FASTOPEN);
400 P_MSG_FLAG(CMSG_CLOEXEC);
404 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
409 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
411 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
412 struct syscall_arg *arg)
417 if (mode == F_OK) /* 0 */
418 return scnprintf(bf, size, "F");
420 if (mode & n##_OK) { \
421 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
431 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
436 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
438 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
439 struct syscall_arg *arg)
441 int printed = 0, flags = arg->val;
443 if (!(flags & O_CREAT))
444 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
447 return scnprintf(bf, size, "RDONLY");
449 if (flags & O_##n) { \
450 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
474 if ((flags & O_SYNC) == O_SYNC)
475 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
487 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
492 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
494 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
495 struct syscall_arg *arg)
497 int printed = 0, flags = arg->val;
500 return scnprintf(bf, size, "NONE");
502 if (flags & EFD_##n) { \
503 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
518 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
520 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
521 struct syscall_arg *arg)
523 int printed = 0, flags = arg->val;
526 if (flags & O_##n) { \
527 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
536 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
541 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
543 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
548 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
583 return scnprintf(bf, size, "%#x", sig);
586 #define SCA_SIGNUM syscall_arg__scnprintf_signum
588 #define STRARRAY(arg, name, array) \
589 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
590 .arg_parm = { [arg] = &strarray__##array, }
592 static struct syscall_fmt {
595 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
601 { .name = "access", .errmsg = true,
602 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
603 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
604 { .name = "brk", .hexret = true,
605 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
606 { .name = "connect", .errmsg = true, },
607 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
608 { .name = "eventfd2", .errmsg = true,
609 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
610 { .name = "fcntl", .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), },
611 { .name = "flock", .errmsg = true,
612 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
613 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
614 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
615 { .name = "futex", .errmsg = true,
616 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
617 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
618 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
619 { .name = "ioctl", .errmsg = true,
620 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
621 { .name = "kill", .errmsg = true,
622 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
623 { .name = "lseek", .errmsg = true, STRARRAY(2, whence, whences), },
624 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
625 { .name = "madvise", .errmsg = true,
626 .arg_scnprintf = { [0] = SCA_HEX, /* start */
627 [2] = SCA_MADV_BHV, /* behavior */ }, },
628 { .name = "mmap", .hexret = true,
629 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
630 [2] = SCA_MMAP_PROT, /* prot */
631 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
632 { .name = "mprotect", .errmsg = true,
633 .arg_scnprintf = { [0] = SCA_HEX, /* start */
634 [2] = SCA_MMAP_PROT, /* prot */ }, },
635 { .name = "mremap", .hexret = true,
636 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
637 [4] = SCA_HEX, /* new_addr */ }, },
638 { .name = "munmap", .errmsg = true,
639 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
640 { .name = "open", .errmsg = true,
641 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
642 { .name = "open_by_handle_at", .errmsg = true,
643 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
644 { .name = "openat", .errmsg = true,
645 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
646 { .name = "pipe2", .errmsg = true,
647 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
648 { .name = "poll", .errmsg = true, .timeout = true, },
649 { .name = "ppoll", .errmsg = true, .timeout = true, },
650 { .name = "pread", .errmsg = true, .alias = "pread64", },
651 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
652 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
653 { .name = "read", .errmsg = true, },
654 { .name = "recvfrom", .errmsg = true,
655 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
656 { .name = "recvmmsg", .errmsg = true,
657 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
658 { .name = "recvmsg", .errmsg = true,
659 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
660 { .name = "rt_sigaction", .errmsg = true,
661 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
662 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
663 { .name = "rt_sigqueueinfo", .errmsg = true,
664 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
665 { .name = "rt_tgsigqueueinfo", .errmsg = true,
666 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
667 { .name = "select", .errmsg = true, .timeout = true, },
668 { .name = "sendmmsg", .errmsg = true,
669 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
670 { .name = "sendmsg", .errmsg = true,
671 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
672 { .name = "sendto", .errmsg = true,
673 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
674 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
675 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
676 { .name = "socket", .errmsg = true,
677 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
678 [1] = SCA_SK_TYPE, /* type */ },
679 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
680 { .name = "socketpair", .errmsg = true,
681 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
682 [1] = SCA_SK_TYPE, /* type */ },
683 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
684 { .name = "stat", .errmsg = true, .alias = "newstat", },
685 { .name = "tgkill", .errmsg = true,
686 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
687 { .name = "tkill", .errmsg = true,
688 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
689 { .name = "uname", .errmsg = true, .alias = "newuname", },
692 static int syscall_fmt__cmp(const void *name, const void *fmtp)
694 const struct syscall_fmt *fmt = fmtp;
695 return strcmp(name, fmt->name);
698 static struct syscall_fmt *syscall_fmt__find(const char *name)
700 const int nmemb = ARRAY_SIZE(syscall_fmts);
701 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
705 struct event_format *tp_format;
708 struct syscall_fmt *fmt;
709 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
713 static size_t fprintf_duration(unsigned long t, FILE *fp)
715 double duration = (double)t / NSEC_PER_MSEC;
716 size_t printed = fprintf(fp, "(");
719 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
720 else if (duration >= 0.01)
721 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
723 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
724 return printed + fprintf(fp, "): ");
727 struct thread_trace {
731 unsigned long nr_events;
736 static struct thread_trace *thread_trace__new(void)
738 return zalloc(sizeof(struct thread_trace));
741 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
743 struct thread_trace *ttrace;
748 if (thread->priv == NULL)
749 thread->priv = thread_trace__new();
751 if (thread->priv == NULL)
754 ttrace = thread->priv;
759 color_fprintf(fp, PERF_COLOR_RED,
760 "WARNING: not enough memory, dropping samples!\n");
765 struct perf_tool tool;
769 struct syscall *table;
771 struct perf_record_opts opts;
776 unsigned long nr_events;
777 struct strlist *ev_qualifier;
778 bool not_ev_qualifier;
779 struct intlist *tid_list;
780 struct intlist *pid_list;
782 bool multiple_threads;
784 double duration_filter;
788 static bool trace__filter_duration(struct trace *trace, double t)
790 return t < (trace->duration_filter * NSEC_PER_MSEC);
793 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
795 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
797 return fprintf(fp, "%10.3f ", ts);
800 static bool done = false;
802 static void sig_handler(int sig __maybe_unused)
807 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
808 u64 duration, u64 tstamp, FILE *fp)
810 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
811 printed += fprintf_duration(duration, fp);
813 if (trace->multiple_threads) {
814 if (trace->show_comm)
815 printed += fprintf(fp, "%.14s/", thread->comm);
816 printed += fprintf(fp, "%d ", thread->tid);
822 static int trace__process_event(struct trace *trace, struct machine *machine,
823 union perf_event *event)
827 switch (event->header.type) {
828 case PERF_RECORD_LOST:
829 color_fprintf(trace->output, PERF_COLOR_RED,
830 "LOST %" PRIu64 " events!\n", event->lost.lost);
831 ret = machine__process_lost_event(machine, event);
833 ret = machine__process_event(machine, event);
840 static int trace__tool_process(struct perf_tool *tool,
841 union perf_event *event,
842 struct perf_sample *sample __maybe_unused,
843 struct machine *machine)
845 struct trace *trace = container_of(tool, struct trace, tool);
846 return trace__process_event(trace, machine, event);
849 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
851 int err = symbol__init();
856 machine__init(&trace->host, "", HOST_KERNEL_ID);
857 machine__create_kernel_maps(&trace->host);
859 if (perf_target__has_task(&trace->opts.target)) {
860 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
864 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
874 static int syscall__set_arg_fmts(struct syscall *sc)
876 struct format_field *field;
879 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
880 if (sc->arg_scnprintf == NULL)
884 sc->arg_parm = sc->fmt->arg_parm;
886 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
887 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
888 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
889 else if (field->flags & FIELD_IS_POINTER)
890 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
897 static int trace__read_syscall_info(struct trace *trace, int id)
901 const char *name = audit_syscall_to_name(id, trace->audit_machine);
906 if (id > trace->syscalls.max) {
907 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
909 if (nsyscalls == NULL)
912 if (trace->syscalls.max != -1) {
913 memset(nsyscalls + trace->syscalls.max + 1, 0,
914 (id - trace->syscalls.max) * sizeof(*sc));
916 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
919 trace->syscalls.table = nsyscalls;
920 trace->syscalls.max = id;
923 sc = trace->syscalls.table + id;
926 if (trace->ev_qualifier) {
927 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
929 if (!(in ^ trace->not_ev_qualifier)) {
932 * No need to do read tracepoint information since this will be
939 sc->fmt = syscall_fmt__find(sc->name);
941 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
942 sc->tp_format = event_format__new("syscalls", tp_name);
944 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
945 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
946 sc->tp_format = event_format__new("syscalls", tp_name);
949 if (sc->tp_format == NULL)
952 return syscall__set_arg_fmts(sc);
955 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
960 if (sc->tp_format != NULL) {
961 struct format_field *field;
963 struct syscall_arg arg = {
968 for (field = sc->tp_format->format.fields->next; field;
969 field = field->next, ++arg.idx, bit <<= 1) {
973 * Suppress this argument if its value is zero and
974 * and we don't have a string associated in an
977 if (args[arg.idx] == 0 &&
978 !(sc->arg_scnprintf &&
979 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
980 sc->arg_parm[arg.idx]))
983 printed += scnprintf(bf + printed, size - printed,
984 "%s%s: ", printed ? ", " : "", field->name);
985 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
986 arg.val = args[arg.idx];
988 arg.parm = sc->arg_parm[arg.idx];
989 printed += sc->arg_scnprintf[arg.idx](bf + printed,
990 size - printed, &arg);
992 printed += scnprintf(bf + printed, size - printed,
993 "%ld", args[arg.idx]);
1000 printed += scnprintf(bf + printed, size - printed,
1002 printed ? ", " : "", i, args[i]);
1010 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1011 struct perf_sample *sample);
1013 static struct syscall *trace__syscall_info(struct trace *trace,
1014 struct perf_evsel *evsel,
1015 struct perf_sample *sample)
1017 int id = perf_evsel__intval(evsel, sample, "id");
1022 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1023 * before that, leaving at a higher verbosity level till that is
1024 * explained. Reproduced with plain ftrace with:
1026 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1027 * grep "NR -1 " /t/trace_pipe
1029 * After generating some load on the machine.
1033 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1034 id, perf_evsel__name(evsel), ++n);
1039 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1040 trace__read_syscall_info(trace, id))
1043 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1046 return &trace->syscalls.table[id];
1050 fprintf(trace->output, "Problems reading syscall %d", id);
1051 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1052 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1053 fputs(" information\n", trace->output);
1058 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1059 struct perf_sample *sample)
1064 struct thread *thread;
1065 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1066 struct thread_trace *ttrace;
1074 thread = machine__findnew_thread(&trace->host, sample->pid,
1076 ttrace = thread__trace(thread, trace->output);
1080 args = perf_evsel__rawptr(evsel, sample, "args");
1082 fprintf(trace->output, "Problems reading syscall arguments\n");
1086 ttrace = thread->priv;
1088 if (ttrace->entry_str == NULL) {
1089 ttrace->entry_str = malloc(1024);
1090 if (!ttrace->entry_str)
1094 ttrace->entry_time = sample->time;
1095 msg = ttrace->entry_str;
1096 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1098 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
1100 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1101 if (!trace->duration_filter) {
1102 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1103 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1106 ttrace->entry_pending = true;
1111 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1112 struct perf_sample *sample)
1116 struct thread *thread;
1117 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1118 struct thread_trace *ttrace;
1126 thread = machine__findnew_thread(&trace->host, sample->pid,
1128 ttrace = thread__trace(thread, trace->output);
1132 ret = perf_evsel__intval(evsel, sample, "ret");
1134 ttrace = thread->priv;
1136 ttrace->exit_time = sample->time;
1138 if (ttrace->entry_time) {
1139 duration = sample->time - ttrace->entry_time;
1140 if (trace__filter_duration(trace, duration))
1142 } else if (trace->duration_filter)
1145 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1147 if (ttrace->entry_pending) {
1148 fprintf(trace->output, "%-70s", ttrace->entry_str);
1150 fprintf(trace->output, " ... [");
1151 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1152 fprintf(trace->output, "]: %s()", sc->name);
1155 if (sc->fmt == NULL) {
1157 fprintf(trace->output, ") = %d", ret);
1158 } else if (ret < 0 && sc->fmt->errmsg) {
1160 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1161 *e = audit_errno_to_name(-ret);
1163 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1164 } else if (ret == 0 && sc->fmt->timeout)
1165 fprintf(trace->output, ") = 0 Timeout");
1166 else if (sc->fmt->hexret)
1167 fprintf(trace->output, ") = %#x", ret);
1171 fputc('\n', trace->output);
1173 ttrace->entry_pending = false;
1178 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1179 struct perf_sample *sample)
1181 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1182 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1183 struct thread *thread = machine__findnew_thread(&trace->host,
1186 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1191 ttrace->runtime_ms += runtime_ms;
1192 trace->runtime_ms += runtime_ms;
1196 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1198 perf_evsel__strval(evsel, sample, "comm"),
1199 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1201 perf_evsel__intval(evsel, sample, "vruntime"));
1205 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1207 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1208 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1211 if (trace->pid_list || trace->tid_list)
1217 static int trace__process_sample(struct perf_tool *tool,
1218 union perf_event *event __maybe_unused,
1219 struct perf_sample *sample,
1220 struct perf_evsel *evsel,
1221 struct machine *machine __maybe_unused)
1223 struct trace *trace = container_of(tool, struct trace, tool);
1226 tracepoint_handler handler = evsel->handler.func;
1228 if (skip_sample(trace, sample))
1231 if (!trace->full_time && trace->base_time == 0)
1232 trace->base_time = sample->time;
1235 handler(trace, evsel, sample);
1241 perf_session__has_tp(struct perf_session *session, const char *name)
1243 struct perf_evsel *evsel;
1245 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1247 return evsel != NULL;
1250 static int parse_target_str(struct trace *trace)
1252 if (trace->opts.target.pid) {
1253 trace->pid_list = intlist__new(trace->opts.target.pid);
1254 if (trace->pid_list == NULL) {
1255 pr_err("Error parsing process id string\n");
1260 if (trace->opts.target.tid) {
1261 trace->tid_list = intlist__new(trace->opts.target.tid);
1262 if (trace->tid_list == NULL) {
1263 pr_err("Error parsing thread id string\n");
1271 static int trace__run(struct trace *trace, int argc, const char **argv)
1273 struct perf_evlist *evlist = perf_evlist__new();
1274 struct perf_evsel *evsel;
1276 unsigned long before;
1277 const bool forks = argc > 0;
1279 if (evlist == NULL) {
1280 fprintf(trace->output, "Not enough memory to run!\n");
1284 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1285 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1286 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1287 goto out_delete_evlist;
1291 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1292 trace__sched_stat_runtime)) {
1293 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1294 goto out_delete_evlist;
1297 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1299 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1300 goto out_delete_evlist;
1303 err = trace__symbols_init(trace, evlist);
1305 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1306 goto out_delete_maps;
1309 perf_evlist__config(evlist, &trace->opts);
1311 signal(SIGCHLD, sig_handler);
1312 signal(SIGINT, sig_handler);
1315 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1316 argv, false, false);
1318 fprintf(trace->output, "Couldn't run the workload!\n");
1319 goto out_delete_maps;
1323 err = perf_evlist__open(evlist);
1325 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1326 goto out_delete_maps;
1329 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1331 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1332 goto out_close_evlist;
1335 perf_evlist__enable(evlist);
1338 perf_evlist__start_workload(evlist);
1340 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1342 before = trace->nr_events;
1344 for (i = 0; i < evlist->nr_mmaps; i++) {
1345 union perf_event *event;
1347 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1348 const u32 type = event->header.type;
1349 tracepoint_handler handler;
1350 struct perf_sample sample;
1354 err = perf_evlist__parse_sample(evlist, event, &sample);
1356 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1360 if (!trace->full_time && trace->base_time == 0)
1361 trace->base_time = sample.time;
1363 if (type != PERF_RECORD_SAMPLE) {
1364 trace__process_event(trace, &trace->host, event);
1368 evsel = perf_evlist__id2evsel(evlist, sample.id);
1369 if (evsel == NULL) {
1370 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1374 if (sample.raw_data == NULL) {
1375 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1376 perf_evsel__name(evsel), sample.tid,
1377 sample.cpu, sample.raw_size);
1381 handler = evsel->handler.func;
1382 handler(trace, evsel, &sample);
1385 goto out_unmap_evlist;
1389 if (trace->nr_events == before) {
1391 goto out_unmap_evlist;
1393 poll(evlist->pollfd, evlist->nr_fds, -1);
1397 perf_evlist__disable(evlist);
1402 perf_evlist__munmap(evlist);
1404 perf_evlist__close(evlist);
1406 perf_evlist__delete_maps(evlist);
1408 perf_evlist__delete(evlist);
1413 static int trace__replay(struct trace *trace)
1415 const struct perf_evsel_str_handler handlers[] = {
1416 { "raw_syscalls:sys_enter", trace__sys_enter, },
1417 { "raw_syscalls:sys_exit", trace__sys_exit, },
1420 struct perf_session *session;
1423 trace->tool.sample = trace__process_sample;
1424 trace->tool.mmap = perf_event__process_mmap;
1425 trace->tool.mmap2 = perf_event__process_mmap2;
1426 trace->tool.comm = perf_event__process_comm;
1427 trace->tool.exit = perf_event__process_exit;
1428 trace->tool.fork = perf_event__process_fork;
1429 trace->tool.attr = perf_event__process_attr;
1430 trace->tool.tracing_data = perf_event__process_tracing_data;
1431 trace->tool.build_id = perf_event__process_build_id;
1433 trace->tool.ordered_samples = true;
1434 trace->tool.ordering_requires_timestamps = true;
1436 /* add tid to output */
1437 trace->multiple_threads = true;
1439 if (symbol__init() < 0)
1442 session = perf_session__new(input_name, O_RDONLY, 0, false,
1444 if (session == NULL)
1447 err = perf_session__set_tracepoints_handlers(session, handlers);
1451 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1452 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1456 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1457 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1461 err = parse_target_str(trace);
1467 err = perf_session__process_events(session, &trace->tool);
1469 pr_err("Failed to process events, error %d", err);
1472 perf_session__delete(session);
1477 static size_t trace__fprintf_threads_header(FILE *fp)
1481 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1482 printed += fprintf(fp," __) Summary of events (__\n\n");
1483 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1484 printed += fprintf(fp," _____________________________________________________________________\n\n");
1489 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1491 size_t printed = trace__fprintf_threads_header(fp);
1494 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1495 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1496 struct thread_trace *ttrace = thread->priv;
1503 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1505 color = PERF_COLOR_NORMAL;
1507 color = PERF_COLOR_RED;
1508 else if (ratio > 25.0)
1509 color = PERF_COLOR_GREEN;
1510 else if (ratio > 5.0)
1511 color = PERF_COLOR_YELLOW;
1513 printed += color_fprintf(fp, color, "%20s", thread->comm);
1514 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1515 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1516 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1522 static int trace__set_duration(const struct option *opt, const char *str,
1523 int unset __maybe_unused)
1525 struct trace *trace = opt->value;
1527 trace->duration_filter = atof(str);
1531 static int trace__open_output(struct trace *trace, const char *filename)
1535 if (!stat(filename, &st) && st.st_size) {
1536 char oldname[PATH_MAX];
1538 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1540 rename(filename, oldname);
1543 trace->output = fopen(filename, "w");
1545 return trace->output == NULL ? -errno : 0;
1548 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1550 const char * const trace_usage[] = {
1551 "perf trace [<options>] [<command>]",
1552 "perf trace [<options>] -- <command> [<options>]",
1555 struct trace trace = {
1556 .audit_machine = audit_detect_machine(),
1565 .user_freq = UINT_MAX,
1566 .user_interval = ULLONG_MAX,
1573 const char *output_name = NULL;
1574 const char *ev_qualifier_str = NULL;
1575 const struct option trace_options[] = {
1576 OPT_BOOLEAN(0, "comm", &trace.show_comm,
1577 "show the thread COMM next to its id"),
1578 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1579 "list of events to trace"),
1580 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1581 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1582 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1583 "trace events on existing process id"),
1584 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1585 "trace events on existing thread id"),
1586 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1587 "system-wide collection from all CPUs"),
1588 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1589 "list of cpus to monitor"),
1590 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1591 "child tasks do not inherit counters"),
1592 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1593 "number of mmap data pages",
1594 perf_evlist__parse_mmap_pages),
1595 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1597 OPT_CALLBACK(0, "duration", &trace, "float",
1598 "show only events with duration > N.M ms",
1599 trace__set_duration),
1600 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1601 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1602 OPT_BOOLEAN('T', "time", &trace.full_time,
1603 "Show full timestamp, not time relative to first start"),
1609 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1611 if (output_name != NULL) {
1612 err = trace__open_output(&trace, output_name);
1614 perror("failed to create output file");
1619 if (ev_qualifier_str != NULL) {
1620 const char *s = ev_qualifier_str;
1622 trace.not_ev_qualifier = *s == '!';
1623 if (trace.not_ev_qualifier)
1625 trace.ev_qualifier = strlist__new(true, s);
1626 if (trace.ev_qualifier == NULL) {
1627 fputs("Not enough memory to parse event qualifier",
1634 err = perf_target__validate(&trace.opts.target);
1636 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1637 fprintf(trace.output, "%s", bf);
1641 err = perf_target__parse_uid(&trace.opts.target);
1643 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1644 fprintf(trace.output, "%s", bf);
1648 if (!argc && perf_target__none(&trace.opts.target))
1649 trace.opts.target.system_wide = true;
1652 err = trace__replay(&trace);
1654 err = trace__run(&trace, argc, argv);
1656 if (trace.sched && !err)
1657 trace__fprintf_thread_summary(&trace, trace.output);
1660 if (output_name != NULL)
1661 fclose(trace.output);