1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
16 #include <sys/eventfd.h>
18 #include <linux/futex.h>
20 /* For older distros: */
22 # define MAP_STACK 0x20000
26 # define MADV_HWPOISON 100
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE 12
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE 13
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 .nr_entries = ARRAY_SIZE(array), \
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 struct syscall_arg *arg)
58 struct strarray *sa = arg->parm;
60 if (idx < 0 || idx >= sa->nr_entries)
61 return scnprintf(bf, size, "%d", idx);
63 return scnprintf(bf, size, "%s", sa->entries[idx]);
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 struct syscall_arg *arg)
71 return scnprintf(bf, size, "%#lx", arg->val);
74 #define SCA_HEX syscall_arg__scnprintf_hex
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 struct syscall_arg *arg)
79 int printed = 0, prot = arg->val;
81 if (prot == PROT_NONE)
82 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84 if (prot & PROT_##n) { \
85 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
95 P_MMAP_PROT(GROWSDOWN);
100 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 struct syscall_arg *arg)
110 int printed = 0, flags = arg->val;
112 #define P_MMAP_FLAG(n) \
113 if (flags & MAP_##n) { \
114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
119 P_MMAP_FLAG(PRIVATE);
123 P_MMAP_FLAG(ANONYMOUS);
124 P_MMAP_FLAG(DENYWRITE);
125 P_MMAP_FLAG(EXECUTABLE);
128 P_MMAP_FLAG(GROWSDOWN);
130 P_MMAP_FLAG(HUGETLB);
133 P_MMAP_FLAG(NONBLOCK);
134 P_MMAP_FLAG(NORESERVE);
135 P_MMAP_FLAG(POPULATE);
137 #ifdef MAP_UNINITIALIZED
138 P_MMAP_FLAG(UNINITIALIZED);
143 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 struct syscall_arg *arg)
153 int behavior = arg->val;
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
159 P_MADV_BHV(SEQUENTIAL);
160 P_MADV_BHV(WILLNEED);
161 P_MADV_BHV(DONTNEED);
163 P_MADV_BHV(DONTFORK);
165 P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 P_MADV_BHV(SOFT_OFFLINE);
169 P_MADV_BHV(MERGEABLE);
170 P_MADV_BHV(UNMERGEABLE);
172 P_MADV_BHV(HUGEPAGE);
174 #ifdef MADV_NOHUGEPAGE
175 P_MADV_BHV(NOHUGEPAGE);
178 P_MADV_BHV(DONTDUMP);
187 return scnprintf(bf, size, "%#x", behavior);
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
192 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
193 struct syscall_arg *arg)
195 int printed = 0, op = arg->val;
198 return scnprintf(bf, size, "NONE");
200 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
201 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
216 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
221 #define SCA_FLOCK syscall_arg__scnprintf_flock
223 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
225 enum syscall_futex_args {
226 SCF_UADDR = (1 << 0),
229 SCF_TIMEOUT = (1 << 3),
230 SCF_UADDR2 = (1 << 4),
234 int cmd = op & FUTEX_CMD_MASK;
238 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
239 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
240 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
241 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
242 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
243 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
244 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
245 P_FUTEX_OP(WAKE_OP); break;
246 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
247 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
248 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
249 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
250 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
251 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
252 default: printed = scnprintf(bf, size, "%#x", cmd); break;
255 if (op & FUTEX_PRIVATE_FLAG)
256 printed += scnprintf(bf + printed, size - printed, "|PRIV");
258 if (op & FUTEX_CLOCK_REALTIME)
259 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
264 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
266 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
267 static DEFINE_STRARRAY(epoll_ctl_ops);
269 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
270 static DEFINE_STRARRAY(itimers);
272 static const char *whences[] = { "SET", "CUR", "END",
280 static DEFINE_STRARRAY(whences);
282 static const char *fcntl_cmds[] = {
283 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
284 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
285 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
288 static DEFINE_STRARRAY(fcntl_cmds);
290 static const char *rlimit_resources[] = {
291 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
292 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
295 static DEFINE_STRARRAY(rlimit_resources);
297 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
298 static DEFINE_STRARRAY(sighow);
300 static const char *clockid[] = {
301 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
302 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
304 static DEFINE_STRARRAY(clockid);
306 static const char *socket_families[] = {
307 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
308 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
309 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
310 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
311 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
312 "ALG", "NFC", "VSOCK",
314 static DEFINE_STRARRAY(socket_families);
316 #ifndef SOCK_TYPE_MASK
317 #define SOCK_TYPE_MASK 0xf
320 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
321 struct syscall_arg *arg)
325 flags = type & ~SOCK_TYPE_MASK;
327 type &= SOCK_TYPE_MASK;
329 * Can't use a strarray, MIPS may override for ABI reasons.
332 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
337 P_SK_TYPE(SEQPACKET);
342 printed = scnprintf(bf, size, "%#x", type);
345 #define P_SK_FLAG(n) \
346 if (flags & SOCK_##n) { \
347 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
348 flags &= ~SOCK_##n; \
356 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
361 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
364 #define MSG_PROBE 0x10
366 #ifndef MSG_WAITFORONE
367 #define MSG_WAITFORONE 0x10000
369 #ifndef MSG_SENDPAGE_NOTLAST
370 #define MSG_SENDPAGE_NOTLAST 0x20000
373 #define MSG_FASTOPEN 0x20000000
376 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
377 struct syscall_arg *arg)
379 int printed = 0, flags = arg->val;
382 return scnprintf(bf, size, "NONE");
383 #define P_MSG_FLAG(n) \
384 if (flags & MSG_##n) { \
385 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
391 P_MSG_FLAG(DONTROUTE);
396 P_MSG_FLAG(DONTWAIT);
403 P_MSG_FLAG(ERRQUEUE);
404 P_MSG_FLAG(NOSIGNAL);
406 P_MSG_FLAG(WAITFORONE);
407 P_MSG_FLAG(SENDPAGE_NOTLAST);
408 P_MSG_FLAG(FASTOPEN);
409 P_MSG_FLAG(CMSG_CLOEXEC);
413 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
418 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
420 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
421 struct syscall_arg *arg)
426 if (mode == F_OK) /* 0 */
427 return scnprintf(bf, size, "F");
429 if (mode & n##_OK) { \
430 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
440 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
445 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
447 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
448 struct syscall_arg *arg)
450 int printed = 0, flags = arg->val;
452 if (!(flags & O_CREAT))
453 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
456 return scnprintf(bf, size, "RDONLY");
458 if (flags & O_##n) { \
459 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
483 if ((flags & O_SYNC) == O_SYNC)
484 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
496 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
501 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
503 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
504 struct syscall_arg *arg)
506 int printed = 0, flags = arg->val;
509 return scnprintf(bf, size, "NONE");
511 if (flags & EFD_##n) { \
512 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
522 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
527 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
529 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
530 struct syscall_arg *arg)
532 int printed = 0, flags = arg->val;
535 if (flags & O_##n) { \
536 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
545 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
550 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
552 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
557 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
592 return scnprintf(bf, size, "%#x", sig);
595 #define SCA_SIGNUM syscall_arg__scnprintf_signum
597 #define STRARRAY(arg, name, array) \
598 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
599 .arg_parm = { [arg] = &strarray__##array, }
601 static struct syscall_fmt {
604 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
610 { .name = "access", .errmsg = true,
611 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
612 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
613 { .name = "brk", .hexret = true,
614 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
615 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
616 { .name = "connect", .errmsg = true, },
617 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
618 { .name = "eventfd2", .errmsg = true,
619 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
620 { .name = "fcntl", .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), },
621 { .name = "flock", .errmsg = true,
622 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
623 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
624 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
625 { .name = "futex", .errmsg = true,
626 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
627 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
628 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
629 { .name = "ioctl", .errmsg = true,
630 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
631 { .name = "kill", .errmsg = true,
632 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
633 { .name = "lseek", .errmsg = true, STRARRAY(2, whence, whences), },
634 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
635 { .name = "madvise", .errmsg = true,
636 .arg_scnprintf = { [0] = SCA_HEX, /* start */
637 [2] = SCA_MADV_BHV, /* behavior */ }, },
638 { .name = "mmap", .hexret = true,
639 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
640 [2] = SCA_MMAP_PROT, /* prot */
641 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
642 { .name = "mprotect", .errmsg = true,
643 .arg_scnprintf = { [0] = SCA_HEX, /* start */
644 [2] = SCA_MMAP_PROT, /* prot */ }, },
645 { .name = "mremap", .hexret = true,
646 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
647 [4] = SCA_HEX, /* new_addr */ }, },
648 { .name = "munmap", .errmsg = true,
649 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
650 { .name = "open", .errmsg = true,
651 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
652 { .name = "open_by_handle_at", .errmsg = true,
653 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
654 { .name = "openat", .errmsg = true,
655 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
656 { .name = "pipe2", .errmsg = true,
657 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
658 { .name = "poll", .errmsg = true, .timeout = true, },
659 { .name = "ppoll", .errmsg = true, .timeout = true, },
660 { .name = "pread", .errmsg = true, .alias = "pread64", },
661 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
662 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
663 { .name = "read", .errmsg = true, },
664 { .name = "recvfrom", .errmsg = true,
665 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
666 { .name = "recvmmsg", .errmsg = true,
667 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
668 { .name = "recvmsg", .errmsg = true,
669 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
670 { .name = "rt_sigaction", .errmsg = true,
671 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
672 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
673 { .name = "rt_sigqueueinfo", .errmsg = true,
674 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
675 { .name = "rt_tgsigqueueinfo", .errmsg = true,
676 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
677 { .name = "select", .errmsg = true, .timeout = true, },
678 { .name = "sendmmsg", .errmsg = true,
679 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
680 { .name = "sendmsg", .errmsg = true,
681 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
682 { .name = "sendto", .errmsg = true,
683 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
684 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
685 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
686 { .name = "socket", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
688 [1] = SCA_SK_TYPE, /* type */ },
689 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
690 { .name = "socketpair", .errmsg = true,
691 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
692 [1] = SCA_SK_TYPE, /* type */ },
693 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
694 { .name = "stat", .errmsg = true, .alias = "newstat", },
695 { .name = "tgkill", .errmsg = true,
696 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
697 { .name = "tkill", .errmsg = true,
698 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
699 { .name = "uname", .errmsg = true, .alias = "newuname", },
702 static int syscall_fmt__cmp(const void *name, const void *fmtp)
704 const struct syscall_fmt *fmt = fmtp;
705 return strcmp(name, fmt->name);
708 static struct syscall_fmt *syscall_fmt__find(const char *name)
710 const int nmemb = ARRAY_SIZE(syscall_fmts);
711 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
715 struct event_format *tp_format;
718 struct syscall_fmt *fmt;
719 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
723 static size_t fprintf_duration(unsigned long t, FILE *fp)
725 double duration = (double)t / NSEC_PER_MSEC;
726 size_t printed = fprintf(fp, "(");
729 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
730 else if (duration >= 0.01)
731 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
733 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
734 return printed + fprintf(fp, "): ");
737 struct thread_trace {
741 unsigned long nr_events;
746 static struct thread_trace *thread_trace__new(void)
748 return zalloc(sizeof(struct thread_trace));
751 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
753 struct thread_trace *ttrace;
758 if (thread->priv == NULL)
759 thread->priv = thread_trace__new();
761 if (thread->priv == NULL)
764 ttrace = thread->priv;
769 color_fprintf(fp, PERF_COLOR_RED,
770 "WARNING: not enough memory, dropping samples!\n");
775 struct perf_tool tool;
779 struct syscall *table;
781 struct perf_record_opts opts;
786 unsigned long nr_events;
787 struct strlist *ev_qualifier;
788 bool not_ev_qualifier;
789 struct intlist *tid_list;
790 struct intlist *pid_list;
792 bool multiple_threads;
794 double duration_filter;
798 static bool trace__filter_duration(struct trace *trace, double t)
800 return t < (trace->duration_filter * NSEC_PER_MSEC);
803 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
805 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
807 return fprintf(fp, "%10.3f ", ts);
810 static bool done = false;
812 static void sig_handler(int sig __maybe_unused)
817 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
818 u64 duration, u64 tstamp, FILE *fp)
820 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
821 printed += fprintf_duration(duration, fp);
823 if (trace->multiple_threads) {
824 if (trace->show_comm)
825 printed += fprintf(fp, "%.14s/", thread->comm);
826 printed += fprintf(fp, "%d ", thread->tid);
832 static int trace__process_event(struct trace *trace, struct machine *machine,
833 union perf_event *event)
837 switch (event->header.type) {
838 case PERF_RECORD_LOST:
839 color_fprintf(trace->output, PERF_COLOR_RED,
840 "LOST %" PRIu64 " events!\n", event->lost.lost);
841 ret = machine__process_lost_event(machine, event);
843 ret = machine__process_event(machine, event);
850 static int trace__tool_process(struct perf_tool *tool,
851 union perf_event *event,
852 struct perf_sample *sample __maybe_unused,
853 struct machine *machine)
855 struct trace *trace = container_of(tool, struct trace, tool);
856 return trace__process_event(trace, machine, event);
859 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
861 int err = symbol__init();
866 machine__init(&trace->host, "", HOST_KERNEL_ID);
867 machine__create_kernel_maps(&trace->host);
869 if (perf_target__has_task(&trace->opts.target)) {
870 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
874 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
884 static int syscall__set_arg_fmts(struct syscall *sc)
886 struct format_field *field;
889 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
890 if (sc->arg_scnprintf == NULL)
894 sc->arg_parm = sc->fmt->arg_parm;
896 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
897 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
898 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
899 else if (field->flags & FIELD_IS_POINTER)
900 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
907 static int trace__read_syscall_info(struct trace *trace, int id)
911 const char *name = audit_syscall_to_name(id, trace->audit_machine);
916 if (id > trace->syscalls.max) {
917 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
919 if (nsyscalls == NULL)
922 if (trace->syscalls.max != -1) {
923 memset(nsyscalls + trace->syscalls.max + 1, 0,
924 (id - trace->syscalls.max) * sizeof(*sc));
926 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
929 trace->syscalls.table = nsyscalls;
930 trace->syscalls.max = id;
933 sc = trace->syscalls.table + id;
936 if (trace->ev_qualifier) {
937 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
939 if (!(in ^ trace->not_ev_qualifier)) {
942 * No need to do read tracepoint information since this will be
949 sc->fmt = syscall_fmt__find(sc->name);
951 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
952 sc->tp_format = event_format__new("syscalls", tp_name);
954 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
955 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
956 sc->tp_format = event_format__new("syscalls", tp_name);
959 if (sc->tp_format == NULL)
962 return syscall__set_arg_fmts(sc);
965 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
970 if (sc->tp_format != NULL) {
971 struct format_field *field;
973 struct syscall_arg arg = {
978 for (field = sc->tp_format->format.fields->next; field;
979 field = field->next, ++arg.idx, bit <<= 1) {
983 * Suppress this argument if its value is zero and
984 * and we don't have a string associated in an
987 if (args[arg.idx] == 0 &&
988 !(sc->arg_scnprintf &&
989 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
990 sc->arg_parm[arg.idx]))
993 printed += scnprintf(bf + printed, size - printed,
994 "%s%s: ", printed ? ", " : "", field->name);
995 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
996 arg.val = args[arg.idx];
998 arg.parm = sc->arg_parm[arg.idx];
999 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1000 size - printed, &arg);
1002 printed += scnprintf(bf + printed, size - printed,
1003 "%ld", args[arg.idx]);
1010 printed += scnprintf(bf + printed, size - printed,
1012 printed ? ", " : "", i, args[i]);
1020 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1021 struct perf_sample *sample);
1023 static struct syscall *trace__syscall_info(struct trace *trace,
1024 struct perf_evsel *evsel,
1025 struct perf_sample *sample)
1027 int id = perf_evsel__intval(evsel, sample, "id");
1032 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1033 * before that, leaving at a higher verbosity level till that is
1034 * explained. Reproduced with plain ftrace with:
1036 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1037 * grep "NR -1 " /t/trace_pipe
1039 * After generating some load on the machine.
1043 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1044 id, perf_evsel__name(evsel), ++n);
1049 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1050 trace__read_syscall_info(trace, id))
1053 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1056 return &trace->syscalls.table[id];
1060 fprintf(trace->output, "Problems reading syscall %d", id);
1061 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1062 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1063 fputs(" information\n", trace->output);
1068 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1069 struct perf_sample *sample)
1074 struct thread *thread;
1075 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1076 struct thread_trace *ttrace;
1084 thread = machine__findnew_thread(&trace->host, sample->pid,
1086 ttrace = thread__trace(thread, trace->output);
1090 args = perf_evsel__rawptr(evsel, sample, "args");
1092 fprintf(trace->output, "Problems reading syscall arguments\n");
1096 ttrace = thread->priv;
1098 if (ttrace->entry_str == NULL) {
1099 ttrace->entry_str = malloc(1024);
1100 if (!ttrace->entry_str)
1104 ttrace->entry_time = sample->time;
1105 msg = ttrace->entry_str;
1106 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1108 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
1110 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1111 if (!trace->duration_filter) {
1112 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1113 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1116 ttrace->entry_pending = true;
1121 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1122 struct perf_sample *sample)
1126 struct thread *thread;
1127 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1128 struct thread_trace *ttrace;
1136 thread = machine__findnew_thread(&trace->host, sample->pid,
1138 ttrace = thread__trace(thread, trace->output);
1142 ret = perf_evsel__intval(evsel, sample, "ret");
1144 ttrace = thread->priv;
1146 ttrace->exit_time = sample->time;
1148 if (ttrace->entry_time) {
1149 duration = sample->time - ttrace->entry_time;
1150 if (trace__filter_duration(trace, duration))
1152 } else if (trace->duration_filter)
1155 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1157 if (ttrace->entry_pending) {
1158 fprintf(trace->output, "%-70s", ttrace->entry_str);
1160 fprintf(trace->output, " ... [");
1161 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1162 fprintf(trace->output, "]: %s()", sc->name);
1165 if (sc->fmt == NULL) {
1167 fprintf(trace->output, ") = %d", ret);
1168 } else if (ret < 0 && sc->fmt->errmsg) {
1170 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1171 *e = audit_errno_to_name(-ret);
1173 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1174 } else if (ret == 0 && sc->fmt->timeout)
1175 fprintf(trace->output, ") = 0 Timeout");
1176 else if (sc->fmt->hexret)
1177 fprintf(trace->output, ") = %#x", ret);
1181 fputc('\n', trace->output);
1183 ttrace->entry_pending = false;
1188 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1189 struct perf_sample *sample)
1191 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1192 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1193 struct thread *thread = machine__findnew_thread(&trace->host,
1196 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1201 ttrace->runtime_ms += runtime_ms;
1202 trace->runtime_ms += runtime_ms;
1206 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1208 perf_evsel__strval(evsel, sample, "comm"),
1209 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1211 perf_evsel__intval(evsel, sample, "vruntime"));
1215 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1217 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1218 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1221 if (trace->pid_list || trace->tid_list)
1227 static int trace__process_sample(struct perf_tool *tool,
1228 union perf_event *event __maybe_unused,
1229 struct perf_sample *sample,
1230 struct perf_evsel *evsel,
1231 struct machine *machine __maybe_unused)
1233 struct trace *trace = container_of(tool, struct trace, tool);
1236 tracepoint_handler handler = evsel->handler.func;
1238 if (skip_sample(trace, sample))
1241 if (!trace->full_time && trace->base_time == 0)
1242 trace->base_time = sample->time;
1245 handler(trace, evsel, sample);
1251 perf_session__has_tp(struct perf_session *session, const char *name)
1253 struct perf_evsel *evsel;
1255 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1257 return evsel != NULL;
1260 static int parse_target_str(struct trace *trace)
1262 if (trace->opts.target.pid) {
1263 trace->pid_list = intlist__new(trace->opts.target.pid);
1264 if (trace->pid_list == NULL) {
1265 pr_err("Error parsing process id string\n");
1270 if (trace->opts.target.tid) {
1271 trace->tid_list = intlist__new(trace->opts.target.tid);
1272 if (trace->tid_list == NULL) {
1273 pr_err("Error parsing thread id string\n");
1281 static int trace__run(struct trace *trace, int argc, const char **argv)
1283 struct perf_evlist *evlist = perf_evlist__new();
1284 struct perf_evsel *evsel;
1286 unsigned long before;
1287 const bool forks = argc > 0;
1289 if (evlist == NULL) {
1290 fprintf(trace->output, "Not enough memory to run!\n");
1294 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1295 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1296 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1297 goto out_delete_evlist;
1301 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1302 trace__sched_stat_runtime)) {
1303 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1304 goto out_delete_evlist;
1307 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1309 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1310 goto out_delete_evlist;
1313 err = trace__symbols_init(trace, evlist);
1315 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1316 goto out_delete_maps;
1319 perf_evlist__config(evlist, &trace->opts);
1321 signal(SIGCHLD, sig_handler);
1322 signal(SIGINT, sig_handler);
1325 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1326 argv, false, false);
1328 fprintf(trace->output, "Couldn't run the workload!\n");
1329 goto out_delete_maps;
1333 err = perf_evlist__open(evlist);
1335 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1336 goto out_delete_maps;
1339 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1341 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1342 goto out_close_evlist;
1345 perf_evlist__enable(evlist);
1348 perf_evlist__start_workload(evlist);
1350 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1352 before = trace->nr_events;
1354 for (i = 0; i < evlist->nr_mmaps; i++) {
1355 union perf_event *event;
1357 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1358 const u32 type = event->header.type;
1359 tracepoint_handler handler;
1360 struct perf_sample sample;
1364 err = perf_evlist__parse_sample(evlist, event, &sample);
1366 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1370 if (!trace->full_time && trace->base_time == 0)
1371 trace->base_time = sample.time;
1373 if (type != PERF_RECORD_SAMPLE) {
1374 trace__process_event(trace, &trace->host, event);
1378 evsel = perf_evlist__id2evsel(evlist, sample.id);
1379 if (evsel == NULL) {
1380 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1384 if (sample.raw_data == NULL) {
1385 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1386 perf_evsel__name(evsel), sample.tid,
1387 sample.cpu, sample.raw_size);
1391 handler = evsel->handler.func;
1392 handler(trace, evsel, &sample);
1395 goto out_unmap_evlist;
1399 if (trace->nr_events == before) {
1401 goto out_unmap_evlist;
1403 poll(evlist->pollfd, evlist->nr_fds, -1);
1407 perf_evlist__disable(evlist);
1412 perf_evlist__munmap(evlist);
1414 perf_evlist__close(evlist);
1416 perf_evlist__delete_maps(evlist);
1418 perf_evlist__delete(evlist);
1423 static int trace__replay(struct trace *trace)
1425 const struct perf_evsel_str_handler handlers[] = {
1426 { "raw_syscalls:sys_enter", trace__sys_enter, },
1427 { "raw_syscalls:sys_exit", trace__sys_exit, },
1430 struct perf_session *session;
1433 trace->tool.sample = trace__process_sample;
1434 trace->tool.mmap = perf_event__process_mmap;
1435 trace->tool.mmap2 = perf_event__process_mmap2;
1436 trace->tool.comm = perf_event__process_comm;
1437 trace->tool.exit = perf_event__process_exit;
1438 trace->tool.fork = perf_event__process_fork;
1439 trace->tool.attr = perf_event__process_attr;
1440 trace->tool.tracing_data = perf_event__process_tracing_data;
1441 trace->tool.build_id = perf_event__process_build_id;
1443 trace->tool.ordered_samples = true;
1444 trace->tool.ordering_requires_timestamps = true;
1446 /* add tid to output */
1447 trace->multiple_threads = true;
1449 if (symbol__init() < 0)
1452 session = perf_session__new(input_name, O_RDONLY, 0, false,
1454 if (session == NULL)
1457 err = perf_session__set_tracepoints_handlers(session, handlers);
1461 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1462 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1466 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1467 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1471 err = parse_target_str(trace);
1477 err = perf_session__process_events(session, &trace->tool);
1479 pr_err("Failed to process events, error %d", err);
1482 perf_session__delete(session);
1487 static size_t trace__fprintf_threads_header(FILE *fp)
1491 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1492 printed += fprintf(fp," __) Summary of events (__\n\n");
1493 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1494 printed += fprintf(fp," _____________________________________________________________________\n\n");
1499 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1501 size_t printed = trace__fprintf_threads_header(fp);
1504 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1505 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1506 struct thread_trace *ttrace = thread->priv;
1513 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1515 color = PERF_COLOR_NORMAL;
1517 color = PERF_COLOR_RED;
1518 else if (ratio > 25.0)
1519 color = PERF_COLOR_GREEN;
1520 else if (ratio > 5.0)
1521 color = PERF_COLOR_YELLOW;
1523 printed += color_fprintf(fp, color, "%20s", thread->comm);
1524 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1525 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1526 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1532 static int trace__set_duration(const struct option *opt, const char *str,
1533 int unset __maybe_unused)
1535 struct trace *trace = opt->value;
1537 trace->duration_filter = atof(str);
1541 static int trace__open_output(struct trace *trace, const char *filename)
1545 if (!stat(filename, &st) && st.st_size) {
1546 char oldname[PATH_MAX];
1548 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1550 rename(filename, oldname);
1553 trace->output = fopen(filename, "w");
1555 return trace->output == NULL ? -errno : 0;
1558 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1560 const char * const trace_usage[] = {
1561 "perf trace [<options>] [<command>]",
1562 "perf trace [<options>] -- <command> [<options>]",
1565 struct trace trace = {
1566 .audit_machine = audit_detect_machine(),
1575 .user_freq = UINT_MAX,
1576 .user_interval = ULLONG_MAX,
1583 const char *output_name = NULL;
1584 const char *ev_qualifier_str = NULL;
1585 const struct option trace_options[] = {
1586 OPT_BOOLEAN(0, "comm", &trace.show_comm,
1587 "show the thread COMM next to its id"),
1588 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1589 "list of events to trace"),
1590 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1591 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1592 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1593 "trace events on existing process id"),
1594 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1595 "trace events on existing thread id"),
1596 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1597 "system-wide collection from all CPUs"),
1598 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1599 "list of cpus to monitor"),
1600 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1601 "child tasks do not inherit counters"),
1602 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1603 "number of mmap data pages",
1604 perf_evlist__parse_mmap_pages),
1605 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1607 OPT_CALLBACK(0, "duration", &trace, "float",
1608 "show only events with duration > N.M ms",
1609 trace__set_duration),
1610 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1611 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1612 OPT_BOOLEAN('T', "time", &trace.full_time,
1613 "Show full timestamp, not time relative to first start"),
1619 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1621 if (output_name != NULL) {
1622 err = trace__open_output(&trace, output_name);
1624 perror("failed to create output file");
1629 if (ev_qualifier_str != NULL) {
1630 const char *s = ev_qualifier_str;
1632 trace.not_ev_qualifier = *s == '!';
1633 if (trace.not_ev_qualifier)
1635 trace.ev_qualifier = strlist__new(true, s);
1636 if (trace.ev_qualifier == NULL) {
1637 fputs("Not enough memory to parse event qualifier",
1644 err = perf_target__validate(&trace.opts.target);
1646 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1647 fprintf(trace.output, "%s", bf);
1651 err = perf_target__parse_uid(&trace.opts.target);
1653 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1654 fprintf(trace.output, "%s", bf);
1658 if (!argc && perf_target__none(&trace.opts.target))
1659 trace.opts.target.system_wide = true;
1662 err = trace__replay(&trace);
1664 err = trace__run(&trace, argc, argv);
1666 if (trace.sched && !err)
1667 trace__fprintf_thread_summary(&trace, trace.output);
1670 if (output_name != NULL)
1671 fclose(trace.output);