1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
16 #include <sys/eventfd.h>
18 #include <linux/futex.h>
20 /* For older distros: */
22 # define MAP_STACK 0x20000
26 # define MADV_HWPOISON 100
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE 12
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE 13
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 .nr_entries = ARRAY_SIZE(array), \
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 struct syscall_arg *arg)
58 struct strarray *sa = arg->parm;
60 if (idx < 0 || idx >= sa->nr_entries)
61 return scnprintf(bf, size, "%d", idx);
63 return scnprintf(bf, size, "%s", sa->entries[idx]);
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 struct syscall_arg *arg)
71 return scnprintf(bf, size, "%#lx", arg->val);
74 #define SCA_HEX syscall_arg__scnprintf_hex
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 struct syscall_arg *arg)
79 int printed = 0, prot = arg->val;
81 if (prot == PROT_NONE)
82 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84 if (prot & PROT_##n) { \
85 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
95 P_MMAP_PROT(GROWSDOWN);
100 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 struct syscall_arg *arg)
110 int printed = 0, flags = arg->val;
112 #define P_MMAP_FLAG(n) \
113 if (flags & MAP_##n) { \
114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
119 P_MMAP_FLAG(PRIVATE);
123 P_MMAP_FLAG(ANONYMOUS);
124 P_MMAP_FLAG(DENYWRITE);
125 P_MMAP_FLAG(EXECUTABLE);
128 P_MMAP_FLAG(GROWSDOWN);
130 P_MMAP_FLAG(HUGETLB);
133 P_MMAP_FLAG(NONBLOCK);
134 P_MMAP_FLAG(NORESERVE);
135 P_MMAP_FLAG(POPULATE);
137 #ifdef MAP_UNINITIALIZED
138 P_MMAP_FLAG(UNINITIALIZED);
143 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 struct syscall_arg *arg)
153 int behavior = arg->val;
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
159 P_MADV_BHV(SEQUENTIAL);
160 P_MADV_BHV(WILLNEED);
161 P_MADV_BHV(DONTNEED);
163 P_MADV_BHV(DONTFORK);
165 P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 P_MADV_BHV(SOFT_OFFLINE);
169 P_MADV_BHV(MERGEABLE);
170 P_MADV_BHV(UNMERGEABLE);
172 P_MADV_BHV(HUGEPAGE);
174 #ifdef MADV_NOHUGEPAGE
175 P_MADV_BHV(NOHUGEPAGE);
178 P_MADV_BHV(DONTDUMP);
187 return scnprintf(bf, size, "%#x", behavior);
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
194 enum syscall_futex_args {
195 SCF_UADDR = (1 << 0),
198 SCF_TIMEOUT = (1 << 3),
199 SCF_UADDR2 = (1 << 4),
203 int cmd = op & FUTEX_CMD_MASK;
207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
209 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
214 P_FUTEX_OP(WAKE_OP); break;
215 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
218 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
220 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
221 default: printed = scnprintf(bf, size, "%#x", cmd); break;
224 if (op & FUTEX_PRIVATE_FLAG)
225 printed += scnprintf(bf + printed, size - printed, "|PRIV");
227 if (op & FUTEX_CLOCK_REALTIME)
228 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
233 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
236 static DEFINE_STRARRAY(itimers);
238 static const char *whences[] = { "SET", "CUR", "END",
246 static DEFINE_STRARRAY(whences);
248 static const char *fcntl_cmds[] = {
249 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
250 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
251 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
254 static DEFINE_STRARRAY(fcntl_cmds);
256 static const char *rlimit_resources[] = {
257 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
258 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
261 static DEFINE_STRARRAY(rlimit_resources);
263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
264 static DEFINE_STRARRAY(sighow);
266 static const char *socket_families[] = {
267 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
268 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
269 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
270 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
271 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
272 "ALG", "NFC", "VSOCK",
274 static DEFINE_STRARRAY(socket_families);
276 #ifndef SOCK_TYPE_MASK
277 #define SOCK_TYPE_MASK 0xf
280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
281 struct syscall_arg *arg)
285 flags = type & ~SOCK_TYPE_MASK;
287 type &= SOCK_TYPE_MASK;
289 * Can't use a strarray, MIPS may override for ABI reasons.
292 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
297 P_SK_TYPE(SEQPACKET);
302 printed = scnprintf(bf, size, "%#x", type);
305 #define P_SK_FLAG(n) \
306 if (flags & SOCK_##n) { \
307 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
308 flags &= ~SOCK_##n; \
316 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
324 #define MSG_PROBE 0x10
326 #ifndef MSG_SENDPAGE_NOTLAST
327 #define MSG_SENDPAGE_NOTLAST 0x20000
330 #define MSG_FASTOPEN 0x20000000
333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
334 struct syscall_arg *arg)
336 int printed = 0, flags = arg->val;
339 return scnprintf(bf, size, "NONE");
340 #define P_MSG_FLAG(n) \
341 if (flags & MSG_##n) { \
342 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
348 P_MSG_FLAG(DONTROUTE);
353 P_MSG_FLAG(DONTWAIT);
360 P_MSG_FLAG(ERRQUEUE);
361 P_MSG_FLAG(NOSIGNAL);
363 P_MSG_FLAG(WAITFORONE);
364 P_MSG_FLAG(SENDPAGE_NOTLAST);
365 P_MSG_FLAG(FASTOPEN);
366 P_MSG_FLAG(CMSG_CLOEXEC);
370 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
378 struct syscall_arg *arg)
383 if (mode == F_OK) /* 0 */
384 return scnprintf(bf, size, "F");
386 if (mode & n##_OK) { \
387 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
397 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
405 struct syscall_arg *arg)
407 int printed = 0, flags = arg->val;
409 if (!(flags & O_CREAT))
410 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
413 return scnprintf(bf, size, "RDONLY");
415 if (flags & O_##n) { \
416 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
440 if ((flags & O_SYNC) == O_SYNC)
441 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
453 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
461 struct syscall_arg *arg)
463 int printed = 0, flags = arg->val;
466 return scnprintf(bf, size, "NONE");
468 if (flags & EFD_##n) { \
469 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
479 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
491 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
526 return scnprintf(bf, size, "%#x", sig);
529 #define SCA_SIGNUM syscall_arg__scnprintf_signum
531 static struct syscall_fmt {
534 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
540 { .name = "access", .errmsg = true,
541 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
542 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
543 { .name = "brk", .hexret = true,
544 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
545 { .name = "connect", .errmsg = true, },
546 { .name = "eventfd2", .errmsg = true,
547 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
548 { .name = "fcntl", .errmsg = true,
549 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
550 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
551 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
552 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
553 { .name = "futex", .errmsg = true,
554 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
555 { .name = "getitimer", .errmsg = true,
556 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
557 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
558 { .name = "getrlimit", .errmsg = true,
559 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
560 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
561 { .name = "ioctl", .errmsg = true,
562 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
563 { .name = "kill", .errmsg = true,
564 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
565 { .name = "lseek", .errmsg = true,
566 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
567 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
568 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
569 { .name = "madvise", .errmsg = true,
570 .arg_scnprintf = { [0] = SCA_HEX, /* start */
571 [2] = SCA_MADV_BHV, /* behavior */ }, },
572 { .name = "mmap", .hexret = true,
573 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
574 [2] = SCA_MMAP_PROT, /* prot */
575 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
576 { .name = "mprotect", .errmsg = true,
577 .arg_scnprintf = { [0] = SCA_HEX, /* start */
578 [2] = SCA_MMAP_PROT, /* prot */ }, },
579 { .name = "mremap", .hexret = true,
580 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
581 [4] = SCA_HEX, /* new_addr */ }, },
582 { .name = "munmap", .errmsg = true,
583 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
584 { .name = "open", .errmsg = true,
585 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
586 { .name = "open_by_handle_at", .errmsg = true,
587 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
588 { .name = "openat", .errmsg = true,
589 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
590 { .name = "poll", .errmsg = true, .timeout = true, },
591 { .name = "ppoll", .errmsg = true, .timeout = true, },
592 { .name = "pread", .errmsg = true, .alias = "pread64", },
593 { .name = "prlimit64", .errmsg = true,
594 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
595 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
596 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
597 { .name = "read", .errmsg = true, },
598 { .name = "recvfrom", .errmsg = true,
599 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
600 { .name = "recvmmsg", .errmsg = true,
601 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
602 { .name = "recvmsg", .errmsg = true,
603 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
604 { .name = "rt_sigaction", .errmsg = true,
605 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
606 { .name = "rt_sigprocmask", .errmsg = true,
607 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
608 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
609 { .name = "rt_sigqueueinfo", .errmsg = true,
610 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
611 { .name = "rt_tgsigqueueinfo", .errmsg = true,
612 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
613 { .name = "select", .errmsg = true, .timeout = true, },
614 { .name = "sendmmsg", .errmsg = true,
615 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
616 { .name = "sendmsg", .errmsg = true,
617 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
618 { .name = "sendto", .errmsg = true,
619 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
620 { .name = "setitimer", .errmsg = true,
621 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
622 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
623 { .name = "setrlimit", .errmsg = true,
624 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
625 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
626 { .name = "socket", .errmsg = true,
627 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
628 [1] = SCA_SK_TYPE, /* type */ },
629 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
630 { .name = "stat", .errmsg = true, .alias = "newstat", },
631 { .name = "tgkill", .errmsg = true,
632 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
633 { .name = "tkill", .errmsg = true,
634 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
635 { .name = "uname", .errmsg = true, .alias = "newuname", },
638 static int syscall_fmt__cmp(const void *name, const void *fmtp)
640 const struct syscall_fmt *fmt = fmtp;
641 return strcmp(name, fmt->name);
644 static struct syscall_fmt *syscall_fmt__find(const char *name)
646 const int nmemb = ARRAY_SIZE(syscall_fmts);
647 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
651 struct event_format *tp_format;
654 struct syscall_fmt *fmt;
655 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
659 static size_t fprintf_duration(unsigned long t, FILE *fp)
661 double duration = (double)t / NSEC_PER_MSEC;
662 size_t printed = fprintf(fp, "(");
665 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
666 else if (duration >= 0.01)
667 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
669 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
670 return printed + fprintf(fp, "): ");
673 struct thread_trace {
677 unsigned long nr_events;
682 static struct thread_trace *thread_trace__new(void)
684 return zalloc(sizeof(struct thread_trace));
687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
689 struct thread_trace *ttrace;
694 if (thread->priv == NULL)
695 thread->priv = thread_trace__new();
697 if (thread->priv == NULL)
700 ttrace = thread->priv;
705 color_fprintf(fp, PERF_COLOR_RED,
706 "WARNING: not enough memory, dropping samples!\n");
711 struct perf_tool tool;
715 struct syscall *table;
717 struct perf_record_opts opts;
722 unsigned long nr_events;
723 struct strlist *ev_qualifier;
724 bool not_ev_qualifier;
725 struct intlist *tid_list;
726 struct intlist *pid_list;
728 bool multiple_threads;
730 double duration_filter;
734 static bool trace__filter_duration(struct trace *trace, double t)
736 return t < (trace->duration_filter * NSEC_PER_MSEC);
739 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
741 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
743 return fprintf(fp, "%10.3f ", ts);
746 static bool done = false;
748 static void sig_handler(int sig __maybe_unused)
753 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
754 u64 duration, u64 tstamp, FILE *fp)
756 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
757 printed += fprintf_duration(duration, fp);
759 if (trace->multiple_threads) {
760 if (trace->show_comm)
761 printed += fprintf(fp, "%.14s/", thread->comm);
762 printed += fprintf(fp, "%d ", thread->tid);
768 static int trace__process_event(struct trace *trace, struct machine *machine,
769 union perf_event *event)
773 switch (event->header.type) {
774 case PERF_RECORD_LOST:
775 color_fprintf(trace->output, PERF_COLOR_RED,
776 "LOST %" PRIu64 " events!\n", event->lost.lost);
777 ret = machine__process_lost_event(machine, event);
779 ret = machine__process_event(machine, event);
786 static int trace__tool_process(struct perf_tool *tool,
787 union perf_event *event,
788 struct perf_sample *sample __maybe_unused,
789 struct machine *machine)
791 struct trace *trace = container_of(tool, struct trace, tool);
792 return trace__process_event(trace, machine, event);
795 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
797 int err = symbol__init();
802 machine__init(&trace->host, "", HOST_KERNEL_ID);
803 machine__create_kernel_maps(&trace->host);
805 if (perf_target__has_task(&trace->opts.target)) {
806 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
810 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
820 static int syscall__set_arg_fmts(struct syscall *sc)
822 struct format_field *field;
825 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
826 if (sc->arg_scnprintf == NULL)
830 sc->arg_parm = sc->fmt->arg_parm;
832 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
833 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
834 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
835 else if (field->flags & FIELD_IS_POINTER)
836 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
843 static int trace__read_syscall_info(struct trace *trace, int id)
847 const char *name = audit_syscall_to_name(id, trace->audit_machine);
852 if (id > trace->syscalls.max) {
853 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
855 if (nsyscalls == NULL)
858 if (trace->syscalls.max != -1) {
859 memset(nsyscalls + trace->syscalls.max + 1, 0,
860 (id - trace->syscalls.max) * sizeof(*sc));
862 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
865 trace->syscalls.table = nsyscalls;
866 trace->syscalls.max = id;
869 sc = trace->syscalls.table + id;
872 if (trace->ev_qualifier) {
873 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
875 if (!(in ^ trace->not_ev_qualifier)) {
878 * No need to do read tracepoint information since this will be
885 sc->fmt = syscall_fmt__find(sc->name);
887 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
888 sc->tp_format = event_format__new("syscalls", tp_name);
890 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
891 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
892 sc->tp_format = event_format__new("syscalls", tp_name);
895 if (sc->tp_format == NULL)
898 return syscall__set_arg_fmts(sc);
901 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
906 if (sc->tp_format != NULL) {
907 struct format_field *field;
909 struct syscall_arg arg = {
914 for (field = sc->tp_format->format.fields->next; field;
915 field = field->next, ++arg.idx, bit <<= 1) {
919 if (args[arg.idx] == 0)
922 printed += scnprintf(bf + printed, size - printed,
923 "%s%s: ", printed ? ", " : "", field->name);
924 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
925 arg.val = args[arg.idx];
927 arg.parm = sc->arg_parm[arg.idx];
928 printed += sc->arg_scnprintf[arg.idx](bf + printed,
929 size - printed, &arg);
931 printed += scnprintf(bf + printed, size - printed,
932 "%ld", args[arg.idx]);
939 printed += scnprintf(bf + printed, size - printed,
941 printed ? ", " : "", i, args[i]);
949 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
950 struct perf_sample *sample);
952 static struct syscall *trace__syscall_info(struct trace *trace,
953 struct perf_evsel *evsel,
954 struct perf_sample *sample)
956 int id = perf_evsel__intval(evsel, sample, "id");
961 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
962 * before that, leaving at a higher verbosity level till that is
963 * explained. Reproduced with plain ftrace with:
965 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
966 * grep "NR -1 " /t/trace_pipe
968 * After generating some load on the machine.
972 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
973 id, perf_evsel__name(evsel), ++n);
978 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
979 trace__read_syscall_info(trace, id))
982 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
985 return &trace->syscalls.table[id];
989 fprintf(trace->output, "Problems reading syscall %d", id);
990 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
991 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
992 fputs(" information\n", trace->output);
997 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
998 struct perf_sample *sample)
1003 struct thread *thread;
1004 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1005 struct thread_trace *ttrace;
1013 thread = machine__findnew_thread(&trace->host, sample->pid,
1015 ttrace = thread__trace(thread, trace->output);
1019 args = perf_evsel__rawptr(evsel, sample, "args");
1021 fprintf(trace->output, "Problems reading syscall arguments\n");
1025 ttrace = thread->priv;
1027 if (ttrace->entry_str == NULL) {
1028 ttrace->entry_str = malloc(1024);
1029 if (!ttrace->entry_str)
1033 ttrace->entry_time = sample->time;
1034 msg = ttrace->entry_str;
1035 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1037 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
1039 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1040 if (!trace->duration_filter) {
1041 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1042 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1045 ttrace->entry_pending = true;
1050 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1051 struct perf_sample *sample)
1055 struct thread *thread;
1056 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1057 struct thread_trace *ttrace;
1065 thread = machine__findnew_thread(&trace->host, sample->pid,
1067 ttrace = thread__trace(thread, trace->output);
1071 ret = perf_evsel__intval(evsel, sample, "ret");
1073 ttrace = thread->priv;
1075 ttrace->exit_time = sample->time;
1077 if (ttrace->entry_time) {
1078 duration = sample->time - ttrace->entry_time;
1079 if (trace__filter_duration(trace, duration))
1081 } else if (trace->duration_filter)
1084 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1086 if (ttrace->entry_pending) {
1087 fprintf(trace->output, "%-70s", ttrace->entry_str);
1089 fprintf(trace->output, " ... [");
1090 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1091 fprintf(trace->output, "]: %s()", sc->name);
1094 if (sc->fmt == NULL) {
1096 fprintf(trace->output, ") = %d", ret);
1097 } else if (ret < 0 && sc->fmt->errmsg) {
1099 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1100 *e = audit_errno_to_name(-ret);
1102 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1103 } else if (ret == 0 && sc->fmt->timeout)
1104 fprintf(trace->output, ") = 0 Timeout");
1105 else if (sc->fmt->hexret)
1106 fprintf(trace->output, ") = %#x", ret);
1110 fputc('\n', trace->output);
1112 ttrace->entry_pending = false;
1117 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1118 struct perf_sample *sample)
1120 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1121 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1122 struct thread *thread = machine__findnew_thread(&trace->host,
1125 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1130 ttrace->runtime_ms += runtime_ms;
1131 trace->runtime_ms += runtime_ms;
1135 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1137 perf_evsel__strval(evsel, sample, "comm"),
1138 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1140 perf_evsel__intval(evsel, sample, "vruntime"));
1144 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1146 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1147 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1150 if (trace->pid_list || trace->tid_list)
1156 static int trace__process_sample(struct perf_tool *tool,
1157 union perf_event *event __maybe_unused,
1158 struct perf_sample *sample,
1159 struct perf_evsel *evsel,
1160 struct machine *machine __maybe_unused)
1162 struct trace *trace = container_of(tool, struct trace, tool);
1165 tracepoint_handler handler = evsel->handler.func;
1167 if (skip_sample(trace, sample))
1170 if (!trace->full_time && trace->base_time == 0)
1171 trace->base_time = sample->time;
1174 handler(trace, evsel, sample);
1180 perf_session__has_tp(struct perf_session *session, const char *name)
1182 struct perf_evsel *evsel;
1184 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1186 return evsel != NULL;
1189 static int parse_target_str(struct trace *trace)
1191 if (trace->opts.target.pid) {
1192 trace->pid_list = intlist__new(trace->opts.target.pid);
1193 if (trace->pid_list == NULL) {
1194 pr_err("Error parsing process id string\n");
1199 if (trace->opts.target.tid) {
1200 trace->tid_list = intlist__new(trace->opts.target.tid);
1201 if (trace->tid_list == NULL) {
1202 pr_err("Error parsing thread id string\n");
1210 static int trace__run(struct trace *trace, int argc, const char **argv)
1212 struct perf_evlist *evlist = perf_evlist__new();
1213 struct perf_evsel *evsel;
1215 unsigned long before;
1216 const bool forks = argc > 0;
1218 if (evlist == NULL) {
1219 fprintf(trace->output, "Not enough memory to run!\n");
1223 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1224 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1225 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1226 goto out_delete_evlist;
1230 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1231 trace__sched_stat_runtime)) {
1232 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1233 goto out_delete_evlist;
1236 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1238 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1239 goto out_delete_evlist;
1242 err = trace__symbols_init(trace, evlist);
1244 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1245 goto out_delete_maps;
1248 perf_evlist__config(evlist, &trace->opts);
1250 signal(SIGCHLD, sig_handler);
1251 signal(SIGINT, sig_handler);
1254 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1255 argv, false, false);
1257 fprintf(trace->output, "Couldn't run the workload!\n");
1258 goto out_delete_maps;
1262 err = perf_evlist__open(evlist);
1264 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1265 goto out_delete_maps;
1268 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1270 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1271 goto out_close_evlist;
1274 perf_evlist__enable(evlist);
1277 perf_evlist__start_workload(evlist);
1279 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1281 before = trace->nr_events;
1283 for (i = 0; i < evlist->nr_mmaps; i++) {
1284 union perf_event *event;
1286 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1287 const u32 type = event->header.type;
1288 tracepoint_handler handler;
1289 struct perf_sample sample;
1293 err = perf_evlist__parse_sample(evlist, event, &sample);
1295 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1299 if (!trace->full_time && trace->base_time == 0)
1300 trace->base_time = sample.time;
1302 if (type != PERF_RECORD_SAMPLE) {
1303 trace__process_event(trace, &trace->host, event);
1307 evsel = perf_evlist__id2evsel(evlist, sample.id);
1308 if (evsel == NULL) {
1309 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1313 if (sample.raw_data == NULL) {
1314 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1315 perf_evsel__name(evsel), sample.tid,
1316 sample.cpu, sample.raw_size);
1320 handler = evsel->handler.func;
1321 handler(trace, evsel, &sample);
1324 goto out_unmap_evlist;
1328 if (trace->nr_events == before) {
1330 goto out_unmap_evlist;
1332 poll(evlist->pollfd, evlist->nr_fds, -1);
1336 perf_evlist__disable(evlist);
1341 perf_evlist__munmap(evlist);
1343 perf_evlist__close(evlist);
1345 perf_evlist__delete_maps(evlist);
1347 perf_evlist__delete(evlist);
1352 static int trace__replay(struct trace *trace)
1354 const struct perf_evsel_str_handler handlers[] = {
1355 { "raw_syscalls:sys_enter", trace__sys_enter, },
1356 { "raw_syscalls:sys_exit", trace__sys_exit, },
1359 struct perf_session *session;
1362 trace->tool.sample = trace__process_sample;
1363 trace->tool.mmap = perf_event__process_mmap;
1364 trace->tool.mmap2 = perf_event__process_mmap2;
1365 trace->tool.comm = perf_event__process_comm;
1366 trace->tool.exit = perf_event__process_exit;
1367 trace->tool.fork = perf_event__process_fork;
1368 trace->tool.attr = perf_event__process_attr;
1369 trace->tool.tracing_data = perf_event__process_tracing_data;
1370 trace->tool.build_id = perf_event__process_build_id;
1372 trace->tool.ordered_samples = true;
1373 trace->tool.ordering_requires_timestamps = true;
1375 /* add tid to output */
1376 trace->multiple_threads = true;
1378 if (symbol__init() < 0)
1381 session = perf_session__new(input_name, O_RDONLY, 0, false,
1383 if (session == NULL)
1386 err = perf_session__set_tracepoints_handlers(session, handlers);
1390 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1391 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1395 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1396 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1400 err = parse_target_str(trace);
1406 err = perf_session__process_events(session, &trace->tool);
1408 pr_err("Failed to process events, error %d", err);
1411 perf_session__delete(session);
1416 static size_t trace__fprintf_threads_header(FILE *fp)
1420 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1421 printed += fprintf(fp," __) Summary of events (__\n\n");
1422 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1423 printed += fprintf(fp," _____________________________________________________________________\n\n");
1428 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1430 size_t printed = trace__fprintf_threads_header(fp);
1433 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1434 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1435 struct thread_trace *ttrace = thread->priv;
1442 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1444 color = PERF_COLOR_NORMAL;
1446 color = PERF_COLOR_RED;
1447 else if (ratio > 25.0)
1448 color = PERF_COLOR_GREEN;
1449 else if (ratio > 5.0)
1450 color = PERF_COLOR_YELLOW;
1452 printed += color_fprintf(fp, color, "%20s", thread->comm);
1453 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1454 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1455 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1461 static int trace__set_duration(const struct option *opt, const char *str,
1462 int unset __maybe_unused)
1464 struct trace *trace = opt->value;
1466 trace->duration_filter = atof(str);
1470 static int trace__open_output(struct trace *trace, const char *filename)
1474 if (!stat(filename, &st) && st.st_size) {
1475 char oldname[PATH_MAX];
1477 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1479 rename(filename, oldname);
1482 trace->output = fopen(filename, "w");
1484 return trace->output == NULL ? -errno : 0;
1487 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1489 const char * const trace_usage[] = {
1490 "perf trace [<options>] [<command>]",
1491 "perf trace [<options>] -- <command> [<options>]",
1494 struct trace trace = {
1495 .audit_machine = audit_detect_machine(),
1504 .user_freq = UINT_MAX,
1505 .user_interval = ULLONG_MAX,
1512 const char *output_name = NULL;
1513 const char *ev_qualifier_str = NULL;
1514 const struct option trace_options[] = {
1515 OPT_BOOLEAN(0, "comm", &trace.show_comm,
1516 "show the thread COMM next to its id"),
1517 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1518 "list of events to trace"),
1519 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1520 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1521 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1522 "trace events on existing process id"),
1523 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1524 "trace events on existing thread id"),
1525 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1526 "system-wide collection from all CPUs"),
1527 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1528 "list of cpus to monitor"),
1529 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1530 "child tasks do not inherit counters"),
1531 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1532 "number of mmap data pages",
1533 perf_evlist__parse_mmap_pages),
1534 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1536 OPT_CALLBACK(0, "duration", &trace, "float",
1537 "show only events with duration > N.M ms",
1538 trace__set_duration),
1539 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1540 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1541 OPT_BOOLEAN('T', "time", &trace.full_time,
1542 "Show full timestamp, not time relative to first start"),
1548 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1550 if (output_name != NULL) {
1551 err = trace__open_output(&trace, output_name);
1553 perror("failed to create output file");
1558 if (ev_qualifier_str != NULL) {
1559 const char *s = ev_qualifier_str;
1561 trace.not_ev_qualifier = *s == '!';
1562 if (trace.not_ev_qualifier)
1564 trace.ev_qualifier = strlist__new(true, s);
1565 if (trace.ev_qualifier == NULL) {
1566 fputs("Not enough memory to parse event qualifier",
1573 err = perf_target__validate(&trace.opts.target);
1575 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1576 fprintf(trace.output, "%s", bf);
1580 err = perf_target__parse_uid(&trace.opts.target);
1582 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1583 fprintf(trace.output, "%s", bf);
1587 if (!argc && perf_target__none(&trace.opts.target))
1588 trace.opts.target.system_wide = true;
1591 err = trace__replay(&trace);
1593 err = trace__run(&trace, argc, argv);
1595 if (trace.sched && !err)
1596 trace__fprintf_thread_summary(&trace, trace.output);
1599 if (output_name != NULL)
1600 fclose(trace.output);