1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
16 #include <sys/eventfd.h>
18 #include <linux/futex.h>
20 /* For older distros: */
22 # define MAP_STACK 0x20000
26 # define MADV_HWPOISON 100
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE 12
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE 13
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 .nr_entries = ARRAY_SIZE(array), \
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 struct syscall_arg *arg)
58 struct strarray *sa = arg->parm;
60 if (idx < 0 || idx >= sa->nr_entries)
61 return scnprintf(bf, size, "%d", idx);
63 return scnprintf(bf, size, "%s", sa->entries[idx]);
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 struct syscall_arg *arg)
71 return scnprintf(bf, size, "%#lx", arg->val);
74 #define SCA_HEX syscall_arg__scnprintf_hex
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 struct syscall_arg *arg)
79 int printed = 0, prot = arg->val;
81 if (prot == PROT_NONE)
82 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84 if (prot & PROT_##n) { \
85 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
95 P_MMAP_PROT(GROWSDOWN);
100 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 struct syscall_arg *arg)
110 int printed = 0, flags = arg->val;
112 #define P_MMAP_FLAG(n) \
113 if (flags & MAP_##n) { \
114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
119 P_MMAP_FLAG(PRIVATE);
123 P_MMAP_FLAG(ANONYMOUS);
124 P_MMAP_FLAG(DENYWRITE);
125 P_MMAP_FLAG(EXECUTABLE);
128 P_MMAP_FLAG(GROWSDOWN);
130 P_MMAP_FLAG(HUGETLB);
133 P_MMAP_FLAG(NONBLOCK);
134 P_MMAP_FLAG(NORESERVE);
135 P_MMAP_FLAG(POPULATE);
137 #ifdef MAP_UNINITIALIZED
138 P_MMAP_FLAG(UNINITIALIZED);
143 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 struct syscall_arg *arg)
153 int behavior = arg->val;
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
159 P_MADV_BHV(SEQUENTIAL);
160 P_MADV_BHV(WILLNEED);
161 P_MADV_BHV(DONTNEED);
163 P_MADV_BHV(DONTFORK);
165 P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 P_MADV_BHV(SOFT_OFFLINE);
169 P_MADV_BHV(MERGEABLE);
170 P_MADV_BHV(UNMERGEABLE);
172 P_MADV_BHV(HUGEPAGE);
174 #ifdef MADV_NOHUGEPAGE
175 P_MADV_BHV(NOHUGEPAGE);
178 P_MADV_BHV(DONTDUMP);
187 return scnprintf(bf, size, "%#x", behavior);
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
194 enum syscall_futex_args {
195 SCF_UADDR = (1 << 0),
198 SCF_TIMEOUT = (1 << 3),
199 SCF_UADDR2 = (1 << 4),
203 int cmd = op & FUTEX_CMD_MASK;
207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
209 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
212 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
213 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
214 P_FUTEX_OP(WAKE_OP); break;
215 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
218 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
219 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
220 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
221 default: printed = scnprintf(bf, size, "%#x", cmd); break;
224 if (op & FUTEX_PRIVATE_FLAG)
225 printed += scnprintf(bf + printed, size - printed, "|PRIV");
227 if (op & FUTEX_CLOCK_REALTIME)
228 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
233 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
235 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
236 static DEFINE_STRARRAY(epoll_ctl_ops);
238 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
239 static DEFINE_STRARRAY(itimers);
241 static const char *whences[] = { "SET", "CUR", "END",
249 static DEFINE_STRARRAY(whences);
251 static const char *fcntl_cmds[] = {
252 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
253 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
254 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
257 static DEFINE_STRARRAY(fcntl_cmds);
259 static const char *rlimit_resources[] = {
260 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
261 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
264 static DEFINE_STRARRAY(rlimit_resources);
266 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
267 static DEFINE_STRARRAY(sighow);
269 static const char *socket_families[] = {
270 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
271 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
272 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
273 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
274 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
275 "ALG", "NFC", "VSOCK",
277 static DEFINE_STRARRAY(socket_families);
279 #ifndef SOCK_TYPE_MASK
280 #define SOCK_TYPE_MASK 0xf
283 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
284 struct syscall_arg *arg)
288 flags = type & ~SOCK_TYPE_MASK;
290 type &= SOCK_TYPE_MASK;
292 * Can't use a strarray, MIPS may override for ABI reasons.
295 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
300 P_SK_TYPE(SEQPACKET);
305 printed = scnprintf(bf, size, "%#x", type);
308 #define P_SK_FLAG(n) \
309 if (flags & SOCK_##n) { \
310 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
311 flags &= ~SOCK_##n; \
319 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
324 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
327 #define MSG_PROBE 0x10
329 #ifndef MSG_SENDPAGE_NOTLAST
330 #define MSG_SENDPAGE_NOTLAST 0x20000
333 #define MSG_FASTOPEN 0x20000000
336 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
337 struct syscall_arg *arg)
339 int printed = 0, flags = arg->val;
342 return scnprintf(bf, size, "NONE");
343 #define P_MSG_FLAG(n) \
344 if (flags & MSG_##n) { \
345 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
351 P_MSG_FLAG(DONTROUTE);
356 P_MSG_FLAG(DONTWAIT);
363 P_MSG_FLAG(ERRQUEUE);
364 P_MSG_FLAG(NOSIGNAL);
366 P_MSG_FLAG(WAITFORONE);
367 P_MSG_FLAG(SENDPAGE_NOTLAST);
368 P_MSG_FLAG(FASTOPEN);
369 P_MSG_FLAG(CMSG_CLOEXEC);
373 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
378 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
380 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
381 struct syscall_arg *arg)
386 if (mode == F_OK) /* 0 */
387 return scnprintf(bf, size, "F");
389 if (mode & n##_OK) { \
390 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
400 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
405 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
407 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
408 struct syscall_arg *arg)
410 int printed = 0, flags = arg->val;
412 if (!(flags & O_CREAT))
413 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
416 return scnprintf(bf, size, "RDONLY");
418 if (flags & O_##n) { \
419 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
443 if ((flags & O_SYNC) == O_SYNC)
444 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
456 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
461 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
463 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
464 struct syscall_arg *arg)
466 int printed = 0, flags = arg->val;
469 return scnprintf(bf, size, "NONE");
471 if (flags & EFD_##n) { \
472 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
482 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
487 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
489 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
494 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
529 return scnprintf(bf, size, "%#x", sig);
532 #define SCA_SIGNUM syscall_arg__scnprintf_signum
534 static struct syscall_fmt {
537 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
543 { .name = "access", .errmsg = true,
544 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
545 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
546 { .name = "brk", .hexret = true,
547 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
548 { .name = "connect", .errmsg = true, },
549 { .name = "epoll_ctl", .errmsg = true,
550 .arg_scnprintf = { [1] = SCA_STRARRAY, /* op */ },
551 .arg_parm = { [1] = &strarray__epoll_ctl_ops, /* op */ }, },
552 { .name = "eventfd2", .errmsg = true,
553 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
554 { .name = "fcntl", .errmsg = true,
555 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
556 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
557 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
558 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
559 { .name = "futex", .errmsg = true,
560 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
561 { .name = "getitimer", .errmsg = true,
562 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
563 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
564 { .name = "getrlimit", .errmsg = true,
565 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
566 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
567 { .name = "ioctl", .errmsg = true,
568 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
569 { .name = "kill", .errmsg = true,
570 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
571 { .name = "lseek", .errmsg = true,
572 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
573 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
574 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
575 { .name = "madvise", .errmsg = true,
576 .arg_scnprintf = { [0] = SCA_HEX, /* start */
577 [2] = SCA_MADV_BHV, /* behavior */ }, },
578 { .name = "mmap", .hexret = true,
579 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
580 [2] = SCA_MMAP_PROT, /* prot */
581 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
582 { .name = "mprotect", .errmsg = true,
583 .arg_scnprintf = { [0] = SCA_HEX, /* start */
584 [2] = SCA_MMAP_PROT, /* prot */ }, },
585 { .name = "mremap", .hexret = true,
586 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
587 [4] = SCA_HEX, /* new_addr */ }, },
588 { .name = "munmap", .errmsg = true,
589 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
590 { .name = "open", .errmsg = true,
591 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
592 { .name = "open_by_handle_at", .errmsg = true,
593 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
594 { .name = "openat", .errmsg = true,
595 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
596 { .name = "poll", .errmsg = true, .timeout = true, },
597 { .name = "ppoll", .errmsg = true, .timeout = true, },
598 { .name = "pread", .errmsg = true, .alias = "pread64", },
599 { .name = "prlimit64", .errmsg = true,
600 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
601 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, },
602 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
603 { .name = "read", .errmsg = true, },
604 { .name = "recvfrom", .errmsg = true,
605 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
606 { .name = "recvmmsg", .errmsg = true,
607 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
608 { .name = "recvmsg", .errmsg = true,
609 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
610 { .name = "rt_sigaction", .errmsg = true,
611 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
612 { .name = "rt_sigprocmask", .errmsg = true,
613 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
614 .arg_parm = { [0] = &strarray__sighow, /* how */ }, },
615 { .name = "rt_sigqueueinfo", .errmsg = true,
616 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
617 { .name = "rt_tgsigqueueinfo", .errmsg = true,
618 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
619 { .name = "select", .errmsg = true, .timeout = true, },
620 { .name = "sendmmsg", .errmsg = true,
621 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
622 { .name = "sendmsg", .errmsg = true,
623 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
624 { .name = "sendto", .errmsg = true,
625 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
626 { .name = "setitimer", .errmsg = true,
627 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
628 .arg_parm = { [0] = &strarray__itimers, /* which */ }, },
629 { .name = "setrlimit", .errmsg = true,
630 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
631 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, },
632 { .name = "socket", .errmsg = true,
633 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
634 [1] = SCA_SK_TYPE, /* type */ },
635 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
636 { .name = "stat", .errmsg = true, .alias = "newstat", },
637 { .name = "tgkill", .errmsg = true,
638 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
639 { .name = "tkill", .errmsg = true,
640 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
641 { .name = "uname", .errmsg = true, .alias = "newuname", },
644 static int syscall_fmt__cmp(const void *name, const void *fmtp)
646 const struct syscall_fmt *fmt = fmtp;
647 return strcmp(name, fmt->name);
650 static struct syscall_fmt *syscall_fmt__find(const char *name)
652 const int nmemb = ARRAY_SIZE(syscall_fmts);
653 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
657 struct event_format *tp_format;
660 struct syscall_fmt *fmt;
661 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
665 static size_t fprintf_duration(unsigned long t, FILE *fp)
667 double duration = (double)t / NSEC_PER_MSEC;
668 size_t printed = fprintf(fp, "(");
671 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
672 else if (duration >= 0.01)
673 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
675 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
676 return printed + fprintf(fp, "): ");
679 struct thread_trace {
683 unsigned long nr_events;
688 static struct thread_trace *thread_trace__new(void)
690 return zalloc(sizeof(struct thread_trace));
693 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
695 struct thread_trace *ttrace;
700 if (thread->priv == NULL)
701 thread->priv = thread_trace__new();
703 if (thread->priv == NULL)
706 ttrace = thread->priv;
711 color_fprintf(fp, PERF_COLOR_RED,
712 "WARNING: not enough memory, dropping samples!\n");
717 struct perf_tool tool;
721 struct syscall *table;
723 struct perf_record_opts opts;
728 unsigned long nr_events;
729 struct strlist *ev_qualifier;
730 bool not_ev_qualifier;
731 struct intlist *tid_list;
732 struct intlist *pid_list;
734 bool multiple_threads;
736 double duration_filter;
740 static bool trace__filter_duration(struct trace *trace, double t)
742 return t < (trace->duration_filter * NSEC_PER_MSEC);
745 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
747 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
749 return fprintf(fp, "%10.3f ", ts);
752 static bool done = false;
754 static void sig_handler(int sig __maybe_unused)
759 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
760 u64 duration, u64 tstamp, FILE *fp)
762 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
763 printed += fprintf_duration(duration, fp);
765 if (trace->multiple_threads) {
766 if (trace->show_comm)
767 printed += fprintf(fp, "%.14s/", thread->comm);
768 printed += fprintf(fp, "%d ", thread->tid);
774 static int trace__process_event(struct trace *trace, struct machine *machine,
775 union perf_event *event)
779 switch (event->header.type) {
780 case PERF_RECORD_LOST:
781 color_fprintf(trace->output, PERF_COLOR_RED,
782 "LOST %" PRIu64 " events!\n", event->lost.lost);
783 ret = machine__process_lost_event(machine, event);
785 ret = machine__process_event(machine, event);
792 static int trace__tool_process(struct perf_tool *tool,
793 union perf_event *event,
794 struct perf_sample *sample __maybe_unused,
795 struct machine *machine)
797 struct trace *trace = container_of(tool, struct trace, tool);
798 return trace__process_event(trace, machine, event);
801 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
803 int err = symbol__init();
808 machine__init(&trace->host, "", HOST_KERNEL_ID);
809 machine__create_kernel_maps(&trace->host);
811 if (perf_target__has_task(&trace->opts.target)) {
812 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
816 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
826 static int syscall__set_arg_fmts(struct syscall *sc)
828 struct format_field *field;
831 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
832 if (sc->arg_scnprintf == NULL)
836 sc->arg_parm = sc->fmt->arg_parm;
838 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
839 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
840 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
841 else if (field->flags & FIELD_IS_POINTER)
842 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
849 static int trace__read_syscall_info(struct trace *trace, int id)
853 const char *name = audit_syscall_to_name(id, trace->audit_machine);
858 if (id > trace->syscalls.max) {
859 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
861 if (nsyscalls == NULL)
864 if (trace->syscalls.max != -1) {
865 memset(nsyscalls + trace->syscalls.max + 1, 0,
866 (id - trace->syscalls.max) * sizeof(*sc));
868 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
871 trace->syscalls.table = nsyscalls;
872 trace->syscalls.max = id;
875 sc = trace->syscalls.table + id;
878 if (trace->ev_qualifier) {
879 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
881 if (!(in ^ trace->not_ev_qualifier)) {
884 * No need to do read tracepoint information since this will be
891 sc->fmt = syscall_fmt__find(sc->name);
893 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
894 sc->tp_format = event_format__new("syscalls", tp_name);
896 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
897 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
898 sc->tp_format = event_format__new("syscalls", tp_name);
901 if (sc->tp_format == NULL)
904 return syscall__set_arg_fmts(sc);
907 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
912 if (sc->tp_format != NULL) {
913 struct format_field *field;
915 struct syscall_arg arg = {
920 for (field = sc->tp_format->format.fields->next; field;
921 field = field->next, ++arg.idx, bit <<= 1) {
925 if (args[arg.idx] == 0)
928 printed += scnprintf(bf + printed, size - printed,
929 "%s%s: ", printed ? ", " : "", field->name);
930 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
931 arg.val = args[arg.idx];
933 arg.parm = sc->arg_parm[arg.idx];
934 printed += sc->arg_scnprintf[arg.idx](bf + printed,
935 size - printed, &arg);
937 printed += scnprintf(bf + printed, size - printed,
938 "%ld", args[arg.idx]);
945 printed += scnprintf(bf + printed, size - printed,
947 printed ? ", " : "", i, args[i]);
955 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
956 struct perf_sample *sample);
958 static struct syscall *trace__syscall_info(struct trace *trace,
959 struct perf_evsel *evsel,
960 struct perf_sample *sample)
962 int id = perf_evsel__intval(evsel, sample, "id");
967 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
968 * before that, leaving at a higher verbosity level till that is
969 * explained. Reproduced with plain ftrace with:
971 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
972 * grep "NR -1 " /t/trace_pipe
974 * After generating some load on the machine.
978 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
979 id, perf_evsel__name(evsel), ++n);
984 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
985 trace__read_syscall_info(trace, id))
988 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
991 return &trace->syscalls.table[id];
995 fprintf(trace->output, "Problems reading syscall %d", id);
996 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
997 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
998 fputs(" information\n", trace->output);
1003 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1004 struct perf_sample *sample)
1009 struct thread *thread;
1010 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1011 struct thread_trace *ttrace;
1019 thread = machine__findnew_thread(&trace->host, sample->pid,
1021 ttrace = thread__trace(thread, trace->output);
1025 args = perf_evsel__rawptr(evsel, sample, "args");
1027 fprintf(trace->output, "Problems reading syscall arguments\n");
1031 ttrace = thread->priv;
1033 if (ttrace->entry_str == NULL) {
1034 ttrace->entry_str = malloc(1024);
1035 if (!ttrace->entry_str)
1039 ttrace->entry_time = sample->time;
1040 msg = ttrace->entry_str;
1041 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1043 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args);
1045 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1046 if (!trace->duration_filter) {
1047 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1048 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1051 ttrace->entry_pending = true;
1056 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1057 struct perf_sample *sample)
1061 struct thread *thread;
1062 struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1063 struct thread_trace *ttrace;
1071 thread = machine__findnew_thread(&trace->host, sample->pid,
1073 ttrace = thread__trace(thread, trace->output);
1077 ret = perf_evsel__intval(evsel, sample, "ret");
1079 ttrace = thread->priv;
1081 ttrace->exit_time = sample->time;
1083 if (ttrace->entry_time) {
1084 duration = sample->time - ttrace->entry_time;
1085 if (trace__filter_duration(trace, duration))
1087 } else if (trace->duration_filter)
1090 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1092 if (ttrace->entry_pending) {
1093 fprintf(trace->output, "%-70s", ttrace->entry_str);
1095 fprintf(trace->output, " ... [");
1096 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1097 fprintf(trace->output, "]: %s()", sc->name);
1100 if (sc->fmt == NULL) {
1102 fprintf(trace->output, ") = %d", ret);
1103 } else if (ret < 0 && sc->fmt->errmsg) {
1105 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1106 *e = audit_errno_to_name(-ret);
1108 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1109 } else if (ret == 0 && sc->fmt->timeout)
1110 fprintf(trace->output, ") = 0 Timeout");
1111 else if (sc->fmt->hexret)
1112 fprintf(trace->output, ") = %#x", ret);
1116 fputc('\n', trace->output);
1118 ttrace->entry_pending = false;
1123 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1124 struct perf_sample *sample)
1126 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1127 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1128 struct thread *thread = machine__findnew_thread(&trace->host,
1131 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1136 ttrace->runtime_ms += runtime_ms;
1137 trace->runtime_ms += runtime_ms;
1141 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1143 perf_evsel__strval(evsel, sample, "comm"),
1144 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1146 perf_evsel__intval(evsel, sample, "vruntime"));
1150 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1152 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1153 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1156 if (trace->pid_list || trace->tid_list)
1162 static int trace__process_sample(struct perf_tool *tool,
1163 union perf_event *event __maybe_unused,
1164 struct perf_sample *sample,
1165 struct perf_evsel *evsel,
1166 struct machine *machine __maybe_unused)
1168 struct trace *trace = container_of(tool, struct trace, tool);
1171 tracepoint_handler handler = evsel->handler.func;
1173 if (skip_sample(trace, sample))
1176 if (!trace->full_time && trace->base_time == 0)
1177 trace->base_time = sample->time;
1180 handler(trace, evsel, sample);
1186 perf_session__has_tp(struct perf_session *session, const char *name)
1188 struct perf_evsel *evsel;
1190 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1192 return evsel != NULL;
1195 static int parse_target_str(struct trace *trace)
1197 if (trace->opts.target.pid) {
1198 trace->pid_list = intlist__new(trace->opts.target.pid);
1199 if (trace->pid_list == NULL) {
1200 pr_err("Error parsing process id string\n");
1205 if (trace->opts.target.tid) {
1206 trace->tid_list = intlist__new(trace->opts.target.tid);
1207 if (trace->tid_list == NULL) {
1208 pr_err("Error parsing thread id string\n");
1216 static int trace__run(struct trace *trace, int argc, const char **argv)
1218 struct perf_evlist *evlist = perf_evlist__new();
1219 struct perf_evsel *evsel;
1221 unsigned long before;
1222 const bool forks = argc > 0;
1224 if (evlist == NULL) {
1225 fprintf(trace->output, "Not enough memory to run!\n");
1229 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1230 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1231 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1232 goto out_delete_evlist;
1236 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1237 trace__sched_stat_runtime)) {
1238 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1239 goto out_delete_evlist;
1242 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1244 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1245 goto out_delete_evlist;
1248 err = trace__symbols_init(trace, evlist);
1250 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1251 goto out_delete_maps;
1254 perf_evlist__config(evlist, &trace->opts);
1256 signal(SIGCHLD, sig_handler);
1257 signal(SIGINT, sig_handler);
1260 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1261 argv, false, false);
1263 fprintf(trace->output, "Couldn't run the workload!\n");
1264 goto out_delete_maps;
1268 err = perf_evlist__open(evlist);
1270 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1271 goto out_delete_maps;
1274 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1276 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1277 goto out_close_evlist;
1280 perf_evlist__enable(evlist);
1283 perf_evlist__start_workload(evlist);
1285 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1287 before = trace->nr_events;
1289 for (i = 0; i < evlist->nr_mmaps; i++) {
1290 union perf_event *event;
1292 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1293 const u32 type = event->header.type;
1294 tracepoint_handler handler;
1295 struct perf_sample sample;
1299 err = perf_evlist__parse_sample(evlist, event, &sample);
1301 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1305 if (!trace->full_time && trace->base_time == 0)
1306 trace->base_time = sample.time;
1308 if (type != PERF_RECORD_SAMPLE) {
1309 trace__process_event(trace, &trace->host, event);
1313 evsel = perf_evlist__id2evsel(evlist, sample.id);
1314 if (evsel == NULL) {
1315 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1319 if (sample.raw_data == NULL) {
1320 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1321 perf_evsel__name(evsel), sample.tid,
1322 sample.cpu, sample.raw_size);
1326 handler = evsel->handler.func;
1327 handler(trace, evsel, &sample);
1330 goto out_unmap_evlist;
1334 if (trace->nr_events == before) {
1336 goto out_unmap_evlist;
1338 poll(evlist->pollfd, evlist->nr_fds, -1);
1342 perf_evlist__disable(evlist);
1347 perf_evlist__munmap(evlist);
1349 perf_evlist__close(evlist);
1351 perf_evlist__delete_maps(evlist);
1353 perf_evlist__delete(evlist);
1358 static int trace__replay(struct trace *trace)
1360 const struct perf_evsel_str_handler handlers[] = {
1361 { "raw_syscalls:sys_enter", trace__sys_enter, },
1362 { "raw_syscalls:sys_exit", trace__sys_exit, },
1365 struct perf_session *session;
1368 trace->tool.sample = trace__process_sample;
1369 trace->tool.mmap = perf_event__process_mmap;
1370 trace->tool.mmap2 = perf_event__process_mmap2;
1371 trace->tool.comm = perf_event__process_comm;
1372 trace->tool.exit = perf_event__process_exit;
1373 trace->tool.fork = perf_event__process_fork;
1374 trace->tool.attr = perf_event__process_attr;
1375 trace->tool.tracing_data = perf_event__process_tracing_data;
1376 trace->tool.build_id = perf_event__process_build_id;
1378 trace->tool.ordered_samples = true;
1379 trace->tool.ordering_requires_timestamps = true;
1381 /* add tid to output */
1382 trace->multiple_threads = true;
1384 if (symbol__init() < 0)
1387 session = perf_session__new(input_name, O_RDONLY, 0, false,
1389 if (session == NULL)
1392 err = perf_session__set_tracepoints_handlers(session, handlers);
1396 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1397 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1401 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1402 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1406 err = parse_target_str(trace);
1412 err = perf_session__process_events(session, &trace->tool);
1414 pr_err("Failed to process events, error %d", err);
1417 perf_session__delete(session);
1422 static size_t trace__fprintf_threads_header(FILE *fp)
1426 printed = fprintf(fp, "\n _____________________________________________________________________\n");
1427 printed += fprintf(fp," __) Summary of events (__\n\n");
1428 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1429 printed += fprintf(fp," _____________________________________________________________________\n\n");
1434 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1436 size_t printed = trace__fprintf_threads_header(fp);
1439 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1440 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1441 struct thread_trace *ttrace = thread->priv;
1448 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1450 color = PERF_COLOR_NORMAL;
1452 color = PERF_COLOR_RED;
1453 else if (ratio > 25.0)
1454 color = PERF_COLOR_GREEN;
1455 else if (ratio > 5.0)
1456 color = PERF_COLOR_YELLOW;
1458 printed += color_fprintf(fp, color, "%20s", thread->comm);
1459 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1460 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1461 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1467 static int trace__set_duration(const struct option *opt, const char *str,
1468 int unset __maybe_unused)
1470 struct trace *trace = opt->value;
1472 trace->duration_filter = atof(str);
1476 static int trace__open_output(struct trace *trace, const char *filename)
1480 if (!stat(filename, &st) && st.st_size) {
1481 char oldname[PATH_MAX];
1483 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1485 rename(filename, oldname);
1488 trace->output = fopen(filename, "w");
1490 return trace->output == NULL ? -errno : 0;
1493 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1495 const char * const trace_usage[] = {
1496 "perf trace [<options>] [<command>]",
1497 "perf trace [<options>] -- <command> [<options>]",
1500 struct trace trace = {
1501 .audit_machine = audit_detect_machine(),
1510 .user_freq = UINT_MAX,
1511 .user_interval = ULLONG_MAX,
1518 const char *output_name = NULL;
1519 const char *ev_qualifier_str = NULL;
1520 const struct option trace_options[] = {
1521 OPT_BOOLEAN(0, "comm", &trace.show_comm,
1522 "show the thread COMM next to its id"),
1523 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1524 "list of events to trace"),
1525 OPT_STRING('o', "output", &output_name, "file", "output file name"),
1526 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1527 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1528 "trace events on existing process id"),
1529 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1530 "trace events on existing thread id"),
1531 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1532 "system-wide collection from all CPUs"),
1533 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1534 "list of cpus to monitor"),
1535 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1536 "child tasks do not inherit counters"),
1537 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1538 "number of mmap data pages",
1539 perf_evlist__parse_mmap_pages),
1540 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1542 OPT_CALLBACK(0, "duration", &trace, "float",
1543 "show only events with duration > N.M ms",
1544 trace__set_duration),
1545 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1546 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1547 OPT_BOOLEAN('T', "time", &trace.full_time,
1548 "Show full timestamp, not time relative to first start"),
1554 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1556 if (output_name != NULL) {
1557 err = trace__open_output(&trace, output_name);
1559 perror("failed to create output file");
1564 if (ev_qualifier_str != NULL) {
1565 const char *s = ev_qualifier_str;
1567 trace.not_ev_qualifier = *s == '!';
1568 if (trace.not_ev_qualifier)
1570 trace.ev_qualifier = strlist__new(true, s);
1571 if (trace.ev_qualifier == NULL) {
1572 fputs("Not enough memory to parse event qualifier",
1579 err = perf_target__validate(&trace.opts.target);
1581 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1582 fprintf(trace.output, "%s", bf);
1586 err = perf_target__parse_uid(&trace.opts.target);
1588 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1589 fprintf(trace.output, "%s", bf);
1593 if (!argc && perf_target__none(&trace.opts.target))
1594 trace.opts.target.system_wide = true;
1597 err = trace__replay(&trace);
1599 err = trace__run(&trace, argc, argv);
1601 if (trace.sched && !err)
1602 trace__fprintf_thread_summary(&trace, trace.output);
1605 if (output_name != NULL)
1606 fclose(trace.output);