1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
43 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
51 return *(u##bits *)(sample->raw_data + field->offset); \
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
62 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63 return bswap_##bits(value);\
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
70 static int tp_field__init_uint(struct tp_field *field,
71 struct format_field *format_field,
74 field->offset = format_field->offset;
76 switch (format_field->size) {
78 field->integer = tp_field__u8;
81 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
84 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
87 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
98 return sample->raw_data + field->offset;
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
103 field->offset = format_field->offset;
104 field->pointer = tp_field__ptr;
111 struct tp_field args, ret;
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116 struct tp_field *field,
119 struct format_field *format_field = perf_evsel__field(evsel, name);
121 if (format_field == NULL)
124 return tp_field__init_uint(field, format_field, evsel->needs_swap);
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128 ({ struct syscall_tp *sc = evsel->priv;\
129 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132 struct tp_field *field,
135 struct format_field *format_field = perf_evsel__field(evsel, name);
137 if (format_field == NULL)
140 return tp_field__init_ptr(field, format_field);
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144 ({ struct syscall_tp *sc = evsel->priv;\
145 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
150 perf_evsel__delete(evsel);
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
155 evsel->priv = malloc(sizeof(struct syscall_tp));
156 if (evsel->priv != NULL) {
157 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
160 evsel->handler = handler;
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
175 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
177 evsel = perf_evsel__newtp("syscalls", direction);
180 if (perf_evsel__init_syscall_tp(evsel, handler))
187 perf_evsel__delete_priv(evsel);
191 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
192 ({ struct syscall_tp *fields = evsel->priv; \
193 fields->name.integer(&fields->name, sample); })
195 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.pointer(&fields->name, sample); })
199 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
200 void *sys_enter_handler,
201 void *sys_exit_handler)
204 struct perf_evsel *sys_enter, *sys_exit;
206 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
207 if (sys_enter == NULL)
210 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
211 goto out_delete_sys_enter;
213 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
214 if (sys_exit == NULL)
215 goto out_delete_sys_enter;
217 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
218 goto out_delete_sys_exit;
220 perf_evlist__add(evlist, sys_enter);
221 perf_evlist__add(evlist, sys_exit);
228 perf_evsel__delete_priv(sys_exit);
229 out_delete_sys_enter:
230 perf_evsel__delete_priv(sys_enter);
237 struct thread *thread;
247 const char **entries;
250 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
251 .nr_entries = ARRAY_SIZE(array), \
255 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
257 .nr_entries = ARRAY_SIZE(array), \
261 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
263 struct syscall_arg *arg)
265 struct strarray *sa = arg->parm;
266 int idx = arg->val - sa->offset;
268 if (idx < 0 || idx >= sa->nr_entries)
269 return scnprintf(bf, size, intfmt, arg->val);
271 return scnprintf(bf, size, "%s", sa->entries[idx]);
274 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
275 struct syscall_arg *arg)
277 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
280 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
282 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
283 struct syscall_arg *arg)
285 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
288 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
291 struct syscall_arg *arg);
293 #define SCA_FD syscall_arg__scnprintf_fd
295 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
296 struct syscall_arg *arg)
301 return scnprintf(bf, size, "CWD");
303 return syscall_arg__scnprintf_fd(bf, size, arg);
306 #define SCA_FDAT syscall_arg__scnprintf_fd_at
308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
309 struct syscall_arg *arg);
311 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
313 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
314 struct syscall_arg *arg)
316 return scnprintf(bf, size, "%#lx", arg->val);
319 #define SCA_HEX syscall_arg__scnprintf_hex
321 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
322 struct syscall_arg *arg)
324 int printed = 0, prot = arg->val;
326 if (prot == PROT_NONE)
327 return scnprintf(bf, size, "NONE");
328 #define P_MMAP_PROT(n) \
329 if (prot & PROT_##n) { \
330 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
340 P_MMAP_PROT(GROWSDOWN);
341 P_MMAP_PROT(GROWSUP);
345 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
350 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
352 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
353 struct syscall_arg *arg)
355 int printed = 0, flags = arg->val;
357 #define P_MMAP_FLAG(n) \
358 if (flags & MAP_##n) { \
359 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
364 P_MMAP_FLAG(PRIVATE);
368 P_MMAP_FLAG(ANONYMOUS);
369 P_MMAP_FLAG(DENYWRITE);
370 P_MMAP_FLAG(EXECUTABLE);
373 P_MMAP_FLAG(GROWSDOWN);
375 P_MMAP_FLAG(HUGETLB);
378 P_MMAP_FLAG(NONBLOCK);
379 P_MMAP_FLAG(NORESERVE);
380 P_MMAP_FLAG(POPULATE);
382 #ifdef MAP_UNINITIALIZED
383 P_MMAP_FLAG(UNINITIALIZED);
388 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
393 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
395 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
396 struct syscall_arg *arg)
398 int behavior = arg->val;
401 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
404 P_MADV_BHV(SEQUENTIAL);
405 P_MADV_BHV(WILLNEED);
406 P_MADV_BHV(DONTNEED);
408 P_MADV_BHV(DONTFORK);
410 P_MADV_BHV(HWPOISON);
411 #ifdef MADV_SOFT_OFFLINE
412 P_MADV_BHV(SOFT_OFFLINE);
414 P_MADV_BHV(MERGEABLE);
415 P_MADV_BHV(UNMERGEABLE);
417 P_MADV_BHV(HUGEPAGE);
419 #ifdef MADV_NOHUGEPAGE
420 P_MADV_BHV(NOHUGEPAGE);
423 P_MADV_BHV(DONTDUMP);
432 return scnprintf(bf, size, "%#x", behavior);
435 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
437 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
438 struct syscall_arg *arg)
440 int printed = 0, op = arg->val;
443 return scnprintf(bf, size, "NONE");
445 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
446 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
466 #define SCA_FLOCK syscall_arg__scnprintf_flock
468 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
470 enum syscall_futex_args {
471 SCF_UADDR = (1 << 0),
474 SCF_TIMEOUT = (1 << 3),
475 SCF_UADDR2 = (1 << 4),
479 int cmd = op & FUTEX_CMD_MASK;
483 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
484 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
485 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
486 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
487 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
488 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
489 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
490 P_FUTEX_OP(WAKE_OP); break;
491 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
492 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
493 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
494 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
496 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
497 default: printed = scnprintf(bf, size, "%#x", cmd); break;
500 if (op & FUTEX_PRIVATE_FLAG)
501 printed += scnprintf(bf + printed, size - printed, "|PRIV");
503 if (op & FUTEX_CLOCK_REALTIME)
504 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
509 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
511 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
512 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
514 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
515 static DEFINE_STRARRAY(itimers);
517 static const char *whences[] = { "SET", "CUR", "END",
525 static DEFINE_STRARRAY(whences);
527 static const char *fcntl_cmds[] = {
528 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
529 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
530 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
533 static DEFINE_STRARRAY(fcntl_cmds);
535 static const char *rlimit_resources[] = {
536 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
537 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
540 static DEFINE_STRARRAY(rlimit_resources);
542 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
543 static DEFINE_STRARRAY(sighow);
545 static const char *clockid[] = {
546 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
547 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
549 static DEFINE_STRARRAY(clockid);
551 static const char *socket_families[] = {
552 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
553 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
554 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
555 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
556 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
557 "ALG", "NFC", "VSOCK",
559 static DEFINE_STRARRAY(socket_families);
561 #ifndef SOCK_TYPE_MASK
562 #define SOCK_TYPE_MASK 0xf
565 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
566 struct syscall_arg *arg)
570 flags = type & ~SOCK_TYPE_MASK;
572 type &= SOCK_TYPE_MASK;
574 * Can't use a strarray, MIPS may override for ABI reasons.
577 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
582 P_SK_TYPE(SEQPACKET);
587 printed = scnprintf(bf, size, "%#x", type);
590 #define P_SK_FLAG(n) \
591 if (flags & SOCK_##n) { \
592 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
593 flags &= ~SOCK_##n; \
601 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
606 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
609 #define MSG_PROBE 0x10
611 #ifndef MSG_WAITFORONE
612 #define MSG_WAITFORONE 0x10000
614 #ifndef MSG_SENDPAGE_NOTLAST
615 #define MSG_SENDPAGE_NOTLAST 0x20000
618 #define MSG_FASTOPEN 0x20000000
621 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
622 struct syscall_arg *arg)
624 int printed = 0, flags = arg->val;
627 return scnprintf(bf, size, "NONE");
628 #define P_MSG_FLAG(n) \
629 if (flags & MSG_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
636 P_MSG_FLAG(DONTROUTE);
641 P_MSG_FLAG(DONTWAIT);
648 P_MSG_FLAG(ERRQUEUE);
649 P_MSG_FLAG(NOSIGNAL);
651 P_MSG_FLAG(WAITFORONE);
652 P_MSG_FLAG(SENDPAGE_NOTLAST);
653 P_MSG_FLAG(FASTOPEN);
654 P_MSG_FLAG(CMSG_CLOEXEC);
658 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
663 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
665 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
666 struct syscall_arg *arg)
671 if (mode == F_OK) /* 0 */
672 return scnprintf(bf, size, "F");
674 if (mode & n##_OK) { \
675 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
685 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
690 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
692 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
693 struct syscall_arg *arg)
695 int printed = 0, flags = arg->val;
697 if (!(flags & O_CREAT))
698 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
701 return scnprintf(bf, size, "RDONLY");
703 if (flags & O_##n) { \
704 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
728 if ((flags & O_SYNC) == O_SYNC)
729 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
741 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
746 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
748 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
749 struct syscall_arg *arg)
751 int printed = 0, flags = arg->val;
754 return scnprintf(bf, size, "NONE");
756 if (flags & EFD_##n) { \
757 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
767 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
772 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
774 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
775 struct syscall_arg *arg)
777 int printed = 0, flags = arg->val;
780 if (flags & O_##n) { \
781 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
790 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
795 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
797 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
802 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
837 return scnprintf(bf, size, "%#x", sig);
840 #define SCA_SIGNUM syscall_arg__scnprintf_signum
842 #define TCGETS 0x5401
844 static const char *tioctls[] = {
845 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
846 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
847 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
848 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
849 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
850 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
851 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
852 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
853 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
854 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
855 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
856 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
857 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
858 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
859 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
862 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
864 #define STRARRAY(arg, name, array) \
865 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
866 .arg_parm = { [arg] = &strarray__##array, }
868 static struct syscall_fmt {
871 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
877 { .name = "access", .errmsg = true,
878 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
879 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
880 { .name = "brk", .hexret = true,
881 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
882 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
883 { .name = "close", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
885 { .name = "connect", .errmsg = true, },
886 { .name = "dup", .errmsg = true,
887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 { .name = "dup2", .errmsg = true,
889 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
890 { .name = "dup3", .errmsg = true,
891 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
892 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
893 { .name = "eventfd2", .errmsg = true,
894 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
895 { .name = "faccessat", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
897 { .name = "fadvise64", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 { .name = "fallocate", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fchdir", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "fchmod", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchmodat", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 { .name = "fchown", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
909 { .name = "fchownat", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
911 { .name = "fcntl", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */
913 [1] = SCA_STRARRAY, /* cmd */ },
914 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
915 { .name = "fdatasync", .errmsg = true,
916 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
917 { .name = "flock", .errmsg = true,
918 .arg_scnprintf = { [0] = SCA_FD, /* fd */
919 [1] = SCA_FLOCK, /* cmd */ }, },
920 { .name = "fsetxattr", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "fstat", .errmsg = true, .alias = "newfstat",
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
925 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
926 { .name = "fstatfs", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fsync", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
930 { .name = "ftruncate", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "futex", .errmsg = true,
933 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
934 { .name = "futimesat", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
936 { .name = "getdents", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 { .name = "getdents64", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
941 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
942 { .name = "ioctl", .errmsg = true,
943 .arg_scnprintf = { [0] = SCA_FD, /* fd */
944 [1] = SCA_STRHEXARRAY, /* cmd */
945 [2] = SCA_HEX, /* arg */ },
946 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
947 { .name = "kill", .errmsg = true,
948 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
949 { .name = "linkat", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
951 { .name = "lseek", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */
953 [2] = SCA_STRARRAY, /* whence */ },
954 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
955 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
956 { .name = "madvise", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_HEX, /* start */
958 [2] = SCA_MADV_BHV, /* behavior */ }, },
959 { .name = "mkdirat", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
961 { .name = "mknodat", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
963 { .name = "mlock", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "mlockall", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 { .name = "mmap", .hexret = true,
968 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
969 [2] = SCA_MMAP_PROT, /* prot */
970 [3] = SCA_MMAP_FLAGS, /* flags */
971 [4] = SCA_FD, /* fd */ }, },
972 { .name = "mprotect", .errmsg = true,
973 .arg_scnprintf = { [0] = SCA_HEX, /* start */
974 [2] = SCA_MMAP_PROT, /* prot */ }, },
975 { .name = "mremap", .hexret = true,
976 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
977 [4] = SCA_HEX, /* new_addr */ }, },
978 { .name = "munlock", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
980 { .name = "munmap", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982 { .name = "name_to_handle_at", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
984 { .name = "newfstatat", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
986 { .name = "open", .errmsg = true,
987 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
988 { .name = "open_by_handle_at", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991 { .name = "openat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
993 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
994 { .name = "pipe2", .errmsg = true,
995 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
996 { .name = "poll", .errmsg = true, .timeout = true, },
997 { .name = "ppoll", .errmsg = true, .timeout = true, },
998 { .name = "pread", .errmsg = true, .alias = "pread64",
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "preadv", .errmsg = true, .alias = "pread",
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1003 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "pwritev", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1007 { .name = "read", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "readlinkat", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1011 { .name = "readv", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "recvfrom", .errmsg = true,
1014 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1015 { .name = "recvmmsg", .errmsg = true,
1016 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017 { .name = "recvmsg", .errmsg = true,
1018 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1019 { .name = "renameat", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1021 { .name = "rt_sigaction", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1023 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1024 { .name = "rt_sigqueueinfo", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1026 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1027 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1028 { .name = "select", .errmsg = true, .timeout = true, },
1029 { .name = "sendmmsg", .errmsg = true,
1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 { .name = "sendmsg", .errmsg = true,
1032 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1033 { .name = "sendto", .errmsg = true,
1034 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1036 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037 { .name = "shutdown", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "socket", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 [1] = SCA_SK_TYPE, /* type */ },
1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1043 { .name = "socketpair", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045 [1] = SCA_SK_TYPE, /* type */ },
1046 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1047 { .name = "stat", .errmsg = true, .alias = "newstat", },
1048 { .name = "symlinkat", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1050 { .name = "tgkill", .errmsg = true,
1051 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1052 { .name = "tkill", .errmsg = true,
1053 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1054 { .name = "uname", .errmsg = true, .alias = "newuname", },
1055 { .name = "unlinkat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057 { .name = "utimensat", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1059 { .name = "write", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "writev", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1067 const struct syscall_fmt *fmt = fmtp;
1068 return strcmp(name, fmt->name);
1071 static struct syscall_fmt *syscall_fmt__find(const char *name)
1073 const int nmemb = ARRAY_SIZE(syscall_fmts);
1074 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1078 struct event_format *tp_format;
1081 struct syscall_fmt *fmt;
1082 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1086 static size_t fprintf_duration(unsigned long t, FILE *fp)
1088 double duration = (double)t / NSEC_PER_MSEC;
1089 size_t printed = fprintf(fp, "(");
1091 if (duration >= 1.0)
1092 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1093 else if (duration >= 0.01)
1094 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1096 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1097 return printed + fprintf(fp, "): ");
1100 struct thread_trace {
1104 unsigned long nr_events;
1112 struct intlist *syscall_stats;
1115 static struct thread_trace *thread_trace__new(void)
1117 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1120 ttrace->paths.max = -1;
1122 ttrace->syscall_stats = intlist__new(NULL);
1127 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1129 struct thread_trace *ttrace;
1134 if (thread->priv == NULL)
1135 thread->priv = thread_trace__new();
1137 if (thread->priv == NULL)
1140 ttrace = thread->priv;
1141 ++ttrace->nr_events;
1145 color_fprintf(fp, PERF_COLOR_RED,
1146 "WARNING: not enough memory, dropping samples!\n");
1151 struct perf_tool tool;
1158 struct syscall *table;
1160 struct record_opts opts;
1161 struct machine *host;
1164 unsigned long nr_events;
1165 struct strlist *ev_qualifier;
1166 const char *last_vfs_getname;
1167 struct intlist *tid_list;
1168 struct intlist *pid_list;
1169 double duration_filter;
1175 bool not_ev_qualifier;
1179 bool multiple_threads;
1183 bool show_tool_stats;
1186 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1188 struct thread_trace *ttrace = thread->priv;
1190 if (fd > ttrace->paths.max) {
1191 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1196 if (ttrace->paths.max != -1) {
1197 memset(npath + ttrace->paths.max + 1, 0,
1198 (fd - ttrace->paths.max) * sizeof(char *));
1200 memset(npath, 0, (fd + 1) * sizeof(char *));
1203 ttrace->paths.table = npath;
1204 ttrace->paths.max = fd;
1207 ttrace->paths.table[fd] = strdup(pathname);
1209 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1212 static int thread__read_fd_path(struct thread *thread, int fd)
1214 char linkname[PATH_MAX], pathname[PATH_MAX];
1218 if (thread->pid_ == thread->tid) {
1219 scnprintf(linkname, sizeof(linkname),
1220 "/proc/%d/fd/%d", thread->pid_, fd);
1222 scnprintf(linkname, sizeof(linkname),
1223 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1226 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1229 ret = readlink(linkname, pathname, sizeof(pathname));
1231 if (ret < 0 || ret > st.st_size)
1234 pathname[ret] = '\0';
1235 return trace__set_fd_pathname(thread, fd, pathname);
1238 static const char *thread__fd_path(struct thread *thread, int fd,
1239 struct trace *trace)
1241 struct thread_trace *ttrace = thread->priv;
1249 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1252 ++trace->stats.proc_getname;
1253 if (thread__read_fd_path(thread, fd)) {
1257 return ttrace->paths.table[fd];
1260 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1261 struct syscall_arg *arg)
1264 size_t printed = scnprintf(bf, size, "%d", fd);
1265 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1268 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1273 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1274 struct syscall_arg *arg)
1277 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1278 struct thread_trace *ttrace = arg->thread->priv;
1280 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1281 zfree(&ttrace->paths.table[fd]);
1286 static bool trace__filter_duration(struct trace *trace, double t)
1288 return t < (trace->duration_filter * NSEC_PER_MSEC);
1291 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1293 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1295 return fprintf(fp, "%10.3f ", ts);
1298 static bool done = false;
1299 static bool interrupted = false;
1301 static void sig_handler(int sig)
1304 interrupted = sig == SIGINT;
1307 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1308 u64 duration, u64 tstamp, FILE *fp)
1310 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1311 printed += fprintf_duration(duration, fp);
1313 if (trace->multiple_threads) {
1314 if (trace->show_comm)
1315 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1316 printed += fprintf(fp, "%d ", thread->tid);
1322 static int trace__process_event(struct trace *trace, struct machine *machine,
1323 union perf_event *event, struct perf_sample *sample)
1327 switch (event->header.type) {
1328 case PERF_RECORD_LOST:
1329 color_fprintf(trace->output, PERF_COLOR_RED,
1330 "LOST %" PRIu64 " events!\n", event->lost.lost);
1331 ret = machine__process_lost_event(machine, event, sample);
1333 ret = machine__process_event(machine, event, sample);
1340 static int trace__tool_process(struct perf_tool *tool,
1341 union perf_event *event,
1342 struct perf_sample *sample,
1343 struct machine *machine)
1345 struct trace *trace = container_of(tool, struct trace, tool);
1346 return trace__process_event(trace, machine, event, sample);
1349 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1351 int err = symbol__init();
1356 trace->host = machine__new_host();
1357 if (trace->host == NULL)
1360 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1361 evlist->threads, trace__tool_process, false);
1368 static int syscall__set_arg_fmts(struct syscall *sc)
1370 struct format_field *field;
1373 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1374 if (sc->arg_scnprintf == NULL)
1378 sc->arg_parm = sc->fmt->arg_parm;
1380 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1381 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1382 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1383 else if (field->flags & FIELD_IS_POINTER)
1384 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1391 static int trace__read_syscall_info(struct trace *trace, int id)
1395 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1400 if (id > trace->syscalls.max) {
1401 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1403 if (nsyscalls == NULL)
1406 if (trace->syscalls.max != -1) {
1407 memset(nsyscalls + trace->syscalls.max + 1, 0,
1408 (id - trace->syscalls.max) * sizeof(*sc));
1410 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1413 trace->syscalls.table = nsyscalls;
1414 trace->syscalls.max = id;
1417 sc = trace->syscalls.table + id;
1420 if (trace->ev_qualifier) {
1421 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1423 if (!(in ^ trace->not_ev_qualifier)) {
1424 sc->filtered = true;
1426 * No need to do read tracepoint information since this will be
1433 sc->fmt = syscall_fmt__find(sc->name);
1435 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1436 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1438 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1439 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1440 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1443 if (sc->tp_format == NULL)
1446 return syscall__set_arg_fmts(sc);
1449 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1450 unsigned long *args, struct trace *trace,
1451 struct thread *thread)
1455 if (sc->tp_format != NULL) {
1456 struct format_field *field;
1458 struct syscall_arg arg = {
1465 for (field = sc->tp_format->format.fields->next; field;
1466 field = field->next, ++arg.idx, bit <<= 1) {
1470 * Suppress this argument if its value is zero and
1471 * and we don't have a string associated in an
1474 if (args[arg.idx] == 0 &&
1475 !(sc->arg_scnprintf &&
1476 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1477 sc->arg_parm[arg.idx]))
1480 printed += scnprintf(bf + printed, size - printed,
1481 "%s%s: ", printed ? ", " : "", field->name);
1482 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1483 arg.val = args[arg.idx];
1485 arg.parm = sc->arg_parm[arg.idx];
1486 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1487 size - printed, &arg);
1489 printed += scnprintf(bf + printed, size - printed,
1490 "%ld", args[arg.idx]);
1497 printed += scnprintf(bf + printed, size - printed,
1499 printed ? ", " : "", i, args[i]);
1507 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1508 struct perf_sample *sample);
1510 static struct syscall *trace__syscall_info(struct trace *trace,
1511 struct perf_evsel *evsel, int id)
1517 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1518 * before that, leaving at a higher verbosity level till that is
1519 * explained. Reproduced with plain ftrace with:
1521 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1522 * grep "NR -1 " /t/trace_pipe
1524 * After generating some load on the machine.
1528 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1529 id, perf_evsel__name(evsel), ++n);
1534 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1535 trace__read_syscall_info(trace, id))
1538 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1541 return &trace->syscalls.table[id];
1545 fprintf(trace->output, "Problems reading syscall %d", id);
1546 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1547 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1548 fputs(" information\n", trace->output);
1553 static void thread__update_stats(struct thread_trace *ttrace,
1554 int id, struct perf_sample *sample)
1556 struct int_node *inode;
1557 struct stats *stats;
1560 inode = intlist__findnew(ttrace->syscall_stats, id);
1564 stats = inode->priv;
1565 if (stats == NULL) {
1566 stats = malloc(sizeof(struct stats));
1570 inode->priv = stats;
1573 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1574 duration = sample->time - ttrace->entry_time;
1576 update_stats(stats, duration);
1579 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1580 struct perf_sample *sample)
1585 struct thread *thread;
1586 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1587 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1588 struct thread_trace *ttrace;
1596 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1597 ttrace = thread__trace(thread, trace->output);
1601 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1602 ttrace = thread->priv;
1604 if (ttrace->entry_str == NULL) {
1605 ttrace->entry_str = malloc(1024);
1606 if (!ttrace->entry_str)
1610 ttrace->entry_time = sample->time;
1611 msg = ttrace->entry_str;
1612 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1614 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1615 args, trace, thread);
1617 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1618 if (!trace->duration_filter && !trace->summary_only) {
1619 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1620 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1623 ttrace->entry_pending = true;
1628 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1629 struct perf_sample *sample)
1633 struct thread *thread;
1634 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1635 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1636 struct thread_trace *ttrace;
1644 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1645 ttrace = thread__trace(thread, trace->output);
1650 thread__update_stats(ttrace, id, sample);
1652 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1654 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1655 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1656 trace->last_vfs_getname = NULL;
1657 ++trace->stats.vfs_getname;
1660 ttrace = thread->priv;
1662 ttrace->exit_time = sample->time;
1664 if (ttrace->entry_time) {
1665 duration = sample->time - ttrace->entry_time;
1666 if (trace__filter_duration(trace, duration))
1668 } else if (trace->duration_filter)
1671 if (trace->summary_only)
1674 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1676 if (ttrace->entry_pending) {
1677 fprintf(trace->output, "%-70s", ttrace->entry_str);
1679 fprintf(trace->output, " ... [");
1680 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1681 fprintf(trace->output, "]: %s()", sc->name);
1684 if (sc->fmt == NULL) {
1686 fprintf(trace->output, ") = %d", ret);
1687 } else if (ret < 0 && sc->fmt->errmsg) {
1689 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1690 *e = audit_errno_to_name(-ret);
1692 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1693 } else if (ret == 0 && sc->fmt->timeout)
1694 fprintf(trace->output, ") = 0 Timeout");
1695 else if (sc->fmt->hexret)
1696 fprintf(trace->output, ") = %#x", ret);
1700 fputc('\n', trace->output);
1702 ttrace->entry_pending = false;
1707 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1708 struct perf_sample *sample)
1710 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1714 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1715 struct perf_sample *sample)
1717 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1718 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1719 struct thread *thread = machine__findnew_thread(trace->host,
1722 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1727 ttrace->runtime_ms += runtime_ms;
1728 trace->runtime_ms += runtime_ms;
1732 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1734 perf_evsel__strval(evsel, sample, "comm"),
1735 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1737 perf_evsel__intval(evsel, sample, "vruntime"));
1741 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1743 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1744 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1747 if (trace->pid_list || trace->tid_list)
1753 static int trace__process_sample(struct perf_tool *tool,
1754 union perf_event *event __maybe_unused,
1755 struct perf_sample *sample,
1756 struct perf_evsel *evsel,
1757 struct machine *machine __maybe_unused)
1759 struct trace *trace = container_of(tool, struct trace, tool);
1762 tracepoint_handler handler = evsel->handler;
1764 if (skip_sample(trace, sample))
1767 if (!trace->full_time && trace->base_time == 0)
1768 trace->base_time = sample->time;
1772 handler(trace, evsel, sample);
1778 static int parse_target_str(struct trace *trace)
1780 if (trace->opts.target.pid) {
1781 trace->pid_list = intlist__new(trace->opts.target.pid);
1782 if (trace->pid_list == NULL) {
1783 pr_err("Error parsing process id string\n");
1788 if (trace->opts.target.tid) {
1789 trace->tid_list = intlist__new(trace->opts.target.tid);
1790 if (trace->tid_list == NULL) {
1791 pr_err("Error parsing thread id string\n");
1799 static int trace__record(int argc, const char **argv)
1801 unsigned int rec_argc, i, j;
1802 const char **rec_argv;
1803 const char * const record_args[] = {
1811 /* +1 is for the event string below */
1812 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1813 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1815 if (rec_argv == NULL)
1818 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1819 rec_argv[i] = record_args[i];
1821 /* event string may be different for older kernels - e.g., RHEL6 */
1822 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1823 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1824 else if (is_valid_tracepoint("syscalls:sys_enter"))
1825 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1827 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1832 for (j = 0; j < (unsigned int)argc; j++, i++)
1833 rec_argv[i] = argv[j];
1835 return cmd_record(i, rec_argv, NULL);
1838 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1840 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1842 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1846 if (perf_evsel__field(evsel, "pathname") == NULL) {
1847 perf_evsel__delete(evsel);
1851 evsel->handler = trace__vfs_getname;
1852 perf_evlist__add(evlist, evsel);
1855 static int trace__run(struct trace *trace, int argc, const char **argv)
1857 struct perf_evlist *evlist = perf_evlist__new();
1858 struct perf_evsel *evsel;
1860 unsigned long before;
1861 const bool forks = argc > 0;
1865 if (evlist == NULL) {
1866 fprintf(trace->output, "Not enough memory to run!\n");
1870 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1873 perf_evlist__add_vfs_getname(evlist);
1876 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1877 trace__sched_stat_runtime))
1880 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1882 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1883 goto out_delete_evlist;
1886 err = trace__symbols_init(trace, evlist);
1888 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1889 goto out_delete_evlist;
1892 perf_evlist__config(evlist, &trace->opts);
1894 signal(SIGCHLD, sig_handler);
1895 signal(SIGINT, sig_handler);
1898 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1901 fprintf(trace->output, "Couldn't run the workload!\n");
1902 goto out_delete_evlist;
1906 err = perf_evlist__open(evlist);
1908 goto out_error_open;
1910 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1912 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1913 goto out_delete_evlist;
1916 perf_evlist__enable(evlist);
1919 perf_evlist__start_workload(evlist);
1921 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1923 before = trace->nr_events;
1925 for (i = 0; i < evlist->nr_mmaps; i++) {
1926 union perf_event *event;
1928 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1929 const u32 type = event->header.type;
1930 tracepoint_handler handler;
1931 struct perf_sample sample;
1935 err = perf_evlist__parse_sample(evlist, event, &sample);
1937 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1941 if (!trace->full_time && trace->base_time == 0)
1942 trace->base_time = sample.time;
1944 if (type != PERF_RECORD_SAMPLE) {
1945 trace__process_event(trace, trace->host, event, &sample);
1949 evsel = perf_evlist__id2evsel(evlist, sample.id);
1950 if (evsel == NULL) {
1951 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1955 if (sample.raw_data == NULL) {
1956 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1957 perf_evsel__name(evsel), sample.tid,
1958 sample.cpu, sample.raw_size);
1962 handler = evsel->handler;
1963 handler(trace, evsel, &sample);
1965 perf_evlist__mmap_consume(evlist, i);
1972 if (trace->nr_events == before) {
1973 int timeout = done ? 100 : -1;
1975 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1982 perf_evlist__disable(evlist);
1986 trace__fprintf_thread_summary(trace, trace->output);
1988 if (trace->show_tool_stats) {
1989 fprintf(trace->output, "Stats:\n "
1990 " vfs_getname : %" PRIu64 "\n"
1991 " proc_getname: %" PRIu64 "\n",
1992 trace->stats.vfs_getname,
1993 trace->stats.proc_getname);
1998 perf_evlist__delete(evlist);
2000 trace->live = false;
2003 char errbuf[BUFSIZ];
2006 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2010 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2013 fprintf(trace->output, "%s\n", errbuf);
2014 goto out_delete_evlist;
2018 static int trace__replay(struct trace *trace)
2020 const struct perf_evsel_str_handler handlers[] = {
2021 { "probe:vfs_getname", trace__vfs_getname, },
2023 struct perf_data_file file = {
2025 .mode = PERF_DATA_MODE_READ,
2027 struct perf_session *session;
2028 struct perf_evsel *evsel;
2031 trace->tool.sample = trace__process_sample;
2032 trace->tool.mmap = perf_event__process_mmap;
2033 trace->tool.mmap2 = perf_event__process_mmap2;
2034 trace->tool.comm = perf_event__process_comm;
2035 trace->tool.exit = perf_event__process_exit;
2036 trace->tool.fork = perf_event__process_fork;
2037 trace->tool.attr = perf_event__process_attr;
2038 trace->tool.tracing_data = perf_event__process_tracing_data;
2039 trace->tool.build_id = perf_event__process_build_id;
2041 trace->tool.ordered_samples = true;
2042 trace->tool.ordering_requires_timestamps = true;
2044 /* add tid to output */
2045 trace->multiple_threads = true;
2047 if (symbol__init() < 0)
2050 session = perf_session__new(&file, false, &trace->tool);
2051 if (session == NULL)
2054 trace->host = &session->machines.host;
2056 err = perf_session__set_tracepoints_handlers(session, handlers);
2060 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2061 "raw_syscalls:sys_enter");
2062 /* older kernels have syscalls tp versus raw_syscalls */
2064 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065 "syscalls:sys_enter");
2066 if (evsel == NULL) {
2067 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2071 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2072 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2073 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2077 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2078 "raw_syscalls:sys_exit");
2080 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2081 "syscalls:sys_exit");
2082 if (evsel == NULL) {
2083 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2087 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2088 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2089 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2093 err = parse_target_str(trace);
2099 err = perf_session__process_events(session, &trace->tool);
2101 pr_err("Failed to process events, error %d", err);
2103 else if (trace->summary)
2104 trace__fprintf_thread_summary(trace, trace->output);
2107 perf_session__delete(session);
2112 static size_t trace__fprintf_threads_header(FILE *fp)
2116 printed = fprintf(fp, "\n Summary of events:\n\n");
2121 static size_t thread__dump_stats(struct thread_trace *ttrace,
2122 struct trace *trace, FILE *fp)
2124 struct stats *stats;
2127 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2132 printed += fprintf(fp, "\n");
2134 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2135 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2136 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2138 /* each int_node is a syscall */
2140 stats = inode->priv;
2142 double min = (double)(stats->min) / NSEC_PER_MSEC;
2143 double max = (double)(stats->max) / NSEC_PER_MSEC;
2144 double avg = avg_stats(stats);
2146 u64 n = (u64) stats->n;
2148 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2149 avg /= NSEC_PER_MSEC;
2151 sc = &trace->syscalls.table[inode->i];
2152 printed += fprintf(fp, " %-15s", sc->name);
2153 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2155 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2158 inode = intlist__next(inode);
2161 printed += fprintf(fp, "\n\n");
2166 /* struct used to pass data to per-thread function */
2167 struct summary_data {
2169 struct trace *trace;
2173 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2175 struct summary_data *data = priv;
2176 FILE *fp = data->fp;
2177 size_t printed = data->printed;
2178 struct trace *trace = data->trace;
2179 struct thread_trace *ttrace = thread->priv;
2185 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2187 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2188 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2189 printed += fprintf(fp, "%.1f%%", ratio);
2190 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2191 printed += thread__dump_stats(ttrace, trace, fp);
2193 data->printed += printed;
2198 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2200 struct summary_data data = {
2204 data.printed = trace__fprintf_threads_header(fp);
2206 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2208 return data.printed;
2211 static int trace__set_duration(const struct option *opt, const char *str,
2212 int unset __maybe_unused)
2214 struct trace *trace = opt->value;
2216 trace->duration_filter = atof(str);
2220 static int trace__open_output(struct trace *trace, const char *filename)
2224 if (!stat(filename, &st) && st.st_size) {
2225 char oldname[PATH_MAX];
2227 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2229 rename(filename, oldname);
2232 trace->output = fopen(filename, "w");
2234 return trace->output == NULL ? -errno : 0;
2237 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2239 const char * const trace_usage[] = {
2240 "perf trace [<options>] [<command>]",
2241 "perf trace [<options>] -- <command> [<options>]",
2242 "perf trace record [<options>] [<command>]",
2243 "perf trace record [<options>] -- <command> [<options>]",
2246 struct trace trace = {
2248 .machine = audit_detect_machine(),
2249 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2259 .user_freq = UINT_MAX,
2260 .user_interval = ULLONG_MAX,
2261 .no_buffering = true,
2267 const char *output_name = NULL;
2268 const char *ev_qualifier_str = NULL;
2269 const struct option trace_options[] = {
2270 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2271 "show the thread COMM next to its id"),
2272 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2273 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2274 "list of events to trace"),
2275 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2276 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2277 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2278 "trace events on existing process id"),
2279 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2280 "trace events on existing thread id"),
2281 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2282 "system-wide collection from all CPUs"),
2283 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2284 "list of cpus to monitor"),
2285 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2286 "child tasks do not inherit counters"),
2287 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2288 "number of mmap data pages",
2289 perf_evlist__parse_mmap_pages),
2290 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2292 OPT_CALLBACK(0, "duration", &trace, "float",
2293 "show only events with duration > N.M ms",
2294 trace__set_duration),
2295 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2296 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2297 OPT_BOOLEAN('T', "time", &trace.full_time,
2298 "Show full timestamp, not time relative to first start"),
2299 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2300 "Show only syscall summary with statistics"),
2301 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2302 "Show all syscalls and summary with statistics"),
2308 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2309 return trace__record(argc-2, &argv[2]);
2311 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2313 /* summary_only implies summary option, but don't overwrite summary if set */
2314 if (trace.summary_only)
2315 trace.summary = trace.summary_only;
2317 if (output_name != NULL) {
2318 err = trace__open_output(&trace, output_name);
2320 perror("failed to create output file");
2325 if (ev_qualifier_str != NULL) {
2326 const char *s = ev_qualifier_str;
2328 trace.not_ev_qualifier = *s == '!';
2329 if (trace.not_ev_qualifier)
2331 trace.ev_qualifier = strlist__new(true, s);
2332 if (trace.ev_qualifier == NULL) {
2333 fputs("Not enough memory to parse event qualifier",
2340 err = target__validate(&trace.opts.target);
2342 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2343 fprintf(trace.output, "%s", bf);
2347 err = target__parse_uid(&trace.opts.target);
2349 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2350 fprintf(trace.output, "%s", bf);
2354 if (!argc && target__none(&trace.opts.target))
2355 trace.opts.target.system_wide = true;
2358 err = trace__replay(&trace);
2360 err = trace__run(&trace, argc, argv);
2363 if (output_name != NULL)
2364 fclose(trace.output);