1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
40 struct thread *thread;
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54 .nr_entries = ARRAY_SIZE(array), \
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
60 .nr_entries = ARRAY_SIZE(array), \
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
66 struct syscall_arg *arg)
68 struct strarray *sa = arg->parm;
69 int idx = arg->val - sa->offset;
71 if (idx < 0 || idx >= sa->nr_entries)
72 return scnprintf(bf, size, intfmt, arg->val);
74 return scnprintf(bf, size, "%s", sa->entries[idx]);
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78 struct syscall_arg *arg)
80 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86 struct syscall_arg *arg)
88 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94 struct syscall_arg *arg);
96 #define SCA_FD syscall_arg__scnprintf_fd
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99 struct syscall_arg *arg)
104 return scnprintf(bf, size, "CWD");
106 return syscall_arg__scnprintf_fd(bf, size, arg);
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112 struct syscall_arg *arg);
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117 struct syscall_arg *arg)
119 return scnprintf(bf, size, "%#lx", arg->val);
122 #define SCA_HEX syscall_arg__scnprintf_hex
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125 struct syscall_arg *arg)
127 int printed = 0, prot = arg->val;
129 if (prot == PROT_NONE)
130 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132 if (prot & PROT_##n) { \
133 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
143 P_MMAP_PROT(GROWSDOWN);
144 P_MMAP_PROT(GROWSUP);
148 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156 struct syscall_arg *arg)
158 int printed = 0, flags = arg->val;
160 #define P_MMAP_FLAG(n) \
161 if (flags & MAP_##n) { \
162 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
167 P_MMAP_FLAG(PRIVATE);
171 P_MMAP_FLAG(ANONYMOUS);
172 P_MMAP_FLAG(DENYWRITE);
173 P_MMAP_FLAG(EXECUTABLE);
176 P_MMAP_FLAG(GROWSDOWN);
178 P_MMAP_FLAG(HUGETLB);
181 P_MMAP_FLAG(NONBLOCK);
182 P_MMAP_FLAG(NORESERVE);
183 P_MMAP_FLAG(POPULATE);
185 #ifdef MAP_UNINITIALIZED
186 P_MMAP_FLAG(UNINITIALIZED);
191 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199 struct syscall_arg *arg)
201 int behavior = arg->val;
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
207 P_MADV_BHV(SEQUENTIAL);
208 P_MADV_BHV(WILLNEED);
209 P_MADV_BHV(DONTNEED);
211 P_MADV_BHV(DONTFORK);
213 P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215 P_MADV_BHV(SOFT_OFFLINE);
217 P_MADV_BHV(MERGEABLE);
218 P_MADV_BHV(UNMERGEABLE);
220 P_MADV_BHV(HUGEPAGE);
222 #ifdef MADV_NOHUGEPAGE
223 P_MADV_BHV(NOHUGEPAGE);
226 P_MADV_BHV(DONTDUMP);
235 return scnprintf(bf, size, "%#x", behavior);
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241 struct syscall_arg *arg)
243 int printed = 0, op = arg->val;
246 return scnprintf(bf, size, "NONE");
248 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
264 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
273 enum syscall_futex_args {
274 SCF_UADDR = (1 << 0),
277 SCF_TIMEOUT = (1 << 3),
278 SCF_UADDR2 = (1 << 4),
282 int cmd = op & FUTEX_CMD_MASK;
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
288 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
291 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
292 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
293 P_FUTEX_OP(WAKE_OP); break;
294 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
297 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
298 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
299 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
300 default: printed = scnprintf(bf, size, "%#x", cmd); break;
303 if (op & FUTEX_PRIVATE_FLAG)
304 printed += scnprintf(bf + printed, size - printed, "|PRIV");
306 if (op & FUTEX_CLOCK_REALTIME)
307 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
312 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
320 static const char *whences[] = { "SET", "CUR", "END",
328 static DEFINE_STRARRAY(whences);
330 static const char *fcntl_cmds[] = {
331 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
336 static DEFINE_STRARRAY(fcntl_cmds);
338 static const char *rlimit_resources[] = {
339 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
343 static DEFINE_STRARRAY(rlimit_resources);
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
348 static const char *clockid[] = {
349 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
352 static DEFINE_STRARRAY(clockid);
354 static const char *socket_families[] = {
355 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360 "ALG", "NFC", "VSOCK",
362 static DEFINE_STRARRAY(socket_families);
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369 struct syscall_arg *arg)
373 flags = type & ~SOCK_TYPE_MASK;
375 type &= SOCK_TYPE_MASK;
377 * Can't use a strarray, MIPS may override for ABI reasons.
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
385 P_SK_TYPE(SEQPACKET);
390 printed = scnprintf(bf, size, "%#x", type);
393 #define P_SK_FLAG(n) \
394 if (flags & SOCK_##n) { \
395 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396 flags &= ~SOCK_##n; \
404 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
412 #define MSG_PROBE 0x10
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE 0x10000
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
421 #define MSG_FASTOPEN 0x20000000
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425 struct syscall_arg *arg)
427 int printed = 0, flags = arg->val;
430 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432 if (flags & MSG_##n) { \
433 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
439 P_MSG_FLAG(DONTROUTE);
444 P_MSG_FLAG(DONTWAIT);
451 P_MSG_FLAG(ERRQUEUE);
452 P_MSG_FLAG(NOSIGNAL);
454 P_MSG_FLAG(WAITFORONE);
455 P_MSG_FLAG(SENDPAGE_NOTLAST);
456 P_MSG_FLAG(FASTOPEN);
457 P_MSG_FLAG(CMSG_CLOEXEC);
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469 struct syscall_arg *arg)
474 if (mode == F_OK) /* 0 */
475 return scnprintf(bf, size, "F");
477 if (mode & n##_OK) { \
478 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
488 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496 struct syscall_arg *arg)
498 int printed = 0, flags = arg->val;
500 if (!(flags & O_CREAT))
501 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
504 return scnprintf(bf, size, "RDONLY");
506 if (flags & O_##n) { \
507 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
531 if ((flags & O_SYNC) == O_SYNC)
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552 struct syscall_arg *arg)
554 int printed = 0, flags = arg->val;
557 return scnprintf(bf, size, "NONE");
559 if (flags & EFD_##n) { \
560 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578 struct syscall_arg *arg)
580 int printed = 0, flags = arg->val;
583 if (flags & O_##n) { \
584 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
593 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
640 return scnprintf(bf, size, "%#x", sig);
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
645 #define TCGETS 0x5401
647 static const char *tioctls[] = {
648 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
667 #define STRARRAY(arg, name, array) \
668 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669 .arg_parm = { [arg] = &strarray__##array, }
671 static struct syscall_fmt {
674 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
680 { .name = "access", .errmsg = true,
681 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
683 { .name = "brk", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
686 { .name = "close", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
688 { .name = "connect", .errmsg = true, },
689 { .name = "dup", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
691 { .name = "dup2", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
693 { .name = "dup3", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
695 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696 { .name = "eventfd2", .errmsg = true,
697 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698 { .name = "faccessat", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 { .name = "fadvise64", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
702 { .name = "fallocate", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
704 { .name = "fchdir", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
706 { .name = "fchmod", .errmsg = true,
707 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
708 { .name = "fchmodat", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
710 { .name = "fchown", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
712 { .name = "fchownat", .errmsg = true,
713 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
714 { .name = "fcntl", .errmsg = true,
715 .arg_scnprintf = { [0] = SCA_FD, /* fd */
716 [1] = SCA_STRARRAY, /* cmd */ },
717 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718 { .name = "fdatasync", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
720 { .name = "flock", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FD, /* fd */
722 [1] = SCA_FLOCK, /* cmd */ }, },
723 { .name = "fsetxattr", .errmsg = true,
724 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
725 { .name = "fstat", .errmsg = true, .alias = "newfstat",
726 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
727 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
728 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
729 { .name = "fstatfs", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
731 { .name = "fsync", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
733 { .name = "ftruncate", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
735 { .name = "futex", .errmsg = true,
736 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737 { .name = "futimesat", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
739 { .name = "getdents", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
741 { .name = "getdents64", .errmsg = true,
742 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
743 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
744 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745 { .name = "ioctl", .errmsg = true,
746 .arg_scnprintf = { [0] = SCA_FD, /* fd */
747 [1] = SCA_STRHEXARRAY, /* cmd */
748 [2] = SCA_HEX, /* arg */ },
749 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
750 { .name = "kill", .errmsg = true,
751 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752 { .name = "linkat", .errmsg = true,
753 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
754 { .name = "lseek", .errmsg = true,
755 .arg_scnprintf = { [0] = SCA_FD, /* fd */
756 [2] = SCA_STRARRAY, /* whence */ },
757 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
758 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
759 { .name = "madvise", .errmsg = true,
760 .arg_scnprintf = { [0] = SCA_HEX, /* start */
761 [2] = SCA_MADV_BHV, /* behavior */ }, },
762 { .name = "mkdirat", .errmsg = true,
763 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
764 { .name = "mknodat", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
766 { .name = "mlock", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768 { .name = "mlockall", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770 { .name = "mmap", .hexret = true,
771 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
772 [2] = SCA_MMAP_PROT, /* prot */
773 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774 { .name = "mprotect", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_HEX, /* start */
776 [2] = SCA_MMAP_PROT, /* prot */ }, },
777 { .name = "mremap", .hexret = true,
778 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779 [4] = SCA_HEX, /* new_addr */ }, },
780 { .name = "munlock", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782 { .name = "munmap", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784 { .name = "name_to_handle_at", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "newfstatat", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "open", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790 { .name = "open_by_handle_at", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793 { .name = "openat", .errmsg = true,
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796 { .name = "pipe2", .errmsg = true,
797 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798 { .name = "poll", .errmsg = true, .timeout = true, },
799 { .name = "ppoll", .errmsg = true, .timeout = true, },
800 { .name = "pread", .errmsg = true, .alias = "pread64",
801 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
802 { .name = "preadv", .errmsg = true, .alias = "pread",
803 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
804 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
806 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
807 { .name = "pwritev", .errmsg = true,
808 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
809 { .name = "read", .errmsg = true,
810 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
811 { .name = "readlinkat", .errmsg = true,
812 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
813 { .name = "readv", .errmsg = true,
814 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
815 { .name = "recvfrom", .errmsg = true,
816 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817 { .name = "recvmmsg", .errmsg = true,
818 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819 { .name = "recvmsg", .errmsg = true,
820 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821 { .name = "renameat", .errmsg = true,
822 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
823 { .name = "rt_sigaction", .errmsg = true,
824 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
826 { .name = "rt_sigqueueinfo", .errmsg = true,
827 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828 { .name = "rt_tgsigqueueinfo", .errmsg = true,
829 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830 { .name = "select", .errmsg = true, .timeout = true, },
831 { .name = "sendmmsg", .errmsg = true,
832 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833 { .name = "sendmsg", .errmsg = true,
834 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835 { .name = "sendto", .errmsg = true,
836 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
838 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839 { .name = "shutdown", .errmsg = true,
840 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
841 { .name = "socket", .errmsg = true,
842 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843 [1] = SCA_SK_TYPE, /* type */ },
844 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
845 { .name = "socketpair", .errmsg = true,
846 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847 [1] = SCA_SK_TYPE, /* type */ },
848 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
849 { .name = "stat", .errmsg = true, .alias = "newstat", },
850 { .name = "symlinkat", .errmsg = true,
851 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
852 { .name = "tgkill", .errmsg = true,
853 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854 { .name = "tkill", .errmsg = true,
855 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856 { .name = "uname", .errmsg = true, .alias = "newuname", },
857 { .name = "unlinkat", .errmsg = true,
858 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859 { .name = "utimensat", .errmsg = true,
860 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861 { .name = "write", .errmsg = true,
862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863 { .name = "writev", .errmsg = true,
864 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
869 const struct syscall_fmt *fmt = fmtp;
870 return strcmp(name, fmt->name);
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
875 const int nmemb = ARRAY_SIZE(syscall_fmts);
876 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
880 struct event_format *tp_format;
883 struct syscall_fmt *fmt;
884 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
890 double duration = (double)t / NSEC_PER_MSEC;
891 size_t printed = fprintf(fp, "(");
894 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895 else if (duration >= 0.01)
896 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
898 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899 return printed + fprintf(fp, "): ");
902 struct thread_trace {
906 unsigned long nr_events;
914 struct intlist *syscall_stats;
917 static struct thread_trace *thread_trace__new(void)
919 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
922 ttrace->paths.max = -1;
924 ttrace->syscall_stats = intlist__new(NULL);
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
931 struct thread_trace *ttrace;
936 if (thread->priv == NULL)
937 thread->priv = thread_trace__new();
939 if (thread->priv == NULL)
942 ttrace = thread->priv;
947 color_fprintf(fp, PERF_COLOR_RED,
948 "WARNING: not enough memory, dropping samples!\n");
953 struct perf_tool tool;
960 struct syscall *table;
962 struct perf_record_opts opts;
963 struct machine *host;
967 unsigned long nr_events;
968 struct strlist *ev_qualifier;
969 bool not_ev_qualifier;
971 const char *last_vfs_getname;
972 struct intlist *tid_list;
973 struct intlist *pid_list;
975 bool multiple_threads;
978 bool show_tool_stats;
979 double duration_filter;
982 u64 vfs_getname, proc_getname;
986 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
988 struct thread_trace *ttrace = thread->priv;
990 if (fd > ttrace->paths.max) {
991 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
996 if (ttrace->paths.max != -1) {
997 memset(npath + ttrace->paths.max + 1, 0,
998 (fd - ttrace->paths.max) * sizeof(char *));
1000 memset(npath, 0, (fd + 1) * sizeof(char *));
1003 ttrace->paths.table = npath;
1004 ttrace->paths.max = fd;
1007 ttrace->paths.table[fd] = strdup(pathname);
1009 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1012 static int thread__read_fd_path(struct thread *thread, int fd)
1014 char linkname[PATH_MAX], pathname[PATH_MAX];
1018 if (thread->pid_ == thread->tid) {
1019 scnprintf(linkname, sizeof(linkname),
1020 "/proc/%d/fd/%d", thread->pid_, fd);
1022 scnprintf(linkname, sizeof(linkname),
1023 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1026 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1029 ret = readlink(linkname, pathname, sizeof(pathname));
1031 if (ret < 0 || ret > st.st_size)
1034 pathname[ret] = '\0';
1035 return trace__set_fd_pathname(thread, fd, pathname);
1038 static const char *thread__fd_path(struct thread *thread, int fd,
1039 struct trace *trace)
1041 struct thread_trace *ttrace = thread->priv;
1049 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1052 ++trace->stats.proc_getname;
1053 if (thread__read_fd_path(thread, fd)) {
1057 return ttrace->paths.table[fd];
1060 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1061 struct syscall_arg *arg)
1064 size_t printed = scnprintf(bf, size, "%d", fd);
1065 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1068 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1073 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1074 struct syscall_arg *arg)
1077 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1078 struct thread_trace *ttrace = arg->thread->priv;
1080 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1081 free(ttrace->paths.table[fd]);
1082 ttrace->paths.table[fd] = NULL;
1088 static bool trace__filter_duration(struct trace *trace, double t)
1090 return t < (trace->duration_filter * NSEC_PER_MSEC);
1093 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1095 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1097 return fprintf(fp, "%10.3f ", ts);
1100 static bool done = false;
1102 static void sig_handler(int sig __maybe_unused)
1107 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1108 u64 duration, u64 tstamp, FILE *fp)
1110 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1111 printed += fprintf_duration(duration, fp);
1113 if (trace->multiple_threads) {
1114 if (trace->show_comm)
1115 printed += fprintf(fp, "%.14s/", thread->comm);
1116 printed += fprintf(fp, "%d ", thread->tid);
1122 static int trace__process_event(struct trace *trace, struct machine *machine,
1123 union perf_event *event)
1127 switch (event->header.type) {
1128 case PERF_RECORD_LOST:
1129 color_fprintf(trace->output, PERF_COLOR_RED,
1130 "LOST %" PRIu64 " events!\n", event->lost.lost);
1131 ret = machine__process_lost_event(machine, event);
1133 ret = machine__process_event(machine, event);
1140 static int trace__tool_process(struct perf_tool *tool,
1141 union perf_event *event,
1142 struct perf_sample *sample __maybe_unused,
1143 struct machine *machine)
1145 struct trace *trace = container_of(tool, struct trace, tool);
1146 return trace__process_event(trace, machine, event);
1149 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1151 int err = symbol__init();
1156 trace->host = machine__new_host();
1157 if (trace->host == NULL)
1160 if (perf_target__has_task(&trace->opts.target)) {
1161 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1162 trace__tool_process,
1165 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1175 static int syscall__set_arg_fmts(struct syscall *sc)
1177 struct format_field *field;
1180 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1181 if (sc->arg_scnprintf == NULL)
1185 sc->arg_parm = sc->fmt->arg_parm;
1187 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1188 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1189 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1190 else if (field->flags & FIELD_IS_POINTER)
1191 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1198 static int trace__read_syscall_info(struct trace *trace, int id)
1202 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1207 if (id > trace->syscalls.max) {
1208 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1210 if (nsyscalls == NULL)
1213 if (trace->syscalls.max != -1) {
1214 memset(nsyscalls + trace->syscalls.max + 1, 0,
1215 (id - trace->syscalls.max) * sizeof(*sc));
1217 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1220 trace->syscalls.table = nsyscalls;
1221 trace->syscalls.max = id;
1224 sc = trace->syscalls.table + id;
1227 if (trace->ev_qualifier) {
1228 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1230 if (!(in ^ trace->not_ev_qualifier)) {
1231 sc->filtered = true;
1233 * No need to do read tracepoint information since this will be
1240 sc->fmt = syscall_fmt__find(sc->name);
1242 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1243 sc->tp_format = event_format__new("syscalls", tp_name);
1245 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1246 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1247 sc->tp_format = event_format__new("syscalls", tp_name);
1250 if (sc->tp_format == NULL)
1253 return syscall__set_arg_fmts(sc);
1256 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1257 unsigned long *args, struct trace *trace,
1258 struct thread *thread)
1262 if (sc->tp_format != NULL) {
1263 struct format_field *field;
1265 struct syscall_arg arg = {
1272 for (field = sc->tp_format->format.fields->next; field;
1273 field = field->next, ++arg.idx, bit <<= 1) {
1277 * Suppress this argument if its value is zero and
1278 * and we don't have a string associated in an
1281 if (args[arg.idx] == 0 &&
1282 !(sc->arg_scnprintf &&
1283 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1284 sc->arg_parm[arg.idx]))
1287 printed += scnprintf(bf + printed, size - printed,
1288 "%s%s: ", printed ? ", " : "", field->name);
1289 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1290 arg.val = args[arg.idx];
1292 arg.parm = sc->arg_parm[arg.idx];
1293 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1294 size - printed, &arg);
1296 printed += scnprintf(bf + printed, size - printed,
1297 "%ld", args[arg.idx]);
1304 printed += scnprintf(bf + printed, size - printed,
1306 printed ? ", " : "", i, args[i]);
1314 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1315 struct perf_sample *sample);
1317 static struct syscall *trace__syscall_info(struct trace *trace,
1318 struct perf_evsel *evsel, int id)
1324 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1325 * before that, leaving at a higher verbosity level till that is
1326 * explained. Reproduced with plain ftrace with:
1328 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1329 * grep "NR -1 " /t/trace_pipe
1331 * After generating some load on the machine.
1335 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1336 id, perf_evsel__name(evsel), ++n);
1341 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1342 trace__read_syscall_info(trace, id))
1345 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1348 return &trace->syscalls.table[id];
1352 fprintf(trace->output, "Problems reading syscall %d", id);
1353 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1354 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1355 fputs(" information\n", trace->output);
1360 static void thread__update_stats(struct thread_trace *ttrace,
1361 int id, struct perf_sample *sample)
1363 struct int_node *inode;
1364 struct stats *stats;
1367 inode = intlist__findnew(ttrace->syscall_stats, id);
1371 stats = inode->priv;
1372 if (stats == NULL) {
1373 stats = malloc(sizeof(struct stats));
1377 inode->priv = stats;
1380 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1381 duration = sample->time - ttrace->entry_time;
1383 update_stats(stats, duration);
1386 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1387 struct perf_sample *sample)
1392 struct thread *thread;
1393 int id = perf_evsel__intval(evsel, sample, "id");
1394 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1395 struct thread_trace *ttrace;
1403 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1404 ttrace = thread__trace(thread, trace->output);
1408 args = perf_evsel__rawptr(evsel, sample, "args");
1410 fprintf(trace->output, "Problems reading syscall arguments\n");
1414 ttrace = thread->priv;
1416 if (ttrace->entry_str == NULL) {
1417 ttrace->entry_str = malloc(1024);
1418 if (!ttrace->entry_str)
1422 ttrace->entry_time = sample->time;
1423 msg = ttrace->entry_str;
1424 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1426 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1427 args, trace, thread);
1429 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1430 if (!trace->duration_filter) {
1431 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1432 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1435 ttrace->entry_pending = true;
1440 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1441 struct perf_sample *sample)
1445 struct thread *thread;
1446 int id = perf_evsel__intval(evsel, sample, "id");
1447 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1448 struct thread_trace *ttrace;
1456 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1457 ttrace = thread__trace(thread, trace->output);
1462 thread__update_stats(ttrace, id, sample);
1464 ret = perf_evsel__intval(evsel, sample, "ret");
1466 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1467 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1468 trace->last_vfs_getname = NULL;
1469 ++trace->stats.vfs_getname;
1472 ttrace = thread->priv;
1474 ttrace->exit_time = sample->time;
1476 if (ttrace->entry_time) {
1477 duration = sample->time - ttrace->entry_time;
1478 if (trace__filter_duration(trace, duration))
1480 } else if (trace->duration_filter)
1483 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1485 if (ttrace->entry_pending) {
1486 fprintf(trace->output, "%-70s", ttrace->entry_str);
1488 fprintf(trace->output, " ... [");
1489 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1490 fprintf(trace->output, "]: %s()", sc->name);
1493 if (sc->fmt == NULL) {
1495 fprintf(trace->output, ") = %d", ret);
1496 } else if (ret < 0 && sc->fmt->errmsg) {
1498 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1499 *e = audit_errno_to_name(-ret);
1501 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1502 } else if (ret == 0 && sc->fmt->timeout)
1503 fprintf(trace->output, ") = 0 Timeout");
1504 else if (sc->fmt->hexret)
1505 fprintf(trace->output, ") = %#x", ret);
1509 fputc('\n', trace->output);
1511 ttrace->entry_pending = false;
1516 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1517 struct perf_sample *sample)
1519 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1523 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1524 struct perf_sample *sample)
1526 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1527 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1528 struct thread *thread = machine__findnew_thread(trace->host,
1531 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1536 ttrace->runtime_ms += runtime_ms;
1537 trace->runtime_ms += runtime_ms;
1541 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1543 perf_evsel__strval(evsel, sample, "comm"),
1544 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1546 perf_evsel__intval(evsel, sample, "vruntime"));
1550 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1552 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1553 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1556 if (trace->pid_list || trace->tid_list)
1562 static int trace__process_sample(struct perf_tool *tool,
1563 union perf_event *event __maybe_unused,
1564 struct perf_sample *sample,
1565 struct perf_evsel *evsel,
1566 struct machine *machine __maybe_unused)
1568 struct trace *trace = container_of(tool, struct trace, tool);
1571 tracepoint_handler handler = evsel->handler.func;
1573 if (skip_sample(trace, sample))
1576 if (!trace->full_time && trace->base_time == 0)
1577 trace->base_time = sample->time;
1580 handler(trace, evsel, sample);
1586 perf_session__has_tp(struct perf_session *session, const char *name)
1588 struct perf_evsel *evsel;
1590 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1592 return evsel != NULL;
1595 static int parse_target_str(struct trace *trace)
1597 if (trace->opts.target.pid) {
1598 trace->pid_list = intlist__new(trace->opts.target.pid);
1599 if (trace->pid_list == NULL) {
1600 pr_err("Error parsing process id string\n");
1605 if (trace->opts.target.tid) {
1606 trace->tid_list = intlist__new(trace->opts.target.tid);
1607 if (trace->tid_list == NULL) {
1608 pr_err("Error parsing thread id string\n");
1616 static int trace__record(int argc, const char **argv)
1618 unsigned int rec_argc, i, j;
1619 const char **rec_argv;
1620 const char * const record_args[] = {
1625 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1628 rec_argc = ARRAY_SIZE(record_args) + argc;
1629 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1631 if (rec_argv == NULL)
1634 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1635 rec_argv[i] = record_args[i];
1637 for (j = 0; j < (unsigned int)argc; j++, i++)
1638 rec_argv[i] = argv[j];
1640 return cmd_record(i, rec_argv, NULL);
1643 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1645 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1647 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1648 evlist->nr_entries);
1652 if (perf_evsel__field(evsel, "pathname") == NULL) {
1653 perf_evsel__delete(evsel);
1657 evsel->handler.func = trace__vfs_getname;
1658 perf_evlist__add(evlist, evsel);
1661 static int trace__run(struct trace *trace, int argc, const char **argv)
1663 struct perf_evlist *evlist = perf_evlist__new();
1664 struct perf_evsel *evsel;
1666 unsigned long before;
1667 const bool forks = argc > 0;
1671 if (evlist == NULL) {
1672 fprintf(trace->output, "Not enough memory to run!\n");
1676 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1677 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1680 perf_evlist__add_vfs_getname(evlist);
1683 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1684 trace__sched_stat_runtime))
1687 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1689 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1690 goto out_delete_evlist;
1693 err = trace__symbols_init(trace, evlist);
1695 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1696 goto out_delete_maps;
1699 perf_evlist__config(evlist, &trace->opts);
1701 signal(SIGCHLD, sig_handler);
1702 signal(SIGINT, sig_handler);
1705 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1706 argv, false, false);
1708 fprintf(trace->output, "Couldn't run the workload!\n");
1709 goto out_delete_maps;
1713 err = perf_evlist__open(evlist);
1715 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1716 goto out_delete_maps;
1719 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1721 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1722 goto out_close_evlist;
1725 perf_evlist__enable(evlist);
1728 perf_evlist__start_workload(evlist);
1730 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1732 before = trace->nr_events;
1734 for (i = 0; i < evlist->nr_mmaps; i++) {
1735 union perf_event *event;
1737 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1738 const u32 type = event->header.type;
1739 tracepoint_handler handler;
1740 struct perf_sample sample;
1744 err = perf_evlist__parse_sample(evlist, event, &sample);
1746 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1750 if (!trace->full_time && trace->base_time == 0)
1751 trace->base_time = sample.time;
1753 if (type != PERF_RECORD_SAMPLE) {
1754 trace__process_event(trace, trace->host, event);
1758 evsel = perf_evlist__id2evsel(evlist, sample.id);
1759 if (evsel == NULL) {
1760 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1764 if (sample.raw_data == NULL) {
1765 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1766 perf_evsel__name(evsel), sample.tid,
1767 sample.cpu, sample.raw_size);
1771 handler = evsel->handler.func;
1772 handler(trace, evsel, &sample);
1775 goto out_unmap_evlist;
1779 if (trace->nr_events == before) {
1781 goto out_unmap_evlist;
1783 poll(evlist->pollfd, evlist->nr_fds, -1);
1787 perf_evlist__disable(evlist);
1794 trace__fprintf_thread_summary(trace, trace->output);
1796 if (trace->show_tool_stats) {
1797 fprintf(trace->output, "Stats:\n "
1798 " vfs_getname : %" PRIu64 "\n"
1799 " proc_getname: %" PRIu64 "\n",
1800 trace->stats.vfs_getname,
1801 trace->stats.proc_getname);
1805 perf_evlist__munmap(evlist);
1807 perf_evlist__close(evlist);
1809 perf_evlist__delete_maps(evlist);
1811 perf_evlist__delete(evlist);
1813 trace->live = false;
1818 fputs("Error:\tUnable to find debugfs\n"
1819 "Hint:\tWas your kernel was compiled with debugfs support?\n"
1820 "Hint:\tIs the debugfs filesystem mounted?\n"
1821 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n",
1825 fprintf(trace->output,
1826 "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
1827 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
1828 debugfs_mountpoint, debugfs_mountpoint);
1832 fprintf(trace->output, "Can't trace: %s\n",
1833 strerror_r(errno, bf, sizeof(bf)));
1837 goto out_delete_evlist;
1840 static int trace__replay(struct trace *trace)
1842 const struct perf_evsel_str_handler handlers[] = {
1843 { "raw_syscalls:sys_enter", trace__sys_enter, },
1844 { "raw_syscalls:sys_exit", trace__sys_exit, },
1845 { "probe:vfs_getname", trace__vfs_getname, },
1848 struct perf_session *session;
1851 trace->tool.sample = trace__process_sample;
1852 trace->tool.mmap = perf_event__process_mmap;
1853 trace->tool.mmap2 = perf_event__process_mmap2;
1854 trace->tool.comm = perf_event__process_comm;
1855 trace->tool.exit = perf_event__process_exit;
1856 trace->tool.fork = perf_event__process_fork;
1857 trace->tool.attr = perf_event__process_attr;
1858 trace->tool.tracing_data = perf_event__process_tracing_data;
1859 trace->tool.build_id = perf_event__process_build_id;
1861 trace->tool.ordered_samples = true;
1862 trace->tool.ordering_requires_timestamps = true;
1864 /* add tid to output */
1865 trace->multiple_threads = true;
1867 if (symbol__init() < 0)
1870 session = perf_session__new(input_name, O_RDONLY, 0, false,
1872 if (session == NULL)
1875 trace->host = &session->machines.host;
1877 err = perf_session__set_tracepoints_handlers(session, handlers);
1881 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1882 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1886 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1887 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1891 err = parse_target_str(trace);
1897 err = perf_session__process_events(session, &trace->tool);
1899 pr_err("Failed to process events, error %d", err);
1901 else if (trace->summary)
1902 trace__fprintf_thread_summary(trace, trace->output);
1905 perf_session__delete(session);
1910 static size_t trace__fprintf_threads_header(FILE *fp)
1914 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
1915 printed += fprintf(fp, " __) Summary of events (__\n\n");
1916 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1917 printed += fprintf(fp, " syscall count min max avg stddev\n");
1918 printed += fprintf(fp, " msec msec msec %%\n");
1919 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1924 static size_t thread__dump_stats(struct thread_trace *ttrace,
1925 struct trace *trace, FILE *fp)
1927 struct stats *stats;
1930 struct int_node *inode = intlist__first(ttrace->syscall_stats);
1935 printed += fprintf(fp, "\n");
1937 /* each int_node is a syscall */
1939 stats = inode->priv;
1941 double min = (double)(stats->min) / NSEC_PER_MSEC;
1942 double max = (double)(stats->max) / NSEC_PER_MSEC;
1943 double avg = avg_stats(stats);
1945 u64 n = (u64) stats->n;
1947 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1948 avg /= NSEC_PER_MSEC;
1950 sc = &trace->syscalls.table[inode->i];
1951 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
1952 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
1954 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
1957 inode = intlist__next(inode);
1960 printed += fprintf(fp, "\n\n");
1965 /* struct used to pass data to per-thread function */
1966 struct summary_data {
1968 struct trace *trace;
1972 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1974 struct summary_data *data = priv;
1975 FILE *fp = data->fp;
1976 size_t printed = data->printed;
1977 struct trace *trace = data->trace;
1978 struct thread_trace *ttrace = thread->priv;
1985 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1987 color = PERF_COLOR_NORMAL;
1989 color = PERF_COLOR_RED;
1990 else if (ratio > 25.0)
1991 color = PERF_COLOR_GREEN;
1992 else if (ratio > 5.0)
1993 color = PERF_COLOR_YELLOW;
1995 printed += color_fprintf(fp, color, "%20s", thread->comm);
1996 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1997 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1998 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1999 printed += thread__dump_stats(ttrace, trace, fp);
2001 data->printed += printed;
2006 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2008 struct summary_data data = {
2012 data.printed = trace__fprintf_threads_header(fp);
2014 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2016 return data.printed;
2019 static int trace__set_duration(const struct option *opt, const char *str,
2020 int unset __maybe_unused)
2022 struct trace *trace = opt->value;
2024 trace->duration_filter = atof(str);
2028 static int trace__open_output(struct trace *trace, const char *filename)
2032 if (!stat(filename, &st) && st.st_size) {
2033 char oldname[PATH_MAX];
2035 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2037 rename(filename, oldname);
2040 trace->output = fopen(filename, "w");
2042 return trace->output == NULL ? -errno : 0;
2045 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2047 const char * const trace_usage[] = {
2048 "perf trace [<options>] [<command>]",
2049 "perf trace [<options>] -- <command> [<options>]",
2050 "perf trace record [<options>] [<command>]",
2051 "perf trace record [<options>] -- <command> [<options>]",
2054 struct trace trace = {
2056 .machine = audit_detect_machine(),
2057 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2067 .user_freq = UINT_MAX,
2068 .user_interval = ULLONG_MAX,
2075 const char *output_name = NULL;
2076 const char *ev_qualifier_str = NULL;
2077 const struct option trace_options[] = {
2078 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2079 "show the thread COMM next to its id"),
2080 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2081 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2082 "list of events to trace"),
2083 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2084 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2085 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2086 "trace events on existing process id"),
2087 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2088 "trace events on existing thread id"),
2089 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2090 "system-wide collection from all CPUs"),
2091 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2092 "list of cpus to monitor"),
2093 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2094 "child tasks do not inherit counters"),
2095 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2096 "number of mmap data pages",
2097 perf_evlist__parse_mmap_pages),
2098 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2100 OPT_CALLBACK(0, "duration", &trace, "float",
2101 "show only events with duration > N.M ms",
2102 trace__set_duration),
2103 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2104 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2105 OPT_BOOLEAN('T', "time", &trace.full_time,
2106 "Show full timestamp, not time relative to first start"),
2107 OPT_BOOLEAN(0, "summary", &trace.summary,
2108 "Show syscall summary with statistics"),
2114 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2115 return trace__record(argc-2, &argv[2]);
2117 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2119 if (output_name != NULL) {
2120 err = trace__open_output(&trace, output_name);
2122 perror("failed to create output file");
2127 if (ev_qualifier_str != NULL) {
2128 const char *s = ev_qualifier_str;
2130 trace.not_ev_qualifier = *s == '!';
2131 if (trace.not_ev_qualifier)
2133 trace.ev_qualifier = strlist__new(true, s);
2134 if (trace.ev_qualifier == NULL) {
2135 fputs("Not enough memory to parse event qualifier",
2142 err = perf_target__validate(&trace.opts.target);
2144 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2145 fprintf(trace.output, "%s", bf);
2149 err = perf_target__parse_uid(&trace.opts.target);
2151 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2152 fprintf(trace.output, "%s", bf);
2156 if (!argc && perf_target__none(&trace.opts.target))
2157 trace.opts.target.system_wide = true;
2160 err = trace__replay(&trace);
2162 err = trace__run(&trace, argc, argv);
2165 if (output_name != NULL)
2166 fclose(trace.output);