1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
40 struct thread *thread;
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54 .nr_entries = ARRAY_SIZE(array), \
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
60 .nr_entries = ARRAY_SIZE(array), \
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
66 struct syscall_arg *arg)
68 struct strarray *sa = arg->parm;
69 int idx = arg->val - sa->offset;
71 if (idx < 0 || idx >= sa->nr_entries)
72 return scnprintf(bf, size, intfmt, arg->val);
74 return scnprintf(bf, size, "%s", sa->entries[idx]);
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78 struct syscall_arg *arg)
80 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86 struct syscall_arg *arg)
88 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94 struct syscall_arg *arg);
96 #define SCA_FD syscall_arg__scnprintf_fd
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99 struct syscall_arg *arg)
104 return scnprintf(bf, size, "CWD");
106 return syscall_arg__scnprintf_fd(bf, size, arg);
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112 struct syscall_arg *arg);
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117 struct syscall_arg *arg)
119 return scnprintf(bf, size, "%#lx", arg->val);
122 #define SCA_HEX syscall_arg__scnprintf_hex
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125 struct syscall_arg *arg)
127 int printed = 0, prot = arg->val;
129 if (prot == PROT_NONE)
130 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132 if (prot & PROT_##n) { \
133 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
143 P_MMAP_PROT(GROWSDOWN);
144 P_MMAP_PROT(GROWSUP);
148 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156 struct syscall_arg *arg)
158 int printed = 0, flags = arg->val;
160 #define P_MMAP_FLAG(n) \
161 if (flags & MAP_##n) { \
162 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
167 P_MMAP_FLAG(PRIVATE);
171 P_MMAP_FLAG(ANONYMOUS);
172 P_MMAP_FLAG(DENYWRITE);
173 P_MMAP_FLAG(EXECUTABLE);
176 P_MMAP_FLAG(GROWSDOWN);
178 P_MMAP_FLAG(HUGETLB);
181 P_MMAP_FLAG(NONBLOCK);
182 P_MMAP_FLAG(NORESERVE);
183 P_MMAP_FLAG(POPULATE);
185 #ifdef MAP_UNINITIALIZED
186 P_MMAP_FLAG(UNINITIALIZED);
191 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199 struct syscall_arg *arg)
201 int behavior = arg->val;
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
207 P_MADV_BHV(SEQUENTIAL);
208 P_MADV_BHV(WILLNEED);
209 P_MADV_BHV(DONTNEED);
211 P_MADV_BHV(DONTFORK);
213 P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215 P_MADV_BHV(SOFT_OFFLINE);
217 P_MADV_BHV(MERGEABLE);
218 P_MADV_BHV(UNMERGEABLE);
220 P_MADV_BHV(HUGEPAGE);
222 #ifdef MADV_NOHUGEPAGE
223 P_MADV_BHV(NOHUGEPAGE);
226 P_MADV_BHV(DONTDUMP);
235 return scnprintf(bf, size, "%#x", behavior);
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241 struct syscall_arg *arg)
243 int printed = 0, op = arg->val;
246 return scnprintf(bf, size, "NONE");
248 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
264 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
273 enum syscall_futex_args {
274 SCF_UADDR = (1 << 0),
277 SCF_TIMEOUT = (1 << 3),
278 SCF_UADDR2 = (1 << 4),
282 int cmd = op & FUTEX_CMD_MASK;
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
288 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
291 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
292 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
293 P_FUTEX_OP(WAKE_OP); break;
294 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
297 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
298 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
299 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
300 default: printed = scnprintf(bf, size, "%#x", cmd); break;
303 if (op & FUTEX_PRIVATE_FLAG)
304 printed += scnprintf(bf + printed, size - printed, "|PRIV");
306 if (op & FUTEX_CLOCK_REALTIME)
307 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
312 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
320 static const char *whences[] = { "SET", "CUR", "END",
328 static DEFINE_STRARRAY(whences);
330 static const char *fcntl_cmds[] = {
331 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
336 static DEFINE_STRARRAY(fcntl_cmds);
338 static const char *rlimit_resources[] = {
339 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
343 static DEFINE_STRARRAY(rlimit_resources);
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
348 static const char *clockid[] = {
349 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
352 static DEFINE_STRARRAY(clockid);
354 static const char *socket_families[] = {
355 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360 "ALG", "NFC", "VSOCK",
362 static DEFINE_STRARRAY(socket_families);
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369 struct syscall_arg *arg)
373 flags = type & ~SOCK_TYPE_MASK;
375 type &= SOCK_TYPE_MASK;
377 * Can't use a strarray, MIPS may override for ABI reasons.
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
385 P_SK_TYPE(SEQPACKET);
390 printed = scnprintf(bf, size, "%#x", type);
393 #define P_SK_FLAG(n) \
394 if (flags & SOCK_##n) { \
395 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396 flags &= ~SOCK_##n; \
404 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
412 #define MSG_PROBE 0x10
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE 0x10000
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
421 #define MSG_FASTOPEN 0x20000000
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425 struct syscall_arg *arg)
427 int printed = 0, flags = arg->val;
430 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432 if (flags & MSG_##n) { \
433 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
439 P_MSG_FLAG(DONTROUTE);
444 P_MSG_FLAG(DONTWAIT);
451 P_MSG_FLAG(ERRQUEUE);
452 P_MSG_FLAG(NOSIGNAL);
454 P_MSG_FLAG(WAITFORONE);
455 P_MSG_FLAG(SENDPAGE_NOTLAST);
456 P_MSG_FLAG(FASTOPEN);
457 P_MSG_FLAG(CMSG_CLOEXEC);
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469 struct syscall_arg *arg)
474 if (mode == F_OK) /* 0 */
475 return scnprintf(bf, size, "F");
477 if (mode & n##_OK) { \
478 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
488 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496 struct syscall_arg *arg)
498 int printed = 0, flags = arg->val;
500 if (!(flags & O_CREAT))
501 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
504 return scnprintf(bf, size, "RDONLY");
506 if (flags & O_##n) { \
507 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
531 if ((flags & O_SYNC) == O_SYNC)
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552 struct syscall_arg *arg)
554 int printed = 0, flags = arg->val;
557 return scnprintf(bf, size, "NONE");
559 if (flags & EFD_##n) { \
560 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578 struct syscall_arg *arg)
580 int printed = 0, flags = arg->val;
583 if (flags & O_##n) { \
584 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
593 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
640 return scnprintf(bf, size, "%#x", sig);
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
645 #define TCGETS 0x5401
647 static const char *tioctls[] = {
648 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
667 #define STRARRAY(arg, name, array) \
668 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669 .arg_parm = { [arg] = &strarray__##array, }
671 static struct syscall_fmt {
674 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
680 { .name = "access", .errmsg = true,
681 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
683 { .name = "brk", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
686 { .name = "close", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
688 { .name = "connect", .errmsg = true, },
689 { .name = "dup", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
691 { .name = "dup2", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
693 { .name = "dup3", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
695 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696 { .name = "eventfd2", .errmsg = true,
697 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698 { .name = "faccessat", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 { .name = "fadvise64", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
702 { .name = "fallocate", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
704 { .name = "fchdir", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
706 { .name = "fchmod", .errmsg = true,
707 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
708 { .name = "fchmodat", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
710 { .name = "fchown", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
712 { .name = "fchownat", .errmsg = true,
713 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
714 { .name = "fcntl", .errmsg = true,
715 .arg_scnprintf = { [0] = SCA_FD, /* fd */
716 [1] = SCA_STRARRAY, /* cmd */ },
717 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718 { .name = "fdatasync", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
720 { .name = "flock", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FD, /* fd */
722 [1] = SCA_FLOCK, /* cmd */ }, },
723 { .name = "fsetxattr", .errmsg = true,
724 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
725 { .name = "fstat", .errmsg = true, .alias = "newfstat",
726 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
727 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
728 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
729 { .name = "fstatfs", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
731 { .name = "fsync", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
733 { .name = "ftruncate", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
735 { .name = "futex", .errmsg = true,
736 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737 { .name = "futimesat", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
739 { .name = "getdents", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
741 { .name = "getdents64", .errmsg = true,
742 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
743 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
744 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745 { .name = "ioctl", .errmsg = true,
746 .arg_scnprintf = { [0] = SCA_FD, /* fd */
747 [1] = SCA_STRHEXARRAY, /* cmd */
748 [2] = SCA_HEX, /* arg */ },
749 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
750 { .name = "kill", .errmsg = true,
751 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752 { .name = "linkat", .errmsg = true,
753 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
754 { .name = "lseek", .errmsg = true,
755 .arg_scnprintf = { [0] = SCA_FD, /* fd */
756 [2] = SCA_STRARRAY, /* whence */ },
757 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
758 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
759 { .name = "madvise", .errmsg = true,
760 .arg_scnprintf = { [0] = SCA_HEX, /* start */
761 [2] = SCA_MADV_BHV, /* behavior */ }, },
762 { .name = "mkdirat", .errmsg = true,
763 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
764 { .name = "mknodat", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
766 { .name = "mlock", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768 { .name = "mlockall", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770 { .name = "mmap", .hexret = true,
771 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
772 [2] = SCA_MMAP_PROT, /* prot */
773 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774 { .name = "mprotect", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_HEX, /* start */
776 [2] = SCA_MMAP_PROT, /* prot */ }, },
777 { .name = "mremap", .hexret = true,
778 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779 [4] = SCA_HEX, /* new_addr */ }, },
780 { .name = "munlock", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782 { .name = "munmap", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784 { .name = "name_to_handle_at", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "newfstatat", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "open", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790 { .name = "open_by_handle_at", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793 { .name = "openat", .errmsg = true,
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796 { .name = "pipe2", .errmsg = true,
797 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798 { .name = "poll", .errmsg = true, .timeout = true, },
799 { .name = "ppoll", .errmsg = true, .timeout = true, },
800 { .name = "pread", .errmsg = true, .alias = "pread64",
801 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
802 { .name = "preadv", .errmsg = true, .alias = "pread",
803 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
804 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
806 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
807 { .name = "pwritev", .errmsg = true,
808 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
809 { .name = "read", .errmsg = true,
810 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
811 { .name = "readlinkat", .errmsg = true,
812 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
813 { .name = "readv", .errmsg = true,
814 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
815 { .name = "recvfrom", .errmsg = true,
816 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817 { .name = "recvmmsg", .errmsg = true,
818 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819 { .name = "recvmsg", .errmsg = true,
820 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821 { .name = "renameat", .errmsg = true,
822 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
823 { .name = "rt_sigaction", .errmsg = true,
824 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
826 { .name = "rt_sigqueueinfo", .errmsg = true,
827 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828 { .name = "rt_tgsigqueueinfo", .errmsg = true,
829 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830 { .name = "select", .errmsg = true, .timeout = true, },
831 { .name = "sendmmsg", .errmsg = true,
832 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833 { .name = "sendmsg", .errmsg = true,
834 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835 { .name = "sendto", .errmsg = true,
836 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
838 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839 { .name = "shutdown", .errmsg = true,
840 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
841 { .name = "socket", .errmsg = true,
842 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843 [1] = SCA_SK_TYPE, /* type */ },
844 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
845 { .name = "socketpair", .errmsg = true,
846 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847 [1] = SCA_SK_TYPE, /* type */ },
848 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
849 { .name = "stat", .errmsg = true, .alias = "newstat", },
850 { .name = "symlinkat", .errmsg = true,
851 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
852 { .name = "tgkill", .errmsg = true,
853 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854 { .name = "tkill", .errmsg = true,
855 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856 { .name = "uname", .errmsg = true, .alias = "newuname", },
857 { .name = "unlinkat", .errmsg = true,
858 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859 { .name = "utimensat", .errmsg = true,
860 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861 { .name = "write", .errmsg = true,
862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863 { .name = "writev", .errmsg = true,
864 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
869 const struct syscall_fmt *fmt = fmtp;
870 return strcmp(name, fmt->name);
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
875 const int nmemb = ARRAY_SIZE(syscall_fmts);
876 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
880 struct event_format *tp_format;
883 struct syscall_fmt *fmt;
884 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
890 double duration = (double)t / NSEC_PER_MSEC;
891 size_t printed = fprintf(fp, "(");
894 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895 else if (duration >= 0.01)
896 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
898 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899 return printed + fprintf(fp, "): ");
902 struct thread_trace {
906 unsigned long nr_events;
914 struct intlist *syscall_stats;
917 static struct thread_trace *thread_trace__new(void)
919 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
922 ttrace->paths.max = -1;
924 ttrace->syscall_stats = intlist__new(NULL);
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
931 struct thread_trace *ttrace;
936 if (thread->priv == NULL)
937 thread->priv = thread_trace__new();
939 if (thread->priv == NULL)
942 ttrace = thread->priv;
947 color_fprintf(fp, PERF_COLOR_RED,
948 "WARNING: not enough memory, dropping samples!\n");
953 struct perf_tool tool;
960 struct syscall *table;
962 struct perf_record_opts opts;
963 struct machine *host;
967 unsigned long nr_events;
968 struct strlist *ev_qualifier;
969 bool not_ev_qualifier;
971 const char *last_vfs_getname;
972 struct intlist *tid_list;
973 struct intlist *pid_list;
975 bool multiple_threads;
978 bool show_tool_stats;
979 double duration_filter;
982 u64 vfs_getname, proc_getname;
986 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
988 struct thread_trace *ttrace = thread->priv;
990 if (fd > ttrace->paths.max) {
991 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
996 if (ttrace->paths.max != -1) {
997 memset(npath + ttrace->paths.max + 1, 0,
998 (fd - ttrace->paths.max) * sizeof(char *));
1000 memset(npath, 0, (fd + 1) * sizeof(char *));
1003 ttrace->paths.table = npath;
1004 ttrace->paths.max = fd;
1007 ttrace->paths.table[fd] = strdup(pathname);
1009 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1012 static int thread__read_fd_path(struct thread *thread, int fd)
1014 char linkname[PATH_MAX], pathname[PATH_MAX];
1018 if (thread->pid_ == thread->tid) {
1019 scnprintf(linkname, sizeof(linkname),
1020 "/proc/%d/fd/%d", thread->pid_, fd);
1022 scnprintf(linkname, sizeof(linkname),
1023 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1026 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1029 ret = readlink(linkname, pathname, sizeof(pathname));
1031 if (ret < 0 || ret > st.st_size)
1034 pathname[ret] = '\0';
1035 return trace__set_fd_pathname(thread, fd, pathname);
1038 static const char *thread__fd_path(struct thread *thread, int fd,
1039 struct trace *trace)
1041 struct thread_trace *ttrace = thread->priv;
1049 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1052 ++trace->stats.proc_getname;
1053 if (thread__read_fd_path(thread, fd)) {
1057 return ttrace->paths.table[fd];
1060 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1061 struct syscall_arg *arg)
1064 size_t printed = scnprintf(bf, size, "%d", fd);
1065 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1068 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1073 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1074 struct syscall_arg *arg)
1077 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1078 struct thread_trace *ttrace = arg->thread->priv;
1080 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1081 free(ttrace->paths.table[fd]);
1082 ttrace->paths.table[fd] = NULL;
1088 static bool trace__filter_duration(struct trace *trace, double t)
1090 return t < (trace->duration_filter * NSEC_PER_MSEC);
1093 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1095 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1097 return fprintf(fp, "%10.3f ", ts);
1100 static bool done = false;
1101 static bool interrupted = false;
1103 static void sig_handler(int sig)
1106 interrupted = sig == SIGINT;
1109 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1110 u64 duration, u64 tstamp, FILE *fp)
1112 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1113 printed += fprintf_duration(duration, fp);
1115 if (trace->multiple_threads) {
1116 if (trace->show_comm)
1117 printed += fprintf(fp, "%.14s/", thread->comm);
1118 printed += fprintf(fp, "%d ", thread->tid);
1124 static int trace__process_event(struct trace *trace, struct machine *machine,
1125 union perf_event *event)
1129 switch (event->header.type) {
1130 case PERF_RECORD_LOST:
1131 color_fprintf(trace->output, PERF_COLOR_RED,
1132 "LOST %" PRIu64 " events!\n", event->lost.lost);
1133 ret = machine__process_lost_event(machine, event);
1135 ret = machine__process_event(machine, event);
1142 static int trace__tool_process(struct perf_tool *tool,
1143 union perf_event *event,
1144 struct perf_sample *sample __maybe_unused,
1145 struct machine *machine)
1147 struct trace *trace = container_of(tool, struct trace, tool);
1148 return trace__process_event(trace, machine, event);
1151 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1153 int err = symbol__init();
1158 trace->host = machine__new_host();
1159 if (trace->host == NULL)
1162 if (perf_target__has_task(&trace->opts.target)) {
1163 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1164 trace__tool_process,
1167 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1177 static int syscall__set_arg_fmts(struct syscall *sc)
1179 struct format_field *field;
1182 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1183 if (sc->arg_scnprintf == NULL)
1187 sc->arg_parm = sc->fmt->arg_parm;
1189 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1190 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1191 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1192 else if (field->flags & FIELD_IS_POINTER)
1193 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1200 static int trace__read_syscall_info(struct trace *trace, int id)
1204 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1209 if (id > trace->syscalls.max) {
1210 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1212 if (nsyscalls == NULL)
1215 if (trace->syscalls.max != -1) {
1216 memset(nsyscalls + trace->syscalls.max + 1, 0,
1217 (id - trace->syscalls.max) * sizeof(*sc));
1219 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1222 trace->syscalls.table = nsyscalls;
1223 trace->syscalls.max = id;
1226 sc = trace->syscalls.table + id;
1229 if (trace->ev_qualifier) {
1230 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1232 if (!(in ^ trace->not_ev_qualifier)) {
1233 sc->filtered = true;
1235 * No need to do read tracepoint information since this will be
1242 sc->fmt = syscall_fmt__find(sc->name);
1244 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1245 sc->tp_format = event_format__new("syscalls", tp_name);
1247 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1248 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1249 sc->tp_format = event_format__new("syscalls", tp_name);
1252 if (sc->tp_format == NULL)
1255 return syscall__set_arg_fmts(sc);
1258 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1259 unsigned long *args, struct trace *trace,
1260 struct thread *thread)
1264 if (sc->tp_format != NULL) {
1265 struct format_field *field;
1267 struct syscall_arg arg = {
1274 for (field = sc->tp_format->format.fields->next; field;
1275 field = field->next, ++arg.idx, bit <<= 1) {
1279 * Suppress this argument if its value is zero and
1280 * and we don't have a string associated in an
1283 if (args[arg.idx] == 0 &&
1284 !(sc->arg_scnprintf &&
1285 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1286 sc->arg_parm[arg.idx]))
1289 printed += scnprintf(bf + printed, size - printed,
1290 "%s%s: ", printed ? ", " : "", field->name);
1291 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1292 arg.val = args[arg.idx];
1294 arg.parm = sc->arg_parm[arg.idx];
1295 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1296 size - printed, &arg);
1298 printed += scnprintf(bf + printed, size - printed,
1299 "%ld", args[arg.idx]);
1306 printed += scnprintf(bf + printed, size - printed,
1308 printed ? ", " : "", i, args[i]);
1316 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1317 struct perf_sample *sample);
1319 static struct syscall *trace__syscall_info(struct trace *trace,
1320 struct perf_evsel *evsel, int id)
1326 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1327 * before that, leaving at a higher verbosity level till that is
1328 * explained. Reproduced with plain ftrace with:
1330 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1331 * grep "NR -1 " /t/trace_pipe
1333 * After generating some load on the machine.
1337 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1338 id, perf_evsel__name(evsel), ++n);
1343 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1344 trace__read_syscall_info(trace, id))
1347 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1350 return &trace->syscalls.table[id];
1354 fprintf(trace->output, "Problems reading syscall %d", id);
1355 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1356 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1357 fputs(" information\n", trace->output);
1362 static void thread__update_stats(struct thread_trace *ttrace,
1363 int id, struct perf_sample *sample)
1365 struct int_node *inode;
1366 struct stats *stats;
1369 inode = intlist__findnew(ttrace->syscall_stats, id);
1373 stats = inode->priv;
1374 if (stats == NULL) {
1375 stats = malloc(sizeof(struct stats));
1379 inode->priv = stats;
1382 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1383 duration = sample->time - ttrace->entry_time;
1385 update_stats(stats, duration);
1388 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1389 struct perf_sample *sample)
1394 struct thread *thread;
1395 int id = perf_evsel__intval(evsel, sample, "id");
1396 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1397 struct thread_trace *ttrace;
1405 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1406 ttrace = thread__trace(thread, trace->output);
1410 args = perf_evsel__rawptr(evsel, sample, "args");
1412 fprintf(trace->output, "Problems reading syscall arguments\n");
1416 ttrace = thread->priv;
1418 if (ttrace->entry_str == NULL) {
1419 ttrace->entry_str = malloc(1024);
1420 if (!ttrace->entry_str)
1424 ttrace->entry_time = sample->time;
1425 msg = ttrace->entry_str;
1426 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1428 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1429 args, trace, thread);
1431 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1432 if (!trace->duration_filter) {
1433 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1434 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1437 ttrace->entry_pending = true;
1442 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1443 struct perf_sample *sample)
1447 struct thread *thread;
1448 int id = perf_evsel__intval(evsel, sample, "id");
1449 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1450 struct thread_trace *ttrace;
1458 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1459 ttrace = thread__trace(thread, trace->output);
1464 thread__update_stats(ttrace, id, sample);
1466 ret = perf_evsel__intval(evsel, sample, "ret");
1468 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1469 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1470 trace->last_vfs_getname = NULL;
1471 ++trace->stats.vfs_getname;
1474 ttrace = thread->priv;
1476 ttrace->exit_time = sample->time;
1478 if (ttrace->entry_time) {
1479 duration = sample->time - ttrace->entry_time;
1480 if (trace__filter_duration(trace, duration))
1482 } else if (trace->duration_filter)
1485 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1487 if (ttrace->entry_pending) {
1488 fprintf(trace->output, "%-70s", ttrace->entry_str);
1490 fprintf(trace->output, " ... [");
1491 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1492 fprintf(trace->output, "]: %s()", sc->name);
1495 if (sc->fmt == NULL) {
1497 fprintf(trace->output, ") = %d", ret);
1498 } else if (ret < 0 && sc->fmt->errmsg) {
1500 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1501 *e = audit_errno_to_name(-ret);
1503 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1504 } else if (ret == 0 && sc->fmt->timeout)
1505 fprintf(trace->output, ") = 0 Timeout");
1506 else if (sc->fmt->hexret)
1507 fprintf(trace->output, ") = %#x", ret);
1511 fputc('\n', trace->output);
1513 ttrace->entry_pending = false;
1518 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1519 struct perf_sample *sample)
1521 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1525 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1526 struct perf_sample *sample)
1528 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1529 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1530 struct thread *thread = machine__findnew_thread(trace->host,
1533 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1538 ttrace->runtime_ms += runtime_ms;
1539 trace->runtime_ms += runtime_ms;
1543 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1545 perf_evsel__strval(evsel, sample, "comm"),
1546 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1548 perf_evsel__intval(evsel, sample, "vruntime"));
1552 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1554 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1555 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1558 if (trace->pid_list || trace->tid_list)
1564 static int trace__process_sample(struct perf_tool *tool,
1565 union perf_event *event __maybe_unused,
1566 struct perf_sample *sample,
1567 struct perf_evsel *evsel,
1568 struct machine *machine __maybe_unused)
1570 struct trace *trace = container_of(tool, struct trace, tool);
1573 tracepoint_handler handler = evsel->handler.func;
1575 if (skip_sample(trace, sample))
1578 if (!trace->full_time && trace->base_time == 0)
1579 trace->base_time = sample->time;
1582 handler(trace, evsel, sample);
1588 perf_session__has_tp(struct perf_session *session, const char *name)
1590 struct perf_evsel *evsel;
1592 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1594 return evsel != NULL;
1597 static int parse_target_str(struct trace *trace)
1599 if (trace->opts.target.pid) {
1600 trace->pid_list = intlist__new(trace->opts.target.pid);
1601 if (trace->pid_list == NULL) {
1602 pr_err("Error parsing process id string\n");
1607 if (trace->opts.target.tid) {
1608 trace->tid_list = intlist__new(trace->opts.target.tid);
1609 if (trace->tid_list == NULL) {
1610 pr_err("Error parsing thread id string\n");
1618 static int trace__record(int argc, const char **argv)
1620 unsigned int rec_argc, i, j;
1621 const char **rec_argv;
1622 const char * const record_args[] = {
1627 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1630 rec_argc = ARRAY_SIZE(record_args) + argc;
1631 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1633 if (rec_argv == NULL)
1636 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1637 rec_argv[i] = record_args[i];
1639 for (j = 0; j < (unsigned int)argc; j++, i++)
1640 rec_argv[i] = argv[j];
1642 return cmd_record(i, rec_argv, NULL);
1645 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1647 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1649 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1650 evlist->nr_entries);
1654 if (perf_evsel__field(evsel, "pathname") == NULL) {
1655 perf_evsel__delete(evsel);
1659 evsel->handler.func = trace__vfs_getname;
1660 perf_evlist__add(evlist, evsel);
1663 static int trace__run(struct trace *trace, int argc, const char **argv)
1665 struct perf_evlist *evlist = perf_evlist__new();
1666 struct perf_evsel *evsel;
1668 unsigned long before;
1669 const bool forks = argc > 0;
1673 if (evlist == NULL) {
1674 fprintf(trace->output, "Not enough memory to run!\n");
1678 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1679 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1682 perf_evlist__add_vfs_getname(evlist);
1685 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1686 trace__sched_stat_runtime))
1689 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1691 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1692 goto out_delete_evlist;
1695 err = trace__symbols_init(trace, evlist);
1697 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1698 goto out_delete_maps;
1701 perf_evlist__config(evlist, &trace->opts);
1703 signal(SIGCHLD, sig_handler);
1704 signal(SIGINT, sig_handler);
1707 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1708 argv, false, false);
1710 fprintf(trace->output, "Couldn't run the workload!\n");
1711 goto out_delete_maps;
1715 err = perf_evlist__open(evlist);
1717 goto out_error_open;
1719 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1721 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1722 goto out_close_evlist;
1725 perf_evlist__enable(evlist);
1728 perf_evlist__start_workload(evlist);
1730 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1732 before = trace->nr_events;
1734 for (i = 0; i < evlist->nr_mmaps; i++) {
1735 union perf_event *event;
1737 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1738 const u32 type = event->header.type;
1739 tracepoint_handler handler;
1740 struct perf_sample sample;
1744 err = perf_evlist__parse_sample(evlist, event, &sample);
1746 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1750 if (!trace->full_time && trace->base_time == 0)
1751 trace->base_time = sample.time;
1753 if (type != PERF_RECORD_SAMPLE) {
1754 trace__process_event(trace, trace->host, event);
1758 evsel = perf_evlist__id2evsel(evlist, sample.id);
1759 if (evsel == NULL) {
1760 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1764 if (sample.raw_data == NULL) {
1765 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1766 perf_evsel__name(evsel), sample.tid,
1767 sample.cpu, sample.raw_size);
1771 handler = evsel->handler.func;
1772 handler(trace, evsel, &sample);
1779 if (trace->nr_events == before) {
1780 int timeout = done ? 100 : -1;
1782 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1789 perf_evlist__disable(evlist);
1793 trace__fprintf_thread_summary(trace, trace->output);
1795 if (trace->show_tool_stats) {
1796 fprintf(trace->output, "Stats:\n "
1797 " vfs_getname : %" PRIu64 "\n"
1798 " proc_getname: %" PRIu64 "\n",
1799 trace->stats.vfs_getname,
1800 trace->stats.proc_getname);
1804 perf_evlist__munmap(evlist);
1806 perf_evlist__close(evlist);
1808 perf_evlist__delete_maps(evlist);
1810 perf_evlist__delete(evlist);
1812 trace->live = false;
1815 char errbuf[BUFSIZ];
1818 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1822 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1825 fprintf(trace->output, "%s\n", errbuf);
1826 goto out_delete_evlist;
1830 static int trace__replay(struct trace *trace)
1832 const struct perf_evsel_str_handler handlers[] = {
1833 { "raw_syscalls:sys_enter", trace__sys_enter, },
1834 { "raw_syscalls:sys_exit", trace__sys_exit, },
1835 { "probe:vfs_getname", trace__vfs_getname, },
1838 struct perf_session *session;
1841 trace->tool.sample = trace__process_sample;
1842 trace->tool.mmap = perf_event__process_mmap;
1843 trace->tool.mmap2 = perf_event__process_mmap2;
1844 trace->tool.comm = perf_event__process_comm;
1845 trace->tool.exit = perf_event__process_exit;
1846 trace->tool.fork = perf_event__process_fork;
1847 trace->tool.attr = perf_event__process_attr;
1848 trace->tool.tracing_data = perf_event__process_tracing_data;
1849 trace->tool.build_id = perf_event__process_build_id;
1851 trace->tool.ordered_samples = true;
1852 trace->tool.ordering_requires_timestamps = true;
1854 /* add tid to output */
1855 trace->multiple_threads = true;
1857 if (symbol__init() < 0)
1860 session = perf_session__new(input_name, O_RDONLY, 0, false,
1862 if (session == NULL)
1865 trace->host = &session->machines.host;
1867 err = perf_session__set_tracepoints_handlers(session, handlers);
1871 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1872 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1876 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1877 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1881 err = parse_target_str(trace);
1887 err = perf_session__process_events(session, &trace->tool);
1889 pr_err("Failed to process events, error %d", err);
1891 else if (trace->summary)
1892 trace__fprintf_thread_summary(trace, trace->output);
1895 perf_session__delete(session);
1900 static size_t trace__fprintf_threads_header(FILE *fp)
1904 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
1905 printed += fprintf(fp, " __) Summary of events (__\n\n");
1906 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1907 printed += fprintf(fp, " syscall count min max avg stddev\n");
1908 printed += fprintf(fp, " msec msec msec %%\n");
1909 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1914 static size_t thread__dump_stats(struct thread_trace *ttrace,
1915 struct trace *trace, FILE *fp)
1917 struct stats *stats;
1920 struct int_node *inode = intlist__first(ttrace->syscall_stats);
1925 printed += fprintf(fp, "\n");
1927 /* each int_node is a syscall */
1929 stats = inode->priv;
1931 double min = (double)(stats->min) / NSEC_PER_MSEC;
1932 double max = (double)(stats->max) / NSEC_PER_MSEC;
1933 double avg = avg_stats(stats);
1935 u64 n = (u64) stats->n;
1937 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1938 avg /= NSEC_PER_MSEC;
1940 sc = &trace->syscalls.table[inode->i];
1941 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
1942 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
1944 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
1947 inode = intlist__next(inode);
1950 printed += fprintf(fp, "\n\n");
1955 /* struct used to pass data to per-thread function */
1956 struct summary_data {
1958 struct trace *trace;
1962 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1964 struct summary_data *data = priv;
1965 FILE *fp = data->fp;
1966 size_t printed = data->printed;
1967 struct trace *trace = data->trace;
1968 struct thread_trace *ttrace = thread->priv;
1975 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1977 color = PERF_COLOR_NORMAL;
1979 color = PERF_COLOR_RED;
1980 else if (ratio > 25.0)
1981 color = PERF_COLOR_GREEN;
1982 else if (ratio > 5.0)
1983 color = PERF_COLOR_YELLOW;
1985 printed += color_fprintf(fp, color, "%20s", thread->comm);
1986 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1987 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1988 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1989 printed += thread__dump_stats(ttrace, trace, fp);
1991 data->printed += printed;
1996 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1998 struct summary_data data = {
2002 data.printed = trace__fprintf_threads_header(fp);
2004 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2006 return data.printed;
2009 static int trace__set_duration(const struct option *opt, const char *str,
2010 int unset __maybe_unused)
2012 struct trace *trace = opt->value;
2014 trace->duration_filter = atof(str);
2018 static int trace__open_output(struct trace *trace, const char *filename)
2022 if (!stat(filename, &st) && st.st_size) {
2023 char oldname[PATH_MAX];
2025 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2027 rename(filename, oldname);
2030 trace->output = fopen(filename, "w");
2032 return trace->output == NULL ? -errno : 0;
2035 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2037 const char * const trace_usage[] = {
2038 "perf trace [<options>] [<command>]",
2039 "perf trace [<options>] -- <command> [<options>]",
2040 "perf trace record [<options>] [<command>]",
2041 "perf trace record [<options>] -- <command> [<options>]",
2044 struct trace trace = {
2046 .machine = audit_detect_machine(),
2047 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2057 .user_freq = UINT_MAX,
2058 .user_interval = ULLONG_MAX,
2065 const char *output_name = NULL;
2066 const char *ev_qualifier_str = NULL;
2067 const struct option trace_options[] = {
2068 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2069 "show the thread COMM next to its id"),
2070 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2071 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2072 "list of events to trace"),
2073 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2074 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2075 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2076 "trace events on existing process id"),
2077 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2078 "trace events on existing thread id"),
2079 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2080 "system-wide collection from all CPUs"),
2081 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2082 "list of cpus to monitor"),
2083 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2084 "child tasks do not inherit counters"),
2085 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2086 "number of mmap data pages",
2087 perf_evlist__parse_mmap_pages),
2088 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2090 OPT_CALLBACK(0, "duration", &trace, "float",
2091 "show only events with duration > N.M ms",
2092 trace__set_duration),
2093 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2094 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2095 OPT_BOOLEAN('T', "time", &trace.full_time,
2096 "Show full timestamp, not time relative to first start"),
2097 OPT_BOOLEAN(0, "summary", &trace.summary,
2098 "Show syscall summary with statistics"),
2104 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2105 return trace__record(argc-2, &argv[2]);
2107 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2109 if (output_name != NULL) {
2110 err = trace__open_output(&trace, output_name);
2112 perror("failed to create output file");
2117 if (ev_qualifier_str != NULL) {
2118 const char *s = ev_qualifier_str;
2120 trace.not_ev_qualifier = *s == '!';
2121 if (trace.not_ev_qualifier)
2123 trace.ev_qualifier = strlist__new(true, s);
2124 if (trace.ev_qualifier == NULL) {
2125 fputs("Not enough memory to parse event qualifier",
2132 err = perf_target__validate(&trace.opts.target);
2134 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2135 fprintf(trace.output, "%s", bf);
2139 err = perf_target__parse_uid(&trace.opts.target);
2141 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2142 fprintf(trace.output, "%s", bf);
2146 if (!argc && perf_target__none(&trace.opts.target))
2147 trace.opts.target.system_wide = true;
2150 err = trace__replay(&trace);
2152 err = trace__run(&trace, argc, argv);
2155 if (output_name != NULL)
2156 fclose(trace.output);