1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
40 struct thread *thread;
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54 .nr_entries = ARRAY_SIZE(array), \
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
60 .nr_entries = ARRAY_SIZE(array), \
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
66 struct syscall_arg *arg)
68 struct strarray *sa = arg->parm;
69 int idx = arg->val - sa->offset;
71 if (idx < 0 || idx >= sa->nr_entries)
72 return scnprintf(bf, size, intfmt, arg->val);
74 return scnprintf(bf, size, "%s", sa->entries[idx]);
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78 struct syscall_arg *arg)
80 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86 struct syscall_arg *arg)
88 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94 struct syscall_arg *arg);
96 #define SCA_FD syscall_arg__scnprintf_fd
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99 struct syscall_arg *arg)
104 return scnprintf(bf, size, "CWD");
106 return syscall_arg__scnprintf_fd(bf, size, arg);
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112 struct syscall_arg *arg);
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117 struct syscall_arg *arg)
119 return scnprintf(bf, size, "%#lx", arg->val);
122 #define SCA_HEX syscall_arg__scnprintf_hex
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125 struct syscall_arg *arg)
127 int printed = 0, prot = arg->val;
129 if (prot == PROT_NONE)
130 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132 if (prot & PROT_##n) { \
133 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
143 P_MMAP_PROT(GROWSDOWN);
144 P_MMAP_PROT(GROWSUP);
148 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156 struct syscall_arg *arg)
158 int printed = 0, flags = arg->val;
160 #define P_MMAP_FLAG(n) \
161 if (flags & MAP_##n) { \
162 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
167 P_MMAP_FLAG(PRIVATE);
171 P_MMAP_FLAG(ANONYMOUS);
172 P_MMAP_FLAG(DENYWRITE);
173 P_MMAP_FLAG(EXECUTABLE);
176 P_MMAP_FLAG(GROWSDOWN);
178 P_MMAP_FLAG(HUGETLB);
181 P_MMAP_FLAG(NONBLOCK);
182 P_MMAP_FLAG(NORESERVE);
183 P_MMAP_FLAG(POPULATE);
185 #ifdef MAP_UNINITIALIZED
186 P_MMAP_FLAG(UNINITIALIZED);
191 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199 struct syscall_arg *arg)
201 int behavior = arg->val;
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
207 P_MADV_BHV(SEQUENTIAL);
208 P_MADV_BHV(WILLNEED);
209 P_MADV_BHV(DONTNEED);
211 P_MADV_BHV(DONTFORK);
213 P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215 P_MADV_BHV(SOFT_OFFLINE);
217 P_MADV_BHV(MERGEABLE);
218 P_MADV_BHV(UNMERGEABLE);
220 P_MADV_BHV(HUGEPAGE);
222 #ifdef MADV_NOHUGEPAGE
223 P_MADV_BHV(NOHUGEPAGE);
226 P_MADV_BHV(DONTDUMP);
235 return scnprintf(bf, size, "%#x", behavior);
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241 struct syscall_arg *arg)
243 int printed = 0, op = arg->val;
246 return scnprintf(bf, size, "NONE");
248 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
264 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
273 enum syscall_futex_args {
274 SCF_UADDR = (1 << 0),
277 SCF_TIMEOUT = (1 << 3),
278 SCF_UADDR2 = (1 << 4),
282 int cmd = op & FUTEX_CMD_MASK;
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
288 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
291 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
292 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
293 P_FUTEX_OP(WAKE_OP); break;
294 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
297 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
298 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
299 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
300 default: printed = scnprintf(bf, size, "%#x", cmd); break;
303 if (op & FUTEX_PRIVATE_FLAG)
304 printed += scnprintf(bf + printed, size - printed, "|PRIV");
306 if (op & FUTEX_CLOCK_REALTIME)
307 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
312 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
320 static const char *whences[] = { "SET", "CUR", "END",
328 static DEFINE_STRARRAY(whences);
330 static const char *fcntl_cmds[] = {
331 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
336 static DEFINE_STRARRAY(fcntl_cmds);
338 static const char *rlimit_resources[] = {
339 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
343 static DEFINE_STRARRAY(rlimit_resources);
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
348 static const char *clockid[] = {
349 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
352 static DEFINE_STRARRAY(clockid);
354 static const char *socket_families[] = {
355 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360 "ALG", "NFC", "VSOCK",
362 static DEFINE_STRARRAY(socket_families);
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369 struct syscall_arg *arg)
373 flags = type & ~SOCK_TYPE_MASK;
375 type &= SOCK_TYPE_MASK;
377 * Can't use a strarray, MIPS may override for ABI reasons.
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
385 P_SK_TYPE(SEQPACKET);
390 printed = scnprintf(bf, size, "%#x", type);
393 #define P_SK_FLAG(n) \
394 if (flags & SOCK_##n) { \
395 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396 flags &= ~SOCK_##n; \
404 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
412 #define MSG_PROBE 0x10
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE 0x10000
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
421 #define MSG_FASTOPEN 0x20000000
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425 struct syscall_arg *arg)
427 int printed = 0, flags = arg->val;
430 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432 if (flags & MSG_##n) { \
433 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
439 P_MSG_FLAG(DONTROUTE);
444 P_MSG_FLAG(DONTWAIT);
451 P_MSG_FLAG(ERRQUEUE);
452 P_MSG_FLAG(NOSIGNAL);
454 P_MSG_FLAG(WAITFORONE);
455 P_MSG_FLAG(SENDPAGE_NOTLAST);
456 P_MSG_FLAG(FASTOPEN);
457 P_MSG_FLAG(CMSG_CLOEXEC);
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469 struct syscall_arg *arg)
474 if (mode == F_OK) /* 0 */
475 return scnprintf(bf, size, "F");
477 if (mode & n##_OK) { \
478 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
488 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496 struct syscall_arg *arg)
498 int printed = 0, flags = arg->val;
500 if (!(flags & O_CREAT))
501 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
504 return scnprintf(bf, size, "RDONLY");
506 if (flags & O_##n) { \
507 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
531 if ((flags & O_SYNC) == O_SYNC)
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552 struct syscall_arg *arg)
554 int printed = 0, flags = arg->val;
557 return scnprintf(bf, size, "NONE");
559 if (flags & EFD_##n) { \
560 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578 struct syscall_arg *arg)
580 int printed = 0, flags = arg->val;
583 if (flags & O_##n) { \
584 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
593 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
640 return scnprintf(bf, size, "%#x", sig);
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
645 #define TCGETS 0x5401
647 static const char *tioctls[] = {
648 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
667 #define STRARRAY(arg, name, array) \
668 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669 .arg_parm = { [arg] = &strarray__##array, }
671 static struct syscall_fmt {
674 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
680 { .name = "access", .errmsg = true,
681 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
683 { .name = "brk", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
686 { .name = "close", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
688 { .name = "connect", .errmsg = true, },
689 { .name = "dup", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
691 { .name = "dup2", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
693 { .name = "dup3", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
695 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696 { .name = "eventfd2", .errmsg = true,
697 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698 { .name = "faccessat", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 { .name = "fadvise64", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
702 { .name = "fallocate", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
704 { .name = "fchdir", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
706 { .name = "fchmod", .errmsg = true,
707 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
708 { .name = "fchmodat", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
710 { .name = "fchown", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
712 { .name = "fchownat", .errmsg = true,
713 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
714 { .name = "fcntl", .errmsg = true,
715 .arg_scnprintf = { [0] = SCA_FD, /* fd */
716 [1] = SCA_STRARRAY, /* cmd */ },
717 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718 { .name = "fdatasync", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
720 { .name = "flock", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FD, /* fd */
722 [1] = SCA_FLOCK, /* cmd */ }, },
723 { .name = "fsetxattr", .errmsg = true,
724 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
725 { .name = "fstat", .errmsg = true, .alias = "newfstat",
726 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
727 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
728 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
729 { .name = "fstatfs", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
731 { .name = "fsync", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
733 { .name = "ftruncate", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
735 { .name = "futex", .errmsg = true,
736 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737 { .name = "futimesat", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
739 { .name = "getdents", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
741 { .name = "getdents64", .errmsg = true,
742 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
743 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
744 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745 { .name = "ioctl", .errmsg = true,
746 .arg_scnprintf = { [0] = SCA_FD, /* fd */
747 [1] = SCA_STRHEXARRAY, /* cmd */
748 [2] = SCA_HEX, /* arg */ },
749 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
750 { .name = "kill", .errmsg = true,
751 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752 { .name = "linkat", .errmsg = true,
753 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
754 { .name = "lseek", .errmsg = true,
755 .arg_scnprintf = { [0] = SCA_FD, /* fd */
756 [2] = SCA_STRARRAY, /* whence */ },
757 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
758 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
759 { .name = "madvise", .errmsg = true,
760 .arg_scnprintf = { [0] = SCA_HEX, /* start */
761 [2] = SCA_MADV_BHV, /* behavior */ }, },
762 { .name = "mkdirat", .errmsg = true,
763 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
764 { .name = "mknodat", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
766 { .name = "mlock", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768 { .name = "mlockall", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770 { .name = "mmap", .hexret = true,
771 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
772 [2] = SCA_MMAP_PROT, /* prot */
773 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774 { .name = "mprotect", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_HEX, /* start */
776 [2] = SCA_MMAP_PROT, /* prot */ }, },
777 { .name = "mremap", .hexret = true,
778 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779 [4] = SCA_HEX, /* new_addr */ }, },
780 { .name = "munlock", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782 { .name = "munmap", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784 { .name = "name_to_handle_at", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "newfstatat", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "open", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790 { .name = "open_by_handle_at", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793 { .name = "openat", .errmsg = true,
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796 { .name = "pipe2", .errmsg = true,
797 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798 { .name = "poll", .errmsg = true, .timeout = true, },
799 { .name = "ppoll", .errmsg = true, .timeout = true, },
800 { .name = "pread", .errmsg = true, .alias = "pread64",
801 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
802 { .name = "preadv", .errmsg = true, .alias = "pread",
803 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
804 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
806 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
807 { .name = "pwritev", .errmsg = true,
808 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
809 { .name = "read", .errmsg = true,
810 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
811 { .name = "readlinkat", .errmsg = true,
812 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
813 { .name = "readv", .errmsg = true,
814 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
815 { .name = "recvfrom", .errmsg = true,
816 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817 { .name = "recvmmsg", .errmsg = true,
818 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819 { .name = "recvmsg", .errmsg = true,
820 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821 { .name = "renameat", .errmsg = true,
822 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
823 { .name = "rt_sigaction", .errmsg = true,
824 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
826 { .name = "rt_sigqueueinfo", .errmsg = true,
827 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828 { .name = "rt_tgsigqueueinfo", .errmsg = true,
829 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830 { .name = "select", .errmsg = true, .timeout = true, },
831 { .name = "sendmmsg", .errmsg = true,
832 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833 { .name = "sendmsg", .errmsg = true,
834 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835 { .name = "sendto", .errmsg = true,
836 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
838 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839 { .name = "shutdown", .errmsg = true,
840 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
841 { .name = "socket", .errmsg = true,
842 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843 [1] = SCA_SK_TYPE, /* type */ },
844 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
845 { .name = "socketpair", .errmsg = true,
846 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847 [1] = SCA_SK_TYPE, /* type */ },
848 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
849 { .name = "stat", .errmsg = true, .alias = "newstat", },
850 { .name = "symlinkat", .errmsg = true,
851 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
852 { .name = "tgkill", .errmsg = true,
853 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854 { .name = "tkill", .errmsg = true,
855 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856 { .name = "uname", .errmsg = true, .alias = "newuname", },
857 { .name = "unlinkat", .errmsg = true,
858 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859 { .name = "utimensat", .errmsg = true,
860 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861 { .name = "write", .errmsg = true,
862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863 { .name = "writev", .errmsg = true,
864 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
869 const struct syscall_fmt *fmt = fmtp;
870 return strcmp(name, fmt->name);
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
875 const int nmemb = ARRAY_SIZE(syscall_fmts);
876 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
880 struct event_format *tp_format;
883 struct syscall_fmt *fmt;
884 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
890 double duration = (double)t / NSEC_PER_MSEC;
891 size_t printed = fprintf(fp, "(");
894 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895 else if (duration >= 0.01)
896 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
898 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899 return printed + fprintf(fp, "): ");
902 struct thread_trace {
906 unsigned long nr_events;
914 struct intlist *syscall_stats;
917 static struct thread_trace *thread_trace__new(void)
919 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
922 ttrace->paths.max = -1;
924 ttrace->syscall_stats = intlist__new(NULL);
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
931 struct thread_trace *ttrace;
936 if (thread->priv == NULL)
937 thread->priv = thread_trace__new();
939 if (thread->priv == NULL)
942 ttrace = thread->priv;
947 color_fprintf(fp, PERF_COLOR_RED,
948 "WARNING: not enough memory, dropping samples!\n");
953 struct perf_tool tool;
960 struct syscall *table;
962 struct perf_record_opts opts;
963 struct machine *host;
967 unsigned long nr_events;
968 struct strlist *ev_qualifier;
969 bool not_ev_qualifier;
971 const char *last_vfs_getname;
972 struct intlist *tid_list;
973 struct intlist *pid_list;
975 bool multiple_threads;
978 bool show_tool_stats;
979 double duration_filter;
982 u64 vfs_getname, proc_getname;
986 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
988 struct thread_trace *ttrace = thread->priv;
990 if (fd > ttrace->paths.max) {
991 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
996 if (ttrace->paths.max != -1) {
997 memset(npath + ttrace->paths.max + 1, 0,
998 (fd - ttrace->paths.max) * sizeof(char *));
1000 memset(npath, 0, (fd + 1) * sizeof(char *));
1003 ttrace->paths.table = npath;
1004 ttrace->paths.max = fd;
1007 ttrace->paths.table[fd] = strdup(pathname);
1009 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1012 static int thread__read_fd_path(struct thread *thread, int fd)
1014 char linkname[PATH_MAX], pathname[PATH_MAX];
1018 if (thread->pid_ == thread->tid) {
1019 scnprintf(linkname, sizeof(linkname),
1020 "/proc/%d/fd/%d", thread->pid_, fd);
1022 scnprintf(linkname, sizeof(linkname),
1023 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1026 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1029 ret = readlink(linkname, pathname, sizeof(pathname));
1031 if (ret < 0 || ret > st.st_size)
1034 pathname[ret] = '\0';
1035 return trace__set_fd_pathname(thread, fd, pathname);
1038 static const char *thread__fd_path(struct thread *thread, int fd,
1039 struct trace *trace)
1041 struct thread_trace *ttrace = thread->priv;
1049 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1052 ++trace->stats.proc_getname;
1053 if (thread__read_fd_path(thread, fd)) {
1057 return ttrace->paths.table[fd];
1060 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1061 struct syscall_arg *arg)
1064 size_t printed = scnprintf(bf, size, "%d", fd);
1065 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1068 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1073 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1074 struct syscall_arg *arg)
1077 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1078 struct thread_trace *ttrace = arg->thread->priv;
1080 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1081 free(ttrace->paths.table[fd]);
1082 ttrace->paths.table[fd] = NULL;
1088 static bool trace__filter_duration(struct trace *trace, double t)
1090 return t < (trace->duration_filter * NSEC_PER_MSEC);
1093 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1095 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1097 return fprintf(fp, "%10.3f ", ts);
1100 static bool done = false;
1101 static bool interrupted = false;
1103 static void sig_handler(int sig)
1106 interrupted = sig == SIGINT;
1109 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1110 u64 duration, u64 tstamp, FILE *fp)
1112 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1113 printed += fprintf_duration(duration, fp);
1115 if (trace->multiple_threads) {
1116 if (trace->show_comm)
1117 printed += fprintf(fp, "%.14s/", thread->comm);
1118 printed += fprintf(fp, "%d ", thread->tid);
1124 static int trace__process_event(struct trace *trace, struct machine *machine,
1125 union perf_event *event)
1129 switch (event->header.type) {
1130 case PERF_RECORD_LOST:
1131 color_fprintf(trace->output, PERF_COLOR_RED,
1132 "LOST %" PRIu64 " events!\n", event->lost.lost);
1133 ret = machine__process_lost_event(machine, event);
1135 ret = machine__process_event(machine, event);
1142 static int trace__tool_process(struct perf_tool *tool,
1143 union perf_event *event,
1144 struct perf_sample *sample __maybe_unused,
1145 struct machine *machine)
1147 struct trace *trace = container_of(tool, struct trace, tool);
1148 return trace__process_event(trace, machine, event);
1151 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1153 int err = symbol__init();
1158 trace->host = machine__new_host();
1159 if (trace->host == NULL)
1162 if (perf_target__has_task(&trace->opts.target)) {
1163 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1164 trace__tool_process,
1167 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1177 static int syscall__set_arg_fmts(struct syscall *sc)
1179 struct format_field *field;
1182 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1183 if (sc->arg_scnprintf == NULL)
1187 sc->arg_parm = sc->fmt->arg_parm;
1189 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1190 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1191 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1192 else if (field->flags & FIELD_IS_POINTER)
1193 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1200 static int trace__read_syscall_info(struct trace *trace, int id)
1204 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1209 if (id > trace->syscalls.max) {
1210 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1212 if (nsyscalls == NULL)
1215 if (trace->syscalls.max != -1) {
1216 memset(nsyscalls + trace->syscalls.max + 1, 0,
1217 (id - trace->syscalls.max) * sizeof(*sc));
1219 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1222 trace->syscalls.table = nsyscalls;
1223 trace->syscalls.max = id;
1226 sc = trace->syscalls.table + id;
1229 if (trace->ev_qualifier) {
1230 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1232 if (!(in ^ trace->not_ev_qualifier)) {
1233 sc->filtered = true;
1235 * No need to do read tracepoint information since this will be
1242 sc->fmt = syscall_fmt__find(sc->name);
1244 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1245 sc->tp_format = event_format__new("syscalls", tp_name);
1247 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1248 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1249 sc->tp_format = event_format__new("syscalls", tp_name);
1252 if (sc->tp_format == NULL)
1255 return syscall__set_arg_fmts(sc);
1258 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1259 unsigned long *args, struct trace *trace,
1260 struct thread *thread)
1264 if (sc->tp_format != NULL) {
1265 struct format_field *field;
1267 struct syscall_arg arg = {
1274 for (field = sc->tp_format->format.fields->next; field;
1275 field = field->next, ++arg.idx, bit <<= 1) {
1279 * Suppress this argument if its value is zero and
1280 * and we don't have a string associated in an
1283 if (args[arg.idx] == 0 &&
1284 !(sc->arg_scnprintf &&
1285 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1286 sc->arg_parm[arg.idx]))
1289 printed += scnprintf(bf + printed, size - printed,
1290 "%s%s: ", printed ? ", " : "", field->name);
1291 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1292 arg.val = args[arg.idx];
1294 arg.parm = sc->arg_parm[arg.idx];
1295 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1296 size - printed, &arg);
1298 printed += scnprintf(bf + printed, size - printed,
1299 "%ld", args[arg.idx]);
1306 printed += scnprintf(bf + printed, size - printed,
1308 printed ? ", " : "", i, args[i]);
1316 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1317 struct perf_sample *sample);
1319 static struct syscall *trace__syscall_info(struct trace *trace,
1320 struct perf_evsel *evsel, int id)
1326 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1327 * before that, leaving at a higher verbosity level till that is
1328 * explained. Reproduced with plain ftrace with:
1330 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1331 * grep "NR -1 " /t/trace_pipe
1333 * After generating some load on the machine.
1337 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1338 id, perf_evsel__name(evsel), ++n);
1343 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1344 trace__read_syscall_info(trace, id))
1347 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1350 return &trace->syscalls.table[id];
1354 fprintf(trace->output, "Problems reading syscall %d", id);
1355 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1356 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1357 fputs(" information\n", trace->output);
1362 static void thread__update_stats(struct thread_trace *ttrace,
1363 int id, struct perf_sample *sample)
1365 struct int_node *inode;
1366 struct stats *stats;
1369 inode = intlist__findnew(ttrace->syscall_stats, id);
1373 stats = inode->priv;
1374 if (stats == NULL) {
1375 stats = malloc(sizeof(struct stats));
1379 inode->priv = stats;
1382 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1383 duration = sample->time - ttrace->entry_time;
1385 update_stats(stats, duration);
1388 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1389 struct perf_sample *sample)
1394 struct thread *thread;
1395 int id = perf_evsel__intval(evsel, sample, "id");
1396 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1397 struct thread_trace *ttrace;
1405 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1406 ttrace = thread__trace(thread, trace->output);
1410 args = perf_evsel__rawptr(evsel, sample, "args");
1412 fprintf(trace->output, "Problems reading syscall arguments\n");
1416 ttrace = thread->priv;
1418 if (ttrace->entry_str == NULL) {
1419 ttrace->entry_str = malloc(1024);
1420 if (!ttrace->entry_str)
1424 ttrace->entry_time = sample->time;
1425 msg = ttrace->entry_str;
1426 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1428 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1429 args, trace, thread);
1431 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1432 if (!trace->duration_filter) {
1433 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1434 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1437 ttrace->entry_pending = true;
1442 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1443 struct perf_sample *sample)
1447 struct thread *thread;
1448 int id = perf_evsel__intval(evsel, sample, "id");
1449 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1450 struct thread_trace *ttrace;
1458 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1459 ttrace = thread__trace(thread, trace->output);
1464 thread__update_stats(ttrace, id, sample);
1466 ret = perf_evsel__intval(evsel, sample, "ret");
1468 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1469 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1470 trace->last_vfs_getname = NULL;
1471 ++trace->stats.vfs_getname;
1474 ttrace = thread->priv;
1476 ttrace->exit_time = sample->time;
1478 if (ttrace->entry_time) {
1479 duration = sample->time - ttrace->entry_time;
1480 if (trace__filter_duration(trace, duration))
1482 } else if (trace->duration_filter)
1485 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1487 if (ttrace->entry_pending) {
1488 fprintf(trace->output, "%-70s", ttrace->entry_str);
1490 fprintf(trace->output, " ... [");
1491 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1492 fprintf(trace->output, "]: %s()", sc->name);
1495 if (sc->fmt == NULL) {
1497 fprintf(trace->output, ") = %d", ret);
1498 } else if (ret < 0 && sc->fmt->errmsg) {
1500 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1501 *e = audit_errno_to_name(-ret);
1503 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1504 } else if (ret == 0 && sc->fmt->timeout)
1505 fprintf(trace->output, ") = 0 Timeout");
1506 else if (sc->fmt->hexret)
1507 fprintf(trace->output, ") = %#x", ret);
1511 fputc('\n', trace->output);
1513 ttrace->entry_pending = false;
1518 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1519 struct perf_sample *sample)
1521 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1525 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1526 struct perf_sample *sample)
1528 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1529 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1530 struct thread *thread = machine__findnew_thread(trace->host,
1533 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1538 ttrace->runtime_ms += runtime_ms;
1539 trace->runtime_ms += runtime_ms;
1543 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1545 perf_evsel__strval(evsel, sample, "comm"),
1546 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1548 perf_evsel__intval(evsel, sample, "vruntime"));
1552 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1554 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1555 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1558 if (trace->pid_list || trace->tid_list)
1564 static int trace__process_sample(struct perf_tool *tool,
1565 union perf_event *event __maybe_unused,
1566 struct perf_sample *sample,
1567 struct perf_evsel *evsel,
1568 struct machine *machine __maybe_unused)
1570 struct trace *trace = container_of(tool, struct trace, tool);
1573 tracepoint_handler handler = evsel->handler.func;
1575 if (skip_sample(trace, sample))
1578 if (!trace->full_time && trace->base_time == 0)
1579 trace->base_time = sample->time;
1582 handler(trace, evsel, sample);
1588 perf_session__has_tp(struct perf_session *session, const char *name)
1590 struct perf_evsel *evsel;
1592 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1594 return evsel != NULL;
1597 static int parse_target_str(struct trace *trace)
1599 if (trace->opts.target.pid) {
1600 trace->pid_list = intlist__new(trace->opts.target.pid);
1601 if (trace->pid_list == NULL) {
1602 pr_err("Error parsing process id string\n");
1607 if (trace->opts.target.tid) {
1608 trace->tid_list = intlist__new(trace->opts.target.tid);
1609 if (trace->tid_list == NULL) {
1610 pr_err("Error parsing thread id string\n");
1618 static int trace__record(int argc, const char **argv)
1620 unsigned int rec_argc, i, j;
1621 const char **rec_argv;
1622 const char * const record_args[] = {
1627 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1630 rec_argc = ARRAY_SIZE(record_args) + argc;
1631 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1633 if (rec_argv == NULL)
1636 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1637 rec_argv[i] = record_args[i];
1639 for (j = 0; j < (unsigned int)argc; j++, i++)
1640 rec_argv[i] = argv[j];
1642 return cmd_record(i, rec_argv, NULL);
1645 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1647 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1649 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1650 evlist->nr_entries);
1654 if (perf_evsel__field(evsel, "pathname") == NULL) {
1655 perf_evsel__delete(evsel);
1659 evsel->handler.func = trace__vfs_getname;
1660 perf_evlist__add(evlist, evsel);
1663 static int trace__run(struct trace *trace, int argc, const char **argv)
1665 struct perf_evlist *evlist = perf_evlist__new();
1666 struct perf_evsel *evsel;
1668 unsigned long before;
1669 const bool forks = argc > 0;
1673 if (evlist == NULL) {
1674 fprintf(trace->output, "Not enough memory to run!\n");
1678 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1679 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1682 perf_evlist__add_vfs_getname(evlist);
1685 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1686 trace__sched_stat_runtime))
1689 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1691 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1692 goto out_delete_evlist;
1695 err = trace__symbols_init(trace, evlist);
1697 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1698 goto out_delete_maps;
1701 perf_evlist__config(evlist, &trace->opts);
1703 signal(SIGCHLD, sig_handler);
1704 signal(SIGINT, sig_handler);
1707 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1708 argv, false, false);
1710 fprintf(trace->output, "Couldn't run the workload!\n");
1711 goto out_delete_maps;
1715 err = perf_evlist__open(evlist);
1717 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1718 goto out_delete_maps;
1721 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1723 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1724 goto out_close_evlist;
1727 perf_evlist__enable(evlist);
1730 perf_evlist__start_workload(evlist);
1732 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1734 before = trace->nr_events;
1736 for (i = 0; i < evlist->nr_mmaps; i++) {
1737 union perf_event *event;
1739 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1740 const u32 type = event->header.type;
1741 tracepoint_handler handler;
1742 struct perf_sample sample;
1746 err = perf_evlist__parse_sample(evlist, event, &sample);
1748 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1752 if (!trace->full_time && trace->base_time == 0)
1753 trace->base_time = sample.time;
1755 if (type != PERF_RECORD_SAMPLE) {
1756 trace__process_event(trace, trace->host, event);
1760 evsel = perf_evlist__id2evsel(evlist, sample.id);
1761 if (evsel == NULL) {
1762 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1766 if (sample.raw_data == NULL) {
1767 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1768 perf_evsel__name(evsel), sample.tid,
1769 sample.cpu, sample.raw_size);
1773 handler = evsel->handler.func;
1774 handler(trace, evsel, &sample);
1781 if (trace->nr_events == before) {
1782 int timeout = done ? 100 : -1;
1784 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1791 perf_evlist__disable(evlist);
1795 trace__fprintf_thread_summary(trace, trace->output);
1797 if (trace->show_tool_stats) {
1798 fprintf(trace->output, "Stats:\n "
1799 " vfs_getname : %" PRIu64 "\n"
1800 " proc_getname: %" PRIu64 "\n",
1801 trace->stats.vfs_getname,
1802 trace->stats.proc_getname);
1806 perf_evlist__munmap(evlist);
1808 perf_evlist__close(evlist);
1810 perf_evlist__delete_maps(evlist);
1812 perf_evlist__delete(evlist);
1814 trace->live = false;
1818 char errbuf[BUFSIZ];
1819 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1820 fprintf(trace->output, "%s\n", errbuf);
1822 goto out_delete_evlist;
1825 static int trace__replay(struct trace *trace)
1827 const struct perf_evsel_str_handler handlers[] = {
1828 { "raw_syscalls:sys_enter", trace__sys_enter, },
1829 { "raw_syscalls:sys_exit", trace__sys_exit, },
1830 { "probe:vfs_getname", trace__vfs_getname, },
1833 struct perf_session *session;
1836 trace->tool.sample = trace__process_sample;
1837 trace->tool.mmap = perf_event__process_mmap;
1838 trace->tool.mmap2 = perf_event__process_mmap2;
1839 trace->tool.comm = perf_event__process_comm;
1840 trace->tool.exit = perf_event__process_exit;
1841 trace->tool.fork = perf_event__process_fork;
1842 trace->tool.attr = perf_event__process_attr;
1843 trace->tool.tracing_data = perf_event__process_tracing_data;
1844 trace->tool.build_id = perf_event__process_build_id;
1846 trace->tool.ordered_samples = true;
1847 trace->tool.ordering_requires_timestamps = true;
1849 /* add tid to output */
1850 trace->multiple_threads = true;
1852 if (symbol__init() < 0)
1855 session = perf_session__new(input_name, O_RDONLY, 0, false,
1857 if (session == NULL)
1860 trace->host = &session->machines.host;
1862 err = perf_session__set_tracepoints_handlers(session, handlers);
1866 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1867 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1871 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1872 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1876 err = parse_target_str(trace);
1882 err = perf_session__process_events(session, &trace->tool);
1884 pr_err("Failed to process events, error %d", err);
1886 else if (trace->summary)
1887 trace__fprintf_thread_summary(trace, trace->output);
1890 perf_session__delete(session);
1895 static size_t trace__fprintf_threads_header(FILE *fp)
1899 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
1900 printed += fprintf(fp, " __) Summary of events (__\n\n");
1901 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1902 printed += fprintf(fp, " syscall count min max avg stddev\n");
1903 printed += fprintf(fp, " msec msec msec %%\n");
1904 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1909 static size_t thread__dump_stats(struct thread_trace *ttrace,
1910 struct trace *trace, FILE *fp)
1912 struct stats *stats;
1915 struct int_node *inode = intlist__first(ttrace->syscall_stats);
1920 printed += fprintf(fp, "\n");
1922 /* each int_node is a syscall */
1924 stats = inode->priv;
1926 double min = (double)(stats->min) / NSEC_PER_MSEC;
1927 double max = (double)(stats->max) / NSEC_PER_MSEC;
1928 double avg = avg_stats(stats);
1930 u64 n = (u64) stats->n;
1932 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1933 avg /= NSEC_PER_MSEC;
1935 sc = &trace->syscalls.table[inode->i];
1936 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
1937 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
1939 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
1942 inode = intlist__next(inode);
1945 printed += fprintf(fp, "\n\n");
1950 /* struct used to pass data to per-thread function */
1951 struct summary_data {
1953 struct trace *trace;
1957 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1959 struct summary_data *data = priv;
1960 FILE *fp = data->fp;
1961 size_t printed = data->printed;
1962 struct trace *trace = data->trace;
1963 struct thread_trace *ttrace = thread->priv;
1970 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1972 color = PERF_COLOR_NORMAL;
1974 color = PERF_COLOR_RED;
1975 else if (ratio > 25.0)
1976 color = PERF_COLOR_GREEN;
1977 else if (ratio > 5.0)
1978 color = PERF_COLOR_YELLOW;
1980 printed += color_fprintf(fp, color, "%20s", thread->comm);
1981 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1982 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1983 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1984 printed += thread__dump_stats(ttrace, trace, fp);
1986 data->printed += printed;
1991 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1993 struct summary_data data = {
1997 data.printed = trace__fprintf_threads_header(fp);
1999 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2001 return data.printed;
2004 static int trace__set_duration(const struct option *opt, const char *str,
2005 int unset __maybe_unused)
2007 struct trace *trace = opt->value;
2009 trace->duration_filter = atof(str);
2013 static int trace__open_output(struct trace *trace, const char *filename)
2017 if (!stat(filename, &st) && st.st_size) {
2018 char oldname[PATH_MAX];
2020 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2022 rename(filename, oldname);
2025 trace->output = fopen(filename, "w");
2027 return trace->output == NULL ? -errno : 0;
2030 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2032 const char * const trace_usage[] = {
2033 "perf trace [<options>] [<command>]",
2034 "perf trace [<options>] -- <command> [<options>]",
2035 "perf trace record [<options>] [<command>]",
2036 "perf trace record [<options>] -- <command> [<options>]",
2039 struct trace trace = {
2041 .machine = audit_detect_machine(),
2042 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2052 .user_freq = UINT_MAX,
2053 .user_interval = ULLONG_MAX,
2060 const char *output_name = NULL;
2061 const char *ev_qualifier_str = NULL;
2062 const struct option trace_options[] = {
2063 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2064 "show the thread COMM next to its id"),
2065 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2066 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2067 "list of events to trace"),
2068 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2069 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2070 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2071 "trace events on existing process id"),
2072 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2073 "trace events on existing thread id"),
2074 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2075 "system-wide collection from all CPUs"),
2076 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2077 "list of cpus to monitor"),
2078 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2079 "child tasks do not inherit counters"),
2080 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2081 "number of mmap data pages",
2082 perf_evlist__parse_mmap_pages),
2083 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2085 OPT_CALLBACK(0, "duration", &trace, "float",
2086 "show only events with duration > N.M ms",
2087 trace__set_duration),
2088 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2089 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2090 OPT_BOOLEAN('T', "time", &trace.full_time,
2091 "Show full timestamp, not time relative to first start"),
2092 OPT_BOOLEAN(0, "summary", &trace.summary,
2093 "Show syscall summary with statistics"),
2099 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2100 return trace__record(argc-2, &argv[2]);
2102 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2104 if (output_name != NULL) {
2105 err = trace__open_output(&trace, output_name);
2107 perror("failed to create output file");
2112 if (ev_qualifier_str != NULL) {
2113 const char *s = ev_qualifier_str;
2115 trace.not_ev_qualifier = *s == '!';
2116 if (trace.not_ev_qualifier)
2118 trace.ev_qualifier = strlist__new(true, s);
2119 if (trace.ev_qualifier == NULL) {
2120 fputs("Not enough memory to parse event qualifier",
2127 err = perf_target__validate(&trace.opts.target);
2129 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2130 fprintf(trace.output, "%s", bf);
2134 err = perf_target__parse_uid(&trace.opts.target);
2136 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2137 fprintf(trace.output, "%s", bf);
2141 if (!argc && perf_target__none(&trace.opts.target))
2142 trace.opts.target.system_wide = true;
2145 err = trace__replay(&trace);
2147 err = trace__run(&trace, argc, argv);
2150 if (output_name != NULL)
2151 fclose(trace.output);