1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
40 struct thread *thread;
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54 .nr_entries = ARRAY_SIZE(array), \
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
60 .nr_entries = ARRAY_SIZE(array), \
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
66 struct syscall_arg *arg)
68 struct strarray *sa = arg->parm;
69 int idx = arg->val - sa->offset;
71 if (idx < 0 || idx >= sa->nr_entries)
72 return scnprintf(bf, size, intfmt, arg->val);
74 return scnprintf(bf, size, "%s", sa->entries[idx]);
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78 struct syscall_arg *arg)
80 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86 struct syscall_arg *arg)
88 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94 struct syscall_arg *arg);
96 #define SCA_FD syscall_arg__scnprintf_fd
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99 struct syscall_arg *arg)
104 return scnprintf(bf, size, "CWD");
106 return syscall_arg__scnprintf_fd(bf, size, arg);
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112 struct syscall_arg *arg);
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117 struct syscall_arg *arg)
119 return scnprintf(bf, size, "%#lx", arg->val);
122 #define SCA_HEX syscall_arg__scnprintf_hex
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125 struct syscall_arg *arg)
127 int printed = 0, prot = arg->val;
129 if (prot == PROT_NONE)
130 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132 if (prot & PROT_##n) { \
133 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
143 P_MMAP_PROT(GROWSDOWN);
144 P_MMAP_PROT(GROWSUP);
148 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156 struct syscall_arg *arg)
158 int printed = 0, flags = arg->val;
160 #define P_MMAP_FLAG(n) \
161 if (flags & MAP_##n) { \
162 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
167 P_MMAP_FLAG(PRIVATE);
171 P_MMAP_FLAG(ANONYMOUS);
172 P_MMAP_FLAG(DENYWRITE);
173 P_MMAP_FLAG(EXECUTABLE);
176 P_MMAP_FLAG(GROWSDOWN);
178 P_MMAP_FLAG(HUGETLB);
181 P_MMAP_FLAG(NONBLOCK);
182 P_MMAP_FLAG(NORESERVE);
183 P_MMAP_FLAG(POPULATE);
185 #ifdef MAP_UNINITIALIZED
186 P_MMAP_FLAG(UNINITIALIZED);
191 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199 struct syscall_arg *arg)
201 int behavior = arg->val;
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
207 P_MADV_BHV(SEQUENTIAL);
208 P_MADV_BHV(WILLNEED);
209 P_MADV_BHV(DONTNEED);
211 P_MADV_BHV(DONTFORK);
213 P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215 P_MADV_BHV(SOFT_OFFLINE);
217 P_MADV_BHV(MERGEABLE);
218 P_MADV_BHV(UNMERGEABLE);
220 P_MADV_BHV(HUGEPAGE);
222 #ifdef MADV_NOHUGEPAGE
223 P_MADV_BHV(NOHUGEPAGE);
226 P_MADV_BHV(DONTDUMP);
235 return scnprintf(bf, size, "%#x", behavior);
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241 struct syscall_arg *arg)
243 int printed = 0, op = arg->val;
246 return scnprintf(bf, size, "NONE");
248 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
264 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
273 enum syscall_futex_args {
274 SCF_UADDR = (1 << 0),
277 SCF_TIMEOUT = (1 << 3),
278 SCF_UADDR2 = (1 << 4),
282 int cmd = op & FUTEX_CMD_MASK;
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
288 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
291 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
292 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
293 P_FUTEX_OP(WAKE_OP); break;
294 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
297 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
298 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
299 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
300 default: printed = scnprintf(bf, size, "%#x", cmd); break;
303 if (op & FUTEX_PRIVATE_FLAG)
304 printed += scnprintf(bf + printed, size - printed, "|PRIV");
306 if (op & FUTEX_CLOCK_REALTIME)
307 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
312 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
320 static const char *whences[] = { "SET", "CUR", "END",
328 static DEFINE_STRARRAY(whences);
330 static const char *fcntl_cmds[] = {
331 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
336 static DEFINE_STRARRAY(fcntl_cmds);
338 static const char *rlimit_resources[] = {
339 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
343 static DEFINE_STRARRAY(rlimit_resources);
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
348 static const char *clockid[] = {
349 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
352 static DEFINE_STRARRAY(clockid);
354 static const char *socket_families[] = {
355 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360 "ALG", "NFC", "VSOCK",
362 static DEFINE_STRARRAY(socket_families);
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369 struct syscall_arg *arg)
373 flags = type & ~SOCK_TYPE_MASK;
375 type &= SOCK_TYPE_MASK;
377 * Can't use a strarray, MIPS may override for ABI reasons.
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
385 P_SK_TYPE(SEQPACKET);
390 printed = scnprintf(bf, size, "%#x", type);
393 #define P_SK_FLAG(n) \
394 if (flags & SOCK_##n) { \
395 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396 flags &= ~SOCK_##n; \
404 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
412 #define MSG_PROBE 0x10
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE 0x10000
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
421 #define MSG_FASTOPEN 0x20000000
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425 struct syscall_arg *arg)
427 int printed = 0, flags = arg->val;
430 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432 if (flags & MSG_##n) { \
433 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
439 P_MSG_FLAG(DONTROUTE);
444 P_MSG_FLAG(DONTWAIT);
451 P_MSG_FLAG(ERRQUEUE);
452 P_MSG_FLAG(NOSIGNAL);
454 P_MSG_FLAG(WAITFORONE);
455 P_MSG_FLAG(SENDPAGE_NOTLAST);
456 P_MSG_FLAG(FASTOPEN);
457 P_MSG_FLAG(CMSG_CLOEXEC);
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469 struct syscall_arg *arg)
474 if (mode == F_OK) /* 0 */
475 return scnprintf(bf, size, "F");
477 if (mode & n##_OK) { \
478 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
488 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496 struct syscall_arg *arg)
498 int printed = 0, flags = arg->val;
500 if (!(flags & O_CREAT))
501 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
504 return scnprintf(bf, size, "RDONLY");
506 if (flags & O_##n) { \
507 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
531 if ((flags & O_SYNC) == O_SYNC)
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552 struct syscall_arg *arg)
554 int printed = 0, flags = arg->val;
557 return scnprintf(bf, size, "NONE");
559 if (flags & EFD_##n) { \
560 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578 struct syscall_arg *arg)
580 int printed = 0, flags = arg->val;
583 if (flags & O_##n) { \
584 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
593 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
640 return scnprintf(bf, size, "%#x", sig);
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
645 #define TCGETS 0x5401
647 static const char *tioctls[] = {
648 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
667 #define STRARRAY(arg, name, array) \
668 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669 .arg_parm = { [arg] = &strarray__##array, }
671 static struct syscall_fmt {
674 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
680 { .name = "access", .errmsg = true,
681 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
683 { .name = "brk", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
686 { .name = "close", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
688 { .name = "connect", .errmsg = true, },
689 { .name = "dup", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
691 { .name = "dup2", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
693 { .name = "dup3", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
695 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696 { .name = "eventfd2", .errmsg = true,
697 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698 { .name = "faccessat", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 { .name = "fadvise64", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
702 { .name = "fallocate", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
704 { .name = "fchdir", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
706 { .name = "fchmod", .errmsg = true,
707 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
708 { .name = "fchmodat", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
710 { .name = "fchown", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
712 { .name = "fchownat", .errmsg = true,
713 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
714 { .name = "fcntl", .errmsg = true,
715 .arg_scnprintf = { [0] = SCA_FD, /* fd */
716 [1] = SCA_STRARRAY, /* cmd */ },
717 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718 { .name = "fdatasync", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
720 { .name = "flock", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FD, /* fd */
722 [1] = SCA_FLOCK, /* cmd */ }, },
723 { .name = "fsetxattr", .errmsg = true,
724 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
725 { .name = "fstat", .errmsg = true, .alias = "newfstat",
726 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
727 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
728 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
729 { .name = "fstatfs", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
731 { .name = "fsync", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
733 { .name = "ftruncate", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
735 { .name = "futex", .errmsg = true,
736 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737 { .name = "futimesat", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
739 { .name = "getdents", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
741 { .name = "getdents64", .errmsg = true,
742 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
743 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
744 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745 { .name = "ioctl", .errmsg = true,
746 .arg_scnprintf = { [0] = SCA_FD, /* fd */
747 [1] = SCA_STRHEXARRAY, /* cmd */
748 [2] = SCA_HEX, /* arg */ },
749 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
750 { .name = "kill", .errmsg = true,
751 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752 { .name = "linkat", .errmsg = true,
753 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
754 { .name = "lseek", .errmsg = true,
755 .arg_scnprintf = { [0] = SCA_FD, /* fd */
756 [2] = SCA_STRARRAY, /* whence */ },
757 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
758 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
759 { .name = "madvise", .errmsg = true,
760 .arg_scnprintf = { [0] = SCA_HEX, /* start */
761 [2] = SCA_MADV_BHV, /* behavior */ }, },
762 { .name = "mkdirat", .errmsg = true,
763 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
764 { .name = "mknodat", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
766 { .name = "mlock", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768 { .name = "mlockall", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770 { .name = "mmap", .hexret = true,
771 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
772 [2] = SCA_MMAP_PROT, /* prot */
773 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774 { .name = "mprotect", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_HEX, /* start */
776 [2] = SCA_MMAP_PROT, /* prot */ }, },
777 { .name = "mremap", .hexret = true,
778 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779 [4] = SCA_HEX, /* new_addr */ }, },
780 { .name = "munlock", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782 { .name = "munmap", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784 { .name = "name_to_handle_at", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "newfstatat", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "open", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790 { .name = "open_by_handle_at", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793 { .name = "openat", .errmsg = true,
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796 { .name = "pipe2", .errmsg = true,
797 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798 { .name = "poll", .errmsg = true, .timeout = true, },
799 { .name = "ppoll", .errmsg = true, .timeout = true, },
800 { .name = "pread", .errmsg = true, .alias = "pread64",
801 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
802 { .name = "preadv", .errmsg = true, .alias = "pread",
803 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
804 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
806 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
807 { .name = "pwritev", .errmsg = true,
808 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
809 { .name = "read", .errmsg = true,
810 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
811 { .name = "readlinkat", .errmsg = true,
812 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
813 { .name = "readv", .errmsg = true,
814 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
815 { .name = "recvfrom", .errmsg = true,
816 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817 { .name = "recvmmsg", .errmsg = true,
818 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819 { .name = "recvmsg", .errmsg = true,
820 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821 { .name = "renameat", .errmsg = true,
822 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
823 { .name = "rt_sigaction", .errmsg = true,
824 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
826 { .name = "rt_sigqueueinfo", .errmsg = true,
827 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828 { .name = "rt_tgsigqueueinfo", .errmsg = true,
829 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830 { .name = "select", .errmsg = true, .timeout = true, },
831 { .name = "sendmmsg", .errmsg = true,
832 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833 { .name = "sendmsg", .errmsg = true,
834 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835 { .name = "sendto", .errmsg = true,
836 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
838 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839 { .name = "shutdown", .errmsg = true,
840 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
841 { .name = "socket", .errmsg = true,
842 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843 [1] = SCA_SK_TYPE, /* type */ },
844 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
845 { .name = "socketpair", .errmsg = true,
846 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847 [1] = SCA_SK_TYPE, /* type */ },
848 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
849 { .name = "stat", .errmsg = true, .alias = "newstat", },
850 { .name = "symlinkat", .errmsg = true,
851 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
852 { .name = "tgkill", .errmsg = true,
853 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854 { .name = "tkill", .errmsg = true,
855 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856 { .name = "uname", .errmsg = true, .alias = "newuname", },
857 { .name = "unlinkat", .errmsg = true,
858 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859 { .name = "utimensat", .errmsg = true,
860 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861 { .name = "write", .errmsg = true,
862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863 { .name = "writev", .errmsg = true,
864 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
869 const struct syscall_fmt *fmt = fmtp;
870 return strcmp(name, fmt->name);
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
875 const int nmemb = ARRAY_SIZE(syscall_fmts);
876 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
880 struct event_format *tp_format;
883 struct syscall_fmt *fmt;
884 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
890 double duration = (double)t / NSEC_PER_MSEC;
891 size_t printed = fprintf(fp, "(");
894 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895 else if (duration >= 0.01)
896 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
898 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899 return printed + fprintf(fp, "): ");
902 struct thread_trace {
906 unsigned long nr_events;
914 struct intlist *syscall_stats;
917 static struct thread_trace *thread_trace__new(void)
919 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
922 ttrace->paths.max = -1;
924 ttrace->syscall_stats = intlist__new(NULL);
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
931 struct thread_trace *ttrace;
936 if (thread->priv == NULL)
937 thread->priv = thread_trace__new();
939 if (thread->priv == NULL)
942 ttrace = thread->priv;
947 color_fprintf(fp, PERF_COLOR_RED,
948 "WARNING: not enough memory, dropping samples!\n");
953 struct perf_tool tool;
957 struct syscall *table;
959 struct perf_record_opts opts;
960 struct machine *host;
964 unsigned long nr_events;
965 struct strlist *ev_qualifier;
966 bool not_ev_qualifier;
968 struct intlist *tid_list;
969 struct intlist *pid_list;
971 bool multiple_threads;
974 double duration_filter;
978 static int thread__read_fd_path(struct thread *thread, int fd)
980 struct thread_trace *ttrace = thread->priv;
981 char linkname[PATH_MAX], pathname[PATH_MAX];
985 if (thread->pid_ == thread->tid) {
986 scnprintf(linkname, sizeof(linkname),
987 "/proc/%d/fd/%d", thread->pid_, fd);
989 scnprintf(linkname, sizeof(linkname),
990 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
993 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
996 ret = readlink(linkname, pathname, sizeof(pathname));
998 if (ret < 0 || ret > st.st_size)
1001 pathname[ret] = '\0';
1003 if (fd > ttrace->paths.max) {
1004 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1009 if (ttrace->paths.max != -1) {
1010 memset(npath + ttrace->paths.max + 1, 0,
1011 (fd - ttrace->paths.max) * sizeof(char *));
1013 memset(npath, 0, (fd + 1) * sizeof(char *));
1016 ttrace->paths.table = npath;
1017 ttrace->paths.max = fd;
1020 ttrace->paths.table[fd] = strdup(pathname);
1022 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1025 static const char *thread__fd_path(struct thread *thread, int fd, bool live)
1027 struct thread_trace *ttrace = thread->priv;
1035 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL) &&
1036 (!live || thread__read_fd_path(thread, fd)))
1039 return ttrace->paths.table[fd];
1042 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1043 struct syscall_arg *arg)
1046 size_t printed = scnprintf(bf, size, "%d", fd);
1047 const char *path = thread__fd_path(arg->thread, fd, arg->trace->live);
1050 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1055 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1056 struct syscall_arg *arg)
1059 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1060 struct thread_trace *ttrace = arg->thread->priv;
1062 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1063 free(ttrace->paths.table[fd]);
1064 ttrace->paths.table[fd] = NULL;
1070 static bool trace__filter_duration(struct trace *trace, double t)
1072 return t < (trace->duration_filter * NSEC_PER_MSEC);
1075 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1077 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1079 return fprintf(fp, "%10.3f ", ts);
1082 static bool done = false;
1084 static void sig_handler(int sig __maybe_unused)
1089 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1090 u64 duration, u64 tstamp, FILE *fp)
1092 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1093 printed += fprintf_duration(duration, fp);
1095 if (trace->multiple_threads) {
1096 if (trace->show_comm)
1097 printed += fprintf(fp, "%.14s/", thread->comm);
1098 printed += fprintf(fp, "%d ", thread->tid);
1104 static int trace__process_event(struct trace *trace, struct machine *machine,
1105 union perf_event *event)
1109 switch (event->header.type) {
1110 case PERF_RECORD_LOST:
1111 color_fprintf(trace->output, PERF_COLOR_RED,
1112 "LOST %" PRIu64 " events!\n", event->lost.lost);
1113 ret = machine__process_lost_event(machine, event);
1115 ret = machine__process_event(machine, event);
1122 static int trace__tool_process(struct perf_tool *tool,
1123 union perf_event *event,
1124 struct perf_sample *sample __maybe_unused,
1125 struct machine *machine)
1127 struct trace *trace = container_of(tool, struct trace, tool);
1128 return trace__process_event(trace, machine, event);
1131 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1133 int err = symbol__init();
1138 trace->host = machine__new_host();
1139 if (trace->host == NULL)
1142 if (perf_target__has_task(&trace->opts.target)) {
1143 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1144 trace__tool_process,
1147 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1157 static int syscall__set_arg_fmts(struct syscall *sc)
1159 struct format_field *field;
1162 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1163 if (sc->arg_scnprintf == NULL)
1167 sc->arg_parm = sc->fmt->arg_parm;
1169 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1170 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1171 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1172 else if (field->flags & FIELD_IS_POINTER)
1173 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1180 static int trace__read_syscall_info(struct trace *trace, int id)
1184 const char *name = audit_syscall_to_name(id, trace->audit_machine);
1189 if (id > trace->syscalls.max) {
1190 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1192 if (nsyscalls == NULL)
1195 if (trace->syscalls.max != -1) {
1196 memset(nsyscalls + trace->syscalls.max + 1, 0,
1197 (id - trace->syscalls.max) * sizeof(*sc));
1199 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1202 trace->syscalls.table = nsyscalls;
1203 trace->syscalls.max = id;
1206 sc = trace->syscalls.table + id;
1209 if (trace->ev_qualifier) {
1210 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1212 if (!(in ^ trace->not_ev_qualifier)) {
1213 sc->filtered = true;
1215 * No need to do read tracepoint information since this will be
1222 sc->fmt = syscall_fmt__find(sc->name);
1224 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1225 sc->tp_format = event_format__new("syscalls", tp_name);
1227 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1228 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1229 sc->tp_format = event_format__new("syscalls", tp_name);
1232 if (sc->tp_format == NULL)
1235 return syscall__set_arg_fmts(sc);
1238 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1239 unsigned long *args, struct trace *trace,
1240 struct thread *thread)
1244 if (sc->tp_format != NULL) {
1245 struct format_field *field;
1247 struct syscall_arg arg = {
1254 for (field = sc->tp_format->format.fields->next; field;
1255 field = field->next, ++arg.idx, bit <<= 1) {
1259 * Suppress this argument if its value is zero and
1260 * and we don't have a string associated in an
1263 if (args[arg.idx] == 0 &&
1264 !(sc->arg_scnprintf &&
1265 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1266 sc->arg_parm[arg.idx]))
1269 printed += scnprintf(bf + printed, size - printed,
1270 "%s%s: ", printed ? ", " : "", field->name);
1271 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1272 arg.val = args[arg.idx];
1274 arg.parm = sc->arg_parm[arg.idx];
1275 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1276 size - printed, &arg);
1278 printed += scnprintf(bf + printed, size - printed,
1279 "%ld", args[arg.idx]);
1286 printed += scnprintf(bf + printed, size - printed,
1288 printed ? ", " : "", i, args[i]);
1296 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1297 struct perf_sample *sample);
1299 static struct syscall *trace__syscall_info(struct trace *trace,
1300 struct perf_evsel *evsel, int id)
1306 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1307 * before that, leaving at a higher verbosity level till that is
1308 * explained. Reproduced with plain ftrace with:
1310 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1311 * grep "NR -1 " /t/trace_pipe
1313 * After generating some load on the machine.
1317 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1318 id, perf_evsel__name(evsel), ++n);
1323 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1324 trace__read_syscall_info(trace, id))
1327 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1330 return &trace->syscalls.table[id];
1334 fprintf(trace->output, "Problems reading syscall %d", id);
1335 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1336 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1337 fputs(" information\n", trace->output);
1342 static void thread__update_stats(struct thread_trace *ttrace,
1343 int id, struct perf_sample *sample)
1345 struct int_node *inode;
1346 struct stats *stats;
1349 inode = intlist__findnew(ttrace->syscall_stats, id);
1353 stats = inode->priv;
1354 if (stats == NULL) {
1355 stats = malloc(sizeof(struct stats));
1359 inode->priv = stats;
1362 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1363 duration = sample->time - ttrace->entry_time;
1365 update_stats(stats, duration);
1368 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1369 struct perf_sample *sample)
1374 struct thread *thread;
1375 int id = perf_evsel__intval(evsel, sample, "id");
1376 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1377 struct thread_trace *ttrace;
1385 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1386 ttrace = thread__trace(thread, trace->output);
1390 args = perf_evsel__rawptr(evsel, sample, "args");
1392 fprintf(trace->output, "Problems reading syscall arguments\n");
1396 ttrace = thread->priv;
1398 if (ttrace->entry_str == NULL) {
1399 ttrace->entry_str = malloc(1024);
1400 if (!ttrace->entry_str)
1404 ttrace->entry_time = sample->time;
1405 msg = ttrace->entry_str;
1406 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1408 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1409 args, trace, thread);
1411 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1412 if (!trace->duration_filter) {
1413 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1414 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1417 ttrace->entry_pending = true;
1422 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1423 struct perf_sample *sample)
1427 struct thread *thread;
1428 int id = perf_evsel__intval(evsel, sample, "id");
1429 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1430 struct thread_trace *ttrace;
1438 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1439 ttrace = thread__trace(thread, trace->output);
1444 thread__update_stats(ttrace, id, sample);
1446 ret = perf_evsel__intval(evsel, sample, "ret");
1448 ttrace = thread->priv;
1450 ttrace->exit_time = sample->time;
1452 if (ttrace->entry_time) {
1453 duration = sample->time - ttrace->entry_time;
1454 if (trace__filter_duration(trace, duration))
1456 } else if (trace->duration_filter)
1459 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1461 if (ttrace->entry_pending) {
1462 fprintf(trace->output, "%-70s", ttrace->entry_str);
1464 fprintf(trace->output, " ... [");
1465 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1466 fprintf(trace->output, "]: %s()", sc->name);
1469 if (sc->fmt == NULL) {
1471 fprintf(trace->output, ") = %d", ret);
1472 } else if (ret < 0 && sc->fmt->errmsg) {
1474 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1475 *e = audit_errno_to_name(-ret);
1477 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1478 } else if (ret == 0 && sc->fmt->timeout)
1479 fprintf(trace->output, ") = 0 Timeout");
1480 else if (sc->fmt->hexret)
1481 fprintf(trace->output, ") = %#x", ret);
1485 fputc('\n', trace->output);
1487 ttrace->entry_pending = false;
1492 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1493 struct perf_sample *sample)
1495 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1496 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1497 struct thread *thread = machine__findnew_thread(trace->host,
1500 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1505 ttrace->runtime_ms += runtime_ms;
1506 trace->runtime_ms += runtime_ms;
1510 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1512 perf_evsel__strval(evsel, sample, "comm"),
1513 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1515 perf_evsel__intval(evsel, sample, "vruntime"));
1519 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1521 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1522 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1525 if (trace->pid_list || trace->tid_list)
1531 static int trace__process_sample(struct perf_tool *tool,
1532 union perf_event *event __maybe_unused,
1533 struct perf_sample *sample,
1534 struct perf_evsel *evsel,
1535 struct machine *machine __maybe_unused)
1537 struct trace *trace = container_of(tool, struct trace, tool);
1540 tracepoint_handler handler = evsel->handler.func;
1542 if (skip_sample(trace, sample))
1545 if (!trace->full_time && trace->base_time == 0)
1546 trace->base_time = sample->time;
1549 handler(trace, evsel, sample);
1555 perf_session__has_tp(struct perf_session *session, const char *name)
1557 struct perf_evsel *evsel;
1559 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1561 return evsel != NULL;
1564 static int parse_target_str(struct trace *trace)
1566 if (trace->opts.target.pid) {
1567 trace->pid_list = intlist__new(trace->opts.target.pid);
1568 if (trace->pid_list == NULL) {
1569 pr_err("Error parsing process id string\n");
1574 if (trace->opts.target.tid) {
1575 trace->tid_list = intlist__new(trace->opts.target.tid);
1576 if (trace->tid_list == NULL) {
1577 pr_err("Error parsing thread id string\n");
1585 static int trace__record(int argc, const char **argv)
1587 unsigned int rec_argc, i, j;
1588 const char **rec_argv;
1589 const char * const record_args[] = {
1594 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1597 rec_argc = ARRAY_SIZE(record_args) + argc;
1598 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1600 if (rec_argv == NULL)
1603 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1604 rec_argv[i] = record_args[i];
1606 for (j = 0; j < (unsigned int)argc; j++, i++)
1607 rec_argv[i] = argv[j];
1609 return cmd_record(i, rec_argv, NULL);
1612 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1614 static int trace__run(struct trace *trace, int argc, const char **argv)
1616 struct perf_evlist *evlist = perf_evlist__new();
1617 struct perf_evsel *evsel;
1619 unsigned long before;
1620 const bool forks = argc > 0;
1624 if (evlist == NULL) {
1625 fprintf(trace->output, "Not enough memory to run!\n");
1629 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1630 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1634 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1635 trace__sched_stat_runtime))
1638 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1640 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1641 goto out_delete_evlist;
1644 err = trace__symbols_init(trace, evlist);
1646 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1647 goto out_delete_maps;
1650 perf_evlist__config(evlist, &trace->opts);
1652 signal(SIGCHLD, sig_handler);
1653 signal(SIGINT, sig_handler);
1656 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1657 argv, false, false);
1659 fprintf(trace->output, "Couldn't run the workload!\n");
1660 goto out_delete_maps;
1664 err = perf_evlist__open(evlist);
1666 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1667 goto out_delete_maps;
1670 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1672 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1673 goto out_close_evlist;
1676 perf_evlist__enable(evlist);
1679 perf_evlist__start_workload(evlist);
1681 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1683 before = trace->nr_events;
1685 for (i = 0; i < evlist->nr_mmaps; i++) {
1686 union perf_event *event;
1688 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1689 const u32 type = event->header.type;
1690 tracepoint_handler handler;
1691 struct perf_sample sample;
1695 err = perf_evlist__parse_sample(evlist, event, &sample);
1697 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1701 if (!trace->full_time && trace->base_time == 0)
1702 trace->base_time = sample.time;
1704 if (type != PERF_RECORD_SAMPLE) {
1705 trace__process_event(trace, trace->host, event);
1709 evsel = perf_evlist__id2evsel(evlist, sample.id);
1710 if (evsel == NULL) {
1711 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1715 if (sample.raw_data == NULL) {
1716 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1717 perf_evsel__name(evsel), sample.tid,
1718 sample.cpu, sample.raw_size);
1722 handler = evsel->handler.func;
1723 handler(trace, evsel, &sample);
1726 goto out_unmap_evlist;
1730 if (trace->nr_events == before) {
1732 goto out_unmap_evlist;
1734 poll(evlist->pollfd, evlist->nr_fds, -1);
1738 perf_evlist__disable(evlist);
1743 if (!err && trace->summary)
1744 trace__fprintf_thread_summary(trace, trace->output);
1746 perf_evlist__munmap(evlist);
1748 perf_evlist__close(evlist);
1750 perf_evlist__delete_maps(evlist);
1752 perf_evlist__delete(evlist);
1754 trace->live = false;
1759 fputs("Error:\tUnable to find debugfs\n"
1760 "Hint:\tWas your kernel was compiled with debugfs support?\n"
1761 "Hint:\tIs the debugfs filesystem mounted?\n"
1762 "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n",
1766 fprintf(trace->output,
1767 "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
1768 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
1769 debugfs_mountpoint, debugfs_mountpoint);
1773 fprintf(trace->output, "Can't trace: %s\n",
1774 strerror_r(errno, bf, sizeof(bf)));
1778 goto out_delete_evlist;
1781 static int trace__replay(struct trace *trace)
1783 const struct perf_evsel_str_handler handlers[] = {
1784 { "raw_syscalls:sys_enter", trace__sys_enter, },
1785 { "raw_syscalls:sys_exit", trace__sys_exit, },
1788 struct perf_session *session;
1791 trace->tool.sample = trace__process_sample;
1792 trace->tool.mmap = perf_event__process_mmap;
1793 trace->tool.mmap2 = perf_event__process_mmap2;
1794 trace->tool.comm = perf_event__process_comm;
1795 trace->tool.exit = perf_event__process_exit;
1796 trace->tool.fork = perf_event__process_fork;
1797 trace->tool.attr = perf_event__process_attr;
1798 trace->tool.tracing_data = perf_event__process_tracing_data;
1799 trace->tool.build_id = perf_event__process_build_id;
1801 trace->tool.ordered_samples = true;
1802 trace->tool.ordering_requires_timestamps = true;
1804 /* add tid to output */
1805 trace->multiple_threads = true;
1807 if (symbol__init() < 0)
1810 session = perf_session__new(input_name, O_RDONLY, 0, false,
1812 if (session == NULL)
1815 trace->host = &session->machines.host;
1817 err = perf_session__set_tracepoints_handlers(session, handlers);
1821 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1822 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1826 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1827 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1831 err = parse_target_str(trace);
1837 err = perf_session__process_events(session, &trace->tool);
1839 pr_err("Failed to process events, error %d", err);
1841 else if (trace->summary)
1842 trace__fprintf_thread_summary(trace, trace->output);
1845 perf_session__delete(session);
1850 static size_t trace__fprintf_threads_header(FILE *fp)
1854 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
1855 printed += fprintf(fp, " __) Summary of events (__\n\n");
1856 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
1857 printed += fprintf(fp, " syscall count min max avg stddev\n");
1858 printed += fprintf(fp, " msec msec msec %%\n");
1859 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1864 static size_t thread__dump_stats(struct thread_trace *ttrace,
1865 struct trace *trace, FILE *fp)
1867 struct stats *stats;
1870 struct int_node *inode = intlist__first(ttrace->syscall_stats);
1875 printed += fprintf(fp, "\n");
1877 /* each int_node is a syscall */
1879 stats = inode->priv;
1881 double min = (double)(stats->min) / NSEC_PER_MSEC;
1882 double max = (double)(stats->max) / NSEC_PER_MSEC;
1883 double avg = avg_stats(stats);
1885 u64 n = (u64) stats->n;
1887 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1888 avg /= NSEC_PER_MSEC;
1890 sc = &trace->syscalls.table[inode->i];
1891 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
1892 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
1894 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
1897 inode = intlist__next(inode);
1900 printed += fprintf(fp, "\n\n");
1905 /* struct used to pass data to per-thread function */
1906 struct summary_data {
1908 struct trace *trace;
1912 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1914 struct summary_data *data = priv;
1915 FILE *fp = data->fp;
1916 size_t printed = data->printed;
1917 struct trace *trace = data->trace;
1918 struct thread_trace *ttrace = thread->priv;
1925 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1927 color = PERF_COLOR_NORMAL;
1929 color = PERF_COLOR_RED;
1930 else if (ratio > 25.0)
1931 color = PERF_COLOR_GREEN;
1932 else if (ratio > 5.0)
1933 color = PERF_COLOR_YELLOW;
1935 printed += color_fprintf(fp, color, "%20s", thread->comm);
1936 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
1937 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1938 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1939 printed += thread__dump_stats(ttrace, trace, fp);
1941 data->printed += printed;
1946 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1948 struct summary_data data = {
1952 data.printed = trace__fprintf_threads_header(fp);
1954 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
1956 return data.printed;
1959 static int trace__set_duration(const struct option *opt, const char *str,
1960 int unset __maybe_unused)
1962 struct trace *trace = opt->value;
1964 trace->duration_filter = atof(str);
1968 static int trace__open_output(struct trace *trace, const char *filename)
1972 if (!stat(filename, &st) && st.st_size) {
1973 char oldname[PATH_MAX];
1975 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1977 rename(filename, oldname);
1980 trace->output = fopen(filename, "w");
1982 return trace->output == NULL ? -errno : 0;
1985 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1987 const char * const trace_usage[] = {
1988 "perf trace [<options>] [<command>]",
1989 "perf trace [<options>] -- <command> [<options>]",
1990 "perf trace record [<options>] [<command>]",
1991 "perf trace record [<options>] -- <command> [<options>]",
1994 struct trace trace = {
1995 .audit_machine = audit_detect_machine(),
2004 .user_freq = UINT_MAX,
2005 .user_interval = ULLONG_MAX,
2012 const char *output_name = NULL;
2013 const char *ev_qualifier_str = NULL;
2014 const struct option trace_options[] = {
2015 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2016 "show the thread COMM next to its id"),
2017 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2018 "list of events to trace"),
2019 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2020 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2021 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2022 "trace events on existing process id"),
2023 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2024 "trace events on existing thread id"),
2025 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2026 "system-wide collection from all CPUs"),
2027 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2028 "list of cpus to monitor"),
2029 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2030 "child tasks do not inherit counters"),
2031 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2032 "number of mmap data pages",
2033 perf_evlist__parse_mmap_pages),
2034 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2036 OPT_CALLBACK(0, "duration", &trace, "float",
2037 "show only events with duration > N.M ms",
2038 trace__set_duration),
2039 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2040 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2041 OPT_BOOLEAN('T', "time", &trace.full_time,
2042 "Show full timestamp, not time relative to first start"),
2043 OPT_BOOLEAN(0, "summary", &trace.summary,
2044 "Show syscall summary with statistics"),
2050 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2051 return trace__record(argc-2, &argv[2]);
2053 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2055 if (output_name != NULL) {
2056 err = trace__open_output(&trace, output_name);
2058 perror("failed to create output file");
2063 if (ev_qualifier_str != NULL) {
2064 const char *s = ev_qualifier_str;
2066 trace.not_ev_qualifier = *s == '!';
2067 if (trace.not_ev_qualifier)
2069 trace.ev_qualifier = strlist__new(true, s);
2070 if (trace.ev_qualifier == NULL) {
2071 fputs("Not enough memory to parse event qualifier",
2078 err = perf_target__validate(&trace.opts.target);
2080 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2081 fprintf(trace.output, "%s", bf);
2085 err = perf_target__parse_uid(&trace.opts.target);
2087 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2088 fprintf(trace.output, "%s", bf);
2092 if (!argc && perf_target__none(&trace.opts.target))
2093 trace.opts.target.system_wide = true;
2096 err = trace__replay(&trace);
2098 err = trace__run(&trace, argc, argv);
2101 if (output_name != NULL)
2102 fclose(trace.output);