1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
49 return *(u##bits *)(sample->raw_data + field->offset); \
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 return bswap_##bits(value);\
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
68 static int tp_field__init_uint(struct tp_field *field,
69 struct format_field *format_field,
72 field->offset = format_field->offset;
74 switch (format_field->size) {
76 field->integer = tp_field__u8;
79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
96 return sample->raw_data + field->offset;
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
101 field->offset = format_field->offset;
102 field->pointer = tp_field__ptr;
109 struct tp_field args, ret;
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 struct tp_field *field,
117 struct format_field *format_field = perf_evsel__field(evsel, name);
119 if (format_field == NULL)
122 return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 ({ struct syscall_tp *sc = evsel->priv;\
127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 struct tp_field *field,
133 struct format_field *format_field = perf_evsel__field(evsel, name);
135 if (format_field == NULL)
138 return tp_field__init_ptr(field, format_field);
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 ({ struct syscall_tp *sc = evsel->priv;\
143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
149 perf_evsel__delete(evsel);
152 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
154 evsel->priv = malloc(sizeof(struct syscall_tp));
155 if (evsel->priv != NULL) {
156 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
159 evsel->handler = handler;
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
176 if (perf_evsel__init_syscall_tp(evsel, handler))
183 perf_evsel__delete_priv(evsel);
187 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
188 ({ struct syscall_tp *fields = evsel->priv; \
189 fields->name.integer(&fields->name, sample); })
191 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
192 ({ struct syscall_tp *fields = evsel->priv; \
193 fields->name.pointer(&fields->name, sample); })
195 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
196 void *sys_enter_handler,
197 void *sys_exit_handler)
200 struct perf_evsel *sys_enter, *sys_exit;
202 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
203 if (sys_enter == NULL)
206 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
207 goto out_delete_sys_enter;
209 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
210 if (sys_exit == NULL)
211 goto out_delete_sys_enter;
213 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
214 goto out_delete_sys_exit;
216 perf_evlist__add(evlist, sys_enter);
217 perf_evlist__add(evlist, sys_exit);
224 perf_evsel__delete_priv(sys_exit);
225 out_delete_sys_enter:
226 perf_evsel__delete_priv(sys_enter);
233 struct thread *thread;
243 const char **entries;
246 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
247 .nr_entries = ARRAY_SIZE(array), \
251 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
253 .nr_entries = ARRAY_SIZE(array), \
257 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
259 struct syscall_arg *arg)
261 struct strarray *sa = arg->parm;
262 int idx = arg->val - sa->offset;
264 if (idx < 0 || idx >= sa->nr_entries)
265 return scnprintf(bf, size, intfmt, arg->val);
267 return scnprintf(bf, size, "%s", sa->entries[idx]);
270 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
271 struct syscall_arg *arg)
273 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
276 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
278 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
284 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
286 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
287 struct syscall_arg *arg);
289 #define SCA_FD syscall_arg__scnprintf_fd
291 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
292 struct syscall_arg *arg)
297 return scnprintf(bf, size, "CWD");
299 return syscall_arg__scnprintf_fd(bf, size, arg);
302 #define SCA_FDAT syscall_arg__scnprintf_fd_at
304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
305 struct syscall_arg *arg);
307 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
309 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
310 struct syscall_arg *arg)
312 return scnprintf(bf, size, "%#lx", arg->val);
315 #define SCA_HEX syscall_arg__scnprintf_hex
317 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
318 struct syscall_arg *arg)
320 int printed = 0, prot = arg->val;
322 if (prot == PROT_NONE)
323 return scnprintf(bf, size, "NONE");
324 #define P_MMAP_PROT(n) \
325 if (prot & PROT_##n) { \
326 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
336 P_MMAP_PROT(GROWSDOWN);
337 P_MMAP_PROT(GROWSUP);
341 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
346 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
348 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
349 struct syscall_arg *arg)
351 int printed = 0, flags = arg->val;
353 #define P_MMAP_FLAG(n) \
354 if (flags & MAP_##n) { \
355 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
360 P_MMAP_FLAG(PRIVATE);
364 P_MMAP_FLAG(ANONYMOUS);
365 P_MMAP_FLAG(DENYWRITE);
366 P_MMAP_FLAG(EXECUTABLE);
369 P_MMAP_FLAG(GROWSDOWN);
371 P_MMAP_FLAG(HUGETLB);
374 P_MMAP_FLAG(NONBLOCK);
375 P_MMAP_FLAG(NORESERVE);
376 P_MMAP_FLAG(POPULATE);
378 #ifdef MAP_UNINITIALIZED
379 P_MMAP_FLAG(UNINITIALIZED);
384 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
389 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
391 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
392 struct syscall_arg *arg)
394 int behavior = arg->val;
397 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
400 P_MADV_BHV(SEQUENTIAL);
401 P_MADV_BHV(WILLNEED);
402 P_MADV_BHV(DONTNEED);
404 P_MADV_BHV(DONTFORK);
406 P_MADV_BHV(HWPOISON);
407 #ifdef MADV_SOFT_OFFLINE
408 P_MADV_BHV(SOFT_OFFLINE);
410 P_MADV_BHV(MERGEABLE);
411 P_MADV_BHV(UNMERGEABLE);
413 P_MADV_BHV(HUGEPAGE);
415 #ifdef MADV_NOHUGEPAGE
416 P_MADV_BHV(NOHUGEPAGE);
419 P_MADV_BHV(DONTDUMP);
428 return scnprintf(bf, size, "%#x", behavior);
431 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
433 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
434 struct syscall_arg *arg)
436 int printed = 0, op = arg->val;
439 return scnprintf(bf, size, "NONE");
441 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
442 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
462 #define SCA_FLOCK syscall_arg__scnprintf_flock
464 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
466 enum syscall_futex_args {
467 SCF_UADDR = (1 << 0),
470 SCF_TIMEOUT = (1 << 3),
471 SCF_UADDR2 = (1 << 4),
475 int cmd = op & FUTEX_CMD_MASK;
479 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
480 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
481 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
482 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
484 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
485 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
486 P_FUTEX_OP(WAKE_OP); break;
487 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
490 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
491 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
492 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
493 default: printed = scnprintf(bf, size, "%#x", cmd); break;
496 if (op & FUTEX_PRIVATE_FLAG)
497 printed += scnprintf(bf + printed, size - printed, "|PRIV");
499 if (op & FUTEX_CLOCK_REALTIME)
500 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
505 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
507 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
508 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
510 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
511 static DEFINE_STRARRAY(itimers);
513 static const char *whences[] = { "SET", "CUR", "END",
521 static DEFINE_STRARRAY(whences);
523 static const char *fcntl_cmds[] = {
524 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
525 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
526 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
529 static DEFINE_STRARRAY(fcntl_cmds);
531 static const char *rlimit_resources[] = {
532 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
533 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
536 static DEFINE_STRARRAY(rlimit_resources);
538 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
539 static DEFINE_STRARRAY(sighow);
541 static const char *clockid[] = {
542 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
543 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
545 static DEFINE_STRARRAY(clockid);
547 static const char *socket_families[] = {
548 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
549 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
550 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
551 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
552 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
553 "ALG", "NFC", "VSOCK",
555 static DEFINE_STRARRAY(socket_families);
557 #ifndef SOCK_TYPE_MASK
558 #define SOCK_TYPE_MASK 0xf
561 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
562 struct syscall_arg *arg)
566 flags = type & ~SOCK_TYPE_MASK;
568 type &= SOCK_TYPE_MASK;
570 * Can't use a strarray, MIPS may override for ABI reasons.
573 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
578 P_SK_TYPE(SEQPACKET);
583 printed = scnprintf(bf, size, "%#x", type);
586 #define P_SK_FLAG(n) \
587 if (flags & SOCK_##n) { \
588 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
589 flags &= ~SOCK_##n; \
597 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
602 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
605 #define MSG_PROBE 0x10
607 #ifndef MSG_WAITFORONE
608 #define MSG_WAITFORONE 0x10000
610 #ifndef MSG_SENDPAGE_NOTLAST
611 #define MSG_SENDPAGE_NOTLAST 0x20000
614 #define MSG_FASTOPEN 0x20000000
617 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
618 struct syscall_arg *arg)
620 int printed = 0, flags = arg->val;
623 return scnprintf(bf, size, "NONE");
624 #define P_MSG_FLAG(n) \
625 if (flags & MSG_##n) { \
626 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
632 P_MSG_FLAG(DONTROUTE);
637 P_MSG_FLAG(DONTWAIT);
644 P_MSG_FLAG(ERRQUEUE);
645 P_MSG_FLAG(NOSIGNAL);
647 P_MSG_FLAG(WAITFORONE);
648 P_MSG_FLAG(SENDPAGE_NOTLAST);
649 P_MSG_FLAG(FASTOPEN);
650 P_MSG_FLAG(CMSG_CLOEXEC);
654 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
659 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
661 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
662 struct syscall_arg *arg)
667 if (mode == F_OK) /* 0 */
668 return scnprintf(bf, size, "F");
670 if (mode & n##_OK) { \
671 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
681 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
686 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
688 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
689 struct syscall_arg *arg)
691 int printed = 0, flags = arg->val;
693 if (!(flags & O_CREAT))
694 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
697 return scnprintf(bf, size, "RDONLY");
699 if (flags & O_##n) { \
700 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724 if ((flags & O_SYNC) == O_SYNC)
725 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
737 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
742 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
744 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
745 struct syscall_arg *arg)
747 int printed = 0, flags = arg->val;
750 return scnprintf(bf, size, "NONE");
752 if (flags & EFD_##n) { \
753 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
763 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
768 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
770 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
771 struct syscall_arg *arg)
773 int printed = 0, flags = arg->val;
776 if (flags & O_##n) { \
777 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
786 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
791 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
793 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
798 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
833 return scnprintf(bf, size, "%#x", sig);
836 #define SCA_SIGNUM syscall_arg__scnprintf_signum
838 #define TCGETS 0x5401
840 static const char *tioctls[] = {
841 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
842 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
843 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
844 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
845 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
846 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
847 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
848 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
849 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
850 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
851 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
852 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
853 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
854 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
855 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
858 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
860 #define STRARRAY(arg, name, array) \
861 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
862 .arg_parm = { [arg] = &strarray__##array, }
864 static struct syscall_fmt {
867 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
873 { .name = "access", .errmsg = true,
874 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
875 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
876 { .name = "brk", .hexret = true,
877 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
878 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
879 { .name = "close", .errmsg = true,
880 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
881 { .name = "connect", .errmsg = true, },
882 { .name = "dup", .errmsg = true,
883 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
884 { .name = "dup2", .errmsg = true,
885 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
886 { .name = "dup3", .errmsg = true,
887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
889 { .name = "eventfd2", .errmsg = true,
890 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
891 { .name = "faccessat", .errmsg = true,
892 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
893 { .name = "fadvise64", .errmsg = true,
894 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
895 { .name = "fallocate", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
897 { .name = "fchdir", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 { .name = "fchmod", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fchmodat", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
903 { .name = "fchown", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchownat", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 { .name = "fcntl", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */
909 [1] = SCA_STRARRAY, /* cmd */ },
910 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
911 { .name = "fdatasync", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 { .name = "flock", .errmsg = true,
914 .arg_scnprintf = { [0] = SCA_FD, /* fd */
915 [1] = SCA_FLOCK, /* cmd */ }, },
916 { .name = "fsetxattr", .errmsg = true,
917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
918 { .name = "fstat", .errmsg = true, .alias = "newfstat",
919 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
920 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
921 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
922 { .name = "fstatfs", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fsync", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "ftruncate", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "futex", .errmsg = true,
929 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
930 { .name = "futimesat", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
932 { .name = "getdents", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
934 { .name = "getdents64", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
937 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
938 { .name = "ioctl", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */
940 [1] = SCA_STRHEXARRAY, /* cmd */
941 [2] = SCA_HEX, /* arg */ },
942 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
943 { .name = "kill", .errmsg = true,
944 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
945 { .name = "linkat", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 { .name = "lseek", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_FD, /* fd */
949 [2] = SCA_STRARRAY, /* whence */ },
950 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
951 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
952 { .name = "madvise", .errmsg = true,
953 .arg_scnprintf = { [0] = SCA_HEX, /* start */
954 [2] = SCA_MADV_BHV, /* behavior */ }, },
955 { .name = "mkdirat", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
957 { .name = "mknodat", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 { .name = "mlock", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
961 { .name = "mlockall", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963 { .name = "mmap", .hexret = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
965 [2] = SCA_MMAP_PROT, /* prot */
966 [3] = SCA_MMAP_FLAGS, /* flags */
967 [4] = SCA_FD, /* fd */ }, },
968 { .name = "mprotect", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_HEX, /* start */
970 [2] = SCA_MMAP_PROT, /* prot */ }, },
971 { .name = "mremap", .hexret = true,
972 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
973 [4] = SCA_HEX, /* new_addr */ }, },
974 { .name = "munlock", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
976 { .name = "munmap", .errmsg = true,
977 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
978 { .name = "name_to_handle_at", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
980 { .name = "newfstatat", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
982 { .name = "open", .errmsg = true,
983 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
984 { .name = "open_by_handle_at", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
986 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
987 { .name = "openat", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
989 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
990 { .name = "pipe2", .errmsg = true,
991 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
992 { .name = "poll", .errmsg = true, .timeout = true, },
993 { .name = "ppoll", .errmsg = true, .timeout = true, },
994 { .name = "pread", .errmsg = true, .alias = "pread64",
995 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
996 { .name = "preadv", .errmsg = true, .alias = "pread",
997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
999 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 { .name = "pwritev", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003 { .name = "read", .errmsg = true,
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "readlinkat", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1007 { .name = "readv", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "recvfrom", .errmsg = true,
1010 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1011 { .name = "recvmmsg", .errmsg = true,
1012 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1013 { .name = "recvmsg", .errmsg = true,
1014 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1015 { .name = "renameat", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1017 { .name = "rt_sigaction", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1019 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1020 { .name = "rt_sigqueueinfo", .errmsg = true,
1021 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1022 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1023 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1024 { .name = "select", .errmsg = true, .timeout = true, },
1025 { .name = "sendmmsg", .errmsg = true,
1026 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1027 { .name = "sendmsg", .errmsg = true,
1028 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1029 { .name = "sendto", .errmsg = true,
1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1032 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1033 { .name = "shutdown", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "socket", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1037 [1] = SCA_SK_TYPE, /* type */ },
1038 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1039 { .name = "socketpair", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 [1] = SCA_SK_TYPE, /* type */ },
1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1043 { .name = "stat", .errmsg = true, .alias = "newstat", },
1044 { .name = "symlinkat", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1046 { .name = "tgkill", .errmsg = true,
1047 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1048 { .name = "tkill", .errmsg = true,
1049 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1050 { .name = "uname", .errmsg = true, .alias = "newuname", },
1051 { .name = "unlinkat", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1053 { .name = "utimensat", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1055 { .name = "write", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1057 { .name = "writev", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1063 const struct syscall_fmt *fmt = fmtp;
1064 return strcmp(name, fmt->name);
1067 static struct syscall_fmt *syscall_fmt__find(const char *name)
1069 const int nmemb = ARRAY_SIZE(syscall_fmts);
1070 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1074 struct event_format *tp_format;
1077 struct syscall_fmt *fmt;
1078 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1082 static size_t fprintf_duration(unsigned long t, FILE *fp)
1084 double duration = (double)t / NSEC_PER_MSEC;
1085 size_t printed = fprintf(fp, "(");
1087 if (duration >= 1.0)
1088 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1089 else if (duration >= 0.01)
1090 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1092 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1093 return printed + fprintf(fp, "): ");
1096 struct thread_trace {
1100 unsigned long nr_events;
1108 struct intlist *syscall_stats;
1111 static struct thread_trace *thread_trace__new(void)
1113 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1116 ttrace->paths.max = -1;
1118 ttrace->syscall_stats = intlist__new(NULL);
1123 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1125 struct thread_trace *ttrace;
1130 if (thread->priv == NULL)
1131 thread->priv = thread_trace__new();
1133 if (thread->priv == NULL)
1136 ttrace = thread->priv;
1137 ++ttrace->nr_events;
1141 color_fprintf(fp, PERF_COLOR_RED,
1142 "WARNING: not enough memory, dropping samples!\n");
1147 struct perf_tool tool;
1154 struct syscall *table;
1156 struct perf_record_opts opts;
1157 struct machine *host;
1161 unsigned long nr_events;
1162 struct strlist *ev_qualifier;
1163 bool not_ev_qualifier;
1165 const char *last_vfs_getname;
1166 struct intlist *tid_list;
1167 struct intlist *pid_list;
1169 bool multiple_threads;
1172 bool show_tool_stats;
1173 double duration_filter;
1176 u64 vfs_getname, proc_getname;
1180 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1182 struct thread_trace *ttrace = thread->priv;
1184 if (fd > ttrace->paths.max) {
1185 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1190 if (ttrace->paths.max != -1) {
1191 memset(npath + ttrace->paths.max + 1, 0,
1192 (fd - ttrace->paths.max) * sizeof(char *));
1194 memset(npath, 0, (fd + 1) * sizeof(char *));
1197 ttrace->paths.table = npath;
1198 ttrace->paths.max = fd;
1201 ttrace->paths.table[fd] = strdup(pathname);
1203 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1206 static int thread__read_fd_path(struct thread *thread, int fd)
1208 char linkname[PATH_MAX], pathname[PATH_MAX];
1212 if (thread->pid_ == thread->tid) {
1213 scnprintf(linkname, sizeof(linkname),
1214 "/proc/%d/fd/%d", thread->pid_, fd);
1216 scnprintf(linkname, sizeof(linkname),
1217 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1220 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1223 ret = readlink(linkname, pathname, sizeof(pathname));
1225 if (ret < 0 || ret > st.st_size)
1228 pathname[ret] = '\0';
1229 return trace__set_fd_pathname(thread, fd, pathname);
1232 static const char *thread__fd_path(struct thread *thread, int fd,
1233 struct trace *trace)
1235 struct thread_trace *ttrace = thread->priv;
1243 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1246 ++trace->stats.proc_getname;
1247 if (thread__read_fd_path(thread, fd)) {
1251 return ttrace->paths.table[fd];
1254 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1255 struct syscall_arg *arg)
1258 size_t printed = scnprintf(bf, size, "%d", fd);
1259 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1262 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1267 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1268 struct syscall_arg *arg)
1271 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1272 struct thread_trace *ttrace = arg->thread->priv;
1274 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1275 free(ttrace->paths.table[fd]);
1276 ttrace->paths.table[fd] = NULL;
1282 static bool trace__filter_duration(struct trace *trace, double t)
1284 return t < (trace->duration_filter * NSEC_PER_MSEC);
1287 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1289 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1291 return fprintf(fp, "%10.3f ", ts);
1294 static bool done = false;
1295 static bool interrupted = false;
1297 static void sig_handler(int sig)
1300 interrupted = sig == SIGINT;
1303 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1304 u64 duration, u64 tstamp, FILE *fp)
1306 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1307 printed += fprintf_duration(duration, fp);
1309 if (trace->multiple_threads) {
1310 if (trace->show_comm)
1311 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1312 printed += fprintf(fp, "%d ", thread->tid);
1318 static int trace__process_event(struct trace *trace, struct machine *machine,
1319 union perf_event *event, struct perf_sample *sample)
1323 switch (event->header.type) {
1324 case PERF_RECORD_LOST:
1325 color_fprintf(trace->output, PERF_COLOR_RED,
1326 "LOST %" PRIu64 " events!\n", event->lost.lost);
1327 ret = machine__process_lost_event(machine, event, sample);
1329 ret = machine__process_event(machine, event, sample);
1336 static int trace__tool_process(struct perf_tool *tool,
1337 union perf_event *event,
1338 struct perf_sample *sample,
1339 struct machine *machine)
1341 struct trace *trace = container_of(tool, struct trace, tool);
1342 return trace__process_event(trace, machine, event, sample);
1345 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1347 int err = symbol__init();
1352 trace->host = machine__new_host();
1353 if (trace->host == NULL)
1356 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1357 evlist->threads, trace__tool_process, false);
1364 static int syscall__set_arg_fmts(struct syscall *sc)
1366 struct format_field *field;
1369 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1370 if (sc->arg_scnprintf == NULL)
1374 sc->arg_parm = sc->fmt->arg_parm;
1376 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1377 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1378 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1379 else if (field->flags & FIELD_IS_POINTER)
1380 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1387 static int trace__read_syscall_info(struct trace *trace, int id)
1391 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1396 if (id > trace->syscalls.max) {
1397 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1399 if (nsyscalls == NULL)
1402 if (trace->syscalls.max != -1) {
1403 memset(nsyscalls + trace->syscalls.max + 1, 0,
1404 (id - trace->syscalls.max) * sizeof(*sc));
1406 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1409 trace->syscalls.table = nsyscalls;
1410 trace->syscalls.max = id;
1413 sc = trace->syscalls.table + id;
1416 if (trace->ev_qualifier) {
1417 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1419 if (!(in ^ trace->not_ev_qualifier)) {
1420 sc->filtered = true;
1422 * No need to do read tracepoint information since this will be
1429 sc->fmt = syscall_fmt__find(sc->name);
1431 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1432 sc->tp_format = event_format__new("syscalls", tp_name);
1434 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1435 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1436 sc->tp_format = event_format__new("syscalls", tp_name);
1439 if (sc->tp_format == NULL)
1442 return syscall__set_arg_fmts(sc);
1445 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1446 unsigned long *args, struct trace *trace,
1447 struct thread *thread)
1451 if (sc->tp_format != NULL) {
1452 struct format_field *field;
1454 struct syscall_arg arg = {
1461 for (field = sc->tp_format->format.fields->next; field;
1462 field = field->next, ++arg.idx, bit <<= 1) {
1466 * Suppress this argument if its value is zero and
1467 * and we don't have a string associated in an
1470 if (args[arg.idx] == 0 &&
1471 !(sc->arg_scnprintf &&
1472 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1473 sc->arg_parm[arg.idx]))
1476 printed += scnprintf(bf + printed, size - printed,
1477 "%s%s: ", printed ? ", " : "", field->name);
1478 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1479 arg.val = args[arg.idx];
1481 arg.parm = sc->arg_parm[arg.idx];
1482 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1483 size - printed, &arg);
1485 printed += scnprintf(bf + printed, size - printed,
1486 "%ld", args[arg.idx]);
1493 printed += scnprintf(bf + printed, size - printed,
1495 printed ? ", " : "", i, args[i]);
1503 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1504 struct perf_sample *sample);
1506 static struct syscall *trace__syscall_info(struct trace *trace,
1507 struct perf_evsel *evsel, int id)
1513 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1514 * before that, leaving at a higher verbosity level till that is
1515 * explained. Reproduced with plain ftrace with:
1517 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1518 * grep "NR -1 " /t/trace_pipe
1520 * After generating some load on the machine.
1524 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1525 id, perf_evsel__name(evsel), ++n);
1530 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1531 trace__read_syscall_info(trace, id))
1534 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1537 return &trace->syscalls.table[id];
1541 fprintf(trace->output, "Problems reading syscall %d", id);
1542 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1543 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1544 fputs(" information\n", trace->output);
1549 static void thread__update_stats(struct thread_trace *ttrace,
1550 int id, struct perf_sample *sample)
1552 struct int_node *inode;
1553 struct stats *stats;
1556 inode = intlist__findnew(ttrace->syscall_stats, id);
1560 stats = inode->priv;
1561 if (stats == NULL) {
1562 stats = malloc(sizeof(struct stats));
1566 inode->priv = stats;
1569 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1570 duration = sample->time - ttrace->entry_time;
1572 update_stats(stats, duration);
1575 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1576 struct perf_sample *sample)
1581 struct thread *thread;
1582 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1583 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1584 struct thread_trace *ttrace;
1592 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1593 ttrace = thread__trace(thread, trace->output);
1597 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1598 ttrace = thread->priv;
1600 if (ttrace->entry_str == NULL) {
1601 ttrace->entry_str = malloc(1024);
1602 if (!ttrace->entry_str)
1606 ttrace->entry_time = sample->time;
1607 msg = ttrace->entry_str;
1608 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1610 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1611 args, trace, thread);
1613 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1614 if (!trace->duration_filter) {
1615 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1616 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1619 ttrace->entry_pending = true;
1624 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1625 struct perf_sample *sample)
1629 struct thread *thread;
1630 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1631 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1632 struct thread_trace *ttrace;
1640 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1641 ttrace = thread__trace(thread, trace->output);
1646 thread__update_stats(ttrace, id, sample);
1648 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1650 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1651 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1652 trace->last_vfs_getname = NULL;
1653 ++trace->stats.vfs_getname;
1656 ttrace = thread->priv;
1658 ttrace->exit_time = sample->time;
1660 if (ttrace->entry_time) {
1661 duration = sample->time - ttrace->entry_time;
1662 if (trace__filter_duration(trace, duration))
1664 } else if (trace->duration_filter)
1667 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1669 if (ttrace->entry_pending) {
1670 fprintf(trace->output, "%-70s", ttrace->entry_str);
1672 fprintf(trace->output, " ... [");
1673 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1674 fprintf(trace->output, "]: %s()", sc->name);
1677 if (sc->fmt == NULL) {
1679 fprintf(trace->output, ") = %d", ret);
1680 } else if (ret < 0 && sc->fmt->errmsg) {
1682 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1683 *e = audit_errno_to_name(-ret);
1685 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1686 } else if (ret == 0 && sc->fmt->timeout)
1687 fprintf(trace->output, ") = 0 Timeout");
1688 else if (sc->fmt->hexret)
1689 fprintf(trace->output, ") = %#x", ret);
1693 fputc('\n', trace->output);
1695 ttrace->entry_pending = false;
1700 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1701 struct perf_sample *sample)
1703 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1707 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1708 struct perf_sample *sample)
1710 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1711 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1712 struct thread *thread = machine__findnew_thread(trace->host,
1715 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1720 ttrace->runtime_ms += runtime_ms;
1721 trace->runtime_ms += runtime_ms;
1725 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1727 perf_evsel__strval(evsel, sample, "comm"),
1728 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1730 perf_evsel__intval(evsel, sample, "vruntime"));
1734 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1736 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1737 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1740 if (trace->pid_list || trace->tid_list)
1746 static int trace__process_sample(struct perf_tool *tool,
1747 union perf_event *event __maybe_unused,
1748 struct perf_sample *sample,
1749 struct perf_evsel *evsel,
1750 struct machine *machine __maybe_unused)
1752 struct trace *trace = container_of(tool, struct trace, tool);
1755 tracepoint_handler handler = evsel->handler;
1757 if (skip_sample(trace, sample))
1760 if (!trace->full_time && trace->base_time == 0)
1761 trace->base_time = sample->time;
1764 handler(trace, evsel, sample);
1770 perf_session__has_tp(struct perf_session *session, const char *name)
1772 struct perf_evsel *evsel;
1774 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1776 return evsel != NULL;
1779 static int parse_target_str(struct trace *trace)
1781 if (trace->opts.target.pid) {
1782 trace->pid_list = intlist__new(trace->opts.target.pid);
1783 if (trace->pid_list == NULL) {
1784 pr_err("Error parsing process id string\n");
1789 if (trace->opts.target.tid) {
1790 trace->tid_list = intlist__new(trace->opts.target.tid);
1791 if (trace->tid_list == NULL) {
1792 pr_err("Error parsing thread id string\n");
1800 static int trace__record(int argc, const char **argv)
1802 unsigned int rec_argc, i, j;
1803 const char **rec_argv;
1804 const char * const record_args[] = {
1809 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1812 rec_argc = ARRAY_SIZE(record_args) + argc;
1813 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1815 if (rec_argv == NULL)
1818 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1819 rec_argv[i] = record_args[i];
1821 for (j = 0; j < (unsigned int)argc; j++, i++)
1822 rec_argv[i] = argv[j];
1824 return cmd_record(i, rec_argv, NULL);
1827 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1829 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1831 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1835 if (perf_evsel__field(evsel, "pathname") == NULL) {
1836 perf_evsel__delete(evsel);
1840 evsel->handler = trace__vfs_getname;
1841 perf_evlist__add(evlist, evsel);
1844 static int trace__run(struct trace *trace, int argc, const char **argv)
1846 struct perf_evlist *evlist = perf_evlist__new();
1847 struct perf_evsel *evsel;
1849 unsigned long before;
1850 const bool forks = argc > 0;
1854 if (evlist == NULL) {
1855 fprintf(trace->output, "Not enough memory to run!\n");
1859 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1862 perf_evlist__add_vfs_getname(evlist);
1865 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1866 trace__sched_stat_runtime))
1869 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1871 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1872 goto out_delete_evlist;
1875 err = trace__symbols_init(trace, evlist);
1877 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1878 goto out_delete_maps;
1881 perf_evlist__config(evlist, &trace->opts);
1883 signal(SIGCHLD, sig_handler);
1884 signal(SIGINT, sig_handler);
1887 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1888 argv, false, false);
1890 fprintf(trace->output, "Couldn't run the workload!\n");
1891 goto out_delete_maps;
1895 err = perf_evlist__open(evlist);
1897 goto out_error_open;
1899 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1901 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1902 goto out_close_evlist;
1905 perf_evlist__enable(evlist);
1908 perf_evlist__start_workload(evlist);
1910 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1912 before = trace->nr_events;
1914 for (i = 0; i < evlist->nr_mmaps; i++) {
1915 union perf_event *event;
1917 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1918 const u32 type = event->header.type;
1919 tracepoint_handler handler;
1920 struct perf_sample sample;
1924 err = perf_evlist__parse_sample(evlist, event, &sample);
1926 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1930 if (!trace->full_time && trace->base_time == 0)
1931 trace->base_time = sample.time;
1933 if (type != PERF_RECORD_SAMPLE) {
1934 trace__process_event(trace, trace->host, event, &sample);
1938 evsel = perf_evlist__id2evsel(evlist, sample.id);
1939 if (evsel == NULL) {
1940 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1944 if (sample.raw_data == NULL) {
1945 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1946 perf_evsel__name(evsel), sample.tid,
1947 sample.cpu, sample.raw_size);
1951 handler = evsel->handler;
1952 handler(trace, evsel, &sample);
1954 perf_evlist__mmap_consume(evlist, i);
1961 if (trace->nr_events == before) {
1962 int timeout = done ? 100 : -1;
1964 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1971 perf_evlist__disable(evlist);
1975 trace__fprintf_thread_summary(trace, trace->output);
1977 if (trace->show_tool_stats) {
1978 fprintf(trace->output, "Stats:\n "
1979 " vfs_getname : %" PRIu64 "\n"
1980 " proc_getname: %" PRIu64 "\n",
1981 trace->stats.vfs_getname,
1982 trace->stats.proc_getname);
1986 perf_evlist__munmap(evlist);
1988 perf_evlist__close(evlist);
1990 perf_evlist__delete_maps(evlist);
1992 perf_evlist__delete(evlist);
1994 trace->live = false;
1997 char errbuf[BUFSIZ];
2000 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2004 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2007 fprintf(trace->output, "%s\n", errbuf);
2008 goto out_delete_evlist;
2012 static int trace__replay(struct trace *trace)
2014 const struct perf_evsel_str_handler handlers[] = {
2015 { "raw_syscalls:sys_enter", trace__sys_enter, },
2016 { "raw_syscalls:sys_exit", trace__sys_exit, },
2017 { "probe:vfs_getname", trace__vfs_getname, },
2019 struct perf_data_file file = {
2021 .mode = PERF_DATA_MODE_READ,
2023 struct perf_session *session;
2026 trace->tool.sample = trace__process_sample;
2027 trace->tool.mmap = perf_event__process_mmap;
2028 trace->tool.mmap2 = perf_event__process_mmap2;
2029 trace->tool.comm = perf_event__process_comm;
2030 trace->tool.exit = perf_event__process_exit;
2031 trace->tool.fork = perf_event__process_fork;
2032 trace->tool.attr = perf_event__process_attr;
2033 trace->tool.tracing_data = perf_event__process_tracing_data;
2034 trace->tool.build_id = perf_event__process_build_id;
2036 trace->tool.ordered_samples = true;
2037 trace->tool.ordering_requires_timestamps = true;
2039 /* add tid to output */
2040 trace->multiple_threads = true;
2042 if (symbol__init() < 0)
2045 session = perf_session__new(&file, false, &trace->tool);
2046 if (session == NULL)
2049 trace->host = &session->machines.host;
2051 err = perf_session__set_tracepoints_handlers(session, handlers);
2055 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2056 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2060 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2061 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2065 err = parse_target_str(trace);
2071 err = perf_session__process_events(session, &trace->tool);
2073 pr_err("Failed to process events, error %d", err);
2075 else if (trace->summary)
2076 trace__fprintf_thread_summary(trace, trace->output);
2079 perf_session__delete(session);
2084 static size_t trace__fprintf_threads_header(FILE *fp)
2088 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
2089 printed += fprintf(fp, " __) Summary of events (__\n\n");
2090 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
2091 printed += fprintf(fp, " syscall count min max avg stddev\n");
2092 printed += fprintf(fp, " msec msec msec %%\n");
2093 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2098 static size_t thread__dump_stats(struct thread_trace *ttrace,
2099 struct trace *trace, FILE *fp)
2101 struct stats *stats;
2104 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2109 printed += fprintf(fp, "\n");
2111 /* each int_node is a syscall */
2113 stats = inode->priv;
2115 double min = (double)(stats->min) / NSEC_PER_MSEC;
2116 double max = (double)(stats->max) / NSEC_PER_MSEC;
2117 double avg = avg_stats(stats);
2119 u64 n = (u64) stats->n;
2121 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2122 avg /= NSEC_PER_MSEC;
2124 sc = &trace->syscalls.table[inode->i];
2125 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
2126 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
2128 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
2131 inode = intlist__next(inode);
2134 printed += fprintf(fp, "\n\n");
2139 /* struct used to pass data to per-thread function */
2140 struct summary_data {
2142 struct trace *trace;
2146 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2148 struct summary_data *data = priv;
2149 FILE *fp = data->fp;
2150 size_t printed = data->printed;
2151 struct trace *trace = data->trace;
2152 struct thread_trace *ttrace = thread->priv;
2159 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2161 color = PERF_COLOR_NORMAL;
2163 color = PERF_COLOR_RED;
2164 else if (ratio > 25.0)
2165 color = PERF_COLOR_GREEN;
2166 else if (ratio > 5.0)
2167 color = PERF_COLOR_YELLOW;
2169 printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2170 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
2171 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2172 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2173 printed += thread__dump_stats(ttrace, trace, fp);
2175 data->printed += printed;
2180 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2182 struct summary_data data = {
2186 data.printed = trace__fprintf_threads_header(fp);
2188 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2190 return data.printed;
2193 static int trace__set_duration(const struct option *opt, const char *str,
2194 int unset __maybe_unused)
2196 struct trace *trace = opt->value;
2198 trace->duration_filter = atof(str);
2202 static int trace__open_output(struct trace *trace, const char *filename)
2206 if (!stat(filename, &st) && st.st_size) {
2207 char oldname[PATH_MAX];
2209 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2211 rename(filename, oldname);
2214 trace->output = fopen(filename, "w");
2216 return trace->output == NULL ? -errno : 0;
2219 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2221 const char * const trace_usage[] = {
2222 "perf trace [<options>] [<command>]",
2223 "perf trace [<options>] -- <command> [<options>]",
2224 "perf trace record [<options>] [<command>]",
2225 "perf trace record [<options>] -- <command> [<options>]",
2228 struct trace trace = {
2230 .machine = audit_detect_machine(),
2231 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2241 .user_freq = UINT_MAX,
2242 .user_interval = ULLONG_MAX,
2249 const char *output_name = NULL;
2250 const char *ev_qualifier_str = NULL;
2251 const struct option trace_options[] = {
2252 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2253 "show the thread COMM next to its id"),
2254 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2255 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2256 "list of events to trace"),
2257 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2258 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2259 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2260 "trace events on existing process id"),
2261 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2262 "trace events on existing thread id"),
2263 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2264 "system-wide collection from all CPUs"),
2265 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2266 "list of cpus to monitor"),
2267 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2268 "child tasks do not inherit counters"),
2269 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2270 "number of mmap data pages",
2271 perf_evlist__parse_mmap_pages),
2272 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2274 OPT_CALLBACK(0, "duration", &trace, "float",
2275 "show only events with duration > N.M ms",
2276 trace__set_duration),
2277 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2278 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2279 OPT_BOOLEAN('T', "time", &trace.full_time,
2280 "Show full timestamp, not time relative to first start"),
2281 OPT_BOOLEAN(0, "summary", &trace.summary,
2282 "Show syscall summary with statistics"),
2288 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2289 return trace__record(argc-2, &argv[2]);
2291 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2293 if (output_name != NULL) {
2294 err = trace__open_output(&trace, output_name);
2296 perror("failed to create output file");
2301 if (ev_qualifier_str != NULL) {
2302 const char *s = ev_qualifier_str;
2304 trace.not_ev_qualifier = *s == '!';
2305 if (trace.not_ev_qualifier)
2307 trace.ev_qualifier = strlist__new(true, s);
2308 if (trace.ev_qualifier == NULL) {
2309 fputs("Not enough memory to parse event qualifier",
2316 err = perf_target__validate(&trace.opts.target);
2318 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2319 fprintf(trace.output, "%s", bf);
2323 err = perf_target__parse_uid(&trace.opts.target);
2325 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2326 fprintf(trace.output, "%s", bf);
2330 if (!argc && perf_target__none(&trace.opts.target))
2331 trace.opts.target.system_wide = true;
2334 err = trace__replay(&trace);
2336 err = trace__run(&trace, argc, argv);
2339 if (output_name != NULL)
2340 fclose(trace.output);