1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
17 #include <sys/eventfd.h>
19 #include <linux/futex.h>
21 /* For older distros: */
23 # define MAP_STACK 0x20000
27 # define MADV_HWPOISON 100
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
49 return *(u##bits *)(sample->raw_data + field->offset); \
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 return bswap_##bits(value);\
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
68 static int tp_field__init_uint(struct tp_field *field,
69 struct format_field *format_field,
72 field->offset = format_field->offset;
74 switch (format_field->size) {
76 field->integer = tp_field__u8;
79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
96 return sample->raw_data + field->offset;
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
101 field->offset = format_field->offset;
102 field->pointer = tp_field__ptr;
109 struct tp_field args, ret;
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 struct tp_field *field,
117 struct format_field *format_field = perf_evsel__field(evsel, name);
119 if (format_field == NULL)
122 return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 ({ struct syscall_tp *sc = evsel->priv;\
127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 struct tp_field *field,
133 struct format_field *format_field = perf_evsel__field(evsel, name);
135 if (format_field == NULL)
138 return tp_field__init_ptr(field, format_field);
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 ({ struct syscall_tp *sc = evsel->priv;\
143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
149 perf_evsel__delete(evsel);
152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction,
153 void *handler, int idx)
155 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx);
158 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv == NULL)
163 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
166 evsel->handler = handler;
172 perf_evsel__delete_priv(evsel);
176 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
177 ({ struct syscall_tp *fields = evsel->priv; \
178 fields->name.integer(&fields->name, sample); })
180 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
181 ({ struct syscall_tp *fields = evsel->priv; \
182 fields->name.pointer(&fields->name, sample); })
184 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
185 void *sys_enter_handler,
186 void *sys_exit_handler)
189 int idx = evlist->nr_entries;
190 struct perf_evsel *sys_enter, *sys_exit;
192 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++);
193 if (sys_enter == NULL)
196 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
197 goto out_delete_sys_enter;
199 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++);
200 if (sys_exit == NULL)
201 goto out_delete_sys_enter;
203 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
204 goto out_delete_sys_exit;
206 perf_evlist__add(evlist, sys_enter);
207 perf_evlist__add(evlist, sys_exit);
214 perf_evsel__delete_priv(sys_exit);
215 out_delete_sys_enter:
216 perf_evsel__delete_priv(sys_enter);
223 struct thread *thread;
233 const char **entries;
236 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
237 .nr_entries = ARRAY_SIZE(array), \
241 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
243 .nr_entries = ARRAY_SIZE(array), \
247 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
249 struct syscall_arg *arg)
251 struct strarray *sa = arg->parm;
252 int idx = arg->val - sa->offset;
254 if (idx < 0 || idx >= sa->nr_entries)
255 return scnprintf(bf, size, intfmt, arg->val);
257 return scnprintf(bf, size, "%s", sa->entries[idx]);
260 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
261 struct syscall_arg *arg)
263 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
266 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
268 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
269 struct syscall_arg *arg)
271 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
274 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
276 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
277 struct syscall_arg *arg);
279 #define SCA_FD syscall_arg__scnprintf_fd
281 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
282 struct syscall_arg *arg)
287 return scnprintf(bf, size, "CWD");
289 return syscall_arg__scnprintf_fd(bf, size, arg);
292 #define SCA_FDAT syscall_arg__scnprintf_fd_at
294 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
295 struct syscall_arg *arg);
297 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
299 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
300 struct syscall_arg *arg)
302 return scnprintf(bf, size, "%#lx", arg->val);
305 #define SCA_HEX syscall_arg__scnprintf_hex
307 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
308 struct syscall_arg *arg)
310 int printed = 0, prot = arg->val;
312 if (prot == PROT_NONE)
313 return scnprintf(bf, size, "NONE");
314 #define P_MMAP_PROT(n) \
315 if (prot & PROT_##n) { \
316 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
326 P_MMAP_PROT(GROWSDOWN);
327 P_MMAP_PROT(GROWSUP);
331 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
336 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
338 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
339 struct syscall_arg *arg)
341 int printed = 0, flags = arg->val;
343 #define P_MMAP_FLAG(n) \
344 if (flags & MAP_##n) { \
345 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
350 P_MMAP_FLAG(PRIVATE);
354 P_MMAP_FLAG(ANONYMOUS);
355 P_MMAP_FLAG(DENYWRITE);
356 P_MMAP_FLAG(EXECUTABLE);
359 P_MMAP_FLAG(GROWSDOWN);
361 P_MMAP_FLAG(HUGETLB);
364 P_MMAP_FLAG(NONBLOCK);
365 P_MMAP_FLAG(NORESERVE);
366 P_MMAP_FLAG(POPULATE);
368 #ifdef MAP_UNINITIALIZED
369 P_MMAP_FLAG(UNINITIALIZED);
374 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
379 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
381 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
382 struct syscall_arg *arg)
384 int behavior = arg->val;
387 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
390 P_MADV_BHV(SEQUENTIAL);
391 P_MADV_BHV(WILLNEED);
392 P_MADV_BHV(DONTNEED);
394 P_MADV_BHV(DONTFORK);
396 P_MADV_BHV(HWPOISON);
397 #ifdef MADV_SOFT_OFFLINE
398 P_MADV_BHV(SOFT_OFFLINE);
400 P_MADV_BHV(MERGEABLE);
401 P_MADV_BHV(UNMERGEABLE);
403 P_MADV_BHV(HUGEPAGE);
405 #ifdef MADV_NOHUGEPAGE
406 P_MADV_BHV(NOHUGEPAGE);
409 P_MADV_BHV(DONTDUMP);
418 return scnprintf(bf, size, "%#x", behavior);
421 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
423 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
424 struct syscall_arg *arg)
426 int printed = 0, op = arg->val;
429 return scnprintf(bf, size, "NONE");
431 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
447 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
452 #define SCA_FLOCK syscall_arg__scnprintf_flock
454 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
456 enum syscall_futex_args {
457 SCF_UADDR = (1 << 0),
460 SCF_TIMEOUT = (1 << 3),
461 SCF_UADDR2 = (1 << 4),
465 int cmd = op & FUTEX_CMD_MASK;
469 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
470 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
471 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
472 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
473 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
474 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
475 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
476 P_FUTEX_OP(WAKE_OP); break;
477 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
478 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
479 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
480 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
481 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
482 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
483 default: printed = scnprintf(bf, size, "%#x", cmd); break;
486 if (op & FUTEX_PRIVATE_FLAG)
487 printed += scnprintf(bf + printed, size - printed, "|PRIV");
489 if (op & FUTEX_CLOCK_REALTIME)
490 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
495 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
497 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
498 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
500 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
501 static DEFINE_STRARRAY(itimers);
503 static const char *whences[] = { "SET", "CUR", "END",
511 static DEFINE_STRARRAY(whences);
513 static const char *fcntl_cmds[] = {
514 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
515 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
516 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
519 static DEFINE_STRARRAY(fcntl_cmds);
521 static const char *rlimit_resources[] = {
522 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
523 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
526 static DEFINE_STRARRAY(rlimit_resources);
528 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
529 static DEFINE_STRARRAY(sighow);
531 static const char *clockid[] = {
532 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
533 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
535 static DEFINE_STRARRAY(clockid);
537 static const char *socket_families[] = {
538 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
539 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
540 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
541 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
542 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
543 "ALG", "NFC", "VSOCK",
545 static DEFINE_STRARRAY(socket_families);
547 #ifndef SOCK_TYPE_MASK
548 #define SOCK_TYPE_MASK 0xf
551 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
552 struct syscall_arg *arg)
556 flags = type & ~SOCK_TYPE_MASK;
558 type &= SOCK_TYPE_MASK;
560 * Can't use a strarray, MIPS may override for ABI reasons.
563 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
568 P_SK_TYPE(SEQPACKET);
573 printed = scnprintf(bf, size, "%#x", type);
576 #define P_SK_FLAG(n) \
577 if (flags & SOCK_##n) { \
578 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
579 flags &= ~SOCK_##n; \
587 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
592 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
595 #define MSG_PROBE 0x10
597 #ifndef MSG_WAITFORONE
598 #define MSG_WAITFORONE 0x10000
600 #ifndef MSG_SENDPAGE_NOTLAST
601 #define MSG_SENDPAGE_NOTLAST 0x20000
604 #define MSG_FASTOPEN 0x20000000
607 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
608 struct syscall_arg *arg)
610 int printed = 0, flags = arg->val;
613 return scnprintf(bf, size, "NONE");
614 #define P_MSG_FLAG(n) \
615 if (flags & MSG_##n) { \
616 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
622 P_MSG_FLAG(DONTROUTE);
627 P_MSG_FLAG(DONTWAIT);
634 P_MSG_FLAG(ERRQUEUE);
635 P_MSG_FLAG(NOSIGNAL);
637 P_MSG_FLAG(WAITFORONE);
638 P_MSG_FLAG(SENDPAGE_NOTLAST);
639 P_MSG_FLAG(FASTOPEN);
640 P_MSG_FLAG(CMSG_CLOEXEC);
644 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
649 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
651 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
652 struct syscall_arg *arg)
657 if (mode == F_OK) /* 0 */
658 return scnprintf(bf, size, "F");
660 if (mode & n##_OK) { \
661 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
671 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
676 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
678 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
679 struct syscall_arg *arg)
681 int printed = 0, flags = arg->val;
683 if (!(flags & O_CREAT))
684 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
687 return scnprintf(bf, size, "RDONLY");
689 if (flags & O_##n) { \
690 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
714 if ((flags & O_SYNC) == O_SYNC)
715 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
727 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
732 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
734 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
735 struct syscall_arg *arg)
737 int printed = 0, flags = arg->val;
740 return scnprintf(bf, size, "NONE");
742 if (flags & EFD_##n) { \
743 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
753 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
758 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
760 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
761 struct syscall_arg *arg)
763 int printed = 0, flags = arg->val;
766 if (flags & O_##n) { \
767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
776 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
781 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
783 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
788 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
823 return scnprintf(bf, size, "%#x", sig);
826 #define SCA_SIGNUM syscall_arg__scnprintf_signum
828 #define TCGETS 0x5401
830 static const char *tioctls[] = {
831 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
832 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
833 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
834 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
835 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
836 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
837 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
838 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
839 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
840 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
841 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
842 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
843 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
844 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
845 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
848 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
850 #define STRARRAY(arg, name, array) \
851 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
852 .arg_parm = { [arg] = &strarray__##array, }
854 static struct syscall_fmt {
857 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
863 { .name = "access", .errmsg = true,
864 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
865 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
866 { .name = "brk", .hexret = true,
867 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
868 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
869 { .name = "close", .errmsg = true,
870 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
871 { .name = "connect", .errmsg = true, },
872 { .name = "dup", .errmsg = true,
873 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
874 { .name = "dup2", .errmsg = true,
875 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
876 { .name = "dup3", .errmsg = true,
877 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
878 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
879 { .name = "eventfd2", .errmsg = true,
880 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
881 { .name = "faccessat", .errmsg = true,
882 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
883 { .name = "fadvise64", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
885 { .name = "fallocate", .errmsg = true,
886 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
887 { .name = "fchdir", .errmsg = true,
888 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
889 { .name = "fchmod", .errmsg = true,
890 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
891 { .name = "fchmodat", .errmsg = true,
892 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
893 { .name = "fchown", .errmsg = true,
894 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
895 { .name = "fchownat", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
897 { .name = "fcntl", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */
899 [1] = SCA_STRARRAY, /* cmd */ },
900 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
901 { .name = "fdatasync", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "flock", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */
905 [1] = SCA_FLOCK, /* cmd */ }, },
906 { .name = "fsetxattr", .errmsg = true,
907 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
908 { .name = "fstat", .errmsg = true, .alias = "newfstat",
909 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
910 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
911 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
912 { .name = "fstatfs", .errmsg = true,
913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
914 { .name = "fsync", .errmsg = true,
915 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
916 { .name = "ftruncate", .errmsg = true,
917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
918 { .name = "futex", .errmsg = true,
919 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
920 { .name = "futimesat", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
922 { .name = "getdents", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "getdents64", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
927 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
928 { .name = "ioctl", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FD, /* fd */
930 [1] = SCA_STRHEXARRAY, /* cmd */
931 [2] = SCA_HEX, /* arg */ },
932 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
933 { .name = "kill", .errmsg = true,
934 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
935 { .name = "linkat", .errmsg = true,
936 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
937 { .name = "lseek", .errmsg = true,
938 .arg_scnprintf = { [0] = SCA_FD, /* fd */
939 [2] = SCA_STRARRAY, /* whence */ },
940 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
941 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
942 { .name = "madvise", .errmsg = true,
943 .arg_scnprintf = { [0] = SCA_HEX, /* start */
944 [2] = SCA_MADV_BHV, /* behavior */ }, },
945 { .name = "mkdirat", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 { .name = "mknodat", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
949 { .name = "mlock", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
951 { .name = "mlockall", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
953 { .name = "mmap", .hexret = true,
954 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
955 [2] = SCA_MMAP_PROT, /* prot */
956 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
957 { .name = "mprotect", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_HEX, /* start */
959 [2] = SCA_MMAP_PROT, /* prot */ }, },
960 { .name = "mremap", .hexret = true,
961 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
962 [4] = SCA_HEX, /* new_addr */ }, },
963 { .name = "munlock", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "munmap", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 { .name = "name_to_handle_at", .errmsg = true,
968 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
969 { .name = "newfstatat", .errmsg = true,
970 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
971 { .name = "open", .errmsg = true,
972 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
973 { .name = "open_by_handle_at", .errmsg = true,
974 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
975 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
976 { .name = "openat", .errmsg = true,
977 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
978 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
979 { .name = "pipe2", .errmsg = true,
980 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
981 { .name = "poll", .errmsg = true, .timeout = true, },
982 { .name = "ppoll", .errmsg = true, .timeout = true, },
983 { .name = "pread", .errmsg = true, .alias = "pread64",
984 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
985 { .name = "preadv", .errmsg = true, .alias = "pread",
986 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
987 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
988 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
989 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
990 { .name = "pwritev", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
992 { .name = "read", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
994 { .name = "readlinkat", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
996 { .name = "readv", .errmsg = true,
997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 { .name = "recvfrom", .errmsg = true,
999 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1000 { .name = "recvmmsg", .errmsg = true,
1001 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1002 { .name = "recvmsg", .errmsg = true,
1003 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1004 { .name = "renameat", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1006 { .name = "rt_sigaction", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1008 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1009 { .name = "rt_sigqueueinfo", .errmsg = true,
1010 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1011 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1012 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1013 { .name = "select", .errmsg = true, .timeout = true, },
1014 { .name = "sendmmsg", .errmsg = true,
1015 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1016 { .name = "sendmsg", .errmsg = true,
1017 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1018 { .name = "sendto", .errmsg = true,
1019 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1020 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1021 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1022 { .name = "shutdown", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024 { .name = "socket", .errmsg = true,
1025 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1026 [1] = SCA_SK_TYPE, /* type */ },
1027 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1028 { .name = "socketpair", .errmsg = true,
1029 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1030 [1] = SCA_SK_TYPE, /* type */ },
1031 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1032 { .name = "stat", .errmsg = true, .alias = "newstat", },
1033 { .name = "symlinkat", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1035 { .name = "tgkill", .errmsg = true,
1036 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1037 { .name = "tkill", .errmsg = true,
1038 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1039 { .name = "uname", .errmsg = true, .alias = "newuname", },
1040 { .name = "unlinkat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042 { .name = "utimensat", .errmsg = true,
1043 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1044 { .name = "write", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046 { .name = "writev", .errmsg = true,
1047 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1052 const struct syscall_fmt *fmt = fmtp;
1053 return strcmp(name, fmt->name);
1056 static struct syscall_fmt *syscall_fmt__find(const char *name)
1058 const int nmemb = ARRAY_SIZE(syscall_fmts);
1059 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1063 struct event_format *tp_format;
1066 struct syscall_fmt *fmt;
1067 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1071 static size_t fprintf_duration(unsigned long t, FILE *fp)
1073 double duration = (double)t / NSEC_PER_MSEC;
1074 size_t printed = fprintf(fp, "(");
1076 if (duration >= 1.0)
1077 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1078 else if (duration >= 0.01)
1079 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1081 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1082 return printed + fprintf(fp, "): ");
1085 struct thread_trace {
1089 unsigned long nr_events;
1097 struct intlist *syscall_stats;
1100 static struct thread_trace *thread_trace__new(void)
1102 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1105 ttrace->paths.max = -1;
1107 ttrace->syscall_stats = intlist__new(NULL);
1112 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1114 struct thread_trace *ttrace;
1119 if (thread->priv == NULL)
1120 thread->priv = thread_trace__new();
1122 if (thread->priv == NULL)
1125 ttrace = thread->priv;
1126 ++ttrace->nr_events;
1130 color_fprintf(fp, PERF_COLOR_RED,
1131 "WARNING: not enough memory, dropping samples!\n");
1136 struct perf_tool tool;
1143 struct syscall *table;
1145 struct perf_record_opts opts;
1146 struct machine *host;
1150 unsigned long nr_events;
1151 struct strlist *ev_qualifier;
1152 bool not_ev_qualifier;
1154 const char *last_vfs_getname;
1155 struct intlist *tid_list;
1156 struct intlist *pid_list;
1158 bool multiple_threads;
1161 bool show_tool_stats;
1162 double duration_filter;
1165 u64 vfs_getname, proc_getname;
1169 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1171 struct thread_trace *ttrace = thread->priv;
1173 if (fd > ttrace->paths.max) {
1174 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1179 if (ttrace->paths.max != -1) {
1180 memset(npath + ttrace->paths.max + 1, 0,
1181 (fd - ttrace->paths.max) * sizeof(char *));
1183 memset(npath, 0, (fd + 1) * sizeof(char *));
1186 ttrace->paths.table = npath;
1187 ttrace->paths.max = fd;
1190 ttrace->paths.table[fd] = strdup(pathname);
1192 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1195 static int thread__read_fd_path(struct thread *thread, int fd)
1197 char linkname[PATH_MAX], pathname[PATH_MAX];
1201 if (thread->pid_ == thread->tid) {
1202 scnprintf(linkname, sizeof(linkname),
1203 "/proc/%d/fd/%d", thread->pid_, fd);
1205 scnprintf(linkname, sizeof(linkname),
1206 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1209 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1212 ret = readlink(linkname, pathname, sizeof(pathname));
1214 if (ret < 0 || ret > st.st_size)
1217 pathname[ret] = '\0';
1218 return trace__set_fd_pathname(thread, fd, pathname);
1221 static const char *thread__fd_path(struct thread *thread, int fd,
1222 struct trace *trace)
1224 struct thread_trace *ttrace = thread->priv;
1232 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1235 ++trace->stats.proc_getname;
1236 if (thread__read_fd_path(thread, fd)) {
1240 return ttrace->paths.table[fd];
1243 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1244 struct syscall_arg *arg)
1247 size_t printed = scnprintf(bf, size, "%d", fd);
1248 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1251 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1256 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1257 struct syscall_arg *arg)
1260 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1261 struct thread_trace *ttrace = arg->thread->priv;
1263 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1264 free(ttrace->paths.table[fd]);
1265 ttrace->paths.table[fd] = NULL;
1271 static bool trace__filter_duration(struct trace *trace, double t)
1273 return t < (trace->duration_filter * NSEC_PER_MSEC);
1276 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1278 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1280 return fprintf(fp, "%10.3f ", ts);
1283 static bool done = false;
1284 static bool interrupted = false;
1286 static void sig_handler(int sig)
1289 interrupted = sig == SIGINT;
1292 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1293 u64 duration, u64 tstamp, FILE *fp)
1295 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1296 printed += fprintf_duration(duration, fp);
1298 if (trace->multiple_threads) {
1299 if (trace->show_comm)
1300 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1301 printed += fprintf(fp, "%d ", thread->tid);
1307 static int trace__process_event(struct trace *trace, struct machine *machine,
1308 union perf_event *event, struct perf_sample *sample)
1312 switch (event->header.type) {
1313 case PERF_RECORD_LOST:
1314 color_fprintf(trace->output, PERF_COLOR_RED,
1315 "LOST %" PRIu64 " events!\n", event->lost.lost);
1316 ret = machine__process_lost_event(machine, event, sample);
1318 ret = machine__process_event(machine, event, sample);
1325 static int trace__tool_process(struct perf_tool *tool,
1326 union perf_event *event,
1327 struct perf_sample *sample,
1328 struct machine *machine)
1330 struct trace *trace = container_of(tool, struct trace, tool);
1331 return trace__process_event(trace, machine, event, sample);
1334 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1336 int err = symbol__init();
1341 trace->host = machine__new_host();
1342 if (trace->host == NULL)
1345 if (perf_target__has_task(&trace->opts.target)) {
1346 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1347 trace__tool_process,
1350 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1360 static int syscall__set_arg_fmts(struct syscall *sc)
1362 struct format_field *field;
1365 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1366 if (sc->arg_scnprintf == NULL)
1370 sc->arg_parm = sc->fmt->arg_parm;
1372 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1373 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1374 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1375 else if (field->flags & FIELD_IS_POINTER)
1376 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1383 static int trace__read_syscall_info(struct trace *trace, int id)
1387 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1392 if (id > trace->syscalls.max) {
1393 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1395 if (nsyscalls == NULL)
1398 if (trace->syscalls.max != -1) {
1399 memset(nsyscalls + trace->syscalls.max + 1, 0,
1400 (id - trace->syscalls.max) * sizeof(*sc));
1402 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1405 trace->syscalls.table = nsyscalls;
1406 trace->syscalls.max = id;
1409 sc = trace->syscalls.table + id;
1412 if (trace->ev_qualifier) {
1413 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1415 if (!(in ^ trace->not_ev_qualifier)) {
1416 sc->filtered = true;
1418 * No need to do read tracepoint information since this will be
1425 sc->fmt = syscall_fmt__find(sc->name);
1427 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1428 sc->tp_format = event_format__new("syscalls", tp_name);
1430 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1431 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1432 sc->tp_format = event_format__new("syscalls", tp_name);
1435 if (sc->tp_format == NULL)
1438 return syscall__set_arg_fmts(sc);
1441 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1442 unsigned long *args, struct trace *trace,
1443 struct thread *thread)
1447 if (sc->tp_format != NULL) {
1448 struct format_field *field;
1450 struct syscall_arg arg = {
1457 for (field = sc->tp_format->format.fields->next; field;
1458 field = field->next, ++arg.idx, bit <<= 1) {
1462 * Suppress this argument if its value is zero and
1463 * and we don't have a string associated in an
1466 if (args[arg.idx] == 0 &&
1467 !(sc->arg_scnprintf &&
1468 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1469 sc->arg_parm[arg.idx]))
1472 printed += scnprintf(bf + printed, size - printed,
1473 "%s%s: ", printed ? ", " : "", field->name);
1474 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1475 arg.val = args[arg.idx];
1477 arg.parm = sc->arg_parm[arg.idx];
1478 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1479 size - printed, &arg);
1481 printed += scnprintf(bf + printed, size - printed,
1482 "%ld", args[arg.idx]);
1489 printed += scnprintf(bf + printed, size - printed,
1491 printed ? ", " : "", i, args[i]);
1499 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1500 struct perf_sample *sample);
1502 static struct syscall *trace__syscall_info(struct trace *trace,
1503 struct perf_evsel *evsel, int id)
1509 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1510 * before that, leaving at a higher verbosity level till that is
1511 * explained. Reproduced with plain ftrace with:
1513 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1514 * grep "NR -1 " /t/trace_pipe
1516 * After generating some load on the machine.
1520 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1521 id, perf_evsel__name(evsel), ++n);
1526 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1527 trace__read_syscall_info(trace, id))
1530 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1533 return &trace->syscalls.table[id];
1537 fprintf(trace->output, "Problems reading syscall %d", id);
1538 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1539 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1540 fputs(" information\n", trace->output);
1545 static void thread__update_stats(struct thread_trace *ttrace,
1546 int id, struct perf_sample *sample)
1548 struct int_node *inode;
1549 struct stats *stats;
1552 inode = intlist__findnew(ttrace->syscall_stats, id);
1556 stats = inode->priv;
1557 if (stats == NULL) {
1558 stats = malloc(sizeof(struct stats));
1562 inode->priv = stats;
1565 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1566 duration = sample->time - ttrace->entry_time;
1568 update_stats(stats, duration);
1571 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1572 struct perf_sample *sample)
1577 struct thread *thread;
1578 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1579 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1580 struct thread_trace *ttrace;
1588 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1589 ttrace = thread__trace(thread, trace->output);
1593 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1594 ttrace = thread->priv;
1596 if (ttrace->entry_str == NULL) {
1597 ttrace->entry_str = malloc(1024);
1598 if (!ttrace->entry_str)
1602 ttrace->entry_time = sample->time;
1603 msg = ttrace->entry_str;
1604 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1606 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1607 args, trace, thread);
1609 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1610 if (!trace->duration_filter) {
1611 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1612 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1615 ttrace->entry_pending = true;
1620 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1621 struct perf_sample *sample)
1625 struct thread *thread;
1626 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1627 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1628 struct thread_trace *ttrace;
1636 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1637 ttrace = thread__trace(thread, trace->output);
1642 thread__update_stats(ttrace, id, sample);
1644 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1646 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1647 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1648 trace->last_vfs_getname = NULL;
1649 ++trace->stats.vfs_getname;
1652 ttrace = thread->priv;
1654 ttrace->exit_time = sample->time;
1656 if (ttrace->entry_time) {
1657 duration = sample->time - ttrace->entry_time;
1658 if (trace__filter_duration(trace, duration))
1660 } else if (trace->duration_filter)
1663 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1665 if (ttrace->entry_pending) {
1666 fprintf(trace->output, "%-70s", ttrace->entry_str);
1668 fprintf(trace->output, " ... [");
1669 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1670 fprintf(trace->output, "]: %s()", sc->name);
1673 if (sc->fmt == NULL) {
1675 fprintf(trace->output, ") = %d", ret);
1676 } else if (ret < 0 && sc->fmt->errmsg) {
1678 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1679 *e = audit_errno_to_name(-ret);
1681 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1682 } else if (ret == 0 && sc->fmt->timeout)
1683 fprintf(trace->output, ") = 0 Timeout");
1684 else if (sc->fmt->hexret)
1685 fprintf(trace->output, ") = %#x", ret);
1689 fputc('\n', trace->output);
1691 ttrace->entry_pending = false;
1696 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1697 struct perf_sample *sample)
1699 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1703 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1704 struct perf_sample *sample)
1706 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1707 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1708 struct thread *thread = machine__findnew_thread(trace->host,
1711 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1716 ttrace->runtime_ms += runtime_ms;
1717 trace->runtime_ms += runtime_ms;
1721 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1723 perf_evsel__strval(evsel, sample, "comm"),
1724 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1726 perf_evsel__intval(evsel, sample, "vruntime"));
1730 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1732 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1733 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1736 if (trace->pid_list || trace->tid_list)
1742 static int trace__process_sample(struct perf_tool *tool,
1743 union perf_event *event __maybe_unused,
1744 struct perf_sample *sample,
1745 struct perf_evsel *evsel,
1746 struct machine *machine __maybe_unused)
1748 struct trace *trace = container_of(tool, struct trace, tool);
1751 tracepoint_handler handler = evsel->handler;
1753 if (skip_sample(trace, sample))
1756 if (!trace->full_time && trace->base_time == 0)
1757 trace->base_time = sample->time;
1760 handler(trace, evsel, sample);
1766 perf_session__has_tp(struct perf_session *session, const char *name)
1768 struct perf_evsel *evsel;
1770 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1772 return evsel != NULL;
1775 static int parse_target_str(struct trace *trace)
1777 if (trace->opts.target.pid) {
1778 trace->pid_list = intlist__new(trace->opts.target.pid);
1779 if (trace->pid_list == NULL) {
1780 pr_err("Error parsing process id string\n");
1785 if (trace->opts.target.tid) {
1786 trace->tid_list = intlist__new(trace->opts.target.tid);
1787 if (trace->tid_list == NULL) {
1788 pr_err("Error parsing thread id string\n");
1796 static int trace__record(int argc, const char **argv)
1798 unsigned int rec_argc, i, j;
1799 const char **rec_argv;
1800 const char * const record_args[] = {
1805 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1808 rec_argc = ARRAY_SIZE(record_args) + argc;
1809 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1811 if (rec_argv == NULL)
1814 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1815 rec_argv[i] = record_args[i];
1817 for (j = 0; j < (unsigned int)argc; j++, i++)
1818 rec_argv[i] = argv[j];
1820 return cmd_record(i, rec_argv, NULL);
1823 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1825 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1827 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1828 evlist->nr_entries);
1832 if (perf_evsel__field(evsel, "pathname") == NULL) {
1833 perf_evsel__delete(evsel);
1837 evsel->handler = trace__vfs_getname;
1838 perf_evlist__add(evlist, evsel);
1841 static int trace__run(struct trace *trace, int argc, const char **argv)
1843 struct perf_evlist *evlist = perf_evlist__new();
1844 struct perf_evsel *evsel;
1846 unsigned long before;
1847 const bool forks = argc > 0;
1851 if (evlist == NULL) {
1852 fprintf(trace->output, "Not enough memory to run!\n");
1856 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1859 perf_evlist__add_vfs_getname(evlist);
1862 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1863 trace__sched_stat_runtime))
1866 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1868 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1869 goto out_delete_evlist;
1872 err = trace__symbols_init(trace, evlist);
1874 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1875 goto out_delete_maps;
1878 perf_evlist__config(evlist, &trace->opts);
1880 signal(SIGCHLD, sig_handler);
1881 signal(SIGINT, sig_handler);
1884 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1885 argv, false, false);
1887 fprintf(trace->output, "Couldn't run the workload!\n");
1888 goto out_delete_maps;
1892 err = perf_evlist__open(evlist);
1894 goto out_error_open;
1896 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1898 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1899 goto out_close_evlist;
1902 perf_evlist__enable(evlist);
1905 perf_evlist__start_workload(evlist);
1907 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1909 before = trace->nr_events;
1911 for (i = 0; i < evlist->nr_mmaps; i++) {
1912 union perf_event *event;
1914 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1915 const u32 type = event->header.type;
1916 tracepoint_handler handler;
1917 struct perf_sample sample;
1921 err = perf_evlist__parse_sample(evlist, event, &sample);
1923 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1927 if (!trace->full_time && trace->base_time == 0)
1928 trace->base_time = sample.time;
1930 if (type != PERF_RECORD_SAMPLE) {
1931 trace__process_event(trace, trace->host, event, &sample);
1935 evsel = perf_evlist__id2evsel(evlist, sample.id);
1936 if (evsel == NULL) {
1937 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1941 if (sample.raw_data == NULL) {
1942 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1943 perf_evsel__name(evsel), sample.tid,
1944 sample.cpu, sample.raw_size);
1948 handler = evsel->handler;
1949 handler(trace, evsel, &sample);
1951 perf_evlist__mmap_consume(evlist, i);
1958 if (trace->nr_events == before) {
1959 int timeout = done ? 100 : -1;
1961 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1968 perf_evlist__disable(evlist);
1972 trace__fprintf_thread_summary(trace, trace->output);
1974 if (trace->show_tool_stats) {
1975 fprintf(trace->output, "Stats:\n "
1976 " vfs_getname : %" PRIu64 "\n"
1977 " proc_getname: %" PRIu64 "\n",
1978 trace->stats.vfs_getname,
1979 trace->stats.proc_getname);
1983 perf_evlist__munmap(evlist);
1985 perf_evlist__close(evlist);
1987 perf_evlist__delete_maps(evlist);
1989 perf_evlist__delete(evlist);
1991 trace->live = false;
1994 char errbuf[BUFSIZ];
1997 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2001 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2004 fprintf(trace->output, "%s\n", errbuf);
2005 goto out_delete_evlist;
2009 static int trace__replay(struct trace *trace)
2011 const struct perf_evsel_str_handler handlers[] = {
2012 { "raw_syscalls:sys_enter", trace__sys_enter, },
2013 { "raw_syscalls:sys_exit", trace__sys_exit, },
2014 { "probe:vfs_getname", trace__vfs_getname, },
2016 struct perf_data_file file = {
2018 .mode = PERF_DATA_MODE_READ,
2020 struct perf_session *session;
2023 trace->tool.sample = trace__process_sample;
2024 trace->tool.mmap = perf_event__process_mmap;
2025 trace->tool.mmap2 = perf_event__process_mmap2;
2026 trace->tool.comm = perf_event__process_comm;
2027 trace->tool.exit = perf_event__process_exit;
2028 trace->tool.fork = perf_event__process_fork;
2029 trace->tool.attr = perf_event__process_attr;
2030 trace->tool.tracing_data = perf_event__process_tracing_data;
2031 trace->tool.build_id = perf_event__process_build_id;
2033 trace->tool.ordered_samples = true;
2034 trace->tool.ordering_requires_timestamps = true;
2036 /* add tid to output */
2037 trace->multiple_threads = true;
2039 if (symbol__init() < 0)
2042 session = perf_session__new(&file, false, &trace->tool);
2043 if (session == NULL)
2046 trace->host = &session->machines.host;
2048 err = perf_session__set_tracepoints_handlers(session, handlers);
2052 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2053 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2057 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2058 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2062 err = parse_target_str(trace);
2068 err = perf_session__process_events(session, &trace->tool);
2070 pr_err("Failed to process events, error %d", err);
2072 else if (trace->summary)
2073 trace__fprintf_thread_summary(trace, trace->output);
2076 perf_session__delete(session);
2081 static size_t trace__fprintf_threads_header(FILE *fp)
2085 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
2086 printed += fprintf(fp, " __) Summary of events (__\n\n");
2087 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
2088 printed += fprintf(fp, " syscall count min max avg stddev\n");
2089 printed += fprintf(fp, " msec msec msec %%\n");
2090 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2095 static size_t thread__dump_stats(struct thread_trace *ttrace,
2096 struct trace *trace, FILE *fp)
2098 struct stats *stats;
2101 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2106 printed += fprintf(fp, "\n");
2108 /* each int_node is a syscall */
2110 stats = inode->priv;
2112 double min = (double)(stats->min) / NSEC_PER_MSEC;
2113 double max = (double)(stats->max) / NSEC_PER_MSEC;
2114 double avg = avg_stats(stats);
2116 u64 n = (u64) stats->n;
2118 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2119 avg /= NSEC_PER_MSEC;
2121 sc = &trace->syscalls.table[inode->i];
2122 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
2123 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
2125 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
2128 inode = intlist__next(inode);
2131 printed += fprintf(fp, "\n\n");
2136 /* struct used to pass data to per-thread function */
2137 struct summary_data {
2139 struct trace *trace;
2143 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2145 struct summary_data *data = priv;
2146 FILE *fp = data->fp;
2147 size_t printed = data->printed;
2148 struct trace *trace = data->trace;
2149 struct thread_trace *ttrace = thread->priv;
2156 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2158 color = PERF_COLOR_NORMAL;
2160 color = PERF_COLOR_RED;
2161 else if (ratio > 25.0)
2162 color = PERF_COLOR_GREEN;
2163 else if (ratio > 5.0)
2164 color = PERF_COLOR_YELLOW;
2166 printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2167 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
2168 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2169 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2170 printed += thread__dump_stats(ttrace, trace, fp);
2172 data->printed += printed;
2177 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2179 struct summary_data data = {
2183 data.printed = trace__fprintf_threads_header(fp);
2185 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2187 return data.printed;
2190 static int trace__set_duration(const struct option *opt, const char *str,
2191 int unset __maybe_unused)
2193 struct trace *trace = opt->value;
2195 trace->duration_filter = atof(str);
2199 static int trace__open_output(struct trace *trace, const char *filename)
2203 if (!stat(filename, &st) && st.st_size) {
2204 char oldname[PATH_MAX];
2206 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2208 rename(filename, oldname);
2211 trace->output = fopen(filename, "w");
2213 return trace->output == NULL ? -errno : 0;
2216 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2218 const char * const trace_usage[] = {
2219 "perf trace [<options>] [<command>]",
2220 "perf trace [<options>] -- <command> [<options>]",
2221 "perf trace record [<options>] [<command>]",
2222 "perf trace record [<options>] -- <command> [<options>]",
2225 struct trace trace = {
2227 .machine = audit_detect_machine(),
2228 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2238 .user_freq = UINT_MAX,
2239 .user_interval = ULLONG_MAX,
2246 const char *output_name = NULL;
2247 const char *ev_qualifier_str = NULL;
2248 const struct option trace_options[] = {
2249 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2250 "show the thread COMM next to its id"),
2251 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2252 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2253 "list of events to trace"),
2254 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2255 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2256 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2257 "trace events on existing process id"),
2258 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2259 "trace events on existing thread id"),
2260 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2261 "system-wide collection from all CPUs"),
2262 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2263 "list of cpus to monitor"),
2264 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2265 "child tasks do not inherit counters"),
2266 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2267 "number of mmap data pages",
2268 perf_evlist__parse_mmap_pages),
2269 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2271 OPT_CALLBACK(0, "duration", &trace, "float",
2272 "show only events with duration > N.M ms",
2273 trace__set_duration),
2274 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2275 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2276 OPT_BOOLEAN('T', "time", &trace.full_time,
2277 "Show full timestamp, not time relative to first start"),
2278 OPT_BOOLEAN(0, "summary", &trace.summary,
2279 "Show syscall summary with statistics"),
2285 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2286 return trace__record(argc-2, &argv[2]);
2288 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2290 if (output_name != NULL) {
2291 err = trace__open_output(&trace, output_name);
2293 perror("failed to create output file");
2298 if (ev_qualifier_str != NULL) {
2299 const char *s = ev_qualifier_str;
2301 trace.not_ev_qualifier = *s == '!';
2302 if (trace.not_ev_qualifier)
2304 trace.ev_qualifier = strlist__new(true, s);
2305 if (trace.ev_qualifier == NULL) {
2306 fputs("Not enough memory to parse event qualifier",
2313 err = perf_target__validate(&trace.opts.target);
2315 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2316 fprintf(trace.output, "%s", bf);
2320 err = perf_target__parse_uid(&trace.opts.target);
2322 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2323 fprintf(trace.output, "%s", bf);
2327 if (!argc && perf_target__none(&trace.opts.target))
2328 trace.opts.target.system_wide = true;
2331 err = trace__replay(&trace);
2333 err = trace__run(&trace, argc, argv);
2336 if (output_name != NULL)
2337 fclose(trace.output);