1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
18 #include <sys/eventfd.h>
20 #include <linux/futex.h>
22 /* For older distros: */
24 # define MAP_STACK 0x20000
28 # define MADV_HWPOISON 100
31 #ifndef MADV_MERGEABLE
32 # define MADV_MERGEABLE 12
35 #ifndef MADV_UNMERGEABLE
36 # define MADV_UNMERGEABLE 13
42 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
43 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
47 #define TP_UINT_FIELD(bits) \
48 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 return *(u##bits *)(sample->raw_data + field->offset); \
58 #define TP_UINT_FIELD__SWAPPED(bits) \
59 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
62 return bswap_##bits(value);\
65 TP_UINT_FIELD__SWAPPED(16);
66 TP_UINT_FIELD__SWAPPED(32);
67 TP_UINT_FIELD__SWAPPED(64);
69 static int tp_field__init_uint(struct tp_field *field,
70 struct format_field *format_field,
73 field->offset = format_field->offset;
75 switch (format_field->size) {
77 field->integer = tp_field__u8;
80 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
83 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
86 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
95 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 return sample->raw_data + field->offset;
100 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 field->offset = format_field->offset;
103 field->pointer = tp_field__ptr;
110 struct tp_field args, ret;
114 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
115 struct tp_field *field,
118 struct format_field *format_field = perf_evsel__field(evsel, name);
120 if (format_field == NULL)
123 return tp_field__init_uint(field, format_field, evsel->needs_swap);
126 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
127 ({ struct syscall_tp *sc = evsel->priv;\
128 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
131 struct tp_field *field,
134 struct format_field *format_field = perf_evsel__field(evsel, name);
136 if (format_field == NULL)
139 return tp_field__init_ptr(field, format_field);
142 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
143 ({ struct syscall_tp *sc = evsel->priv;\
144 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
150 perf_evsel__delete(evsel);
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
155 evsel->priv = malloc(sizeof(struct syscall_tp));
156 if (evsel->priv != NULL) {
157 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
160 evsel->handler = handler;
172 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
174 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
177 if (perf_evsel__init_syscall_tp(evsel, handler))
184 perf_evsel__delete_priv(evsel);
188 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
189 ({ struct syscall_tp *fields = evsel->priv; \
190 fields->name.integer(&fields->name, sample); })
192 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
193 ({ struct syscall_tp *fields = evsel->priv; \
194 fields->name.pointer(&fields->name, sample); })
196 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
197 void *sys_enter_handler,
198 void *sys_exit_handler)
201 struct perf_evsel *sys_enter, *sys_exit;
203 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
204 if (sys_enter == NULL)
207 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
208 goto out_delete_sys_enter;
210 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
211 if (sys_exit == NULL)
212 goto out_delete_sys_enter;
214 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
215 goto out_delete_sys_exit;
217 perf_evlist__add(evlist, sys_enter);
218 perf_evlist__add(evlist, sys_exit);
225 perf_evsel__delete_priv(sys_exit);
226 out_delete_sys_enter:
227 perf_evsel__delete_priv(sys_enter);
234 struct thread *thread;
244 const char **entries;
247 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
248 .nr_entries = ARRAY_SIZE(array), \
252 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
254 .nr_entries = ARRAY_SIZE(array), \
258 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
260 struct syscall_arg *arg)
262 struct strarray *sa = arg->parm;
263 int idx = arg->val - sa->offset;
265 if (idx < 0 || idx >= sa->nr_entries)
266 return scnprintf(bf, size, intfmt, arg->val);
268 return scnprintf(bf, size, "%s", sa->entries[idx]);
271 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
272 struct syscall_arg *arg)
274 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
277 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
279 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
280 struct syscall_arg *arg)
282 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
285 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
287 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
288 struct syscall_arg *arg);
290 #define SCA_FD syscall_arg__scnprintf_fd
292 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
293 struct syscall_arg *arg)
298 return scnprintf(bf, size, "CWD");
300 return syscall_arg__scnprintf_fd(bf, size, arg);
303 #define SCA_FDAT syscall_arg__scnprintf_fd_at
305 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
306 struct syscall_arg *arg);
308 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
310 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
311 struct syscall_arg *arg)
313 return scnprintf(bf, size, "%#lx", arg->val);
316 #define SCA_HEX syscall_arg__scnprintf_hex
318 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
319 struct syscall_arg *arg)
321 int printed = 0, prot = arg->val;
323 if (prot == PROT_NONE)
324 return scnprintf(bf, size, "NONE");
325 #define P_MMAP_PROT(n) \
326 if (prot & PROT_##n) { \
327 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
337 P_MMAP_PROT(GROWSDOWN);
338 P_MMAP_PROT(GROWSUP);
342 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
347 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
349 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
350 struct syscall_arg *arg)
352 int printed = 0, flags = arg->val;
354 #define P_MMAP_FLAG(n) \
355 if (flags & MAP_##n) { \
356 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361 P_MMAP_FLAG(PRIVATE);
365 P_MMAP_FLAG(ANONYMOUS);
366 P_MMAP_FLAG(DENYWRITE);
367 P_MMAP_FLAG(EXECUTABLE);
370 P_MMAP_FLAG(GROWSDOWN);
372 P_MMAP_FLAG(HUGETLB);
375 P_MMAP_FLAG(NONBLOCK);
376 P_MMAP_FLAG(NORESERVE);
377 P_MMAP_FLAG(POPULATE);
379 #ifdef MAP_UNINITIALIZED
380 P_MMAP_FLAG(UNINITIALIZED);
385 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
390 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
392 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
393 struct syscall_arg *arg)
395 int behavior = arg->val;
398 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
401 P_MADV_BHV(SEQUENTIAL);
402 P_MADV_BHV(WILLNEED);
403 P_MADV_BHV(DONTNEED);
405 P_MADV_BHV(DONTFORK);
407 P_MADV_BHV(HWPOISON);
408 #ifdef MADV_SOFT_OFFLINE
409 P_MADV_BHV(SOFT_OFFLINE);
411 P_MADV_BHV(MERGEABLE);
412 P_MADV_BHV(UNMERGEABLE);
414 P_MADV_BHV(HUGEPAGE);
416 #ifdef MADV_NOHUGEPAGE
417 P_MADV_BHV(NOHUGEPAGE);
420 P_MADV_BHV(DONTDUMP);
429 return scnprintf(bf, size, "%#x", behavior);
432 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
434 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
435 struct syscall_arg *arg)
437 int printed = 0, op = arg->val;
440 return scnprintf(bf, size, "NONE");
442 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
443 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
458 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
463 #define SCA_FLOCK syscall_arg__scnprintf_flock
465 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
467 enum syscall_futex_args {
468 SCF_UADDR = (1 << 0),
471 SCF_TIMEOUT = (1 << 3),
472 SCF_UADDR2 = (1 << 4),
476 int cmd = op & FUTEX_CMD_MASK;
480 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
481 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
482 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
484 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
485 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
486 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
487 P_FUTEX_OP(WAKE_OP); break;
488 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
490 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
491 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
492 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
493 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
494 default: printed = scnprintf(bf, size, "%#x", cmd); break;
497 if (op & FUTEX_PRIVATE_FLAG)
498 printed += scnprintf(bf + printed, size - printed, "|PRIV");
500 if (op & FUTEX_CLOCK_REALTIME)
501 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
506 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
508 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
509 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
511 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
512 static DEFINE_STRARRAY(itimers);
514 static const char *whences[] = { "SET", "CUR", "END",
522 static DEFINE_STRARRAY(whences);
524 static const char *fcntl_cmds[] = {
525 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
526 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
527 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
530 static DEFINE_STRARRAY(fcntl_cmds);
532 static const char *rlimit_resources[] = {
533 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
534 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
537 static DEFINE_STRARRAY(rlimit_resources);
539 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
540 static DEFINE_STRARRAY(sighow);
542 static const char *clockid[] = {
543 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
544 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
546 static DEFINE_STRARRAY(clockid);
548 static const char *socket_families[] = {
549 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
550 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
551 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
552 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
553 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
554 "ALG", "NFC", "VSOCK",
556 static DEFINE_STRARRAY(socket_families);
558 #ifndef SOCK_TYPE_MASK
559 #define SOCK_TYPE_MASK 0xf
562 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
563 struct syscall_arg *arg)
567 flags = type & ~SOCK_TYPE_MASK;
569 type &= SOCK_TYPE_MASK;
571 * Can't use a strarray, MIPS may override for ABI reasons.
574 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
579 P_SK_TYPE(SEQPACKET);
584 printed = scnprintf(bf, size, "%#x", type);
587 #define P_SK_FLAG(n) \
588 if (flags & SOCK_##n) { \
589 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
590 flags &= ~SOCK_##n; \
598 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
603 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
606 #define MSG_PROBE 0x10
608 #ifndef MSG_WAITFORONE
609 #define MSG_WAITFORONE 0x10000
611 #ifndef MSG_SENDPAGE_NOTLAST
612 #define MSG_SENDPAGE_NOTLAST 0x20000
615 #define MSG_FASTOPEN 0x20000000
618 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
619 struct syscall_arg *arg)
621 int printed = 0, flags = arg->val;
624 return scnprintf(bf, size, "NONE");
625 #define P_MSG_FLAG(n) \
626 if (flags & MSG_##n) { \
627 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
633 P_MSG_FLAG(DONTROUTE);
638 P_MSG_FLAG(DONTWAIT);
645 P_MSG_FLAG(ERRQUEUE);
646 P_MSG_FLAG(NOSIGNAL);
648 P_MSG_FLAG(WAITFORONE);
649 P_MSG_FLAG(SENDPAGE_NOTLAST);
650 P_MSG_FLAG(FASTOPEN);
651 P_MSG_FLAG(CMSG_CLOEXEC);
655 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
660 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
662 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
663 struct syscall_arg *arg)
668 if (mode == F_OK) /* 0 */
669 return scnprintf(bf, size, "F");
671 if (mode & n##_OK) { \
672 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
682 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
687 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
689 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
690 struct syscall_arg *arg)
692 int printed = 0, flags = arg->val;
694 if (!(flags & O_CREAT))
695 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
698 return scnprintf(bf, size, "RDONLY");
700 if (flags & O_##n) { \
701 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
725 if ((flags & O_SYNC) == O_SYNC)
726 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
738 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
743 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
745 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
746 struct syscall_arg *arg)
748 int printed = 0, flags = arg->val;
751 return scnprintf(bf, size, "NONE");
753 if (flags & EFD_##n) { \
754 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
764 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
769 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
771 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
772 struct syscall_arg *arg)
774 int printed = 0, flags = arg->val;
777 if (flags & O_##n) { \
778 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
787 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
792 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
794 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
799 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
834 return scnprintf(bf, size, "%#x", sig);
837 #define SCA_SIGNUM syscall_arg__scnprintf_signum
839 #define TCGETS 0x5401
841 static const char *tioctls[] = {
842 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
843 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
844 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
845 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
846 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
847 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
848 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
849 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
850 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
851 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
852 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
853 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
854 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
855 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
856 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
859 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
861 #define STRARRAY(arg, name, array) \
862 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
863 .arg_parm = { [arg] = &strarray__##array, }
865 static struct syscall_fmt {
868 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
874 { .name = "access", .errmsg = true,
875 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
876 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
877 { .name = "brk", .hexret = true,
878 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
879 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
880 { .name = "close", .errmsg = true,
881 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
882 { .name = "connect", .errmsg = true, },
883 { .name = "dup", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
885 { .name = "dup2", .errmsg = true,
886 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
887 { .name = "dup3", .errmsg = true,
888 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
889 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
890 { .name = "eventfd2", .errmsg = true,
891 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
892 { .name = "faccessat", .errmsg = true,
893 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
894 { .name = "fadvise64", .errmsg = true,
895 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
896 { .name = "fallocate", .errmsg = true,
897 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
898 { .name = "fchdir", .errmsg = true,
899 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
900 { .name = "fchmod", .errmsg = true,
901 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
902 { .name = "fchmodat", .errmsg = true,
903 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
904 { .name = "fchown", .errmsg = true,
905 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
906 { .name = "fchownat", .errmsg = true,
907 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
908 { .name = "fcntl", .errmsg = true,
909 .arg_scnprintf = { [0] = SCA_FD, /* fd */
910 [1] = SCA_STRARRAY, /* cmd */ },
911 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
912 { .name = "fdatasync", .errmsg = true,
913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
914 { .name = "flock", .errmsg = true,
915 .arg_scnprintf = { [0] = SCA_FD, /* fd */
916 [1] = SCA_FLOCK, /* cmd */ }, },
917 { .name = "fsetxattr", .errmsg = true,
918 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
919 { .name = "fstat", .errmsg = true, .alias = "newfstat",
920 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
921 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
922 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
923 { .name = "fstatfs", .errmsg = true,
924 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
925 { .name = "fsync", .errmsg = true,
926 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
927 { .name = "ftruncate", .errmsg = true,
928 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
929 { .name = "futex", .errmsg = true,
930 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
931 { .name = "futimesat", .errmsg = true,
932 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
933 { .name = "getdents", .errmsg = true,
934 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
935 { .name = "getdents64", .errmsg = true,
936 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
937 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
938 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
939 { .name = "ioctl", .errmsg = true,
940 .arg_scnprintf = { [0] = SCA_FD, /* fd */
941 [1] = SCA_STRHEXARRAY, /* cmd */
942 [2] = SCA_HEX, /* arg */ },
943 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
944 { .name = "kill", .errmsg = true,
945 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
946 { .name = "linkat", .errmsg = true,
947 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
948 { .name = "lseek", .errmsg = true,
949 .arg_scnprintf = { [0] = SCA_FD, /* fd */
950 [2] = SCA_STRARRAY, /* whence */ },
951 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
952 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
953 { .name = "madvise", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_HEX, /* start */
955 [2] = SCA_MADV_BHV, /* behavior */ }, },
956 { .name = "mkdirat", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958 { .name = "mknodat", .errmsg = true,
959 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
960 { .name = "mlock", .errmsg = true,
961 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
962 { .name = "mlockall", .errmsg = true,
963 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
964 { .name = "mmap", .hexret = true,
965 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
966 [2] = SCA_MMAP_PROT, /* prot */
967 [3] = SCA_MMAP_FLAGS, /* flags */
968 [4] = SCA_FD, /* fd */ }, },
969 { .name = "mprotect", .errmsg = true,
970 .arg_scnprintf = { [0] = SCA_HEX, /* start */
971 [2] = SCA_MMAP_PROT, /* prot */ }, },
972 { .name = "mremap", .hexret = true,
973 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
974 [4] = SCA_HEX, /* new_addr */ }, },
975 { .name = "munlock", .errmsg = true,
976 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
977 { .name = "munmap", .errmsg = true,
978 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
979 { .name = "name_to_handle_at", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
981 { .name = "newfstatat", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
983 { .name = "open", .errmsg = true,
984 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
985 { .name = "open_by_handle_at", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
987 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
988 { .name = "openat", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991 { .name = "pipe2", .errmsg = true,
992 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
993 { .name = "poll", .errmsg = true, .timeout = true, },
994 { .name = "ppoll", .errmsg = true, .timeout = true, },
995 { .name = "pread", .errmsg = true, .alias = "pread64",
996 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
997 { .name = "preadv", .errmsg = true, .alias = "pread",
998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1000 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "pwritev", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1004 { .name = "read", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1006 { .name = "readlinkat", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1008 { .name = "readv", .errmsg = true,
1009 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010 { .name = "recvfrom", .errmsg = true,
1011 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1012 { .name = "recvmmsg", .errmsg = true,
1013 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1014 { .name = "recvmsg", .errmsg = true,
1015 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1016 { .name = "renameat", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1018 { .name = "rt_sigaction", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1020 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1021 { .name = "rt_sigqueueinfo", .errmsg = true,
1022 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1023 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1024 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1025 { .name = "select", .errmsg = true, .timeout = true, },
1026 { .name = "sendmmsg", .errmsg = true,
1027 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1028 { .name = "sendmsg", .errmsg = true,
1029 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1030 { .name = "sendto", .errmsg = true,
1031 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1032 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1033 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1034 { .name = "shutdown", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "socket", .errmsg = true,
1037 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1038 [1] = SCA_SK_TYPE, /* type */ },
1039 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1040 { .name = "socketpair", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1042 [1] = SCA_SK_TYPE, /* type */ },
1043 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1044 { .name = "stat", .errmsg = true, .alias = "newstat", },
1045 { .name = "symlinkat", .errmsg = true,
1046 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1047 { .name = "tgkill", .errmsg = true,
1048 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1049 { .name = "tkill", .errmsg = true,
1050 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1051 { .name = "uname", .errmsg = true, .alias = "newuname", },
1052 { .name = "unlinkat", .errmsg = true,
1053 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1054 { .name = "utimensat", .errmsg = true,
1055 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1056 { .name = "write", .errmsg = true,
1057 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058 { .name = "writev", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1062 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1064 const struct syscall_fmt *fmt = fmtp;
1065 return strcmp(name, fmt->name);
1068 static struct syscall_fmt *syscall_fmt__find(const char *name)
1070 const int nmemb = ARRAY_SIZE(syscall_fmts);
1071 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1075 struct event_format *tp_format;
1078 struct syscall_fmt *fmt;
1079 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1083 static size_t fprintf_duration(unsigned long t, FILE *fp)
1085 double duration = (double)t / NSEC_PER_MSEC;
1086 size_t printed = fprintf(fp, "(");
1088 if (duration >= 1.0)
1089 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1090 else if (duration >= 0.01)
1091 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1093 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1094 return printed + fprintf(fp, "): ");
1097 struct thread_trace {
1101 unsigned long nr_events;
1109 struct intlist *syscall_stats;
1112 static struct thread_trace *thread_trace__new(void)
1114 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1117 ttrace->paths.max = -1;
1119 ttrace->syscall_stats = intlist__new(NULL);
1124 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1126 struct thread_trace *ttrace;
1131 if (thread->priv == NULL)
1132 thread->priv = thread_trace__new();
1134 if (thread->priv == NULL)
1137 ttrace = thread->priv;
1138 ++ttrace->nr_events;
1142 color_fprintf(fp, PERF_COLOR_RED,
1143 "WARNING: not enough memory, dropping samples!\n");
1148 struct perf_tool tool;
1155 struct syscall *table;
1157 struct perf_record_opts opts;
1158 struct machine *host;
1162 unsigned long nr_events;
1163 struct strlist *ev_qualifier;
1164 bool not_ev_qualifier;
1166 const char *last_vfs_getname;
1167 struct intlist *tid_list;
1168 struct intlist *pid_list;
1170 bool multiple_threads;
1174 bool show_tool_stats;
1175 double duration_filter;
1178 u64 vfs_getname, proc_getname;
1182 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1184 struct thread_trace *ttrace = thread->priv;
1186 if (fd > ttrace->paths.max) {
1187 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1192 if (ttrace->paths.max != -1) {
1193 memset(npath + ttrace->paths.max + 1, 0,
1194 (fd - ttrace->paths.max) * sizeof(char *));
1196 memset(npath, 0, (fd + 1) * sizeof(char *));
1199 ttrace->paths.table = npath;
1200 ttrace->paths.max = fd;
1203 ttrace->paths.table[fd] = strdup(pathname);
1205 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1208 static int thread__read_fd_path(struct thread *thread, int fd)
1210 char linkname[PATH_MAX], pathname[PATH_MAX];
1214 if (thread->pid_ == thread->tid) {
1215 scnprintf(linkname, sizeof(linkname),
1216 "/proc/%d/fd/%d", thread->pid_, fd);
1218 scnprintf(linkname, sizeof(linkname),
1219 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1222 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1225 ret = readlink(linkname, pathname, sizeof(pathname));
1227 if (ret < 0 || ret > st.st_size)
1230 pathname[ret] = '\0';
1231 return trace__set_fd_pathname(thread, fd, pathname);
1234 static const char *thread__fd_path(struct thread *thread, int fd,
1235 struct trace *trace)
1237 struct thread_trace *ttrace = thread->priv;
1245 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1248 ++trace->stats.proc_getname;
1249 if (thread__read_fd_path(thread, fd)) {
1253 return ttrace->paths.table[fd];
1256 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1257 struct syscall_arg *arg)
1260 size_t printed = scnprintf(bf, size, "%d", fd);
1261 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1264 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1269 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1270 struct syscall_arg *arg)
1273 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1274 struct thread_trace *ttrace = arg->thread->priv;
1276 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1277 free(ttrace->paths.table[fd]);
1278 ttrace->paths.table[fd] = NULL;
1284 static bool trace__filter_duration(struct trace *trace, double t)
1286 return t < (trace->duration_filter * NSEC_PER_MSEC);
1289 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1291 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1293 return fprintf(fp, "%10.3f ", ts);
1296 static bool done = false;
1297 static bool interrupted = false;
1299 static void sig_handler(int sig)
1302 interrupted = sig == SIGINT;
1305 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1306 u64 duration, u64 tstamp, FILE *fp)
1308 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1309 printed += fprintf_duration(duration, fp);
1311 if (trace->multiple_threads) {
1312 if (trace->show_comm)
1313 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1314 printed += fprintf(fp, "%d ", thread->tid);
1320 static int trace__process_event(struct trace *trace, struct machine *machine,
1321 union perf_event *event, struct perf_sample *sample)
1325 switch (event->header.type) {
1326 case PERF_RECORD_LOST:
1327 color_fprintf(trace->output, PERF_COLOR_RED,
1328 "LOST %" PRIu64 " events!\n", event->lost.lost);
1329 ret = machine__process_lost_event(machine, event, sample);
1331 ret = machine__process_event(machine, event, sample);
1338 static int trace__tool_process(struct perf_tool *tool,
1339 union perf_event *event,
1340 struct perf_sample *sample,
1341 struct machine *machine)
1343 struct trace *trace = container_of(tool, struct trace, tool);
1344 return trace__process_event(trace, machine, event, sample);
1347 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1349 int err = symbol__init();
1354 trace->host = machine__new_host();
1355 if (trace->host == NULL)
1358 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1359 evlist->threads, trace__tool_process, false);
1366 static int syscall__set_arg_fmts(struct syscall *sc)
1368 struct format_field *field;
1371 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1372 if (sc->arg_scnprintf == NULL)
1376 sc->arg_parm = sc->fmt->arg_parm;
1378 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1379 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1380 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1381 else if (field->flags & FIELD_IS_POINTER)
1382 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1389 static int trace__read_syscall_info(struct trace *trace, int id)
1393 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1398 if (id > trace->syscalls.max) {
1399 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1401 if (nsyscalls == NULL)
1404 if (trace->syscalls.max != -1) {
1405 memset(nsyscalls + trace->syscalls.max + 1, 0,
1406 (id - trace->syscalls.max) * sizeof(*sc));
1408 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1411 trace->syscalls.table = nsyscalls;
1412 trace->syscalls.max = id;
1415 sc = trace->syscalls.table + id;
1418 if (trace->ev_qualifier) {
1419 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1421 if (!(in ^ trace->not_ev_qualifier)) {
1422 sc->filtered = true;
1424 * No need to do read tracepoint information since this will be
1431 sc->fmt = syscall_fmt__find(sc->name);
1433 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1434 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1436 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1437 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1438 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1441 if (sc->tp_format == NULL)
1444 return syscall__set_arg_fmts(sc);
1447 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1448 unsigned long *args, struct trace *trace,
1449 struct thread *thread)
1453 if (sc->tp_format != NULL) {
1454 struct format_field *field;
1456 struct syscall_arg arg = {
1463 for (field = sc->tp_format->format.fields->next; field;
1464 field = field->next, ++arg.idx, bit <<= 1) {
1468 * Suppress this argument if its value is zero and
1469 * and we don't have a string associated in an
1472 if (args[arg.idx] == 0 &&
1473 !(sc->arg_scnprintf &&
1474 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1475 sc->arg_parm[arg.idx]))
1478 printed += scnprintf(bf + printed, size - printed,
1479 "%s%s: ", printed ? ", " : "", field->name);
1480 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1481 arg.val = args[arg.idx];
1483 arg.parm = sc->arg_parm[arg.idx];
1484 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1485 size - printed, &arg);
1487 printed += scnprintf(bf + printed, size - printed,
1488 "%ld", args[arg.idx]);
1495 printed += scnprintf(bf + printed, size - printed,
1497 printed ? ", " : "", i, args[i]);
1505 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1506 struct perf_sample *sample);
1508 static struct syscall *trace__syscall_info(struct trace *trace,
1509 struct perf_evsel *evsel, int id)
1515 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1516 * before that, leaving at a higher verbosity level till that is
1517 * explained. Reproduced with plain ftrace with:
1519 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1520 * grep "NR -1 " /t/trace_pipe
1522 * After generating some load on the machine.
1526 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1527 id, perf_evsel__name(evsel), ++n);
1532 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1533 trace__read_syscall_info(trace, id))
1536 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1539 return &trace->syscalls.table[id];
1543 fprintf(trace->output, "Problems reading syscall %d", id);
1544 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1545 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1546 fputs(" information\n", trace->output);
1551 static void thread__update_stats(struct thread_trace *ttrace,
1552 int id, struct perf_sample *sample)
1554 struct int_node *inode;
1555 struct stats *stats;
1558 inode = intlist__findnew(ttrace->syscall_stats, id);
1562 stats = inode->priv;
1563 if (stats == NULL) {
1564 stats = malloc(sizeof(struct stats));
1568 inode->priv = stats;
1571 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1572 duration = sample->time - ttrace->entry_time;
1574 update_stats(stats, duration);
1577 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1578 struct perf_sample *sample)
1583 struct thread *thread;
1584 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1585 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1586 struct thread_trace *ttrace;
1594 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1595 ttrace = thread__trace(thread, trace->output);
1599 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1600 ttrace = thread->priv;
1602 if (ttrace->entry_str == NULL) {
1603 ttrace->entry_str = malloc(1024);
1604 if (!ttrace->entry_str)
1608 ttrace->entry_time = sample->time;
1609 msg = ttrace->entry_str;
1610 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1612 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1613 args, trace, thread);
1615 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1616 if (!trace->duration_filter && !trace->summary_only) {
1617 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1618 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1621 ttrace->entry_pending = true;
1626 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1627 struct perf_sample *sample)
1631 struct thread *thread;
1632 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1633 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1634 struct thread_trace *ttrace;
1642 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1643 ttrace = thread__trace(thread, trace->output);
1648 thread__update_stats(ttrace, id, sample);
1650 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1652 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1653 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1654 trace->last_vfs_getname = NULL;
1655 ++trace->stats.vfs_getname;
1658 ttrace = thread->priv;
1660 ttrace->exit_time = sample->time;
1662 if (ttrace->entry_time) {
1663 duration = sample->time - ttrace->entry_time;
1664 if (trace__filter_duration(trace, duration))
1666 } else if (trace->duration_filter)
1669 if (trace->summary_only)
1672 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1674 if (ttrace->entry_pending) {
1675 fprintf(trace->output, "%-70s", ttrace->entry_str);
1677 fprintf(trace->output, " ... [");
1678 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1679 fprintf(trace->output, "]: %s()", sc->name);
1682 if (sc->fmt == NULL) {
1684 fprintf(trace->output, ") = %d", ret);
1685 } else if (ret < 0 && sc->fmt->errmsg) {
1687 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1688 *e = audit_errno_to_name(-ret);
1690 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1691 } else if (ret == 0 && sc->fmt->timeout)
1692 fprintf(trace->output, ") = 0 Timeout");
1693 else if (sc->fmt->hexret)
1694 fprintf(trace->output, ") = %#x", ret);
1698 fputc('\n', trace->output);
1700 ttrace->entry_pending = false;
1705 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1706 struct perf_sample *sample)
1708 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1712 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1713 struct perf_sample *sample)
1715 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1716 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1717 struct thread *thread = machine__findnew_thread(trace->host,
1720 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1725 ttrace->runtime_ms += runtime_ms;
1726 trace->runtime_ms += runtime_ms;
1730 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1732 perf_evsel__strval(evsel, sample, "comm"),
1733 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1735 perf_evsel__intval(evsel, sample, "vruntime"));
1739 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1741 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1742 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1745 if (trace->pid_list || trace->tid_list)
1751 static int trace__process_sample(struct perf_tool *tool,
1752 union perf_event *event __maybe_unused,
1753 struct perf_sample *sample,
1754 struct perf_evsel *evsel,
1755 struct machine *machine __maybe_unused)
1757 struct trace *trace = container_of(tool, struct trace, tool);
1760 tracepoint_handler handler = evsel->handler;
1762 if (skip_sample(trace, sample))
1765 if (!trace->full_time && trace->base_time == 0)
1766 trace->base_time = sample->time;
1769 handler(trace, evsel, sample);
1774 static int parse_target_str(struct trace *trace)
1776 if (trace->opts.target.pid) {
1777 trace->pid_list = intlist__new(trace->opts.target.pid);
1778 if (trace->pid_list == NULL) {
1779 pr_err("Error parsing process id string\n");
1784 if (trace->opts.target.tid) {
1785 trace->tid_list = intlist__new(trace->opts.target.tid);
1786 if (trace->tid_list == NULL) {
1787 pr_err("Error parsing thread id string\n");
1795 static int trace__record(int argc, const char **argv)
1797 unsigned int rec_argc, i, j;
1798 const char **rec_argv;
1799 const char * const record_args[] = {
1804 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1807 rec_argc = ARRAY_SIZE(record_args) + argc;
1808 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1810 if (rec_argv == NULL)
1813 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1814 rec_argv[i] = record_args[i];
1816 for (j = 0; j < (unsigned int)argc; j++, i++)
1817 rec_argv[i] = argv[j];
1819 return cmd_record(i, rec_argv, NULL);
1822 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1824 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1826 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1830 if (perf_evsel__field(evsel, "pathname") == NULL) {
1831 perf_evsel__delete(evsel);
1835 evsel->handler = trace__vfs_getname;
1836 perf_evlist__add(evlist, evsel);
1839 static int trace__run(struct trace *trace, int argc, const char **argv)
1841 struct perf_evlist *evlist = perf_evlist__new();
1842 struct perf_evsel *evsel;
1844 unsigned long before;
1845 const bool forks = argc > 0;
1849 if (evlist == NULL) {
1850 fprintf(trace->output, "Not enough memory to run!\n");
1854 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1857 perf_evlist__add_vfs_getname(evlist);
1860 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1861 trace__sched_stat_runtime))
1864 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1866 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1867 goto out_delete_evlist;
1870 err = trace__symbols_init(trace, evlist);
1872 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1873 goto out_delete_maps;
1876 perf_evlist__config(evlist, &trace->opts);
1878 signal(SIGCHLD, sig_handler);
1879 signal(SIGINT, sig_handler);
1882 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1883 argv, false, false);
1885 fprintf(trace->output, "Couldn't run the workload!\n");
1886 goto out_delete_maps;
1890 err = perf_evlist__open(evlist);
1892 goto out_error_open;
1894 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1896 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1897 goto out_close_evlist;
1900 perf_evlist__enable(evlist);
1903 perf_evlist__start_workload(evlist);
1905 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1907 before = trace->nr_events;
1909 for (i = 0; i < evlist->nr_mmaps; i++) {
1910 union perf_event *event;
1912 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1913 const u32 type = event->header.type;
1914 tracepoint_handler handler;
1915 struct perf_sample sample;
1919 err = perf_evlist__parse_sample(evlist, event, &sample);
1921 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1925 if (!trace->full_time && trace->base_time == 0)
1926 trace->base_time = sample.time;
1928 if (type != PERF_RECORD_SAMPLE) {
1929 trace__process_event(trace, trace->host, event, &sample);
1933 evsel = perf_evlist__id2evsel(evlist, sample.id);
1934 if (evsel == NULL) {
1935 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1939 if (sample.raw_data == NULL) {
1940 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1941 perf_evsel__name(evsel), sample.tid,
1942 sample.cpu, sample.raw_size);
1946 handler = evsel->handler;
1947 handler(trace, evsel, &sample);
1949 perf_evlist__mmap_consume(evlist, i);
1956 if (trace->nr_events == before) {
1957 int timeout = done ? 100 : -1;
1959 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1966 perf_evlist__disable(evlist);
1970 trace__fprintf_thread_summary(trace, trace->output);
1972 if (trace->show_tool_stats) {
1973 fprintf(trace->output, "Stats:\n "
1974 " vfs_getname : %" PRIu64 "\n"
1975 " proc_getname: %" PRIu64 "\n",
1976 trace->stats.vfs_getname,
1977 trace->stats.proc_getname);
1981 perf_evlist__munmap(evlist);
1983 perf_evlist__close(evlist);
1985 perf_evlist__delete_maps(evlist);
1987 perf_evlist__delete(evlist);
1989 trace->live = false;
1992 char errbuf[BUFSIZ];
1995 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1999 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2002 fprintf(trace->output, "%s\n", errbuf);
2003 goto out_delete_evlist;
2007 static int trace__replay(struct trace *trace)
2009 const struct perf_evsel_str_handler handlers[] = {
2010 { "probe:vfs_getname", trace__vfs_getname, },
2012 struct perf_data_file file = {
2014 .mode = PERF_DATA_MODE_READ,
2016 struct perf_session *session;
2017 struct perf_evsel *evsel;
2020 trace->tool.sample = trace__process_sample;
2021 trace->tool.mmap = perf_event__process_mmap;
2022 trace->tool.mmap2 = perf_event__process_mmap2;
2023 trace->tool.comm = perf_event__process_comm;
2024 trace->tool.exit = perf_event__process_exit;
2025 trace->tool.fork = perf_event__process_fork;
2026 trace->tool.attr = perf_event__process_attr;
2027 trace->tool.tracing_data = perf_event__process_tracing_data;
2028 trace->tool.build_id = perf_event__process_build_id;
2030 trace->tool.ordered_samples = true;
2031 trace->tool.ordering_requires_timestamps = true;
2033 /* add tid to output */
2034 trace->multiple_threads = true;
2036 if (symbol__init() < 0)
2039 session = perf_session__new(&file, false, &trace->tool);
2040 if (session == NULL)
2043 trace->host = &session->machines.host;
2045 err = perf_session__set_tracepoints_handlers(session, handlers);
2049 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2050 "raw_syscalls:sys_enter");
2051 if (evsel == NULL) {
2052 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2056 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2057 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2058 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2062 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2063 "raw_syscalls:sys_exit");
2064 if (evsel == NULL) {
2065 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2069 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2070 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2071 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2075 err = parse_target_str(trace);
2081 err = perf_session__process_events(session, &trace->tool);
2083 pr_err("Failed to process events, error %d", err);
2085 else if (trace->summary)
2086 trace__fprintf_thread_summary(trace, trace->output);
2089 perf_session__delete(session);
2094 static size_t trace__fprintf_threads_header(FILE *fp)
2098 printed = fprintf(fp, "\n Summary of events:\n\n");
2103 static size_t thread__dump_stats(struct thread_trace *ttrace,
2104 struct trace *trace, FILE *fp)
2106 struct stats *stats;
2109 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2114 printed += fprintf(fp, "\n");
2116 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2117 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2118 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2120 /* each int_node is a syscall */
2122 stats = inode->priv;
2124 double min = (double)(stats->min) / NSEC_PER_MSEC;
2125 double max = (double)(stats->max) / NSEC_PER_MSEC;
2126 double avg = avg_stats(stats);
2128 u64 n = (u64) stats->n;
2130 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2131 avg /= NSEC_PER_MSEC;
2133 sc = &trace->syscalls.table[inode->i];
2134 printed += fprintf(fp, " %-15s", sc->name);
2135 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2137 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2140 inode = intlist__next(inode);
2143 printed += fprintf(fp, "\n\n");
2148 /* struct used to pass data to per-thread function */
2149 struct summary_data {
2151 struct trace *trace;
2155 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2157 struct summary_data *data = priv;
2158 FILE *fp = data->fp;
2159 size_t printed = data->printed;
2160 struct trace *trace = data->trace;
2161 struct thread_trace *ttrace = thread->priv;
2167 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2169 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2170 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2171 printed += fprintf(fp, "%.1f%%", ratio);
2172 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2173 printed += thread__dump_stats(ttrace, trace, fp);
2175 data->printed += printed;
2180 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2182 struct summary_data data = {
2186 data.printed = trace__fprintf_threads_header(fp);
2188 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2190 return data.printed;
2193 static int trace__set_duration(const struct option *opt, const char *str,
2194 int unset __maybe_unused)
2196 struct trace *trace = opt->value;
2198 trace->duration_filter = atof(str);
2202 static int trace__open_output(struct trace *trace, const char *filename)
2206 if (!stat(filename, &st) && st.st_size) {
2207 char oldname[PATH_MAX];
2209 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2211 rename(filename, oldname);
2214 trace->output = fopen(filename, "w");
2216 return trace->output == NULL ? -errno : 0;
2219 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2221 const char * const trace_usage[] = {
2222 "perf trace [<options>] [<command>]",
2223 "perf trace [<options>] -- <command> [<options>]",
2224 "perf trace record [<options>] [<command>]",
2225 "perf trace record [<options>] -- <command> [<options>]",
2228 struct trace trace = {
2230 .machine = audit_detect_machine(),
2231 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2241 .user_freq = UINT_MAX,
2242 .user_interval = ULLONG_MAX,
2249 const char *output_name = NULL;
2250 const char *ev_qualifier_str = NULL;
2251 const struct option trace_options[] = {
2252 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2253 "show the thread COMM next to its id"),
2254 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2255 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2256 "list of events to trace"),
2257 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2258 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2259 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2260 "trace events on existing process id"),
2261 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2262 "trace events on existing thread id"),
2263 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2264 "system-wide collection from all CPUs"),
2265 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2266 "list of cpus to monitor"),
2267 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2268 "child tasks do not inherit counters"),
2269 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2270 "number of mmap data pages",
2271 perf_evlist__parse_mmap_pages),
2272 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2274 OPT_CALLBACK(0, "duration", &trace, "float",
2275 "show only events with duration > N.M ms",
2276 trace__set_duration),
2277 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2278 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2279 OPT_BOOLEAN('T', "time", &trace.full_time,
2280 "Show full timestamp, not time relative to first start"),
2281 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2282 "Show only syscall summary with statistics"),
2283 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2284 "Show all syscalls and summary with statistics"),
2290 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2291 return trace__record(argc-2, &argv[2]);
2293 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2295 /* summary_only implies summary option, but don't overwrite summary if set */
2296 if (trace.summary_only)
2297 trace.summary = trace.summary_only;
2299 if (output_name != NULL) {
2300 err = trace__open_output(&trace, output_name);
2302 perror("failed to create output file");
2307 if (ev_qualifier_str != NULL) {
2308 const char *s = ev_qualifier_str;
2310 trace.not_ev_qualifier = *s == '!';
2311 if (trace.not_ev_qualifier)
2313 trace.ev_qualifier = strlist__new(true, s);
2314 if (trace.ev_qualifier == NULL) {
2315 fputs("Not enough memory to parse event qualifier",
2322 err = target__validate(&trace.opts.target);
2324 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2325 fprintf(trace.output, "%s", bf);
2329 err = target__parse_uid(&trace.opts.target);
2331 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2332 fprintf(trace.output, "%s", bf);
2336 if (!argc && target__none(&trace.opts.target))
2337 trace.opts.target.system_wide = true;
2340 err = trace__replay(&trace);
2342 err = trace__run(&trace, argc, argv);
2345 if (output_name != NULL)
2346 fclose(trace.output);