1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
43 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
51 return *(u##bits *)(sample->raw_data + field->offset); \
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
62 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63 return bswap_##bits(value);\
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
70 static int tp_field__init_uint(struct tp_field *field,
71 struct format_field *format_field,
74 field->offset = format_field->offset;
76 switch (format_field->size) {
78 field->integer = tp_field__u8;
81 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
84 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
87 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
98 return sample->raw_data + field->offset;
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
103 field->offset = format_field->offset;
104 field->pointer = tp_field__ptr;
111 struct tp_field args, ret;
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116 struct tp_field *field,
119 struct format_field *format_field = perf_evsel__field(evsel, name);
121 if (format_field == NULL)
124 return tp_field__init_uint(field, format_field, evsel->needs_swap);
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128 ({ struct syscall_tp *sc = evsel->priv;\
129 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132 struct tp_field *field,
135 struct format_field *format_field = perf_evsel__field(evsel, name);
137 if (format_field == NULL)
140 return tp_field__init_ptr(field, format_field);
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144 ({ struct syscall_tp *sc = evsel->priv;\
145 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
151 perf_evsel__delete(evsel);
154 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
156 evsel->priv = malloc(sizeof(struct syscall_tp));
157 if (evsel->priv != NULL) {
158 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
161 evsel->handler = handler;
173 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
175 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
177 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
179 evsel = perf_evsel__newtp("syscalls", direction);
182 if (perf_evsel__init_syscall_tp(evsel, handler))
189 perf_evsel__delete_priv(evsel);
193 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
194 ({ struct syscall_tp *fields = evsel->priv; \
195 fields->name.integer(&fields->name, sample); })
197 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
198 ({ struct syscall_tp *fields = evsel->priv; \
199 fields->name.pointer(&fields->name, sample); })
201 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
202 void *sys_enter_handler,
203 void *sys_exit_handler)
206 struct perf_evsel *sys_enter, *sys_exit;
208 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
209 if (sys_enter == NULL)
212 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
213 goto out_delete_sys_enter;
215 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
216 if (sys_exit == NULL)
217 goto out_delete_sys_enter;
219 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
220 goto out_delete_sys_exit;
222 perf_evlist__add(evlist, sys_enter);
223 perf_evlist__add(evlist, sys_exit);
230 perf_evsel__delete_priv(sys_exit);
231 out_delete_sys_enter:
232 perf_evsel__delete_priv(sys_enter);
239 struct thread *thread;
249 const char **entries;
252 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
253 .nr_entries = ARRAY_SIZE(array), \
257 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
259 .nr_entries = ARRAY_SIZE(array), \
263 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
265 struct syscall_arg *arg)
267 struct strarray *sa = arg->parm;
268 int idx = arg->val - sa->offset;
270 if (idx < 0 || idx >= sa->nr_entries)
271 return scnprintf(bf, size, intfmt, arg->val);
273 return scnprintf(bf, size, "%s", sa->entries[idx]);
276 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
277 struct syscall_arg *arg)
279 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
284 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
285 struct syscall_arg *arg)
287 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
290 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
292 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
293 struct syscall_arg *arg);
295 #define SCA_FD syscall_arg__scnprintf_fd
297 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
298 struct syscall_arg *arg)
303 return scnprintf(bf, size, "CWD");
305 return syscall_arg__scnprintf_fd(bf, size, arg);
308 #define SCA_FDAT syscall_arg__scnprintf_fd_at
310 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
311 struct syscall_arg *arg);
313 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
315 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
316 struct syscall_arg *arg)
318 return scnprintf(bf, size, "%#lx", arg->val);
321 #define SCA_HEX syscall_arg__scnprintf_hex
323 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
324 struct syscall_arg *arg)
326 int printed = 0, prot = arg->val;
328 if (prot == PROT_NONE)
329 return scnprintf(bf, size, "NONE");
330 #define P_MMAP_PROT(n) \
331 if (prot & PROT_##n) { \
332 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
342 P_MMAP_PROT(GROWSDOWN);
343 P_MMAP_PROT(GROWSUP);
347 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
352 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
354 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
355 struct syscall_arg *arg)
357 int printed = 0, flags = arg->val;
359 #define P_MMAP_FLAG(n) \
360 if (flags & MAP_##n) { \
361 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
366 P_MMAP_FLAG(PRIVATE);
370 P_MMAP_FLAG(ANONYMOUS);
371 P_MMAP_FLAG(DENYWRITE);
372 P_MMAP_FLAG(EXECUTABLE);
375 P_MMAP_FLAG(GROWSDOWN);
377 P_MMAP_FLAG(HUGETLB);
380 P_MMAP_FLAG(NONBLOCK);
381 P_MMAP_FLAG(NORESERVE);
382 P_MMAP_FLAG(POPULATE);
384 #ifdef MAP_UNINITIALIZED
385 P_MMAP_FLAG(UNINITIALIZED);
390 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
395 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
397 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
398 struct syscall_arg *arg)
400 int behavior = arg->val;
403 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
406 P_MADV_BHV(SEQUENTIAL);
407 P_MADV_BHV(WILLNEED);
408 P_MADV_BHV(DONTNEED);
410 P_MADV_BHV(DONTFORK);
412 P_MADV_BHV(HWPOISON);
413 #ifdef MADV_SOFT_OFFLINE
414 P_MADV_BHV(SOFT_OFFLINE);
416 P_MADV_BHV(MERGEABLE);
417 P_MADV_BHV(UNMERGEABLE);
419 P_MADV_BHV(HUGEPAGE);
421 #ifdef MADV_NOHUGEPAGE
422 P_MADV_BHV(NOHUGEPAGE);
425 P_MADV_BHV(DONTDUMP);
434 return scnprintf(bf, size, "%#x", behavior);
437 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
439 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
440 struct syscall_arg *arg)
442 int printed = 0, op = arg->val;
445 return scnprintf(bf, size, "NONE");
447 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
448 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
463 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
468 #define SCA_FLOCK syscall_arg__scnprintf_flock
470 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
472 enum syscall_futex_args {
473 SCF_UADDR = (1 << 0),
476 SCF_TIMEOUT = (1 << 3),
477 SCF_UADDR2 = (1 << 4),
481 int cmd = op & FUTEX_CMD_MASK;
485 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
486 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
487 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
490 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
491 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
492 P_FUTEX_OP(WAKE_OP); break;
493 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
494 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
495 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
496 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
497 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
498 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
499 default: printed = scnprintf(bf, size, "%#x", cmd); break;
502 if (op & FUTEX_PRIVATE_FLAG)
503 printed += scnprintf(bf + printed, size - printed, "|PRIV");
505 if (op & FUTEX_CLOCK_REALTIME)
506 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
511 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
513 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
514 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
516 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
517 static DEFINE_STRARRAY(itimers);
519 static const char *whences[] = { "SET", "CUR", "END",
527 static DEFINE_STRARRAY(whences);
529 static const char *fcntl_cmds[] = {
530 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
531 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
532 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
535 static DEFINE_STRARRAY(fcntl_cmds);
537 static const char *rlimit_resources[] = {
538 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
539 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
542 static DEFINE_STRARRAY(rlimit_resources);
544 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
545 static DEFINE_STRARRAY(sighow);
547 static const char *clockid[] = {
548 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
549 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
551 static DEFINE_STRARRAY(clockid);
553 static const char *socket_families[] = {
554 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
555 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
556 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
557 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
558 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
559 "ALG", "NFC", "VSOCK",
561 static DEFINE_STRARRAY(socket_families);
563 #ifndef SOCK_TYPE_MASK
564 #define SOCK_TYPE_MASK 0xf
567 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
568 struct syscall_arg *arg)
572 flags = type & ~SOCK_TYPE_MASK;
574 type &= SOCK_TYPE_MASK;
576 * Can't use a strarray, MIPS may override for ABI reasons.
579 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
584 P_SK_TYPE(SEQPACKET);
589 printed = scnprintf(bf, size, "%#x", type);
592 #define P_SK_FLAG(n) \
593 if (flags & SOCK_##n) { \
594 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
595 flags &= ~SOCK_##n; \
603 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
608 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
611 #define MSG_PROBE 0x10
613 #ifndef MSG_WAITFORONE
614 #define MSG_WAITFORONE 0x10000
616 #ifndef MSG_SENDPAGE_NOTLAST
617 #define MSG_SENDPAGE_NOTLAST 0x20000
620 #define MSG_FASTOPEN 0x20000000
623 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
624 struct syscall_arg *arg)
626 int printed = 0, flags = arg->val;
629 return scnprintf(bf, size, "NONE");
630 #define P_MSG_FLAG(n) \
631 if (flags & MSG_##n) { \
632 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
638 P_MSG_FLAG(DONTROUTE);
643 P_MSG_FLAG(DONTWAIT);
650 P_MSG_FLAG(ERRQUEUE);
651 P_MSG_FLAG(NOSIGNAL);
653 P_MSG_FLAG(WAITFORONE);
654 P_MSG_FLAG(SENDPAGE_NOTLAST);
655 P_MSG_FLAG(FASTOPEN);
656 P_MSG_FLAG(CMSG_CLOEXEC);
660 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
665 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
667 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
668 struct syscall_arg *arg)
673 if (mode == F_OK) /* 0 */
674 return scnprintf(bf, size, "F");
676 if (mode & n##_OK) { \
677 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
687 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
692 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
694 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
695 struct syscall_arg *arg)
697 int printed = 0, flags = arg->val;
699 if (!(flags & O_CREAT))
700 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
703 return scnprintf(bf, size, "RDONLY");
705 if (flags & O_##n) { \
706 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
730 if ((flags & O_SYNC) == O_SYNC)
731 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
743 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
748 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
750 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
751 struct syscall_arg *arg)
753 int printed = 0, flags = arg->val;
756 return scnprintf(bf, size, "NONE");
758 if (flags & EFD_##n) { \
759 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
769 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
774 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
776 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
777 struct syscall_arg *arg)
779 int printed = 0, flags = arg->val;
782 if (flags & O_##n) { \
783 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
797 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
799 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
804 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
839 return scnprintf(bf, size, "%#x", sig);
842 #define SCA_SIGNUM syscall_arg__scnprintf_signum
844 #define TCGETS 0x5401
846 static const char *tioctls[] = {
847 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
848 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
849 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
850 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
851 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
852 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
853 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
854 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
855 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
856 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
857 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
858 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
859 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
860 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
861 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
864 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
866 #define STRARRAY(arg, name, array) \
867 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
868 .arg_parm = { [arg] = &strarray__##array, }
870 static struct syscall_fmt {
873 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
879 { .name = "access", .errmsg = true,
880 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
881 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
882 { .name = "brk", .hexret = true,
883 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
884 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
885 { .name = "close", .errmsg = true,
886 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
887 { .name = "connect", .errmsg = true, },
888 { .name = "dup", .errmsg = true,
889 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
890 { .name = "dup2", .errmsg = true,
891 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
892 { .name = "dup3", .errmsg = true,
893 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
894 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
895 { .name = "eventfd2", .errmsg = true,
896 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
897 { .name = "faccessat", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
899 { .name = "fadvise64", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fallocate", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "fchdir", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchmod", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
907 { .name = "fchmodat", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
909 { .name = "fchown", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
911 { .name = "fchownat", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
913 { .name = "fcntl", .errmsg = true,
914 .arg_scnprintf = { [0] = SCA_FD, /* fd */
915 [1] = SCA_STRARRAY, /* cmd */ },
916 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
917 { .name = "fdatasync", .errmsg = true,
918 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
919 { .name = "flock", .errmsg = true,
920 .arg_scnprintf = { [0] = SCA_FD, /* fd */
921 [1] = SCA_FLOCK, /* cmd */ }, },
922 { .name = "fsetxattr", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fstat", .errmsg = true, .alias = "newfstat",
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
927 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
928 { .name = "fstatfs", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
930 { .name = "fsync", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "ftruncate", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
934 { .name = "futex", .errmsg = true,
935 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
936 { .name = "futimesat", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
938 { .name = "getdents", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "getdents64", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
942 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
943 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
944 { .name = "ioctl", .errmsg = true,
945 .arg_scnprintf = { [0] = SCA_FD, /* fd */
946 [1] = SCA_STRHEXARRAY, /* cmd */
947 [2] = SCA_HEX, /* arg */ },
948 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
949 { .name = "kill", .errmsg = true,
950 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
951 { .name = "linkat", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
953 { .name = "lseek", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FD, /* fd */
955 [2] = SCA_STRARRAY, /* whence */ },
956 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
957 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
958 { .name = "madvise", .errmsg = true,
959 .arg_scnprintf = { [0] = SCA_HEX, /* start */
960 [2] = SCA_MADV_BHV, /* behavior */ }, },
961 { .name = "mkdirat", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
963 { .name = "mknodat", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
965 { .name = "mlock", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 { .name = "mlockall", .errmsg = true,
968 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
969 { .name = "mmap", .hexret = true,
970 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
971 [2] = SCA_MMAP_PROT, /* prot */
972 [3] = SCA_MMAP_FLAGS, /* flags */
973 [4] = SCA_FD, /* fd */ }, },
974 { .name = "mprotect", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_HEX, /* start */
976 [2] = SCA_MMAP_PROT, /* prot */ }, },
977 { .name = "mremap", .hexret = true,
978 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
979 [4] = SCA_HEX, /* new_addr */ }, },
980 { .name = "munlock", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982 { .name = "munmap", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
984 { .name = "name_to_handle_at", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
986 { .name = "newfstatat", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
988 { .name = "open", .errmsg = true,
989 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
990 { .name = "open_by_handle_at", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
992 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
993 { .name = "openat", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
995 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
996 { .name = "pipe2", .errmsg = true,
997 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
998 { .name = "poll", .errmsg = true, .timeout = true, },
999 { .name = "ppoll", .errmsg = true, .timeout = true, },
1000 { .name = "pread", .errmsg = true, .alias = "pread64",
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "preadv", .errmsg = true, .alias = "pread",
1003 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1004 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1005 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1006 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1007 { .name = "pwritev", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "read", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1011 { .name = "readlinkat", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1013 { .name = "readv", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1015 { .name = "recvfrom", .errmsg = true,
1016 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017 { .name = "recvmmsg", .errmsg = true,
1018 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1019 { .name = "recvmsg", .errmsg = true,
1020 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1021 { .name = "renameat", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1023 { .name = "rt_sigaction", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1025 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1026 { .name = "rt_sigqueueinfo", .errmsg = true,
1027 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1028 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1029 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1030 { .name = "select", .errmsg = true, .timeout = true, },
1031 { .name = "sendmmsg", .errmsg = true,
1032 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1033 { .name = "sendmsg", .errmsg = true,
1034 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1035 { .name = "sendto", .errmsg = true,
1036 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1037 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1038 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1039 { .name = "shutdown", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "socket", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1043 [1] = SCA_SK_TYPE, /* type */ },
1044 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1045 { .name = "socketpair", .errmsg = true,
1046 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1047 [1] = SCA_SK_TYPE, /* type */ },
1048 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1049 { .name = "stat", .errmsg = true, .alias = "newstat", },
1050 { .name = "symlinkat", .errmsg = true,
1051 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1052 { .name = "tgkill", .errmsg = true,
1053 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1054 { .name = "tkill", .errmsg = true,
1055 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056 { .name = "uname", .errmsg = true, .alias = "newuname", },
1057 { .name = "unlinkat", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1059 { .name = "utimensat", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1061 { .name = "write", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 { .name = "writev", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1067 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1069 const struct syscall_fmt *fmt = fmtp;
1070 return strcmp(name, fmt->name);
1073 static struct syscall_fmt *syscall_fmt__find(const char *name)
1075 const int nmemb = ARRAY_SIZE(syscall_fmts);
1076 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1080 struct event_format *tp_format;
1083 struct syscall_fmt *fmt;
1084 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1088 static size_t fprintf_duration(unsigned long t, FILE *fp)
1090 double duration = (double)t / NSEC_PER_MSEC;
1091 size_t printed = fprintf(fp, "(");
1093 if (duration >= 1.0)
1094 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1095 else if (duration >= 0.01)
1096 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1098 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1099 return printed + fprintf(fp, "): ");
1102 struct thread_trace {
1106 unsigned long nr_events;
1114 struct intlist *syscall_stats;
1117 static struct thread_trace *thread_trace__new(void)
1119 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1122 ttrace->paths.max = -1;
1124 ttrace->syscall_stats = intlist__new(NULL);
1129 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1131 struct thread_trace *ttrace;
1136 if (thread->priv == NULL)
1137 thread->priv = thread_trace__new();
1139 if (thread->priv == NULL)
1142 ttrace = thread->priv;
1143 ++ttrace->nr_events;
1147 color_fprintf(fp, PERF_COLOR_RED,
1148 "WARNING: not enough memory, dropping samples!\n");
1153 struct perf_tool tool;
1160 struct syscall *table;
1162 struct perf_record_opts opts;
1163 struct machine *host;
1167 unsigned long nr_events;
1168 struct strlist *ev_qualifier;
1169 bool not_ev_qualifier;
1171 const char *last_vfs_getname;
1172 struct intlist *tid_list;
1173 struct intlist *pid_list;
1175 bool multiple_threads;
1179 bool show_tool_stats;
1180 double duration_filter;
1183 u64 vfs_getname, proc_getname;
1187 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1189 struct thread_trace *ttrace = thread->priv;
1191 if (fd > ttrace->paths.max) {
1192 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1197 if (ttrace->paths.max != -1) {
1198 memset(npath + ttrace->paths.max + 1, 0,
1199 (fd - ttrace->paths.max) * sizeof(char *));
1201 memset(npath, 0, (fd + 1) * sizeof(char *));
1204 ttrace->paths.table = npath;
1205 ttrace->paths.max = fd;
1208 ttrace->paths.table[fd] = strdup(pathname);
1210 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1213 static int thread__read_fd_path(struct thread *thread, int fd)
1215 char linkname[PATH_MAX], pathname[PATH_MAX];
1219 if (thread->pid_ == thread->tid) {
1220 scnprintf(linkname, sizeof(linkname),
1221 "/proc/%d/fd/%d", thread->pid_, fd);
1223 scnprintf(linkname, sizeof(linkname),
1224 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1227 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1230 ret = readlink(linkname, pathname, sizeof(pathname));
1232 if (ret < 0 || ret > st.st_size)
1235 pathname[ret] = '\0';
1236 return trace__set_fd_pathname(thread, fd, pathname);
1239 static const char *thread__fd_path(struct thread *thread, int fd,
1240 struct trace *trace)
1242 struct thread_trace *ttrace = thread->priv;
1250 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1253 ++trace->stats.proc_getname;
1254 if (thread__read_fd_path(thread, fd)) {
1258 return ttrace->paths.table[fd];
1261 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1262 struct syscall_arg *arg)
1265 size_t printed = scnprintf(bf, size, "%d", fd);
1266 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1269 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1274 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1275 struct syscall_arg *arg)
1278 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1279 struct thread_trace *ttrace = arg->thread->priv;
1281 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1282 free(ttrace->paths.table[fd]);
1283 ttrace->paths.table[fd] = NULL;
1289 static bool trace__filter_duration(struct trace *trace, double t)
1291 return t < (trace->duration_filter * NSEC_PER_MSEC);
1294 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1296 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1298 return fprintf(fp, "%10.3f ", ts);
1301 static bool done = false;
1302 static bool interrupted = false;
1304 static void sig_handler(int sig)
1307 interrupted = sig == SIGINT;
1310 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1311 u64 duration, u64 tstamp, FILE *fp)
1313 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1314 printed += fprintf_duration(duration, fp);
1316 if (trace->multiple_threads) {
1317 if (trace->show_comm)
1318 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1319 printed += fprintf(fp, "%d ", thread->tid);
1325 static int trace__process_event(struct trace *trace, struct machine *machine,
1326 union perf_event *event, struct perf_sample *sample)
1330 switch (event->header.type) {
1331 case PERF_RECORD_LOST:
1332 color_fprintf(trace->output, PERF_COLOR_RED,
1333 "LOST %" PRIu64 " events!\n", event->lost.lost);
1334 ret = machine__process_lost_event(machine, event, sample);
1336 ret = machine__process_event(machine, event, sample);
1343 static int trace__tool_process(struct perf_tool *tool,
1344 union perf_event *event,
1345 struct perf_sample *sample,
1346 struct machine *machine)
1348 struct trace *trace = container_of(tool, struct trace, tool);
1349 return trace__process_event(trace, machine, event, sample);
1352 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1354 int err = symbol__init();
1359 trace->host = machine__new_host();
1360 if (trace->host == NULL)
1363 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1364 evlist->threads, trace__tool_process, false);
1371 static int syscall__set_arg_fmts(struct syscall *sc)
1373 struct format_field *field;
1376 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1377 if (sc->arg_scnprintf == NULL)
1381 sc->arg_parm = sc->fmt->arg_parm;
1383 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1384 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1385 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1386 else if (field->flags & FIELD_IS_POINTER)
1387 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1394 static int trace__read_syscall_info(struct trace *trace, int id)
1398 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1403 if (id > trace->syscalls.max) {
1404 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1406 if (nsyscalls == NULL)
1409 if (trace->syscalls.max != -1) {
1410 memset(nsyscalls + trace->syscalls.max + 1, 0,
1411 (id - trace->syscalls.max) * sizeof(*sc));
1413 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1416 trace->syscalls.table = nsyscalls;
1417 trace->syscalls.max = id;
1420 sc = trace->syscalls.table + id;
1423 if (trace->ev_qualifier) {
1424 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1426 if (!(in ^ trace->not_ev_qualifier)) {
1427 sc->filtered = true;
1429 * No need to do read tracepoint information since this will be
1436 sc->fmt = syscall_fmt__find(sc->name);
1438 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1439 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1441 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1442 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1443 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1446 if (sc->tp_format == NULL)
1449 return syscall__set_arg_fmts(sc);
1452 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1453 unsigned long *args, struct trace *trace,
1454 struct thread *thread)
1458 if (sc->tp_format != NULL) {
1459 struct format_field *field;
1461 struct syscall_arg arg = {
1468 for (field = sc->tp_format->format.fields->next; field;
1469 field = field->next, ++arg.idx, bit <<= 1) {
1473 * Suppress this argument if its value is zero and
1474 * and we don't have a string associated in an
1477 if (args[arg.idx] == 0 &&
1478 !(sc->arg_scnprintf &&
1479 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1480 sc->arg_parm[arg.idx]))
1483 printed += scnprintf(bf + printed, size - printed,
1484 "%s%s: ", printed ? ", " : "", field->name);
1485 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1486 arg.val = args[arg.idx];
1488 arg.parm = sc->arg_parm[arg.idx];
1489 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1490 size - printed, &arg);
1492 printed += scnprintf(bf + printed, size - printed,
1493 "%ld", args[arg.idx]);
1500 printed += scnprintf(bf + printed, size - printed,
1502 printed ? ", " : "", i, args[i]);
1510 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1511 struct perf_sample *sample);
1513 static struct syscall *trace__syscall_info(struct trace *trace,
1514 struct perf_evsel *evsel, int id)
1520 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1521 * before that, leaving at a higher verbosity level till that is
1522 * explained. Reproduced with plain ftrace with:
1524 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1525 * grep "NR -1 " /t/trace_pipe
1527 * After generating some load on the machine.
1531 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1532 id, perf_evsel__name(evsel), ++n);
1537 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1538 trace__read_syscall_info(trace, id))
1541 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1544 return &trace->syscalls.table[id];
1548 fprintf(trace->output, "Problems reading syscall %d", id);
1549 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1550 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1551 fputs(" information\n", trace->output);
1556 static void thread__update_stats(struct thread_trace *ttrace,
1557 int id, struct perf_sample *sample)
1559 struct int_node *inode;
1560 struct stats *stats;
1563 inode = intlist__findnew(ttrace->syscall_stats, id);
1567 stats = inode->priv;
1568 if (stats == NULL) {
1569 stats = malloc(sizeof(struct stats));
1573 inode->priv = stats;
1576 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1577 duration = sample->time - ttrace->entry_time;
1579 update_stats(stats, duration);
1582 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1583 struct perf_sample *sample)
1588 struct thread *thread;
1589 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1590 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1591 struct thread_trace *ttrace;
1599 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1600 ttrace = thread__trace(thread, trace->output);
1604 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1605 ttrace = thread->priv;
1607 if (ttrace->entry_str == NULL) {
1608 ttrace->entry_str = malloc(1024);
1609 if (!ttrace->entry_str)
1613 ttrace->entry_time = sample->time;
1614 msg = ttrace->entry_str;
1615 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1617 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1618 args, trace, thread);
1620 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1621 if (!trace->duration_filter && !trace->summary_only) {
1622 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1623 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1626 ttrace->entry_pending = true;
1631 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1632 struct perf_sample *sample)
1636 struct thread *thread;
1637 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1638 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1639 struct thread_trace *ttrace;
1647 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1648 ttrace = thread__trace(thread, trace->output);
1653 thread__update_stats(ttrace, id, sample);
1655 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1657 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1658 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1659 trace->last_vfs_getname = NULL;
1660 ++trace->stats.vfs_getname;
1663 ttrace = thread->priv;
1665 ttrace->exit_time = sample->time;
1667 if (ttrace->entry_time) {
1668 duration = sample->time - ttrace->entry_time;
1669 if (trace__filter_duration(trace, duration))
1671 } else if (trace->duration_filter)
1674 if (trace->summary_only)
1677 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1679 if (ttrace->entry_pending) {
1680 fprintf(trace->output, "%-70s", ttrace->entry_str);
1682 fprintf(trace->output, " ... [");
1683 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1684 fprintf(trace->output, "]: %s()", sc->name);
1687 if (sc->fmt == NULL) {
1689 fprintf(trace->output, ") = %d", ret);
1690 } else if (ret < 0 && sc->fmt->errmsg) {
1692 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1693 *e = audit_errno_to_name(-ret);
1695 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1696 } else if (ret == 0 && sc->fmt->timeout)
1697 fprintf(trace->output, ") = 0 Timeout");
1698 else if (sc->fmt->hexret)
1699 fprintf(trace->output, ") = %#x", ret);
1703 fputc('\n', trace->output);
1705 ttrace->entry_pending = false;
1710 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1711 struct perf_sample *sample)
1713 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1717 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1718 struct perf_sample *sample)
1720 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1721 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1722 struct thread *thread = machine__findnew_thread(trace->host,
1725 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1730 ttrace->runtime_ms += runtime_ms;
1731 trace->runtime_ms += runtime_ms;
1735 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1737 perf_evsel__strval(evsel, sample, "comm"),
1738 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1740 perf_evsel__intval(evsel, sample, "vruntime"));
1744 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1746 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1747 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1750 if (trace->pid_list || trace->tid_list)
1756 static int trace__process_sample(struct perf_tool *tool,
1757 union perf_event *event __maybe_unused,
1758 struct perf_sample *sample,
1759 struct perf_evsel *evsel,
1760 struct machine *machine __maybe_unused)
1762 struct trace *trace = container_of(tool, struct trace, tool);
1765 tracepoint_handler handler = evsel->handler;
1767 if (skip_sample(trace, sample))
1770 if (!trace->full_time && trace->base_time == 0)
1771 trace->base_time = sample->time;
1775 handler(trace, evsel, sample);
1781 static int parse_target_str(struct trace *trace)
1783 if (trace->opts.target.pid) {
1784 trace->pid_list = intlist__new(trace->opts.target.pid);
1785 if (trace->pid_list == NULL) {
1786 pr_err("Error parsing process id string\n");
1791 if (trace->opts.target.tid) {
1792 trace->tid_list = intlist__new(trace->opts.target.tid);
1793 if (trace->tid_list == NULL) {
1794 pr_err("Error parsing thread id string\n");
1802 static int trace__record(int argc, const char **argv)
1804 unsigned int rec_argc, i, j;
1805 const char **rec_argv;
1806 const char * const record_args[] = {
1814 /* +1 is for the event string below */
1815 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1816 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1818 if (rec_argv == NULL)
1821 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1822 rec_argv[i] = record_args[i];
1824 /* event string may be different for older kernels - e.g., RHEL6 */
1825 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1826 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1827 else if (is_valid_tracepoint("syscalls:sys_enter"))
1828 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1830 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1835 for (j = 0; j < (unsigned int)argc; j++, i++)
1836 rec_argv[i] = argv[j];
1838 return cmd_record(i, rec_argv, NULL);
1841 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1843 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1845 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1849 if (perf_evsel__field(evsel, "pathname") == NULL) {
1850 perf_evsel__delete(evsel);
1854 evsel->handler = trace__vfs_getname;
1855 perf_evlist__add(evlist, evsel);
1858 static int trace__run(struct trace *trace, int argc, const char **argv)
1860 struct perf_evlist *evlist = perf_evlist__new();
1861 struct perf_evsel *evsel;
1863 unsigned long before;
1864 const bool forks = argc > 0;
1868 if (evlist == NULL) {
1869 fprintf(trace->output, "Not enough memory to run!\n");
1873 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1876 perf_evlist__add_vfs_getname(evlist);
1879 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1880 trace__sched_stat_runtime))
1883 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1885 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1886 goto out_delete_evlist;
1889 err = trace__symbols_init(trace, evlist);
1891 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1892 goto out_delete_maps;
1895 perf_evlist__config(evlist, &trace->opts);
1897 signal(SIGCHLD, sig_handler);
1898 signal(SIGINT, sig_handler);
1901 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1902 argv, false, false);
1904 fprintf(trace->output, "Couldn't run the workload!\n");
1905 goto out_delete_maps;
1909 err = perf_evlist__open(evlist);
1911 goto out_error_open;
1913 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1915 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1916 goto out_close_evlist;
1919 perf_evlist__enable(evlist);
1922 perf_evlist__start_workload(evlist);
1924 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1926 before = trace->nr_events;
1928 for (i = 0; i < evlist->nr_mmaps; i++) {
1929 union perf_event *event;
1931 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1932 const u32 type = event->header.type;
1933 tracepoint_handler handler;
1934 struct perf_sample sample;
1938 err = perf_evlist__parse_sample(evlist, event, &sample);
1940 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1944 if (!trace->full_time && trace->base_time == 0)
1945 trace->base_time = sample.time;
1947 if (type != PERF_RECORD_SAMPLE) {
1948 trace__process_event(trace, trace->host, event, &sample);
1952 evsel = perf_evlist__id2evsel(evlist, sample.id);
1953 if (evsel == NULL) {
1954 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1958 if (sample.raw_data == NULL) {
1959 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1960 perf_evsel__name(evsel), sample.tid,
1961 sample.cpu, sample.raw_size);
1965 handler = evsel->handler;
1966 handler(trace, evsel, &sample);
1968 perf_evlist__mmap_consume(evlist, i);
1975 if (trace->nr_events == before) {
1976 int timeout = done ? 100 : -1;
1978 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1985 perf_evlist__disable(evlist);
1989 trace__fprintf_thread_summary(trace, trace->output);
1991 if (trace->show_tool_stats) {
1992 fprintf(trace->output, "Stats:\n "
1993 " vfs_getname : %" PRIu64 "\n"
1994 " proc_getname: %" PRIu64 "\n",
1995 trace->stats.vfs_getname,
1996 trace->stats.proc_getname);
2000 perf_evlist__munmap(evlist);
2002 perf_evlist__close(evlist);
2004 perf_evlist__delete_maps(evlist);
2006 perf_evlist__delete(evlist);
2008 trace->live = false;
2011 char errbuf[BUFSIZ];
2014 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2018 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2021 fprintf(trace->output, "%s\n", errbuf);
2022 goto out_delete_evlist;
2026 static int trace__replay(struct trace *trace)
2028 const struct perf_evsel_str_handler handlers[] = {
2029 { "probe:vfs_getname", trace__vfs_getname, },
2031 struct perf_data_file file = {
2033 .mode = PERF_DATA_MODE_READ,
2035 struct perf_session *session;
2036 struct perf_evsel *evsel;
2039 trace->tool.sample = trace__process_sample;
2040 trace->tool.mmap = perf_event__process_mmap;
2041 trace->tool.mmap2 = perf_event__process_mmap2;
2042 trace->tool.comm = perf_event__process_comm;
2043 trace->tool.exit = perf_event__process_exit;
2044 trace->tool.fork = perf_event__process_fork;
2045 trace->tool.attr = perf_event__process_attr;
2046 trace->tool.tracing_data = perf_event__process_tracing_data;
2047 trace->tool.build_id = perf_event__process_build_id;
2049 trace->tool.ordered_samples = true;
2050 trace->tool.ordering_requires_timestamps = true;
2052 /* add tid to output */
2053 trace->multiple_threads = true;
2055 if (symbol__init() < 0)
2058 session = perf_session__new(&file, false, &trace->tool);
2059 if (session == NULL)
2062 trace->host = &session->machines.host;
2064 err = perf_session__set_tracepoints_handlers(session, handlers);
2068 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2069 "raw_syscalls:sys_enter");
2070 /* older kernels have syscalls tp versus raw_syscalls */
2072 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2073 "syscalls:sys_enter");
2074 if (evsel == NULL) {
2075 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2079 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2080 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2081 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2085 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2086 "raw_syscalls:sys_exit");
2088 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2089 "syscalls:sys_exit");
2090 if (evsel == NULL) {
2091 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2095 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2096 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2097 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2101 err = parse_target_str(trace);
2107 err = perf_session__process_events(session, &trace->tool);
2109 pr_err("Failed to process events, error %d", err);
2111 else if (trace->summary)
2112 trace__fprintf_thread_summary(trace, trace->output);
2115 perf_session__delete(session);
2120 static size_t trace__fprintf_threads_header(FILE *fp)
2124 printed = fprintf(fp, "\n Summary of events:\n\n");
2129 static size_t thread__dump_stats(struct thread_trace *ttrace,
2130 struct trace *trace, FILE *fp)
2132 struct stats *stats;
2135 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2140 printed += fprintf(fp, "\n");
2142 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2143 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2144 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2146 /* each int_node is a syscall */
2148 stats = inode->priv;
2150 double min = (double)(stats->min) / NSEC_PER_MSEC;
2151 double max = (double)(stats->max) / NSEC_PER_MSEC;
2152 double avg = avg_stats(stats);
2154 u64 n = (u64) stats->n;
2156 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2157 avg /= NSEC_PER_MSEC;
2159 sc = &trace->syscalls.table[inode->i];
2160 printed += fprintf(fp, " %-15s", sc->name);
2161 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2163 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2166 inode = intlist__next(inode);
2169 printed += fprintf(fp, "\n\n");
2174 /* struct used to pass data to per-thread function */
2175 struct summary_data {
2177 struct trace *trace;
2181 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2183 struct summary_data *data = priv;
2184 FILE *fp = data->fp;
2185 size_t printed = data->printed;
2186 struct trace *trace = data->trace;
2187 struct thread_trace *ttrace = thread->priv;
2193 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2195 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2196 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2197 printed += fprintf(fp, "%.1f%%", ratio);
2198 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2199 printed += thread__dump_stats(ttrace, trace, fp);
2201 data->printed += printed;
2206 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2208 struct summary_data data = {
2212 data.printed = trace__fprintf_threads_header(fp);
2214 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2216 return data.printed;
2219 static int trace__set_duration(const struct option *opt, const char *str,
2220 int unset __maybe_unused)
2222 struct trace *trace = opt->value;
2224 trace->duration_filter = atof(str);
2228 static int trace__open_output(struct trace *trace, const char *filename)
2232 if (!stat(filename, &st) && st.st_size) {
2233 char oldname[PATH_MAX];
2235 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2237 rename(filename, oldname);
2240 trace->output = fopen(filename, "w");
2242 return trace->output == NULL ? -errno : 0;
2245 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2247 const char * const trace_usage[] = {
2248 "perf trace [<options>] [<command>]",
2249 "perf trace [<options>] -- <command> [<options>]",
2250 "perf trace record [<options>] [<command>]",
2251 "perf trace record [<options>] -- <command> [<options>]",
2254 struct trace trace = {
2256 .machine = audit_detect_machine(),
2257 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2267 .user_freq = UINT_MAX,
2268 .user_interval = ULLONG_MAX,
2275 const char *output_name = NULL;
2276 const char *ev_qualifier_str = NULL;
2277 const struct option trace_options[] = {
2278 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2279 "show the thread COMM next to its id"),
2280 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2281 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2282 "list of events to trace"),
2283 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2284 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2285 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2286 "trace events on existing process id"),
2287 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2288 "trace events on existing thread id"),
2289 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2290 "system-wide collection from all CPUs"),
2291 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2292 "list of cpus to monitor"),
2293 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2294 "child tasks do not inherit counters"),
2295 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2296 "number of mmap data pages",
2297 perf_evlist__parse_mmap_pages),
2298 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2300 OPT_CALLBACK(0, "duration", &trace, "float",
2301 "show only events with duration > N.M ms",
2302 trace__set_duration),
2303 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2304 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2305 OPT_BOOLEAN('T', "time", &trace.full_time,
2306 "Show full timestamp, not time relative to first start"),
2307 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2308 "Show only syscall summary with statistics"),
2309 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2310 "Show all syscalls and summary with statistics"),
2316 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2317 return trace__record(argc-2, &argv[2]);
2319 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2321 /* summary_only implies summary option, but don't overwrite summary if set */
2322 if (trace.summary_only)
2323 trace.summary = trace.summary_only;
2325 if (output_name != NULL) {
2326 err = trace__open_output(&trace, output_name);
2328 perror("failed to create output file");
2333 if (ev_qualifier_str != NULL) {
2334 const char *s = ev_qualifier_str;
2336 trace.not_ev_qualifier = *s == '!';
2337 if (trace.not_ev_qualifier)
2339 trace.ev_qualifier = strlist__new(true, s);
2340 if (trace.ev_qualifier == NULL) {
2341 fputs("Not enough memory to parse event qualifier",
2348 err = target__validate(&trace.opts.target);
2350 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2351 fprintf(trace.output, "%s", bf);
2355 err = target__parse_uid(&trace.opts.target);
2357 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2358 fprintf(trace.output, "%s", bf);
2362 if (!argc && target__none(&trace.opts.target))
2363 trace.opts.target.system_wide = true;
2366 err = trace__replay(&trace);
2368 err = trace__run(&trace, argc, argv);
2371 if (output_name != NULL)
2372 fclose(trace.output);