1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
20 #include <linux/futex.h>
22 /* For older distros: */
24 # define MAP_STACK 0x20000
28 # define MADV_HWPOISON 100
31 #ifndef MADV_MERGEABLE
32 # define MADV_MERGEABLE 12
35 #ifndef MADV_UNMERGEABLE
36 # define MADV_UNMERGEABLE 13
40 # define EFD_SEMAPHORE 1
44 # define EFD_NONBLOCK 00004000
48 # define EFD_CLOEXEC 02000000
52 # define O_CLOEXEC 02000000
60 # define SOCK_CLOEXEC 02000000
64 # define SOCK_NONBLOCK 00004000
67 #ifndef MSG_CMSG_CLOEXEC
68 # define MSG_CMSG_CLOEXEC 0x40000000
71 #ifndef PERF_FLAG_FD_NO_GROUP
72 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
75 #ifndef PERF_FLAG_FD_OUTPUT
76 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
79 #ifndef PERF_FLAG_PID_CGROUP
80 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
83 #ifndef PERF_FLAG_FD_CLOEXEC
84 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
91 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
92 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
96 #define TP_UINT_FIELD(bits) \
97 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
100 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
109 #define TP_UINT_FIELD__SWAPPED(bits) \
110 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
113 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
114 return bswap_##bits(value);\
117 TP_UINT_FIELD__SWAPPED(16);
118 TP_UINT_FIELD__SWAPPED(32);
119 TP_UINT_FIELD__SWAPPED(64);
121 static int tp_field__init_uint(struct tp_field *field,
122 struct format_field *format_field,
125 field->offset = format_field->offset;
127 switch (format_field->size) {
129 field->integer = tp_field__u8;
132 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
135 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
138 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
147 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 return sample->raw_data + field->offset;
152 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 field->offset = format_field->offset;
155 field->pointer = tp_field__ptr;
162 struct tp_field args, ret;
166 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
167 struct tp_field *field,
170 struct format_field *format_field = perf_evsel__field(evsel, name);
172 if (format_field == NULL)
175 return tp_field__init_uint(field, format_field, evsel->needs_swap);
178 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
179 ({ struct syscall_tp *sc = evsel->priv;\
180 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
183 struct tp_field *field,
186 struct format_field *format_field = perf_evsel__field(evsel, name);
188 if (format_field == NULL)
191 return tp_field__init_ptr(field, format_field);
194 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
195 ({ struct syscall_tp *sc = evsel->priv;\
196 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
201 perf_evsel__delete(evsel);
204 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 evsel->priv = malloc(sizeof(struct syscall_tp));
207 if (evsel->priv != NULL) {
208 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
211 evsel->handler = handler;
222 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228 evsel = perf_evsel__newtp("syscalls", direction);
231 if (perf_evsel__init_syscall_tp(evsel, handler))
238 perf_evsel__delete_priv(evsel);
242 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
243 ({ struct syscall_tp *fields = evsel->priv; \
244 fields->name.integer(&fields->name, sample); })
246 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
247 ({ struct syscall_tp *fields = evsel->priv; \
248 fields->name.pointer(&fields->name, sample); })
252 struct thread *thread;
262 const char **entries;
265 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
266 .nr_entries = ARRAY_SIZE(array), \
270 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272 .nr_entries = ARRAY_SIZE(array), \
276 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278 struct syscall_arg *arg)
280 struct strarray *sa = arg->parm;
281 int idx = arg->val - sa->offset;
283 if (idx < 0 || idx >= sa->nr_entries)
284 return scnprintf(bf, size, intfmt, arg->val);
286 return scnprintf(bf, size, "%s", sa->entries[idx]);
289 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
290 struct syscall_arg *arg)
292 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
295 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297 #if defined(__i386__) || defined(__x86_64__)
299 * FIXME: Make this available to all arches as soon as the ioctl beautifier
300 * gets rewritten to support all arches.
302 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
303 struct syscall_arg *arg)
305 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
308 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
309 #endif /* defined(__i386__) || defined(__x86_64__) */
311 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
312 struct syscall_arg *arg);
314 #define SCA_FD syscall_arg__scnprintf_fd
316 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
317 struct syscall_arg *arg)
322 return scnprintf(bf, size, "CWD");
324 return syscall_arg__scnprintf_fd(bf, size, arg);
327 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
330 struct syscall_arg *arg);
332 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
335 struct syscall_arg *arg)
337 return scnprintf(bf, size, "%#lx", arg->val);
340 #define SCA_HEX syscall_arg__scnprintf_hex
342 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
343 struct syscall_arg *arg)
345 return scnprintf(bf, size, "%d", arg->val);
348 #define SCA_INT syscall_arg__scnprintf_int
350 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
351 struct syscall_arg *arg)
353 int printed = 0, prot = arg->val;
355 if (prot == PROT_NONE)
356 return scnprintf(bf, size, "NONE");
357 #define P_MMAP_PROT(n) \
358 if (prot & PROT_##n) { \
359 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
369 P_MMAP_PROT(GROWSDOWN);
370 P_MMAP_PROT(GROWSUP);
374 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
379 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
382 struct syscall_arg *arg)
384 int printed = 0, flags = arg->val;
386 #define P_MMAP_FLAG(n) \
387 if (flags & MAP_##n) { \
388 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
393 P_MMAP_FLAG(PRIVATE);
397 P_MMAP_FLAG(ANONYMOUS);
398 P_MMAP_FLAG(DENYWRITE);
399 P_MMAP_FLAG(EXECUTABLE);
402 P_MMAP_FLAG(GROWSDOWN);
404 P_MMAP_FLAG(HUGETLB);
407 P_MMAP_FLAG(NONBLOCK);
408 P_MMAP_FLAG(NORESERVE);
409 P_MMAP_FLAG(POPULATE);
411 #ifdef MAP_UNINITIALIZED
412 P_MMAP_FLAG(UNINITIALIZED);
417 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
422 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
425 struct syscall_arg *arg)
427 int printed = 0, flags = arg->val;
429 #define P_MREMAP_FLAG(n) \
430 if (flags & MREMAP_##n) { \
431 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
432 flags &= ~MREMAP_##n; \
435 P_MREMAP_FLAG(MAYMOVE);
437 P_MREMAP_FLAG(FIXED);
442 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
447 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
450 struct syscall_arg *arg)
452 int behavior = arg->val;
455 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
458 P_MADV_BHV(SEQUENTIAL);
459 P_MADV_BHV(WILLNEED);
460 P_MADV_BHV(DONTNEED);
462 P_MADV_BHV(DONTFORK);
464 P_MADV_BHV(HWPOISON);
465 #ifdef MADV_SOFT_OFFLINE
466 P_MADV_BHV(SOFT_OFFLINE);
468 P_MADV_BHV(MERGEABLE);
469 P_MADV_BHV(UNMERGEABLE);
471 P_MADV_BHV(HUGEPAGE);
473 #ifdef MADV_NOHUGEPAGE
474 P_MADV_BHV(NOHUGEPAGE);
477 P_MADV_BHV(DONTDUMP);
486 return scnprintf(bf, size, "%#x", behavior);
489 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
492 struct syscall_arg *arg)
494 int printed = 0, op = arg->val;
497 return scnprintf(bf, size, "NONE");
499 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
500 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
515 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
520 #define SCA_FLOCK syscall_arg__scnprintf_flock
522 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 enum syscall_futex_args {
525 SCF_UADDR = (1 << 0),
528 SCF_TIMEOUT = (1 << 3),
529 SCF_UADDR2 = (1 << 4),
533 int cmd = op & FUTEX_CMD_MASK;
537 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
538 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
539 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
540 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
542 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(WAKE_OP); break;
545 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
546 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
548 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
549 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
551 default: printed = scnprintf(bf, size, "%#x", cmd); break;
554 if (op & FUTEX_PRIVATE_FLAG)
555 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557 if (op & FUTEX_CLOCK_REALTIME)
558 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
563 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
565 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
566 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
569 static DEFINE_STRARRAY(itimers);
571 static const char *whences[] = { "SET", "CUR", "END",
579 static DEFINE_STRARRAY(whences);
581 static const char *fcntl_cmds[] = {
582 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
583 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
584 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
587 static DEFINE_STRARRAY(fcntl_cmds);
589 static const char *rlimit_resources[] = {
590 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
591 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
594 static DEFINE_STRARRAY(rlimit_resources);
596 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
597 static DEFINE_STRARRAY(sighow);
599 static const char *clockid[] = {
600 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
601 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
603 static DEFINE_STRARRAY(clockid);
605 static const char *socket_families[] = {
606 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
607 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
608 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
609 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
610 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
611 "ALG", "NFC", "VSOCK",
613 static DEFINE_STRARRAY(socket_families);
615 #ifndef SOCK_TYPE_MASK
616 #define SOCK_TYPE_MASK 0xf
619 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
620 struct syscall_arg *arg)
624 flags = type & ~SOCK_TYPE_MASK;
626 type &= SOCK_TYPE_MASK;
628 * Can't use a strarray, MIPS may override for ABI reasons.
631 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
636 P_SK_TYPE(SEQPACKET);
641 printed = scnprintf(bf, size, "%#x", type);
644 #define P_SK_FLAG(n) \
645 if (flags & SOCK_##n) { \
646 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
647 flags &= ~SOCK_##n; \
655 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
660 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
663 #define MSG_PROBE 0x10
665 #ifndef MSG_WAITFORONE
666 #define MSG_WAITFORONE 0x10000
668 #ifndef MSG_SENDPAGE_NOTLAST
669 #define MSG_SENDPAGE_NOTLAST 0x20000
672 #define MSG_FASTOPEN 0x20000000
675 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
676 struct syscall_arg *arg)
678 int printed = 0, flags = arg->val;
681 return scnprintf(bf, size, "NONE");
682 #define P_MSG_FLAG(n) \
683 if (flags & MSG_##n) { \
684 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
690 P_MSG_FLAG(DONTROUTE);
695 P_MSG_FLAG(DONTWAIT);
702 P_MSG_FLAG(ERRQUEUE);
703 P_MSG_FLAG(NOSIGNAL);
705 P_MSG_FLAG(WAITFORONE);
706 P_MSG_FLAG(SENDPAGE_NOTLAST);
707 P_MSG_FLAG(FASTOPEN);
708 P_MSG_FLAG(CMSG_CLOEXEC);
712 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
717 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
719 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
720 struct syscall_arg *arg)
725 if (mode == F_OK) /* 0 */
726 return scnprintf(bf, size, "F");
728 if (mode & n##_OK) { \
729 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
739 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
744 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
746 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
747 struct syscall_arg *arg)
749 int printed = 0, flags = arg->val;
751 if (!(flags & O_CREAT))
752 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
755 return scnprintf(bf, size, "RDONLY");
757 if (flags & O_##n) { \
758 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
782 if ((flags & O_SYNC) == O_SYNC)
783 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
795 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
800 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
802 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
803 struct syscall_arg *arg)
805 int printed = 0, flags = arg->val;
811 if (flags & PERF_FLAG_##n) { \
812 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
813 flags &= ~PERF_FLAG_##n; \
823 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
828 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
830 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
831 struct syscall_arg *arg)
833 int printed = 0, flags = arg->val;
836 return scnprintf(bf, size, "NONE");
838 if (flags & EFD_##n) { \
839 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
849 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
854 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
856 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
857 struct syscall_arg *arg)
859 int printed = 0, flags = arg->val;
862 if (flags & O_##n) { \
863 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
872 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
877 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
879 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
884 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
927 return scnprintf(bf, size, "%#x", sig);
930 #define SCA_SIGNUM syscall_arg__scnprintf_signum
932 #if defined(__i386__) || defined(__x86_64__)
934 * FIXME: Make this available to all arches.
936 #define TCGETS 0x5401
938 static const char *tioctls[] = {
939 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
940 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
941 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
942 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
943 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
944 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
945 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
946 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
947 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
948 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
949 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
950 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
951 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
952 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
953 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
956 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
957 #endif /* defined(__i386__) || defined(__x86_64__) */
959 #define STRARRAY(arg, name, array) \
960 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
961 .arg_parm = { [arg] = &strarray__##array, }
963 static struct syscall_fmt {
966 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
972 { .name = "access", .errmsg = true,
973 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
974 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
975 { .name = "brk", .hexret = true,
976 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
977 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
978 { .name = "close", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
980 { .name = "connect", .errmsg = true, },
981 { .name = "dup", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983 { .name = "dup2", .errmsg = true,
984 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
985 { .name = "dup3", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
987 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
988 { .name = "eventfd2", .errmsg = true,
989 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
990 { .name = "faccessat", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
992 { .name = "fadvise64", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
994 { .name = "fallocate", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
996 { .name = "fchdir", .errmsg = true,
997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 { .name = "fchmod", .errmsg = true,
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "fchmodat", .errmsg = true,
1001 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1002 { .name = "fchown", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1004 { .name = "fchownat", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006 { .name = "fcntl", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008 [1] = SCA_STRARRAY, /* cmd */ },
1009 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1010 { .name = "fdatasync", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012 { .name = "flock", .errmsg = true,
1013 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1014 [1] = SCA_FLOCK, /* cmd */ }, },
1015 { .name = "fsetxattr", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1017 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1018 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1019 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1021 { .name = "fstatfs", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1023 { .name = "fsync", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1025 { .name = "ftruncate", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1027 { .name = "futex", .errmsg = true,
1028 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1029 { .name = "futimesat", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1031 { .name = "getdents", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1033 { .name = "getdents64", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1036 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037 { .name = "ioctl", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1039 #if defined(__i386__) || defined(__x86_64__)
1041 * FIXME: Make this available to all arches.
1043 [1] = SCA_STRHEXARRAY, /* cmd */
1044 [2] = SCA_HEX, /* arg */ },
1045 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1047 [2] = SCA_HEX, /* arg */ }, },
1049 { .name = "kill", .errmsg = true,
1050 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1051 { .name = "linkat", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1053 { .name = "lseek", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1055 [2] = SCA_STRARRAY, /* whence */ },
1056 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1057 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1058 { .name = "madvise", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1060 [2] = SCA_MADV_BHV, /* behavior */ }, },
1061 { .name = "mkdirat", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1063 { .name = "mknodat", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1065 { .name = "mlock", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1067 { .name = "mlockall", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1069 { .name = "mmap", .hexret = true,
1070 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1071 [2] = SCA_MMAP_PROT, /* prot */
1072 [3] = SCA_MMAP_FLAGS, /* flags */
1073 [4] = SCA_FD, /* fd */ }, },
1074 { .name = "mprotect", .errmsg = true,
1075 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1076 [2] = SCA_MMAP_PROT, /* prot */ }, },
1077 { .name = "mremap", .hexret = true,
1078 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1079 [3] = SCA_MREMAP_FLAGS, /* flags */
1080 [4] = SCA_HEX, /* new_addr */ }, },
1081 { .name = "munlock", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1083 { .name = "munmap", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1085 { .name = "name_to_handle_at", .errmsg = true,
1086 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087 { .name = "newfstatat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1089 { .name = "open", .errmsg = true,
1090 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1091 { .name = "open_by_handle_at", .errmsg = true,
1092 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1093 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1094 { .name = "openat", .errmsg = true,
1095 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1096 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1097 { .name = "perf_event_open", .errmsg = true,
1098 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1099 [2] = SCA_INT, /* cpu */
1100 [3] = SCA_FD, /* group_fd */
1101 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1102 { .name = "pipe2", .errmsg = true,
1103 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1104 { .name = "poll", .errmsg = true, .timeout = true, },
1105 { .name = "ppoll", .errmsg = true, .timeout = true, },
1106 { .name = "pread", .errmsg = true, .alias = "pread64",
1107 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1108 { .name = "preadv", .errmsg = true, .alias = "pread",
1109 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1110 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1111 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1112 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1113 { .name = "pwritev", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1115 { .name = "read", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117 { .name = "readlinkat", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1119 { .name = "readv", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1121 { .name = "recvfrom", .errmsg = true,
1122 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1123 { .name = "recvmmsg", .errmsg = true,
1124 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1125 { .name = "recvmsg", .errmsg = true,
1126 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1127 { .name = "renameat", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1129 { .name = "rt_sigaction", .errmsg = true,
1130 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1131 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1132 { .name = "rt_sigqueueinfo", .errmsg = true,
1133 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1134 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1135 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1136 { .name = "select", .errmsg = true, .timeout = true, },
1137 { .name = "sendmmsg", .errmsg = true,
1138 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1139 { .name = "sendmsg", .errmsg = true,
1140 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1141 { .name = "sendto", .errmsg = true,
1142 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1143 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1144 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1145 { .name = "shutdown", .errmsg = true,
1146 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1147 { .name = "socket", .errmsg = true,
1148 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1149 [1] = SCA_SK_TYPE, /* type */ },
1150 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1151 { .name = "socketpair", .errmsg = true,
1152 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1153 [1] = SCA_SK_TYPE, /* type */ },
1154 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1155 { .name = "stat", .errmsg = true, .alias = "newstat", },
1156 { .name = "symlinkat", .errmsg = true,
1157 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1158 { .name = "tgkill", .errmsg = true,
1159 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1160 { .name = "tkill", .errmsg = true,
1161 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1162 { .name = "uname", .errmsg = true, .alias = "newuname", },
1163 { .name = "unlinkat", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1165 { .name = "utimensat", .errmsg = true,
1166 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1167 { .name = "write", .errmsg = true,
1168 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1169 { .name = "writev", .errmsg = true,
1170 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1173 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1175 const struct syscall_fmt *fmt = fmtp;
1176 return strcmp(name, fmt->name);
1179 static struct syscall_fmt *syscall_fmt__find(const char *name)
1181 const int nmemb = ARRAY_SIZE(syscall_fmts);
1182 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1186 struct event_format *tp_format;
1188 struct format_field *args;
1191 struct syscall_fmt *fmt;
1192 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1196 static size_t fprintf_duration(unsigned long t, FILE *fp)
1198 double duration = (double)t / NSEC_PER_MSEC;
1199 size_t printed = fprintf(fp, "(");
1201 if (duration >= 1.0)
1202 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1203 else if (duration >= 0.01)
1204 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1206 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1207 return printed + fprintf(fp, "): ");
1210 struct thread_trace {
1214 unsigned long nr_events;
1215 unsigned long pfmaj, pfmin;
1223 struct intlist *syscall_stats;
1226 static struct thread_trace *thread_trace__new(void)
1228 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1231 ttrace->paths.max = -1;
1233 ttrace->syscall_stats = intlist__new(NULL);
1238 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1240 struct thread_trace *ttrace;
1245 if (thread__priv(thread) == NULL)
1246 thread__set_priv(thread, thread_trace__new());
1248 if (thread__priv(thread) == NULL)
1251 ttrace = thread__priv(thread);
1252 ++ttrace->nr_events;
1256 color_fprintf(fp, PERF_COLOR_RED,
1257 "WARNING: not enough memory, dropping samples!\n");
1261 #define TRACE_PFMAJ (1 << 0)
1262 #define TRACE_PFMIN (1 << 1)
1265 struct perf_tool tool;
1272 struct syscall *table;
1274 struct perf_evsel *sys_enter,
1278 struct record_opts opts;
1279 struct perf_evlist *evlist;
1280 struct machine *host;
1281 struct thread *current;
1284 unsigned long nr_events;
1285 struct strlist *ev_qualifier;
1290 const char *last_vfs_getname;
1291 struct intlist *tid_list;
1292 struct intlist *pid_list;
1297 double duration_filter;
1303 bool not_ev_qualifier;
1307 bool multiple_threads;
1311 bool show_tool_stats;
1312 bool trace_syscalls;
1317 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1319 struct thread_trace *ttrace = thread__priv(thread);
1321 if (fd > ttrace->paths.max) {
1322 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1327 if (ttrace->paths.max != -1) {
1328 memset(npath + ttrace->paths.max + 1, 0,
1329 (fd - ttrace->paths.max) * sizeof(char *));
1331 memset(npath, 0, (fd + 1) * sizeof(char *));
1334 ttrace->paths.table = npath;
1335 ttrace->paths.max = fd;
1338 ttrace->paths.table[fd] = strdup(pathname);
1340 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1343 static int thread__read_fd_path(struct thread *thread, int fd)
1345 char linkname[PATH_MAX], pathname[PATH_MAX];
1349 if (thread->pid_ == thread->tid) {
1350 scnprintf(linkname, sizeof(linkname),
1351 "/proc/%d/fd/%d", thread->pid_, fd);
1353 scnprintf(linkname, sizeof(linkname),
1354 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1357 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1360 ret = readlink(linkname, pathname, sizeof(pathname));
1362 if (ret < 0 || ret > st.st_size)
1365 pathname[ret] = '\0';
1366 return trace__set_fd_pathname(thread, fd, pathname);
1369 static const char *thread__fd_path(struct thread *thread, int fd,
1370 struct trace *trace)
1372 struct thread_trace *ttrace = thread__priv(thread);
1380 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1383 ++trace->stats.proc_getname;
1384 if (thread__read_fd_path(thread, fd))
1388 return ttrace->paths.table[fd];
1391 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1392 struct syscall_arg *arg)
1395 size_t printed = scnprintf(bf, size, "%d", fd);
1396 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1399 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1404 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1405 struct syscall_arg *arg)
1408 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1409 struct thread_trace *ttrace = thread__priv(arg->thread);
1411 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1412 zfree(&ttrace->paths.table[fd]);
1417 static bool trace__filter_duration(struct trace *trace, double t)
1419 return t < (trace->duration_filter * NSEC_PER_MSEC);
1422 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1424 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1426 return fprintf(fp, "%10.3f ", ts);
1429 static bool done = false;
1430 static bool interrupted = false;
1432 static void sig_handler(int sig)
1435 interrupted = sig == SIGINT;
1438 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1439 u64 duration, u64 tstamp, FILE *fp)
1441 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1442 printed += fprintf_duration(duration, fp);
1444 if (trace->multiple_threads) {
1445 if (trace->show_comm)
1446 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1447 printed += fprintf(fp, "%d ", thread->tid);
1453 static int trace__process_event(struct trace *trace, struct machine *machine,
1454 union perf_event *event, struct perf_sample *sample)
1458 switch (event->header.type) {
1459 case PERF_RECORD_LOST:
1460 color_fprintf(trace->output, PERF_COLOR_RED,
1461 "LOST %" PRIu64 " events!\n", event->lost.lost);
1462 ret = machine__process_lost_event(machine, event, sample);
1464 ret = machine__process_event(machine, event, sample);
1471 static int trace__tool_process(struct perf_tool *tool,
1472 union perf_event *event,
1473 struct perf_sample *sample,
1474 struct machine *machine)
1476 struct trace *trace = container_of(tool, struct trace, tool);
1477 return trace__process_event(trace, machine, event, sample);
1480 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1482 int err = symbol__init(NULL);
1487 trace->host = machine__new_host();
1488 if (trace->host == NULL)
1491 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1492 evlist->threads, trace__tool_process, false,
1493 trace->opts.proc_map_timeout);
1500 static int syscall__set_arg_fmts(struct syscall *sc)
1502 struct format_field *field;
1505 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1506 if (sc->arg_scnprintf == NULL)
1510 sc->arg_parm = sc->fmt->arg_parm;
1512 for (field = sc->args; field; field = field->next) {
1513 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1514 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1515 else if (field->flags & FIELD_IS_POINTER)
1516 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1523 static int trace__read_syscall_info(struct trace *trace, int id)
1527 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1532 if (id > trace->syscalls.max) {
1533 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1535 if (nsyscalls == NULL)
1538 if (trace->syscalls.max != -1) {
1539 memset(nsyscalls + trace->syscalls.max + 1, 0,
1540 (id - trace->syscalls.max) * sizeof(*sc));
1542 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1545 trace->syscalls.table = nsyscalls;
1546 trace->syscalls.max = id;
1549 sc = trace->syscalls.table + id;
1552 sc->fmt = syscall_fmt__find(sc->name);
1554 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1555 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1557 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1558 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1559 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1562 if (sc->tp_format == NULL)
1565 sc->args = sc->tp_format->format.fields;
1566 sc->nr_args = sc->tp_format->format.nr_fields;
1567 /* drop nr field - not relevant here; does not exist on older kernels */
1568 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1569 sc->args = sc->args->next;
1573 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1575 return syscall__set_arg_fmts(sc);
1578 static int trace__validate_ev_qualifier(struct trace *trace)
1581 struct str_node *pos;
1583 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1584 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1585 sizeof(trace->ev_qualifier_ids.entries[0]));
1587 if (trace->ev_qualifier_ids.entries == NULL) {
1588 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1596 strlist__for_each(pos, trace->ev_qualifier) {
1597 const char *sc = pos->s;
1598 int id = audit_name_to_syscall(sc, trace->audit.machine);
1602 fputs("Error:\tInvalid syscall ", trace->output);
1605 fputs(", ", trace->output);
1608 fputs(sc, trace->output);
1611 trace->ev_qualifier_ids.entries[i++] = id;
1615 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1616 "\nHint:\tand: 'man syscalls'\n", trace->output);
1617 zfree(&trace->ev_qualifier_ids.entries);
1618 trace->ev_qualifier_ids.nr = 0;
1625 * args is to be interpreted as a series of longs but we need to handle
1626 * 8-byte unaligned accesses. args points to raw_data within the event
1627 * and raw_data is guaranteed to be 8-byte unaligned because it is
1628 * preceded by raw_size which is a u32. So we need to copy args to a temp
1629 * variable to read it. Most notably this avoids extended load instructions
1630 * on unaligned addresses
1633 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1634 unsigned char *args, struct trace *trace,
1635 struct thread *thread)
1641 if (sc->args != NULL) {
1642 struct format_field *field;
1644 struct syscall_arg arg = {
1651 for (field = sc->args; field;
1652 field = field->next, ++arg.idx, bit <<= 1) {
1656 /* special care for unaligned accesses */
1657 p = args + sizeof(unsigned long) * arg.idx;
1658 memcpy(&val, p, sizeof(val));
1661 * Suppress this argument if its value is zero and
1662 * and we don't have a string associated in an
1666 !(sc->arg_scnprintf &&
1667 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1668 sc->arg_parm[arg.idx]))
1671 printed += scnprintf(bf + printed, size - printed,
1672 "%s%s: ", printed ? ", " : "", field->name);
1673 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1676 arg.parm = sc->arg_parm[arg.idx];
1677 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1678 size - printed, &arg);
1680 printed += scnprintf(bf + printed, size - printed,
1688 /* special care for unaligned accesses */
1689 p = args + sizeof(unsigned long) * i;
1690 memcpy(&val, p, sizeof(val));
1691 printed += scnprintf(bf + printed, size - printed,
1693 printed ? ", " : "", i, val);
1701 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1702 union perf_event *event,
1703 struct perf_sample *sample);
1705 static struct syscall *trace__syscall_info(struct trace *trace,
1706 struct perf_evsel *evsel, int id)
1712 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1713 * before that, leaving at a higher verbosity level till that is
1714 * explained. Reproduced with plain ftrace with:
1716 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1717 * grep "NR -1 " /t/trace_pipe
1719 * After generating some load on the machine.
1723 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1724 id, perf_evsel__name(evsel), ++n);
1729 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1730 trace__read_syscall_info(trace, id))
1733 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1736 return &trace->syscalls.table[id];
1740 fprintf(trace->output, "Problems reading syscall %d", id);
1741 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1742 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1743 fputs(" information\n", trace->output);
1748 static void thread__update_stats(struct thread_trace *ttrace,
1749 int id, struct perf_sample *sample)
1751 struct int_node *inode;
1752 struct stats *stats;
1755 inode = intlist__findnew(ttrace->syscall_stats, id);
1759 stats = inode->priv;
1760 if (stats == NULL) {
1761 stats = malloc(sizeof(struct stats));
1765 inode->priv = stats;
1768 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1769 duration = sample->time - ttrace->entry_time;
1771 update_stats(stats, duration);
1774 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1776 struct thread_trace *ttrace;
1780 if (trace->current == NULL)
1783 ttrace = thread__priv(trace->current);
1785 if (!ttrace->entry_pending)
1788 duration = sample->time - ttrace->entry_time;
1790 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1791 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1792 ttrace->entry_pending = false;
1797 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1798 union perf_event *event __maybe_unused,
1799 struct perf_sample *sample)
1804 struct thread *thread;
1805 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1806 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1807 struct thread_trace *ttrace;
1812 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1813 ttrace = thread__trace(thread, trace->output);
1817 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1819 if (ttrace->entry_str == NULL) {
1820 ttrace->entry_str = malloc(1024);
1821 if (!ttrace->entry_str)
1825 if (!trace->summary_only)
1826 trace__printf_interrupted_entry(trace, sample);
1828 ttrace->entry_time = sample->time;
1829 msg = ttrace->entry_str;
1830 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1832 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1833 args, trace, thread);
1836 if (!trace->duration_filter && !trace->summary_only) {
1837 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1838 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1841 ttrace->entry_pending = true;
1843 if (trace->current != thread) {
1844 thread__put(trace->current);
1845 trace->current = thread__get(thread);
1849 thread__put(thread);
1853 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1854 union perf_event *event __maybe_unused,
1855 struct perf_sample *sample)
1859 struct thread *thread;
1860 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1861 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1862 struct thread_trace *ttrace;
1867 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1868 ttrace = thread__trace(thread, trace->output);
1873 thread__update_stats(ttrace, id, sample);
1875 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1877 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1878 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1879 trace->last_vfs_getname = NULL;
1880 ++trace->stats.vfs_getname;
1883 ttrace->exit_time = sample->time;
1885 if (ttrace->entry_time) {
1886 duration = sample->time - ttrace->entry_time;
1887 if (trace__filter_duration(trace, duration))
1889 } else if (trace->duration_filter)
1892 if (trace->summary_only)
1895 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1897 if (ttrace->entry_pending) {
1898 fprintf(trace->output, "%-70s", ttrace->entry_str);
1900 fprintf(trace->output, " ... [");
1901 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1902 fprintf(trace->output, "]: %s()", sc->name);
1905 if (sc->fmt == NULL) {
1907 fprintf(trace->output, ") = %ld", ret);
1908 } else if (ret < 0 && sc->fmt->errmsg) {
1909 char bf[STRERR_BUFSIZE];
1910 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1911 *e = audit_errno_to_name(-ret);
1913 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1914 } else if (ret == 0 && sc->fmt->timeout)
1915 fprintf(trace->output, ") = 0 Timeout");
1916 else if (sc->fmt->hexret)
1917 fprintf(trace->output, ") = %#lx", ret);
1921 fputc('\n', trace->output);
1923 ttrace->entry_pending = false;
1926 thread__put(thread);
1930 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1931 union perf_event *event __maybe_unused,
1932 struct perf_sample *sample)
1934 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1938 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1939 union perf_event *event __maybe_unused,
1940 struct perf_sample *sample)
1942 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1943 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1944 struct thread *thread = machine__findnew_thread(trace->host,
1947 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1952 ttrace->runtime_ms += runtime_ms;
1953 trace->runtime_ms += runtime_ms;
1954 thread__put(thread);
1958 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1960 perf_evsel__strval(evsel, sample, "comm"),
1961 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1963 perf_evsel__intval(evsel, sample, "vruntime"));
1964 thread__put(thread);
1968 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1969 union perf_event *event __maybe_unused,
1970 struct perf_sample *sample)
1972 trace__printf_interrupted_entry(trace, sample);
1973 trace__fprintf_tstamp(trace, sample->time, trace->output);
1975 if (trace->trace_syscalls)
1976 fprintf(trace->output, "( ): ");
1978 fprintf(trace->output, "%s:", evsel->name);
1980 if (evsel->tp_format) {
1981 event_format__fprintf(evsel->tp_format, sample->cpu,
1982 sample->raw_data, sample->raw_size,
1986 fprintf(trace->output, ")\n");
1990 static void print_location(FILE *f, struct perf_sample *sample,
1991 struct addr_location *al,
1992 bool print_dso, bool print_sym)
1995 if ((verbose || print_dso) && al->map)
1996 fprintf(f, "%s@", al->map->dso->long_name);
1998 if ((verbose || print_sym) && al->sym)
1999 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2000 al->addr - al->sym->start);
2002 fprintf(f, "0x%" PRIx64, al->addr);
2004 fprintf(f, "0x%" PRIx64, sample->addr);
2007 static int trace__pgfault(struct trace *trace,
2008 struct perf_evsel *evsel,
2009 union perf_event *event,
2010 struct perf_sample *sample)
2012 struct thread *thread;
2013 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2014 struct addr_location al;
2015 char map_type = 'd';
2016 struct thread_trace *ttrace;
2019 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2020 ttrace = thread__trace(thread, trace->output);
2024 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2029 if (trace->summary_only)
2032 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2035 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2037 fprintf(trace->output, "%sfault [",
2038 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2041 print_location(trace->output, sample, &al, false, true);
2043 fprintf(trace->output, "] => ");
2045 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2049 thread__find_addr_location(thread, cpumode,
2050 MAP__FUNCTION, sample->addr, &al);
2058 print_location(trace->output, sample, &al, true, false);
2060 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2064 thread__put(thread);
2068 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2070 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2071 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2074 if (trace->pid_list || trace->tid_list)
2080 static int trace__process_sample(struct perf_tool *tool,
2081 union perf_event *event,
2082 struct perf_sample *sample,
2083 struct perf_evsel *evsel,
2084 struct machine *machine __maybe_unused)
2086 struct trace *trace = container_of(tool, struct trace, tool);
2089 tracepoint_handler handler = evsel->handler;
2091 if (skip_sample(trace, sample))
2094 if (!trace->full_time && trace->base_time == 0)
2095 trace->base_time = sample->time;
2099 handler(trace, evsel, event, sample);
2105 static int parse_target_str(struct trace *trace)
2107 if (trace->opts.target.pid) {
2108 trace->pid_list = intlist__new(trace->opts.target.pid);
2109 if (trace->pid_list == NULL) {
2110 pr_err("Error parsing process id string\n");
2115 if (trace->opts.target.tid) {
2116 trace->tid_list = intlist__new(trace->opts.target.tid);
2117 if (trace->tid_list == NULL) {
2118 pr_err("Error parsing thread id string\n");
2126 static int trace__record(struct trace *trace, int argc, const char **argv)
2128 unsigned int rec_argc, i, j;
2129 const char **rec_argv;
2130 const char * const record_args[] = {
2137 const char * const sc_args[] = { "-e", };
2138 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2139 const char * const majpf_args[] = { "-e", "major-faults" };
2140 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2141 const char * const minpf_args[] = { "-e", "minor-faults" };
2142 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2144 /* +1 is for the event string below */
2145 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2146 majpf_args_nr + minpf_args_nr + argc;
2147 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2149 if (rec_argv == NULL)
2153 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2154 rec_argv[j++] = record_args[i];
2156 if (trace->trace_syscalls) {
2157 for (i = 0; i < sc_args_nr; i++)
2158 rec_argv[j++] = sc_args[i];
2160 /* event string may be different for older kernels - e.g., RHEL6 */
2161 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2162 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2163 else if (is_valid_tracepoint("syscalls:sys_enter"))
2164 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2166 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2171 if (trace->trace_pgfaults & TRACE_PFMAJ)
2172 for (i = 0; i < majpf_args_nr; i++)
2173 rec_argv[j++] = majpf_args[i];
2175 if (trace->trace_pgfaults & TRACE_PFMIN)
2176 for (i = 0; i < minpf_args_nr; i++)
2177 rec_argv[j++] = minpf_args[i];
2179 for (i = 0; i < (unsigned int)argc; i++)
2180 rec_argv[j++] = argv[i];
2182 return cmd_record(j, rec_argv, NULL);
2185 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2187 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2189 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2193 if (perf_evsel__field(evsel, "pathname") == NULL) {
2194 perf_evsel__delete(evsel);
2198 evsel->handler = trace__vfs_getname;
2199 perf_evlist__add(evlist, evsel);
2202 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2205 struct perf_evsel *evsel;
2206 struct perf_event_attr attr = {
2207 .type = PERF_TYPE_SOFTWARE,
2211 attr.config = config;
2212 attr.sample_period = 1;
2214 event_attr_init(&attr);
2216 evsel = perf_evsel__new(&attr);
2220 evsel->handler = trace__pgfault;
2221 perf_evlist__add(evlist, evsel);
2226 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2228 const u32 type = event->header.type;
2229 struct perf_evsel *evsel;
2231 if (!trace->full_time && trace->base_time == 0)
2232 trace->base_time = sample->time;
2234 if (type != PERF_RECORD_SAMPLE) {
2235 trace__process_event(trace, trace->host, event, sample);
2239 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2240 if (evsel == NULL) {
2241 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2245 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2246 sample->raw_data == NULL) {
2247 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2248 perf_evsel__name(evsel), sample->tid,
2249 sample->cpu, sample->raw_size);
2251 tracepoint_handler handler = evsel->handler;
2252 handler(trace, evsel, event, sample);
2256 static int trace__add_syscall_newtp(struct trace *trace)
2259 struct perf_evlist *evlist = trace->evlist;
2260 struct perf_evsel *sys_enter, *sys_exit;
2262 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2263 if (sys_enter == NULL)
2266 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2267 goto out_delete_sys_enter;
2269 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2270 if (sys_exit == NULL)
2271 goto out_delete_sys_enter;
2273 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2274 goto out_delete_sys_exit;
2276 perf_evlist__add(evlist, sys_enter);
2277 perf_evlist__add(evlist, sys_exit);
2279 trace->syscalls.events.sys_enter = sys_enter;
2280 trace->syscalls.events.sys_exit = sys_exit;
2286 out_delete_sys_exit:
2287 perf_evsel__delete_priv(sys_exit);
2288 out_delete_sys_enter:
2289 perf_evsel__delete_priv(sys_enter);
2293 static int trace__set_ev_qualifier_filter(struct trace *trace)
2296 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2297 trace->ev_qualifier_ids.nr,
2298 trace->ev_qualifier_ids.entries);
2303 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2304 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2314 static int trace__run(struct trace *trace, int argc, const char **argv)
2316 struct perf_evlist *evlist = trace->evlist;
2317 struct perf_evsel *evsel;
2319 unsigned long before;
2320 const bool forks = argc > 0;
2321 bool draining = false;
2325 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2326 goto out_error_raw_syscalls;
2328 if (trace->trace_syscalls)
2329 perf_evlist__add_vfs_getname(evlist);
2331 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2332 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2336 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2337 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2341 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2342 trace__sched_stat_runtime))
2343 goto out_error_sched_stat_runtime;
2345 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2347 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2348 goto out_delete_evlist;
2351 err = trace__symbols_init(trace, evlist);
2353 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2354 goto out_delete_evlist;
2357 perf_evlist__config(evlist, &trace->opts);
2359 signal(SIGCHLD, sig_handler);
2360 signal(SIGINT, sig_handler);
2363 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2366 fprintf(trace->output, "Couldn't run the workload!\n");
2367 goto out_delete_evlist;
2371 err = perf_evlist__open(evlist);
2373 goto out_error_open;
2376 * Better not use !target__has_task() here because we need to cover the
2377 * case where no threads were specified in the command line, but a
2378 * workload was, and in that case we will fill in the thread_map when
2379 * we fork the workload in perf_evlist__prepare_workload.
2381 if (trace->filter_pids.nr > 0)
2382 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2383 else if (thread_map__pid(evlist->threads, 0) == -1)
2384 err = perf_evlist__set_filter_pid(evlist, getpid());
2389 if (trace->ev_qualifier_ids.nr > 0) {
2390 err = trace__set_ev_qualifier_filter(trace);
2395 pr_debug("%s\n", trace->syscalls.events.sys_exit->filter);
2397 err = perf_evlist__apply_filters(evlist, &evsel);
2399 goto out_error_apply_filters;
2401 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2403 goto out_error_mmap;
2405 if (!target__none(&trace->opts.target))
2406 perf_evlist__enable(evlist);
2409 perf_evlist__start_workload(evlist);
2411 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2412 evlist->threads->nr > 1 ||
2413 perf_evlist__first(evlist)->attr.inherit;
2415 before = trace->nr_events;
2417 for (i = 0; i < evlist->nr_mmaps; i++) {
2418 union perf_event *event;
2420 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2421 struct perf_sample sample;
2425 err = perf_evlist__parse_sample(evlist, event, &sample);
2427 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2431 trace__handle_event(trace, event, &sample);
2433 perf_evlist__mmap_consume(evlist, i);
2438 if (done && !draining) {
2439 perf_evlist__disable(evlist);
2445 if (trace->nr_events == before) {
2446 int timeout = done ? 100 : -1;
2448 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2449 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2459 thread__zput(trace->current);
2461 perf_evlist__disable(evlist);
2465 trace__fprintf_thread_summary(trace, trace->output);
2467 if (trace->show_tool_stats) {
2468 fprintf(trace->output, "Stats:\n "
2469 " vfs_getname : %" PRIu64 "\n"
2470 " proc_getname: %" PRIu64 "\n",
2471 trace->stats.vfs_getname,
2472 trace->stats.proc_getname);
2477 perf_evlist__delete(evlist);
2478 trace->evlist = NULL;
2479 trace->live = false;
2482 char errbuf[BUFSIZ];
2484 out_error_sched_stat_runtime:
2485 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2488 out_error_raw_syscalls:
2489 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2493 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2497 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2500 fprintf(trace->output, "%s\n", errbuf);
2501 goto out_delete_evlist;
2503 out_error_apply_filters:
2504 fprintf(trace->output,
2505 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2506 evsel->filter, perf_evsel__name(evsel), errno,
2507 strerror_r(errno, errbuf, sizeof(errbuf)));
2508 goto out_delete_evlist;
2511 fprintf(trace->output, "Not enough memory to run!\n");
2512 goto out_delete_evlist;
2515 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2516 goto out_delete_evlist;
2519 static int trace__replay(struct trace *trace)
2521 const struct perf_evsel_str_handler handlers[] = {
2522 { "probe:vfs_getname", trace__vfs_getname, },
2524 struct perf_data_file file = {
2526 .mode = PERF_DATA_MODE_READ,
2527 .force = trace->force,
2529 struct perf_session *session;
2530 struct perf_evsel *evsel;
2533 trace->tool.sample = trace__process_sample;
2534 trace->tool.mmap = perf_event__process_mmap;
2535 trace->tool.mmap2 = perf_event__process_mmap2;
2536 trace->tool.comm = perf_event__process_comm;
2537 trace->tool.exit = perf_event__process_exit;
2538 trace->tool.fork = perf_event__process_fork;
2539 trace->tool.attr = perf_event__process_attr;
2540 trace->tool.tracing_data = perf_event__process_tracing_data;
2541 trace->tool.build_id = perf_event__process_build_id;
2543 trace->tool.ordered_events = true;
2544 trace->tool.ordering_requires_timestamps = true;
2546 /* add tid to output */
2547 trace->multiple_threads = true;
2549 session = perf_session__new(&file, false, &trace->tool);
2550 if (session == NULL)
2553 if (symbol__init(&session->header.env) < 0)
2556 trace->host = &session->machines.host;
2558 err = perf_session__set_tracepoints_handlers(session, handlers);
2562 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2563 "raw_syscalls:sys_enter");
2564 /* older kernels have syscalls tp versus raw_syscalls */
2566 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2567 "syscalls:sys_enter");
2570 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2571 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2572 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2576 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2577 "raw_syscalls:sys_exit");
2579 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2580 "syscalls:sys_exit");
2582 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2583 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2584 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2588 evlist__for_each(session->evlist, evsel) {
2589 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2590 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2591 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2592 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2593 evsel->handler = trace__pgfault;
2596 err = parse_target_str(trace);
2602 err = perf_session__process_events(session);
2604 pr_err("Failed to process events, error %d", err);
2606 else if (trace->summary)
2607 trace__fprintf_thread_summary(trace, trace->output);
2610 perf_session__delete(session);
2615 static size_t trace__fprintf_threads_header(FILE *fp)
2619 printed = fprintf(fp, "\n Summary of events:\n\n");
2624 static size_t thread__dump_stats(struct thread_trace *ttrace,
2625 struct trace *trace, FILE *fp)
2627 struct stats *stats;
2630 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2635 printed += fprintf(fp, "\n");
2637 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2638 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2639 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2641 /* each int_node is a syscall */
2643 stats = inode->priv;
2645 double min = (double)(stats->min) / NSEC_PER_MSEC;
2646 double max = (double)(stats->max) / NSEC_PER_MSEC;
2647 double avg = avg_stats(stats);
2649 u64 n = (u64) stats->n;
2651 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2652 avg /= NSEC_PER_MSEC;
2654 sc = &trace->syscalls.table[inode->i];
2655 printed += fprintf(fp, " %-15s", sc->name);
2656 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2658 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2661 inode = intlist__next(inode);
2664 printed += fprintf(fp, "\n\n");
2669 /* struct used to pass data to per-thread function */
2670 struct summary_data {
2672 struct trace *trace;
2676 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2678 struct summary_data *data = priv;
2679 FILE *fp = data->fp;
2680 size_t printed = data->printed;
2681 struct trace *trace = data->trace;
2682 struct thread_trace *ttrace = thread__priv(thread);
2688 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2690 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2691 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2692 printed += fprintf(fp, "%.1f%%", ratio);
2694 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2696 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2697 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2698 printed += thread__dump_stats(ttrace, trace, fp);
2700 data->printed += printed;
2705 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2707 struct summary_data data = {
2711 data.printed = trace__fprintf_threads_header(fp);
2713 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2715 return data.printed;
2718 static int trace__set_duration(const struct option *opt, const char *str,
2719 int unset __maybe_unused)
2721 struct trace *trace = opt->value;
2723 trace->duration_filter = atof(str);
2727 static int trace__set_filter_pids(const struct option *opt, const char *str,
2728 int unset __maybe_unused)
2732 struct trace *trace = opt->value;
2734 * FIXME: introduce a intarray class, plain parse csv and create a
2735 * { int nr, int entries[] } struct...
2737 struct intlist *list = intlist__new(str);
2742 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2743 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2745 if (trace->filter_pids.entries == NULL)
2748 trace->filter_pids.entries[0] = getpid();
2750 for (i = 1; i < trace->filter_pids.nr; ++i)
2751 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2753 intlist__delete(list);
2759 static int trace__open_output(struct trace *trace, const char *filename)
2763 if (!stat(filename, &st) && st.st_size) {
2764 char oldname[PATH_MAX];
2766 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2768 rename(filename, oldname);
2771 trace->output = fopen(filename, "w");
2773 return trace->output == NULL ? -errno : 0;
2776 static int parse_pagefaults(const struct option *opt, const char *str,
2777 int unset __maybe_unused)
2779 int *trace_pgfaults = opt->value;
2781 if (strcmp(str, "all") == 0)
2782 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2783 else if (strcmp(str, "maj") == 0)
2784 *trace_pgfaults |= TRACE_PFMAJ;
2785 else if (strcmp(str, "min") == 0)
2786 *trace_pgfaults |= TRACE_PFMIN;
2793 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2795 struct perf_evsel *evsel;
2797 evlist__for_each(evlist, evsel)
2798 evsel->handler = handler;
2801 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2803 const char *trace_usage[] = {
2804 "perf trace [<options>] [<command>]",
2805 "perf trace [<options>] -- <command> [<options>]",
2806 "perf trace record [<options>] [<command>]",
2807 "perf trace record [<options>] -- <command> [<options>]",
2810 struct trace trace = {
2812 .machine = audit_detect_machine(),
2813 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2823 .user_freq = UINT_MAX,
2824 .user_interval = ULLONG_MAX,
2825 .no_buffering = true,
2826 .mmap_pages = UINT_MAX,
2827 .proc_map_timeout = 500,
2831 .trace_syscalls = true,
2833 const char *output_name = NULL;
2834 const char *ev_qualifier_str = NULL;
2835 const struct option trace_options[] = {
2836 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2837 "event selector. use 'perf list' to list available events",
2838 parse_events_option),
2839 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2840 "show the thread COMM next to its id"),
2841 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2842 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2843 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2844 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2845 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2846 "trace events on existing process id"),
2847 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2848 "trace events on existing thread id"),
2849 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2850 "pids to filter (by the kernel)", trace__set_filter_pids),
2851 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2852 "system-wide collection from all CPUs"),
2853 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2854 "list of cpus to monitor"),
2855 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2856 "child tasks do not inherit counters"),
2857 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2858 "number of mmap data pages",
2859 perf_evlist__parse_mmap_pages),
2860 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2862 OPT_CALLBACK(0, "duration", &trace, "float",
2863 "show only events with duration > N.M ms",
2864 trace__set_duration),
2865 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2866 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2867 OPT_BOOLEAN('T', "time", &trace.full_time,
2868 "Show full timestamp, not time relative to first start"),
2869 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2870 "Show only syscall summary with statistics"),
2871 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2872 "Show all syscalls and summary with statistics"),
2873 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2874 "Trace pagefaults", parse_pagefaults, "maj"),
2875 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2876 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2877 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2878 "per thread proc mmap processing timeout in ms"),
2881 const char * const trace_subcommands[] = { "record", NULL };
2885 signal(SIGSEGV, sighandler_dump_stack);
2886 signal(SIGFPE, sighandler_dump_stack);
2888 trace.evlist = perf_evlist__new();
2890 if (trace.evlist == NULL) {
2891 pr_err("Not enough memory to run!\n");
2896 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2897 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2899 if (trace.trace_pgfaults) {
2900 trace.opts.sample_address = true;
2901 trace.opts.sample_time = true;
2904 if (trace.evlist->nr_entries > 0)
2905 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2907 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2908 return trace__record(&trace, argc-1, &argv[1]);
2910 /* summary_only implies summary option, but don't overwrite summary if set */
2911 if (trace.summary_only)
2912 trace.summary = trace.summary_only;
2914 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2915 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2916 pr_err("Please specify something to trace.\n");
2920 if (output_name != NULL) {
2921 err = trace__open_output(&trace, output_name);
2923 perror("failed to create output file");
2928 if (ev_qualifier_str != NULL) {
2929 const char *s = ev_qualifier_str;
2931 trace.not_ev_qualifier = *s == '!';
2932 if (trace.not_ev_qualifier)
2934 trace.ev_qualifier = strlist__new(s, NULL);
2935 if (trace.ev_qualifier == NULL) {
2936 fputs("Not enough memory to parse event qualifier",
2942 err = trace__validate_ev_qualifier(&trace);
2947 err = target__validate(&trace.opts.target);
2949 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2950 fprintf(trace.output, "%s", bf);
2954 err = target__parse_uid(&trace.opts.target);
2956 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2957 fprintf(trace.output, "%s", bf);
2961 if (!argc && target__none(&trace.opts.target))
2962 trace.opts.target.system_wide = true;
2965 err = trace__replay(&trace);
2967 err = trace__run(&trace, argc, argv);
2970 if (output_name != NULL)
2971 fclose(trace.output);