1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
603 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
605 static DEFINE_STRARRAY(clockid);
607 static const char *socket_families[] = {
608 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
609 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
610 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
611 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
612 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
613 "ALG", "NFC", "VSOCK",
615 static DEFINE_STRARRAY(socket_families);
617 #ifndef SOCK_TYPE_MASK
618 #define SOCK_TYPE_MASK 0xf
621 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
622 struct syscall_arg *arg)
626 flags = type & ~SOCK_TYPE_MASK;
628 type &= SOCK_TYPE_MASK;
630 * Can't use a strarray, MIPS may override for ABI reasons.
633 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
638 P_SK_TYPE(SEQPACKET);
643 printed = scnprintf(bf, size, "%#x", type);
646 #define P_SK_FLAG(n) \
647 if (flags & SOCK_##n) { \
648 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
649 flags &= ~SOCK_##n; \
657 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
662 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
665 #define MSG_PROBE 0x10
667 #ifndef MSG_WAITFORONE
668 #define MSG_WAITFORONE 0x10000
670 #ifndef MSG_SENDPAGE_NOTLAST
671 #define MSG_SENDPAGE_NOTLAST 0x20000
674 #define MSG_FASTOPEN 0x20000000
677 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
678 struct syscall_arg *arg)
680 int printed = 0, flags = arg->val;
683 return scnprintf(bf, size, "NONE");
684 #define P_MSG_FLAG(n) \
685 if (flags & MSG_##n) { \
686 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
692 P_MSG_FLAG(DONTROUTE);
697 P_MSG_FLAG(DONTWAIT);
704 P_MSG_FLAG(ERRQUEUE);
705 P_MSG_FLAG(NOSIGNAL);
707 P_MSG_FLAG(WAITFORONE);
708 P_MSG_FLAG(SENDPAGE_NOTLAST);
709 P_MSG_FLAG(FASTOPEN);
710 P_MSG_FLAG(CMSG_CLOEXEC);
714 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
719 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
721 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
722 struct syscall_arg *arg)
727 if (mode == F_OK) /* 0 */
728 return scnprintf(bf, size, "F");
730 if (mode & n##_OK) { \
731 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
741 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
746 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
748 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
749 struct syscall_arg *arg);
751 #define SCA_FILENAME syscall_arg__scnprintf_filename
753 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
754 struct syscall_arg *arg)
756 int printed = 0, flags = arg->val;
758 if (!(flags & O_CREAT))
759 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
762 return scnprintf(bf, size, "RDONLY");
764 if (flags & O_##n) { \
765 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
789 if ((flags & O_SYNC) == O_SYNC)
790 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
802 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
807 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
809 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
810 struct syscall_arg *arg)
812 int printed = 0, flags = arg->val;
818 if (flags & PERF_FLAG_##n) { \
819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820 flags &= ~PERF_FLAG_##n; \
830 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
835 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
837 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
838 struct syscall_arg *arg)
840 int printed = 0, flags = arg->val;
843 return scnprintf(bf, size, "NONE");
845 if (flags & EFD_##n) { \
846 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
856 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
861 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
863 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
864 struct syscall_arg *arg)
866 int printed = 0, flags = arg->val;
869 if (flags & O_##n) { \
870 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
879 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
884 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
886 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
891 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
934 return scnprintf(bf, size, "%#x", sig);
937 #define SCA_SIGNUM syscall_arg__scnprintf_signum
939 #if defined(__i386__) || defined(__x86_64__)
941 * FIXME: Make this available to all arches.
943 #define TCGETS 0x5401
945 static const char *tioctls[] = {
946 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
947 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
948 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
949 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
950 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
951 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
952 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
953 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
954 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
955 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
956 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
957 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
958 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
959 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
960 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
963 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
964 #endif /* defined(__i386__) || defined(__x86_64__) */
966 #define STRARRAY(arg, name, array) \
967 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
968 .arg_parm = { [arg] = &strarray__##array, }
970 static struct syscall_fmt {
973 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
979 { .name = "access", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
981 [1] = SCA_ACCMODE, /* mode */ }, },
982 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
983 { .name = "brk", .hexret = true,
984 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
985 { .name = "chdir", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
987 { .name = "chmod", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
989 { .name = "chroot", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
991 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
992 { .name = "close", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
994 { .name = "connect", .errmsg = true, },
995 { .name = "creat", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
997 { .name = "dup", .errmsg = true,
998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999 { .name = "dup2", .errmsg = true,
1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 { .name = "dup3", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1004 { .name = "eventfd2", .errmsg = true,
1005 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1006 { .name = "faccessat", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1008 [1] = SCA_FILENAME, /* filename */ }, },
1009 { .name = "fadvise64", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1011 { .name = "fallocate", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "fchdir", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1015 { .name = "fchmod", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1017 { .name = "fchmodat", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1019 [1] = SCA_FILENAME, /* filename */ }, },
1020 { .name = "fchown", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022 { .name = "fchownat", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1024 [1] = SCA_FILENAME, /* filename */ }, },
1025 { .name = "fcntl", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1027 [1] = SCA_STRARRAY, /* cmd */ },
1028 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1029 { .name = "fdatasync", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031 { .name = "flock", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1033 [1] = SCA_FLOCK, /* cmd */ }, },
1034 { .name = "fsetxattr", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1037 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1039 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1040 [1] = SCA_FILENAME, /* filename */ }, },
1041 { .name = "fstatfs", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "fsync", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045 { .name = "ftruncate", .errmsg = true,
1046 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1047 { .name = "futex", .errmsg = true,
1048 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1049 { .name = "futimesat", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1051 [1] = SCA_FILENAME, /* filename */ }, },
1052 { .name = "getdents", .errmsg = true,
1053 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054 { .name = "getdents64", .errmsg = true,
1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1057 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1058 { .name = "getxattr", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1060 { .name = "inotify_add_watch", .errmsg = true,
1061 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1062 { .name = "ioctl", .errmsg = true,
1063 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1064 #if defined(__i386__) || defined(__x86_64__)
1066 * FIXME: Make this available to all arches.
1068 [1] = SCA_STRHEXARRAY, /* cmd */
1069 [2] = SCA_HEX, /* arg */ },
1070 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1072 [2] = SCA_HEX, /* arg */ }, },
1074 { .name = "kill", .errmsg = true,
1075 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1076 { .name = "lchown", .errmsg = true,
1077 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1078 { .name = "lgetxattr", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1080 { .name = "linkat", .errmsg = true,
1081 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1082 { .name = "listxattr", .errmsg = true,
1083 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1084 { .name = "llistxattr", .errmsg = true,
1085 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1086 { .name = "lremovexattr", .errmsg = true,
1087 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088 { .name = "lseek", .errmsg = true,
1089 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1090 [2] = SCA_STRARRAY, /* whence */ },
1091 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1092 { .name = "lsetxattr", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1095 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1096 { .name = "lsxattr", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098 { .name = "madvise", .errmsg = true,
1099 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1100 [2] = SCA_MADV_BHV, /* behavior */ }, },
1101 { .name = "mkdir", .errmsg = true,
1102 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1103 { .name = "mkdirat", .errmsg = true,
1104 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1105 [1] = SCA_FILENAME, /* pathname */ }, },
1106 { .name = "mknod", .errmsg = true,
1107 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1108 { .name = "mknodat", .errmsg = true,
1109 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1110 [1] = SCA_FILENAME, /* filename */ }, },
1111 { .name = "mlock", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1113 { .name = "mlockall", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1115 { .name = "mmap", .hexret = true,
1116 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1117 [2] = SCA_MMAP_PROT, /* prot */
1118 [3] = SCA_MMAP_FLAGS, /* flags */
1119 [4] = SCA_FD, /* fd */ }, },
1120 { .name = "mprotect", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1122 [2] = SCA_MMAP_PROT, /* prot */ }, },
1123 { .name = "mq_unlink", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1125 { .name = "mremap", .hexret = true,
1126 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1127 [3] = SCA_MREMAP_FLAGS, /* flags */
1128 [4] = SCA_HEX, /* new_addr */ }, },
1129 { .name = "munlock", .errmsg = true,
1130 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1131 { .name = "munmap", .errmsg = true,
1132 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1133 { .name = "name_to_handle_at", .errmsg = true,
1134 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1135 { .name = "newfstatat", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1137 [1] = SCA_FILENAME, /* filename */ }, },
1138 { .name = "open", .errmsg = true,
1139 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1140 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1141 { .name = "open_by_handle_at", .errmsg = true,
1142 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1143 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1144 { .name = "openat", .errmsg = true,
1145 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1146 [1] = SCA_FILENAME, /* filename */
1147 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1148 { .name = "perf_event_open", .errmsg = true,
1149 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1150 [2] = SCA_INT, /* cpu */
1151 [3] = SCA_FD, /* group_fd */
1152 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1153 { .name = "pipe2", .errmsg = true,
1154 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1155 { .name = "poll", .errmsg = true, .timeout = true, },
1156 { .name = "ppoll", .errmsg = true, .timeout = true, },
1157 { .name = "pread", .errmsg = true, .alias = "pread64",
1158 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1159 { .name = "preadv", .errmsg = true, .alias = "pread",
1160 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1161 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1162 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1163 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1164 { .name = "pwritev", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1166 { .name = "read", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1168 { .name = "readlink", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1170 { .name = "readlinkat", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172 [1] = SCA_FILENAME, /* pathname */ }, },
1173 { .name = "readv", .errmsg = true,
1174 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1175 { .name = "recvfrom", .errmsg = true,
1176 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1177 { .name = "recvmmsg", .errmsg = true,
1178 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1179 { .name = "recvmsg", .errmsg = true,
1180 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1181 { .name = "removexattr", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1183 { .name = "renameat", .errmsg = true,
1184 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1185 { .name = "rmdir", .errmsg = true,
1186 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1187 { .name = "rt_sigaction", .errmsg = true,
1188 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1189 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1190 { .name = "rt_sigqueueinfo", .errmsg = true,
1191 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1192 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1193 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1194 { .name = "select", .errmsg = true, .timeout = true, },
1195 { .name = "sendmmsg", .errmsg = true,
1196 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1197 { .name = "sendmsg", .errmsg = true,
1198 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1199 { .name = "sendto", .errmsg = true,
1200 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1201 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1202 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1203 { .name = "setxattr", .errmsg = true,
1204 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1205 { .name = "shutdown", .errmsg = true,
1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 { .name = "socket", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1209 [1] = SCA_SK_TYPE, /* type */ },
1210 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1211 { .name = "socketpair", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1213 [1] = SCA_SK_TYPE, /* type */ },
1214 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1215 { .name = "stat", .errmsg = true, .alias = "newstat",
1216 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1217 { .name = "statfs", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1219 { .name = "swapoff", .errmsg = true,
1220 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1221 { .name = "swapon", .errmsg = true,
1222 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1223 { .name = "symlinkat", .errmsg = true,
1224 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1225 { .name = "tgkill", .errmsg = true,
1226 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1227 { .name = "tkill", .errmsg = true,
1228 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1229 { .name = "truncate", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1231 { .name = "uname", .errmsg = true, .alias = "newuname", },
1232 { .name = "unlinkat", .errmsg = true,
1233 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1234 [1] = SCA_FILENAME, /* pathname */ }, },
1235 { .name = "utime", .errmsg = true,
1236 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1237 { .name = "utimensat", .errmsg = true,
1238 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1239 [1] = SCA_FILENAME, /* filename */ }, },
1240 { .name = "utimes", .errmsg = true,
1241 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1242 { .name = "vmsplice", .errmsg = true,
1243 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1244 { .name = "write", .errmsg = true,
1245 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1246 { .name = "writev", .errmsg = true,
1247 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1250 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1252 const struct syscall_fmt *fmt = fmtp;
1253 return strcmp(name, fmt->name);
1256 static struct syscall_fmt *syscall_fmt__find(const char *name)
1258 const int nmemb = ARRAY_SIZE(syscall_fmts);
1259 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1263 struct event_format *tp_format;
1265 struct format_field *args;
1268 struct syscall_fmt *fmt;
1269 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1273 static size_t fprintf_duration(unsigned long t, FILE *fp)
1275 double duration = (double)t / NSEC_PER_MSEC;
1276 size_t printed = fprintf(fp, "(");
1278 if (duration >= 1.0)
1279 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1280 else if (duration >= 0.01)
1281 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1283 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1284 return printed + fprintf(fp, "): ");
1288 * filename.ptr: The filename char pointer that will be vfs_getname'd
1289 * filename.entry_str_pos: Where to insert the string translated from
1290 * filename.ptr by the vfs_getname tracepoint/kprobe.
1292 struct thread_trace {
1296 unsigned long nr_events;
1297 unsigned long pfmaj, pfmin;
1309 struct intlist *syscall_stats;
1312 static struct thread_trace *thread_trace__new(void)
1314 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1317 ttrace->paths.max = -1;
1319 ttrace->syscall_stats = intlist__new(NULL);
1324 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1326 struct thread_trace *ttrace;
1331 if (thread__priv(thread) == NULL)
1332 thread__set_priv(thread, thread_trace__new());
1334 if (thread__priv(thread) == NULL)
1337 ttrace = thread__priv(thread);
1338 ++ttrace->nr_events;
1342 color_fprintf(fp, PERF_COLOR_RED,
1343 "WARNING: not enough memory, dropping samples!\n");
1347 #define TRACE_PFMAJ (1 << 0)
1348 #define TRACE_PFMIN (1 << 1)
1350 static const size_t trace__entry_str_size = 2048;
1353 struct perf_tool tool;
1360 struct syscall *table;
1362 struct perf_evsel *sys_enter,
1366 struct record_opts opts;
1367 struct perf_evlist *evlist;
1368 struct machine *host;
1369 struct thread *current;
1372 unsigned long nr_events;
1373 struct strlist *ev_qualifier;
1378 const char *last_vfs_getname;
1379 struct intlist *tid_list;
1380 struct intlist *pid_list;
1385 double duration_filter;
1391 bool not_ev_qualifier;
1395 bool multiple_threads;
1399 bool show_tool_stats;
1400 bool trace_syscalls;
1406 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1408 struct thread_trace *ttrace = thread__priv(thread);
1410 if (fd > ttrace->paths.max) {
1411 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1416 if (ttrace->paths.max != -1) {
1417 memset(npath + ttrace->paths.max + 1, 0,
1418 (fd - ttrace->paths.max) * sizeof(char *));
1420 memset(npath, 0, (fd + 1) * sizeof(char *));
1423 ttrace->paths.table = npath;
1424 ttrace->paths.max = fd;
1427 ttrace->paths.table[fd] = strdup(pathname);
1429 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1432 static int thread__read_fd_path(struct thread *thread, int fd)
1434 char linkname[PATH_MAX], pathname[PATH_MAX];
1438 if (thread->pid_ == thread->tid) {
1439 scnprintf(linkname, sizeof(linkname),
1440 "/proc/%d/fd/%d", thread->pid_, fd);
1442 scnprintf(linkname, sizeof(linkname),
1443 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1446 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1449 ret = readlink(linkname, pathname, sizeof(pathname));
1451 if (ret < 0 || ret > st.st_size)
1454 pathname[ret] = '\0';
1455 return trace__set_fd_pathname(thread, fd, pathname);
1458 static const char *thread__fd_path(struct thread *thread, int fd,
1459 struct trace *trace)
1461 struct thread_trace *ttrace = thread__priv(thread);
1469 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1472 ++trace->stats.proc_getname;
1473 if (thread__read_fd_path(thread, fd))
1477 return ttrace->paths.table[fd];
1480 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1481 struct syscall_arg *arg)
1484 size_t printed = scnprintf(bf, size, "%d", fd);
1485 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1488 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1493 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1494 struct syscall_arg *arg)
1497 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1498 struct thread_trace *ttrace = thread__priv(arg->thread);
1500 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1501 zfree(&ttrace->paths.table[fd]);
1506 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1509 struct thread_trace *ttrace = thread__priv(thread);
1511 ttrace->filename.ptr = ptr;
1512 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1515 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1516 struct syscall_arg *arg)
1518 unsigned long ptr = arg->val;
1520 if (!arg->trace->vfs_getname)
1521 return scnprintf(bf, size, "%#x", ptr);
1523 thread__set_filename_pos(arg->thread, bf, ptr);
1527 static bool trace__filter_duration(struct trace *trace, double t)
1529 return t < (trace->duration_filter * NSEC_PER_MSEC);
1532 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1534 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1536 return fprintf(fp, "%10.3f ", ts);
1539 static bool done = false;
1540 static bool interrupted = false;
1542 static void sig_handler(int sig)
1545 interrupted = sig == SIGINT;
1548 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1549 u64 duration, u64 tstamp, FILE *fp)
1551 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1552 printed += fprintf_duration(duration, fp);
1554 if (trace->multiple_threads) {
1555 if (trace->show_comm)
1556 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1557 printed += fprintf(fp, "%d ", thread->tid);
1563 static int trace__process_event(struct trace *trace, struct machine *machine,
1564 union perf_event *event, struct perf_sample *sample)
1568 switch (event->header.type) {
1569 case PERF_RECORD_LOST:
1570 color_fprintf(trace->output, PERF_COLOR_RED,
1571 "LOST %" PRIu64 " events!\n", event->lost.lost);
1572 ret = machine__process_lost_event(machine, event, sample);
1574 ret = machine__process_event(machine, event, sample);
1581 static int trace__tool_process(struct perf_tool *tool,
1582 union perf_event *event,
1583 struct perf_sample *sample,
1584 struct machine *machine)
1586 struct trace *trace = container_of(tool, struct trace, tool);
1587 return trace__process_event(trace, machine, event, sample);
1590 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1592 int err = symbol__init(NULL);
1597 trace->host = machine__new_host();
1598 if (trace->host == NULL)
1601 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1604 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1605 evlist->threads, trace__tool_process, false,
1606 trace->opts.proc_map_timeout);
1613 static int syscall__set_arg_fmts(struct syscall *sc)
1615 struct format_field *field;
1618 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1619 if (sc->arg_scnprintf == NULL)
1623 sc->arg_parm = sc->fmt->arg_parm;
1625 for (field = sc->args; field; field = field->next) {
1626 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1627 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1628 else if (field->flags & FIELD_IS_POINTER)
1629 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1636 static int trace__read_syscall_info(struct trace *trace, int id)
1640 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1645 if (id > trace->syscalls.max) {
1646 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1648 if (nsyscalls == NULL)
1651 if (trace->syscalls.max != -1) {
1652 memset(nsyscalls + trace->syscalls.max + 1, 0,
1653 (id - trace->syscalls.max) * sizeof(*sc));
1655 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1658 trace->syscalls.table = nsyscalls;
1659 trace->syscalls.max = id;
1662 sc = trace->syscalls.table + id;
1665 sc->fmt = syscall_fmt__find(sc->name);
1667 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1668 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1670 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1671 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1672 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1675 if (sc->tp_format == NULL)
1678 sc->args = sc->tp_format->format.fields;
1679 sc->nr_args = sc->tp_format->format.nr_fields;
1680 /* drop nr field - not relevant here; does not exist on older kernels */
1681 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1682 sc->args = sc->args->next;
1686 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1688 return syscall__set_arg_fmts(sc);
1691 static int trace__validate_ev_qualifier(struct trace *trace)
1694 struct str_node *pos;
1696 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1697 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1698 sizeof(trace->ev_qualifier_ids.entries[0]));
1700 if (trace->ev_qualifier_ids.entries == NULL) {
1701 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1709 strlist__for_each(pos, trace->ev_qualifier) {
1710 const char *sc = pos->s;
1711 int id = audit_name_to_syscall(sc, trace->audit.machine);
1715 fputs("Error:\tInvalid syscall ", trace->output);
1718 fputs(", ", trace->output);
1721 fputs(sc, trace->output);
1724 trace->ev_qualifier_ids.entries[i++] = id;
1728 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1729 "\nHint:\tand: 'man syscalls'\n", trace->output);
1730 zfree(&trace->ev_qualifier_ids.entries);
1731 trace->ev_qualifier_ids.nr = 0;
1738 * args is to be interpreted as a series of longs but we need to handle
1739 * 8-byte unaligned accesses. args points to raw_data within the event
1740 * and raw_data is guaranteed to be 8-byte unaligned because it is
1741 * preceded by raw_size which is a u32. So we need to copy args to a temp
1742 * variable to read it. Most notably this avoids extended load instructions
1743 * on unaligned addresses
1746 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1747 unsigned char *args, struct trace *trace,
1748 struct thread *thread)
1754 if (sc->args != NULL) {
1755 struct format_field *field;
1757 struct syscall_arg arg = {
1764 for (field = sc->args; field;
1765 field = field->next, ++arg.idx, bit <<= 1) {
1769 /* special care for unaligned accesses */
1770 p = args + sizeof(unsigned long) * arg.idx;
1771 memcpy(&val, p, sizeof(val));
1774 * Suppress this argument if its value is zero and
1775 * and we don't have a string associated in an
1779 !(sc->arg_scnprintf &&
1780 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1781 sc->arg_parm[arg.idx]))
1784 printed += scnprintf(bf + printed, size - printed,
1785 "%s%s: ", printed ? ", " : "", field->name);
1786 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1789 arg.parm = sc->arg_parm[arg.idx];
1790 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1791 size - printed, &arg);
1793 printed += scnprintf(bf + printed, size - printed,
1801 /* special care for unaligned accesses */
1802 p = args + sizeof(unsigned long) * i;
1803 memcpy(&val, p, sizeof(val));
1804 printed += scnprintf(bf + printed, size - printed,
1806 printed ? ", " : "", i, val);
1814 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1815 union perf_event *event,
1816 struct perf_sample *sample);
1818 static struct syscall *trace__syscall_info(struct trace *trace,
1819 struct perf_evsel *evsel, int id)
1825 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1826 * before that, leaving at a higher verbosity level till that is
1827 * explained. Reproduced with plain ftrace with:
1829 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1830 * grep "NR -1 " /t/trace_pipe
1832 * After generating some load on the machine.
1836 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1837 id, perf_evsel__name(evsel), ++n);
1842 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1843 trace__read_syscall_info(trace, id))
1846 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1849 return &trace->syscalls.table[id];
1853 fprintf(trace->output, "Problems reading syscall %d", id);
1854 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1855 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1856 fputs(" information\n", trace->output);
1861 static void thread__update_stats(struct thread_trace *ttrace,
1862 int id, struct perf_sample *sample)
1864 struct int_node *inode;
1865 struct stats *stats;
1868 inode = intlist__findnew(ttrace->syscall_stats, id);
1872 stats = inode->priv;
1873 if (stats == NULL) {
1874 stats = malloc(sizeof(struct stats));
1878 inode->priv = stats;
1881 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1882 duration = sample->time - ttrace->entry_time;
1884 update_stats(stats, duration);
1887 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1889 struct thread_trace *ttrace;
1893 if (trace->current == NULL)
1896 ttrace = thread__priv(trace->current);
1898 if (!ttrace->entry_pending)
1901 duration = sample->time - ttrace->entry_time;
1903 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1904 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1905 ttrace->entry_pending = false;
1910 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1911 union perf_event *event __maybe_unused,
1912 struct perf_sample *sample)
1917 struct thread *thread;
1918 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1919 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1920 struct thread_trace *ttrace;
1925 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1926 ttrace = thread__trace(thread, trace->output);
1930 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1932 if (ttrace->entry_str == NULL) {
1933 ttrace->entry_str = malloc(trace__entry_str_size);
1934 if (!ttrace->entry_str)
1938 if (!trace->summary_only)
1939 trace__printf_interrupted_entry(trace, sample);
1941 ttrace->entry_time = sample->time;
1942 msg = ttrace->entry_str;
1943 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1945 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1946 args, trace, thread);
1949 if (!trace->duration_filter && !trace->summary_only) {
1950 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1951 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1954 ttrace->entry_pending = true;
1956 if (trace->current != thread) {
1957 thread__put(trace->current);
1958 trace->current = thread__get(thread);
1962 thread__put(thread);
1966 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1967 union perf_event *event __maybe_unused,
1968 struct perf_sample *sample)
1972 struct thread *thread;
1973 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1974 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1975 struct thread_trace *ttrace;
1980 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1981 ttrace = thread__trace(thread, trace->output);
1986 thread__update_stats(ttrace, id, sample);
1988 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1990 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1991 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1992 trace->last_vfs_getname = NULL;
1993 ++trace->stats.vfs_getname;
1996 ttrace->exit_time = sample->time;
1998 if (ttrace->entry_time) {
1999 duration = sample->time - ttrace->entry_time;
2000 if (trace__filter_duration(trace, duration))
2002 } else if (trace->duration_filter)
2005 if (trace->summary_only)
2008 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2010 if (ttrace->entry_pending) {
2011 fprintf(trace->output, "%-70s", ttrace->entry_str);
2013 fprintf(trace->output, " ... [");
2014 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2015 fprintf(trace->output, "]: %s()", sc->name);
2018 if (sc->fmt == NULL) {
2020 fprintf(trace->output, ") = %ld", ret);
2021 } else if (ret < 0 && sc->fmt->errmsg) {
2022 char bf[STRERR_BUFSIZE];
2023 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2024 *e = audit_errno_to_name(-ret);
2026 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2027 } else if (ret == 0 && sc->fmt->timeout)
2028 fprintf(trace->output, ") = 0 Timeout");
2029 else if (sc->fmt->hexret)
2030 fprintf(trace->output, ") = %#lx", ret);
2034 fputc('\n', trace->output);
2036 ttrace->entry_pending = false;
2039 thread__put(thread);
2043 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2044 union perf_event *event __maybe_unused,
2045 struct perf_sample *sample)
2047 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2048 struct thread_trace *ttrace;
2049 size_t filename_len, entry_str_len, to_move;
2050 ssize_t remaining_space;
2052 const char *filename;
2054 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2059 ttrace = thread__priv(thread);
2063 if (!ttrace->filename.ptr)
2066 entry_str_len = strlen(ttrace->entry_str);
2067 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2068 if (remaining_space <= 0)
2071 filename = trace->last_vfs_getname;
2072 filename_len = strlen(filename);
2073 if (filename_len > (size_t)remaining_space) {
2074 filename += filename_len - remaining_space;
2075 filename_len = remaining_space;
2078 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2079 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2080 memmove(pos + filename_len, pos, to_move);
2081 memcpy(pos, filename, filename_len);
2083 ttrace->filename.ptr = 0;
2084 ttrace->filename.entry_str_pos = 0;
2089 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2090 union perf_event *event __maybe_unused,
2091 struct perf_sample *sample)
2093 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2094 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2095 struct thread *thread = machine__findnew_thread(trace->host,
2098 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2103 ttrace->runtime_ms += runtime_ms;
2104 trace->runtime_ms += runtime_ms;
2105 thread__put(thread);
2109 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2111 perf_evsel__strval(evsel, sample, "comm"),
2112 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2114 perf_evsel__intval(evsel, sample, "vruntime"));
2115 thread__put(thread);
2119 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2120 union perf_event *event __maybe_unused,
2121 struct perf_sample *sample)
2123 trace__printf_interrupted_entry(trace, sample);
2124 trace__fprintf_tstamp(trace, sample->time, trace->output);
2126 if (trace->trace_syscalls)
2127 fprintf(trace->output, "( ): ");
2129 fprintf(trace->output, "%s:", evsel->name);
2131 if (evsel->tp_format) {
2132 event_format__fprintf(evsel->tp_format, sample->cpu,
2133 sample->raw_data, sample->raw_size,
2137 fprintf(trace->output, ")\n");
2141 static void print_location(FILE *f, struct perf_sample *sample,
2142 struct addr_location *al,
2143 bool print_dso, bool print_sym)
2146 if ((verbose || print_dso) && al->map)
2147 fprintf(f, "%s@", al->map->dso->long_name);
2149 if ((verbose || print_sym) && al->sym)
2150 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2151 al->addr - al->sym->start);
2153 fprintf(f, "0x%" PRIx64, al->addr);
2155 fprintf(f, "0x%" PRIx64, sample->addr);
2158 static int trace__pgfault(struct trace *trace,
2159 struct perf_evsel *evsel,
2160 union perf_event *event,
2161 struct perf_sample *sample)
2163 struct thread *thread;
2164 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2165 struct addr_location al;
2166 char map_type = 'd';
2167 struct thread_trace *ttrace;
2170 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2171 ttrace = thread__trace(thread, trace->output);
2175 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2180 if (trace->summary_only)
2183 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2186 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2188 fprintf(trace->output, "%sfault [",
2189 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2192 print_location(trace->output, sample, &al, false, true);
2194 fprintf(trace->output, "] => ");
2196 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2200 thread__find_addr_location(thread, cpumode,
2201 MAP__FUNCTION, sample->addr, &al);
2209 print_location(trace->output, sample, &al, true, false);
2211 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2215 thread__put(thread);
2219 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2221 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2222 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2225 if (trace->pid_list || trace->tid_list)
2231 static int trace__process_sample(struct perf_tool *tool,
2232 union perf_event *event,
2233 struct perf_sample *sample,
2234 struct perf_evsel *evsel,
2235 struct machine *machine __maybe_unused)
2237 struct trace *trace = container_of(tool, struct trace, tool);
2240 tracepoint_handler handler = evsel->handler;
2242 if (skip_sample(trace, sample))
2245 if (!trace->full_time && trace->base_time == 0)
2246 trace->base_time = sample->time;
2250 handler(trace, evsel, event, sample);
2256 static int parse_target_str(struct trace *trace)
2258 if (trace->opts.target.pid) {
2259 trace->pid_list = intlist__new(trace->opts.target.pid);
2260 if (trace->pid_list == NULL) {
2261 pr_err("Error parsing process id string\n");
2266 if (trace->opts.target.tid) {
2267 trace->tid_list = intlist__new(trace->opts.target.tid);
2268 if (trace->tid_list == NULL) {
2269 pr_err("Error parsing thread id string\n");
2277 static int trace__record(struct trace *trace, int argc, const char **argv)
2279 unsigned int rec_argc, i, j;
2280 const char **rec_argv;
2281 const char * const record_args[] = {
2288 const char * const sc_args[] = { "-e", };
2289 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2290 const char * const majpf_args[] = { "-e", "major-faults" };
2291 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2292 const char * const minpf_args[] = { "-e", "minor-faults" };
2293 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2295 /* +1 is for the event string below */
2296 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2297 majpf_args_nr + minpf_args_nr + argc;
2298 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2300 if (rec_argv == NULL)
2304 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2305 rec_argv[j++] = record_args[i];
2307 if (trace->trace_syscalls) {
2308 for (i = 0; i < sc_args_nr; i++)
2309 rec_argv[j++] = sc_args[i];
2311 /* event string may be different for older kernels - e.g., RHEL6 */
2312 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2313 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2314 else if (is_valid_tracepoint("syscalls:sys_enter"))
2315 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2317 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2322 if (trace->trace_pgfaults & TRACE_PFMAJ)
2323 for (i = 0; i < majpf_args_nr; i++)
2324 rec_argv[j++] = majpf_args[i];
2326 if (trace->trace_pgfaults & TRACE_PFMIN)
2327 for (i = 0; i < minpf_args_nr; i++)
2328 rec_argv[j++] = minpf_args[i];
2330 for (i = 0; i < (unsigned int)argc; i++)
2331 rec_argv[j++] = argv[i];
2333 return cmd_record(j, rec_argv, NULL);
2336 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2338 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2340 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2344 if (perf_evsel__field(evsel, "pathname") == NULL) {
2345 perf_evsel__delete(evsel);
2349 evsel->handler = trace__vfs_getname;
2350 perf_evlist__add(evlist, evsel);
2354 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2357 struct perf_evsel *evsel;
2358 struct perf_event_attr attr = {
2359 .type = PERF_TYPE_SOFTWARE,
2363 attr.config = config;
2364 attr.sample_period = 1;
2366 event_attr_init(&attr);
2368 evsel = perf_evsel__new(&attr);
2372 evsel->handler = trace__pgfault;
2373 perf_evlist__add(evlist, evsel);
2378 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2380 const u32 type = event->header.type;
2381 struct perf_evsel *evsel;
2383 if (!trace->full_time && trace->base_time == 0)
2384 trace->base_time = sample->time;
2386 if (type != PERF_RECORD_SAMPLE) {
2387 trace__process_event(trace, trace->host, event, sample);
2391 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2392 if (evsel == NULL) {
2393 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2397 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2398 sample->raw_data == NULL) {
2399 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2400 perf_evsel__name(evsel), sample->tid,
2401 sample->cpu, sample->raw_size);
2403 tracepoint_handler handler = evsel->handler;
2404 handler(trace, evsel, event, sample);
2408 static int trace__add_syscall_newtp(struct trace *trace)
2411 struct perf_evlist *evlist = trace->evlist;
2412 struct perf_evsel *sys_enter, *sys_exit;
2414 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2415 if (sys_enter == NULL)
2418 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2419 goto out_delete_sys_enter;
2421 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2422 if (sys_exit == NULL)
2423 goto out_delete_sys_enter;
2425 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2426 goto out_delete_sys_exit;
2428 perf_evlist__add(evlist, sys_enter);
2429 perf_evlist__add(evlist, sys_exit);
2431 trace->syscalls.events.sys_enter = sys_enter;
2432 trace->syscalls.events.sys_exit = sys_exit;
2438 out_delete_sys_exit:
2439 perf_evsel__delete_priv(sys_exit);
2440 out_delete_sys_enter:
2441 perf_evsel__delete_priv(sys_enter);
2445 static int trace__set_ev_qualifier_filter(struct trace *trace)
2448 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2449 trace->ev_qualifier_ids.nr,
2450 trace->ev_qualifier_ids.entries);
2455 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2456 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2466 static int trace__run(struct trace *trace, int argc, const char **argv)
2468 struct perf_evlist *evlist = trace->evlist;
2469 struct perf_evsel *evsel;
2471 unsigned long before;
2472 const bool forks = argc > 0;
2473 bool draining = false;
2477 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2478 goto out_error_raw_syscalls;
2480 if (trace->trace_syscalls)
2481 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2483 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2484 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2488 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2489 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2493 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2494 trace__sched_stat_runtime))
2495 goto out_error_sched_stat_runtime;
2497 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2499 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2500 goto out_delete_evlist;
2503 err = trace__symbols_init(trace, evlist);
2505 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2506 goto out_delete_evlist;
2509 perf_evlist__config(evlist, &trace->opts);
2511 signal(SIGCHLD, sig_handler);
2512 signal(SIGINT, sig_handler);
2515 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2518 fprintf(trace->output, "Couldn't run the workload!\n");
2519 goto out_delete_evlist;
2523 err = perf_evlist__open(evlist);
2525 goto out_error_open;
2528 * Better not use !target__has_task() here because we need to cover the
2529 * case where no threads were specified in the command line, but a
2530 * workload was, and in that case we will fill in the thread_map when
2531 * we fork the workload in perf_evlist__prepare_workload.
2533 if (trace->filter_pids.nr > 0)
2534 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2535 else if (thread_map__pid(evlist->threads, 0) == -1)
2536 err = perf_evlist__set_filter_pid(evlist, getpid());
2541 if (trace->ev_qualifier_ids.nr > 0) {
2542 err = trace__set_ev_qualifier_filter(trace);
2546 pr_debug("event qualifier tracepoint filter: %s\n",
2547 trace->syscalls.events.sys_exit->filter);
2550 err = perf_evlist__apply_filters(evlist, &evsel);
2552 goto out_error_apply_filters;
2554 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2556 goto out_error_mmap;
2558 if (!target__none(&trace->opts.target))
2559 perf_evlist__enable(evlist);
2562 perf_evlist__start_workload(evlist);
2564 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2565 evlist->threads->nr > 1 ||
2566 perf_evlist__first(evlist)->attr.inherit;
2568 before = trace->nr_events;
2570 for (i = 0; i < evlist->nr_mmaps; i++) {
2571 union perf_event *event;
2573 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2574 struct perf_sample sample;
2578 err = perf_evlist__parse_sample(evlist, event, &sample);
2580 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2584 trace__handle_event(trace, event, &sample);
2586 perf_evlist__mmap_consume(evlist, i);
2591 if (done && !draining) {
2592 perf_evlist__disable(evlist);
2598 if (trace->nr_events == before) {
2599 int timeout = done ? 100 : -1;
2601 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2602 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2612 thread__zput(trace->current);
2614 perf_evlist__disable(evlist);
2618 trace__fprintf_thread_summary(trace, trace->output);
2620 if (trace->show_tool_stats) {
2621 fprintf(trace->output, "Stats:\n "
2622 " vfs_getname : %" PRIu64 "\n"
2623 " proc_getname: %" PRIu64 "\n",
2624 trace->stats.vfs_getname,
2625 trace->stats.proc_getname);
2630 perf_evlist__delete(evlist);
2631 trace->evlist = NULL;
2632 trace->live = false;
2635 char errbuf[BUFSIZ];
2637 out_error_sched_stat_runtime:
2638 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2641 out_error_raw_syscalls:
2642 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2646 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2650 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2653 fprintf(trace->output, "%s\n", errbuf);
2654 goto out_delete_evlist;
2656 out_error_apply_filters:
2657 fprintf(trace->output,
2658 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2659 evsel->filter, perf_evsel__name(evsel), errno,
2660 strerror_r(errno, errbuf, sizeof(errbuf)));
2661 goto out_delete_evlist;
2664 fprintf(trace->output, "Not enough memory to run!\n");
2665 goto out_delete_evlist;
2668 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2669 goto out_delete_evlist;
2672 static int trace__replay(struct trace *trace)
2674 const struct perf_evsel_str_handler handlers[] = {
2675 { "probe:vfs_getname", trace__vfs_getname, },
2677 struct perf_data_file file = {
2679 .mode = PERF_DATA_MODE_READ,
2680 .force = trace->force,
2682 struct perf_session *session;
2683 struct perf_evsel *evsel;
2686 trace->tool.sample = trace__process_sample;
2687 trace->tool.mmap = perf_event__process_mmap;
2688 trace->tool.mmap2 = perf_event__process_mmap2;
2689 trace->tool.comm = perf_event__process_comm;
2690 trace->tool.exit = perf_event__process_exit;
2691 trace->tool.fork = perf_event__process_fork;
2692 trace->tool.attr = perf_event__process_attr;
2693 trace->tool.tracing_data = perf_event__process_tracing_data;
2694 trace->tool.build_id = perf_event__process_build_id;
2696 trace->tool.ordered_events = true;
2697 trace->tool.ordering_requires_timestamps = true;
2699 /* add tid to output */
2700 trace->multiple_threads = true;
2702 session = perf_session__new(&file, false, &trace->tool);
2703 if (session == NULL)
2706 if (symbol__init(&session->header.env) < 0)
2709 trace->host = &session->machines.host;
2711 err = perf_session__set_tracepoints_handlers(session, handlers);
2715 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2716 "raw_syscalls:sys_enter");
2717 /* older kernels have syscalls tp versus raw_syscalls */
2719 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2720 "syscalls:sys_enter");
2723 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2724 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2725 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2729 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2730 "raw_syscalls:sys_exit");
2732 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2733 "syscalls:sys_exit");
2735 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2736 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2737 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2741 evlist__for_each(session->evlist, evsel) {
2742 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2743 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2744 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2745 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2746 evsel->handler = trace__pgfault;
2749 err = parse_target_str(trace);
2755 err = perf_session__process_events(session);
2757 pr_err("Failed to process events, error %d", err);
2759 else if (trace->summary)
2760 trace__fprintf_thread_summary(trace, trace->output);
2763 perf_session__delete(session);
2768 static size_t trace__fprintf_threads_header(FILE *fp)
2772 printed = fprintf(fp, "\n Summary of events:\n\n");
2777 static size_t thread__dump_stats(struct thread_trace *ttrace,
2778 struct trace *trace, FILE *fp)
2780 struct stats *stats;
2783 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2788 printed += fprintf(fp, "\n");
2790 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2791 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2792 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2794 /* each int_node is a syscall */
2796 stats = inode->priv;
2798 double min = (double)(stats->min) / NSEC_PER_MSEC;
2799 double max = (double)(stats->max) / NSEC_PER_MSEC;
2800 double avg = avg_stats(stats);
2802 u64 n = (u64) stats->n;
2804 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2805 avg /= NSEC_PER_MSEC;
2807 sc = &trace->syscalls.table[inode->i];
2808 printed += fprintf(fp, " %-15s", sc->name);
2809 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2810 n, avg * n, min, avg);
2811 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2814 inode = intlist__next(inode);
2817 printed += fprintf(fp, "\n\n");
2822 /* struct used to pass data to per-thread function */
2823 struct summary_data {
2825 struct trace *trace;
2829 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2831 struct summary_data *data = priv;
2832 FILE *fp = data->fp;
2833 size_t printed = data->printed;
2834 struct trace *trace = data->trace;
2835 struct thread_trace *ttrace = thread__priv(thread);
2841 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2843 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2844 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2845 printed += fprintf(fp, "%.1f%%", ratio);
2847 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2849 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2850 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2851 printed += thread__dump_stats(ttrace, trace, fp);
2853 data->printed += printed;
2858 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2860 struct summary_data data = {
2864 data.printed = trace__fprintf_threads_header(fp);
2866 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2868 return data.printed;
2871 static int trace__set_duration(const struct option *opt, const char *str,
2872 int unset __maybe_unused)
2874 struct trace *trace = opt->value;
2876 trace->duration_filter = atof(str);
2880 static int trace__set_filter_pids(const struct option *opt, const char *str,
2881 int unset __maybe_unused)
2885 struct trace *trace = opt->value;
2887 * FIXME: introduce a intarray class, plain parse csv and create a
2888 * { int nr, int entries[] } struct...
2890 struct intlist *list = intlist__new(str);
2895 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2896 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2898 if (trace->filter_pids.entries == NULL)
2901 trace->filter_pids.entries[0] = getpid();
2903 for (i = 1; i < trace->filter_pids.nr; ++i)
2904 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2906 intlist__delete(list);
2912 static int trace__open_output(struct trace *trace, const char *filename)
2916 if (!stat(filename, &st) && st.st_size) {
2917 char oldname[PATH_MAX];
2919 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2921 rename(filename, oldname);
2924 trace->output = fopen(filename, "w");
2926 return trace->output == NULL ? -errno : 0;
2929 static int parse_pagefaults(const struct option *opt, const char *str,
2930 int unset __maybe_unused)
2932 int *trace_pgfaults = opt->value;
2934 if (strcmp(str, "all") == 0)
2935 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2936 else if (strcmp(str, "maj") == 0)
2937 *trace_pgfaults |= TRACE_PFMAJ;
2938 else if (strcmp(str, "min") == 0)
2939 *trace_pgfaults |= TRACE_PFMIN;
2946 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2948 struct perf_evsel *evsel;
2950 evlist__for_each(evlist, evsel)
2951 evsel->handler = handler;
2954 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2956 const char *trace_usage[] = {
2957 "perf trace [<options>] [<command>]",
2958 "perf trace [<options>] -- <command> [<options>]",
2959 "perf trace record [<options>] [<command>]",
2960 "perf trace record [<options>] -- <command> [<options>]",
2963 struct trace trace = {
2965 .machine = audit_detect_machine(),
2966 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2976 .user_freq = UINT_MAX,
2977 .user_interval = ULLONG_MAX,
2978 .no_buffering = true,
2979 .mmap_pages = UINT_MAX,
2980 .proc_map_timeout = 500,
2984 .trace_syscalls = true,
2986 const char *output_name = NULL;
2987 const char *ev_qualifier_str = NULL;
2988 const struct option trace_options[] = {
2989 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2990 "event selector. use 'perf list' to list available events",
2991 parse_events_option),
2992 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2993 "show the thread COMM next to its id"),
2994 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2995 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2996 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2997 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2998 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2999 "trace events on existing process id"),
3000 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3001 "trace events on existing thread id"),
3002 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3003 "pids to filter (by the kernel)", trace__set_filter_pids),
3004 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3005 "system-wide collection from all CPUs"),
3006 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3007 "list of cpus to monitor"),
3008 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3009 "child tasks do not inherit counters"),
3010 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3011 "number of mmap data pages",
3012 perf_evlist__parse_mmap_pages),
3013 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3015 OPT_CALLBACK(0, "duration", &trace, "float",
3016 "show only events with duration > N.M ms",
3017 trace__set_duration),
3018 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3019 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3020 OPT_BOOLEAN('T', "time", &trace.full_time,
3021 "Show full timestamp, not time relative to first start"),
3022 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3023 "Show only syscall summary with statistics"),
3024 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3025 "Show all syscalls and summary with statistics"),
3026 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3027 "Trace pagefaults", parse_pagefaults, "maj"),
3028 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3029 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3030 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3031 "per thread proc mmap processing timeout in ms"),
3034 const char * const trace_subcommands[] = { "record", NULL };
3038 signal(SIGSEGV, sighandler_dump_stack);
3039 signal(SIGFPE, sighandler_dump_stack);
3041 trace.evlist = perf_evlist__new();
3043 if (trace.evlist == NULL) {
3044 pr_err("Not enough memory to run!\n");
3049 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3050 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3052 if (trace.trace_pgfaults) {
3053 trace.opts.sample_address = true;
3054 trace.opts.sample_time = true;
3057 if (trace.evlist->nr_entries > 0)
3058 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3060 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3061 return trace__record(&trace, argc-1, &argv[1]);
3063 /* summary_only implies summary option, but don't overwrite summary if set */
3064 if (trace.summary_only)
3065 trace.summary = trace.summary_only;
3067 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3068 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3069 pr_err("Please specify something to trace.\n");
3073 if (output_name != NULL) {
3074 err = trace__open_output(&trace, output_name);
3076 perror("failed to create output file");
3081 if (ev_qualifier_str != NULL) {
3082 const char *s = ev_qualifier_str;
3083 struct strlist_config slist_config = {
3084 .dirname = system_path(STRACE_GROUPS_DIR),
3087 trace.not_ev_qualifier = *s == '!';
3088 if (trace.not_ev_qualifier)
3090 trace.ev_qualifier = strlist__new(s, &slist_config);
3091 if (trace.ev_qualifier == NULL) {
3092 fputs("Not enough memory to parse event qualifier",
3098 err = trace__validate_ev_qualifier(&trace);
3103 err = target__validate(&trace.opts.target);
3105 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3106 fprintf(trace.output, "%s", bf);
3110 err = target__parse_uid(&trace.opts.target);
3112 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3113 fprintf(trace.output, "%s", bf);
3117 if (!argc && target__none(&trace.opts.target))
3118 trace.opts.target.system_wide = true;
3121 err = trace__replay(&trace);
3123 err = trace__run(&trace, argc, argv);
3126 if (output_name != NULL)
3127 fclose(trace.output);