1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
603 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
605 static DEFINE_STRARRAY(clockid);
607 static const char *socket_families[] = {
608 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
609 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
610 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
611 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
612 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
613 "ALG", "NFC", "VSOCK",
615 static DEFINE_STRARRAY(socket_families);
617 #ifndef SOCK_TYPE_MASK
618 #define SOCK_TYPE_MASK 0xf
621 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
622 struct syscall_arg *arg)
626 flags = type & ~SOCK_TYPE_MASK;
628 type &= SOCK_TYPE_MASK;
630 * Can't use a strarray, MIPS may override for ABI reasons.
633 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
638 P_SK_TYPE(SEQPACKET);
643 printed = scnprintf(bf, size, "%#x", type);
646 #define P_SK_FLAG(n) \
647 if (flags & SOCK_##n) { \
648 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
649 flags &= ~SOCK_##n; \
657 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
662 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
665 #define MSG_PROBE 0x10
667 #ifndef MSG_WAITFORONE
668 #define MSG_WAITFORONE 0x10000
670 #ifndef MSG_SENDPAGE_NOTLAST
671 #define MSG_SENDPAGE_NOTLAST 0x20000
674 #define MSG_FASTOPEN 0x20000000
677 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
678 struct syscall_arg *arg)
680 int printed = 0, flags = arg->val;
683 return scnprintf(bf, size, "NONE");
684 #define P_MSG_FLAG(n) \
685 if (flags & MSG_##n) { \
686 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
692 P_MSG_FLAG(DONTROUTE);
697 P_MSG_FLAG(DONTWAIT);
704 P_MSG_FLAG(ERRQUEUE);
705 P_MSG_FLAG(NOSIGNAL);
707 P_MSG_FLAG(WAITFORONE);
708 P_MSG_FLAG(SENDPAGE_NOTLAST);
709 P_MSG_FLAG(FASTOPEN);
710 P_MSG_FLAG(CMSG_CLOEXEC);
714 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
719 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
721 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
722 struct syscall_arg *arg)
727 if (mode == F_OK) /* 0 */
728 return scnprintf(bf, size, "F");
730 if (mode & n##_OK) { \
731 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
741 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
746 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
748 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
749 struct syscall_arg *arg);
751 #define SCA_FILENAME syscall_arg__scnprintf_filename
753 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
754 struct syscall_arg *arg)
756 int printed = 0, flags = arg->val;
758 if (!(flags & O_CREAT))
759 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
762 return scnprintf(bf, size, "RDONLY");
764 if (flags & O_##n) { \
765 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
789 if ((flags & O_SYNC) == O_SYNC)
790 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
802 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
807 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
809 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
810 struct syscall_arg *arg)
812 int printed = 0, flags = arg->val;
818 if (flags & PERF_FLAG_##n) { \
819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820 flags &= ~PERF_FLAG_##n; \
830 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
835 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
837 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
838 struct syscall_arg *arg)
840 int printed = 0, flags = arg->val;
843 return scnprintf(bf, size, "NONE");
845 if (flags & EFD_##n) { \
846 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
856 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
861 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
863 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
864 struct syscall_arg *arg)
866 int printed = 0, flags = arg->val;
869 if (flags & O_##n) { \
870 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
879 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
884 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
886 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
891 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
934 return scnprintf(bf, size, "%#x", sig);
937 #define SCA_SIGNUM syscall_arg__scnprintf_signum
939 #if defined(__i386__) || defined(__x86_64__)
941 * FIXME: Make this available to all arches.
943 #define TCGETS 0x5401
945 static const char *tioctls[] = {
946 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
947 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
948 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
949 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
950 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
951 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
952 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
953 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
954 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
955 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
956 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
957 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
958 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
959 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
960 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
963 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
964 #endif /* defined(__i386__) || defined(__x86_64__) */
966 #define STRARRAY(arg, name, array) \
967 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
968 .arg_parm = { [arg] = &strarray__##array, }
970 static struct syscall_fmt {
973 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
979 { .name = "access", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
981 [1] = SCA_ACCMODE, /* mode */ }, },
982 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
983 { .name = "brk", .hexret = true,
984 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
985 { .name = "chdir", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
987 { .name = "chmod", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
989 { .name = "chroot", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
991 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
992 { .name = "close", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
994 { .name = "connect", .errmsg = true, },
995 { .name = "creat", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
997 { .name = "dup", .errmsg = true,
998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999 { .name = "dup2", .errmsg = true,
1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 { .name = "dup3", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1004 { .name = "eventfd2", .errmsg = true,
1005 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1006 { .name = "faccessat", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1008 [1] = SCA_FILENAME, /* filename */ }, },
1009 { .name = "fadvise64", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1011 { .name = "fallocate", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "fchdir", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1015 { .name = "fchmod", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1017 { .name = "fchmodat", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1019 [1] = SCA_FILENAME, /* filename */ }, },
1020 { .name = "fchown", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022 { .name = "fchownat", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1024 [1] = SCA_FILENAME, /* filename */ }, },
1025 { .name = "fcntl", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1027 [1] = SCA_STRARRAY, /* cmd */ },
1028 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1029 { .name = "fdatasync", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031 { .name = "flock", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1033 [1] = SCA_FLOCK, /* cmd */ }, },
1034 { .name = "fsetxattr", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1037 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1039 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1040 [1] = SCA_FILENAME, /* filename */ }, },
1041 { .name = "fstatfs", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "fsync", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045 { .name = "ftruncate", .errmsg = true,
1046 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1047 { .name = "futex", .errmsg = true,
1048 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1049 { .name = "futimesat", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1051 [1] = SCA_FILENAME, /* filename */ }, },
1052 { .name = "getdents", .errmsg = true,
1053 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054 { .name = "getdents64", .errmsg = true,
1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1057 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1058 { .name = "getxattr", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1060 { .name = "inotify_add_watch", .errmsg = true,
1061 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1062 { .name = "ioctl", .errmsg = true,
1063 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1064 #if defined(__i386__) || defined(__x86_64__)
1066 * FIXME: Make this available to all arches.
1068 [1] = SCA_STRHEXARRAY, /* cmd */
1069 [2] = SCA_HEX, /* arg */ },
1070 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1072 [2] = SCA_HEX, /* arg */ }, },
1074 { .name = "kill", .errmsg = true,
1075 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1076 { .name = "lchown", .errmsg = true,
1077 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1078 { .name = "lgetxattr", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1080 { .name = "linkat", .errmsg = true,
1081 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1082 { .name = "listxattr", .errmsg = true,
1083 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1084 { .name = "llistxattr", .errmsg = true,
1085 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1086 { .name = "lremovexattr", .errmsg = true,
1087 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088 { .name = "lseek", .errmsg = true,
1089 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1090 [2] = SCA_STRARRAY, /* whence */ },
1091 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1092 { .name = "lsetxattr", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1095 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1096 { .name = "lsxattr", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098 { .name = "madvise", .errmsg = true,
1099 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1100 [2] = SCA_MADV_BHV, /* behavior */ }, },
1101 { .name = "mkdir", .errmsg = true,
1102 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1103 { .name = "mkdirat", .errmsg = true,
1104 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1105 [1] = SCA_FILENAME, /* pathname */ }, },
1106 { .name = "mknod", .errmsg = true,
1107 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1108 { .name = "mknodat", .errmsg = true,
1109 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1110 [1] = SCA_FILENAME, /* filename */ }, },
1111 { .name = "mlock", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1113 { .name = "mlockall", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1115 { .name = "mmap", .hexret = true,
1116 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1117 [2] = SCA_MMAP_PROT, /* prot */
1118 [3] = SCA_MMAP_FLAGS, /* flags */
1119 [4] = SCA_FD, /* fd */ }, },
1120 { .name = "mprotect", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1122 [2] = SCA_MMAP_PROT, /* prot */ }, },
1123 { .name = "mq_unlink", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1125 { .name = "mremap", .hexret = true,
1126 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1127 [3] = SCA_MREMAP_FLAGS, /* flags */
1128 [4] = SCA_HEX, /* new_addr */ }, },
1129 { .name = "munlock", .errmsg = true,
1130 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1131 { .name = "munmap", .errmsg = true,
1132 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1133 { .name = "name_to_handle_at", .errmsg = true,
1134 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1135 { .name = "newfstatat", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1137 [1] = SCA_FILENAME, /* filename */ }, },
1138 { .name = "open", .errmsg = true,
1139 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1140 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1141 { .name = "open_by_handle_at", .errmsg = true,
1142 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1143 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1144 { .name = "openat", .errmsg = true,
1145 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1146 [1] = SCA_FILENAME, /* filename */
1147 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1148 { .name = "perf_event_open", .errmsg = true,
1149 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1150 [2] = SCA_INT, /* cpu */
1151 [3] = SCA_FD, /* group_fd */
1152 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1153 { .name = "pipe2", .errmsg = true,
1154 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1155 { .name = "poll", .errmsg = true, .timeout = true, },
1156 { .name = "ppoll", .errmsg = true, .timeout = true, },
1157 { .name = "pread", .errmsg = true, .alias = "pread64",
1158 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1159 { .name = "preadv", .errmsg = true, .alias = "pread",
1160 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1161 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1162 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1163 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1164 { .name = "pwritev", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1166 { .name = "read", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1168 { .name = "readlink", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1170 { .name = "readlinkat", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172 [1] = SCA_FILENAME, /* pathname */ }, },
1173 { .name = "readv", .errmsg = true,
1174 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1175 { .name = "recvfrom", .errmsg = true,
1176 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1177 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1178 { .name = "recvmmsg", .errmsg = true,
1179 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1180 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1181 { .name = "recvmsg", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1183 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1184 { .name = "removexattr", .errmsg = true,
1185 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1186 { .name = "renameat", .errmsg = true,
1187 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1188 { .name = "rmdir", .errmsg = true,
1189 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1190 { .name = "rt_sigaction", .errmsg = true,
1191 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1192 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1193 { .name = "rt_sigqueueinfo", .errmsg = true,
1194 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1195 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1196 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1197 { .name = "select", .errmsg = true, .timeout = true, },
1198 { .name = "sendmmsg", .errmsg = true,
1199 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1200 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1201 { .name = "sendmsg", .errmsg = true,
1202 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1203 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1204 { .name = "sendto", .errmsg = true,
1205 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1206 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1207 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1208 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1209 { .name = "setxattr", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1211 { .name = "shutdown", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1213 { .name = "socket", .errmsg = true,
1214 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1215 [1] = SCA_SK_TYPE, /* type */ },
1216 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1217 { .name = "socketpair", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1219 [1] = SCA_SK_TYPE, /* type */ },
1220 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1221 { .name = "stat", .errmsg = true, .alias = "newstat",
1222 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1223 { .name = "statfs", .errmsg = true,
1224 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1225 { .name = "swapoff", .errmsg = true,
1226 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1227 { .name = "swapon", .errmsg = true,
1228 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1229 { .name = "symlinkat", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1231 { .name = "tgkill", .errmsg = true,
1232 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1233 { .name = "tkill", .errmsg = true,
1234 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1235 { .name = "truncate", .errmsg = true,
1236 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1237 { .name = "uname", .errmsg = true, .alias = "newuname", },
1238 { .name = "unlinkat", .errmsg = true,
1239 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1240 [1] = SCA_FILENAME, /* pathname */ }, },
1241 { .name = "utime", .errmsg = true,
1242 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1243 { .name = "utimensat", .errmsg = true,
1244 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1245 [1] = SCA_FILENAME, /* filename */ }, },
1246 { .name = "utimes", .errmsg = true,
1247 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1248 { .name = "vmsplice", .errmsg = true,
1249 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1250 { .name = "write", .errmsg = true,
1251 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1252 { .name = "writev", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1256 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1258 const struct syscall_fmt *fmt = fmtp;
1259 return strcmp(name, fmt->name);
1262 static struct syscall_fmt *syscall_fmt__find(const char *name)
1264 const int nmemb = ARRAY_SIZE(syscall_fmts);
1265 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1269 struct event_format *tp_format;
1271 struct format_field *args;
1274 struct syscall_fmt *fmt;
1275 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1279 static size_t fprintf_duration(unsigned long t, FILE *fp)
1281 double duration = (double)t / NSEC_PER_MSEC;
1282 size_t printed = fprintf(fp, "(");
1284 if (duration >= 1.0)
1285 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1286 else if (duration >= 0.01)
1287 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1289 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1290 return printed + fprintf(fp, "): ");
1294 * filename.ptr: The filename char pointer that will be vfs_getname'd
1295 * filename.entry_str_pos: Where to insert the string translated from
1296 * filename.ptr by the vfs_getname tracepoint/kprobe.
1298 struct thread_trace {
1302 unsigned long nr_events;
1303 unsigned long pfmaj, pfmin;
1315 struct intlist *syscall_stats;
1318 static struct thread_trace *thread_trace__new(void)
1320 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1323 ttrace->paths.max = -1;
1325 ttrace->syscall_stats = intlist__new(NULL);
1330 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1332 struct thread_trace *ttrace;
1337 if (thread__priv(thread) == NULL)
1338 thread__set_priv(thread, thread_trace__new());
1340 if (thread__priv(thread) == NULL)
1343 ttrace = thread__priv(thread);
1344 ++ttrace->nr_events;
1348 color_fprintf(fp, PERF_COLOR_RED,
1349 "WARNING: not enough memory, dropping samples!\n");
1353 #define TRACE_PFMAJ (1 << 0)
1354 #define TRACE_PFMIN (1 << 1)
1356 static const size_t trace__entry_str_size = 2048;
1359 struct perf_tool tool;
1366 struct syscall *table;
1368 struct perf_evsel *sys_enter,
1372 struct record_opts opts;
1373 struct perf_evlist *evlist;
1374 struct machine *host;
1375 struct thread *current;
1378 unsigned long nr_events;
1379 struct strlist *ev_qualifier;
1384 const char *last_vfs_getname;
1385 struct intlist *tid_list;
1386 struct intlist *pid_list;
1391 double duration_filter;
1397 bool not_ev_qualifier;
1401 bool multiple_threads;
1405 bool show_tool_stats;
1406 bool trace_syscalls;
1412 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1414 struct thread_trace *ttrace = thread__priv(thread);
1416 if (fd > ttrace->paths.max) {
1417 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1422 if (ttrace->paths.max != -1) {
1423 memset(npath + ttrace->paths.max + 1, 0,
1424 (fd - ttrace->paths.max) * sizeof(char *));
1426 memset(npath, 0, (fd + 1) * sizeof(char *));
1429 ttrace->paths.table = npath;
1430 ttrace->paths.max = fd;
1433 ttrace->paths.table[fd] = strdup(pathname);
1435 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1438 static int thread__read_fd_path(struct thread *thread, int fd)
1440 char linkname[PATH_MAX], pathname[PATH_MAX];
1444 if (thread->pid_ == thread->tid) {
1445 scnprintf(linkname, sizeof(linkname),
1446 "/proc/%d/fd/%d", thread->pid_, fd);
1448 scnprintf(linkname, sizeof(linkname),
1449 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1452 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1455 ret = readlink(linkname, pathname, sizeof(pathname));
1457 if (ret < 0 || ret > st.st_size)
1460 pathname[ret] = '\0';
1461 return trace__set_fd_pathname(thread, fd, pathname);
1464 static const char *thread__fd_path(struct thread *thread, int fd,
1465 struct trace *trace)
1467 struct thread_trace *ttrace = thread__priv(thread);
1475 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1478 ++trace->stats.proc_getname;
1479 if (thread__read_fd_path(thread, fd))
1483 return ttrace->paths.table[fd];
1486 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1487 struct syscall_arg *arg)
1490 size_t printed = scnprintf(bf, size, "%d", fd);
1491 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1494 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1499 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1500 struct syscall_arg *arg)
1503 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1504 struct thread_trace *ttrace = thread__priv(arg->thread);
1506 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1507 zfree(&ttrace->paths.table[fd]);
1512 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1515 struct thread_trace *ttrace = thread__priv(thread);
1517 ttrace->filename.ptr = ptr;
1518 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1521 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1522 struct syscall_arg *arg)
1524 unsigned long ptr = arg->val;
1526 if (!arg->trace->vfs_getname)
1527 return scnprintf(bf, size, "%#x", ptr);
1529 thread__set_filename_pos(arg->thread, bf, ptr);
1533 static bool trace__filter_duration(struct trace *trace, double t)
1535 return t < (trace->duration_filter * NSEC_PER_MSEC);
1538 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1540 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1542 return fprintf(fp, "%10.3f ", ts);
1545 static bool done = false;
1546 static bool interrupted = false;
1548 static void sig_handler(int sig)
1551 interrupted = sig == SIGINT;
1554 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1555 u64 duration, u64 tstamp, FILE *fp)
1557 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1558 printed += fprintf_duration(duration, fp);
1560 if (trace->multiple_threads) {
1561 if (trace->show_comm)
1562 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1563 printed += fprintf(fp, "%d ", thread->tid);
1569 static int trace__process_event(struct trace *trace, struct machine *machine,
1570 union perf_event *event, struct perf_sample *sample)
1574 switch (event->header.type) {
1575 case PERF_RECORD_LOST:
1576 color_fprintf(trace->output, PERF_COLOR_RED,
1577 "LOST %" PRIu64 " events!\n", event->lost.lost);
1578 ret = machine__process_lost_event(machine, event, sample);
1580 ret = machine__process_event(machine, event, sample);
1587 static int trace__tool_process(struct perf_tool *tool,
1588 union perf_event *event,
1589 struct perf_sample *sample,
1590 struct machine *machine)
1592 struct trace *trace = container_of(tool, struct trace, tool);
1593 return trace__process_event(trace, machine, event, sample);
1596 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1598 int err = symbol__init(NULL);
1603 trace->host = machine__new_host();
1604 if (trace->host == NULL)
1607 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1610 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1611 evlist->threads, trace__tool_process, false,
1612 trace->opts.proc_map_timeout);
1619 static int syscall__set_arg_fmts(struct syscall *sc)
1621 struct format_field *field;
1624 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1625 if (sc->arg_scnprintf == NULL)
1629 sc->arg_parm = sc->fmt->arg_parm;
1631 for (field = sc->args; field; field = field->next) {
1632 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1633 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1634 else if (field->flags & FIELD_IS_POINTER)
1635 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1642 static int trace__read_syscall_info(struct trace *trace, int id)
1646 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1651 if (id > trace->syscalls.max) {
1652 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1654 if (nsyscalls == NULL)
1657 if (trace->syscalls.max != -1) {
1658 memset(nsyscalls + trace->syscalls.max + 1, 0,
1659 (id - trace->syscalls.max) * sizeof(*sc));
1661 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1664 trace->syscalls.table = nsyscalls;
1665 trace->syscalls.max = id;
1668 sc = trace->syscalls.table + id;
1671 sc->fmt = syscall_fmt__find(sc->name);
1673 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1674 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1676 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1677 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1678 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1681 if (sc->tp_format == NULL)
1684 sc->args = sc->tp_format->format.fields;
1685 sc->nr_args = sc->tp_format->format.nr_fields;
1686 /* drop nr field - not relevant here; does not exist on older kernels */
1687 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1688 sc->args = sc->args->next;
1692 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1694 return syscall__set_arg_fmts(sc);
1697 static int trace__validate_ev_qualifier(struct trace *trace)
1700 struct str_node *pos;
1702 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1703 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1704 sizeof(trace->ev_qualifier_ids.entries[0]));
1706 if (trace->ev_qualifier_ids.entries == NULL) {
1707 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1715 strlist__for_each(pos, trace->ev_qualifier) {
1716 const char *sc = pos->s;
1717 int id = audit_name_to_syscall(sc, trace->audit.machine);
1721 fputs("Error:\tInvalid syscall ", trace->output);
1724 fputs(", ", trace->output);
1727 fputs(sc, trace->output);
1730 trace->ev_qualifier_ids.entries[i++] = id;
1734 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1735 "\nHint:\tand: 'man syscalls'\n", trace->output);
1736 zfree(&trace->ev_qualifier_ids.entries);
1737 trace->ev_qualifier_ids.nr = 0;
1744 * args is to be interpreted as a series of longs but we need to handle
1745 * 8-byte unaligned accesses. args points to raw_data within the event
1746 * and raw_data is guaranteed to be 8-byte unaligned because it is
1747 * preceded by raw_size which is a u32. So we need to copy args to a temp
1748 * variable to read it. Most notably this avoids extended load instructions
1749 * on unaligned addresses
1752 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1753 unsigned char *args, struct trace *trace,
1754 struct thread *thread)
1760 if (sc->args != NULL) {
1761 struct format_field *field;
1763 struct syscall_arg arg = {
1770 for (field = sc->args; field;
1771 field = field->next, ++arg.idx, bit <<= 1) {
1775 /* special care for unaligned accesses */
1776 p = args + sizeof(unsigned long) * arg.idx;
1777 memcpy(&val, p, sizeof(val));
1780 * Suppress this argument if its value is zero and
1781 * and we don't have a string associated in an
1785 !(sc->arg_scnprintf &&
1786 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1787 sc->arg_parm[arg.idx]))
1790 printed += scnprintf(bf + printed, size - printed,
1791 "%s%s: ", printed ? ", " : "", field->name);
1792 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1795 arg.parm = sc->arg_parm[arg.idx];
1796 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1797 size - printed, &arg);
1799 printed += scnprintf(bf + printed, size - printed,
1807 /* special care for unaligned accesses */
1808 p = args + sizeof(unsigned long) * i;
1809 memcpy(&val, p, sizeof(val));
1810 printed += scnprintf(bf + printed, size - printed,
1812 printed ? ", " : "", i, val);
1820 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1821 union perf_event *event,
1822 struct perf_sample *sample);
1824 static struct syscall *trace__syscall_info(struct trace *trace,
1825 struct perf_evsel *evsel, int id)
1831 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1832 * before that, leaving at a higher verbosity level till that is
1833 * explained. Reproduced with plain ftrace with:
1835 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1836 * grep "NR -1 " /t/trace_pipe
1838 * After generating some load on the machine.
1842 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1843 id, perf_evsel__name(evsel), ++n);
1848 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1849 trace__read_syscall_info(trace, id))
1852 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1855 return &trace->syscalls.table[id];
1859 fprintf(trace->output, "Problems reading syscall %d", id);
1860 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1861 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1862 fputs(" information\n", trace->output);
1867 static void thread__update_stats(struct thread_trace *ttrace,
1868 int id, struct perf_sample *sample)
1870 struct int_node *inode;
1871 struct stats *stats;
1874 inode = intlist__findnew(ttrace->syscall_stats, id);
1878 stats = inode->priv;
1879 if (stats == NULL) {
1880 stats = malloc(sizeof(struct stats));
1884 inode->priv = stats;
1887 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1888 duration = sample->time - ttrace->entry_time;
1890 update_stats(stats, duration);
1893 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1895 struct thread_trace *ttrace;
1899 if (trace->current == NULL)
1902 ttrace = thread__priv(trace->current);
1904 if (!ttrace->entry_pending)
1907 duration = sample->time - ttrace->entry_time;
1909 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1910 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1911 ttrace->entry_pending = false;
1916 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1917 union perf_event *event __maybe_unused,
1918 struct perf_sample *sample)
1923 struct thread *thread;
1924 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1925 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1926 struct thread_trace *ttrace;
1931 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1932 ttrace = thread__trace(thread, trace->output);
1936 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1938 if (ttrace->entry_str == NULL) {
1939 ttrace->entry_str = malloc(trace__entry_str_size);
1940 if (!ttrace->entry_str)
1944 if (!trace->summary_only)
1945 trace__printf_interrupted_entry(trace, sample);
1947 ttrace->entry_time = sample->time;
1948 msg = ttrace->entry_str;
1949 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1951 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1952 args, trace, thread);
1955 if (!trace->duration_filter && !trace->summary_only) {
1956 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1957 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1960 ttrace->entry_pending = true;
1962 if (trace->current != thread) {
1963 thread__put(trace->current);
1964 trace->current = thread__get(thread);
1968 thread__put(thread);
1972 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1973 union perf_event *event __maybe_unused,
1974 struct perf_sample *sample)
1978 struct thread *thread;
1979 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1980 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1981 struct thread_trace *ttrace;
1986 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1987 ttrace = thread__trace(thread, trace->output);
1992 thread__update_stats(ttrace, id, sample);
1994 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1996 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1997 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1998 trace->last_vfs_getname = NULL;
1999 ++trace->stats.vfs_getname;
2002 ttrace->exit_time = sample->time;
2004 if (ttrace->entry_time) {
2005 duration = sample->time - ttrace->entry_time;
2006 if (trace__filter_duration(trace, duration))
2008 } else if (trace->duration_filter)
2011 if (trace->summary_only)
2014 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2016 if (ttrace->entry_pending) {
2017 fprintf(trace->output, "%-70s", ttrace->entry_str);
2019 fprintf(trace->output, " ... [");
2020 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2021 fprintf(trace->output, "]: %s()", sc->name);
2024 if (sc->fmt == NULL) {
2026 fprintf(trace->output, ") = %ld", ret);
2027 } else if (ret < 0 && sc->fmt->errmsg) {
2028 char bf[STRERR_BUFSIZE];
2029 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2030 *e = audit_errno_to_name(-ret);
2032 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2033 } else if (ret == 0 && sc->fmt->timeout)
2034 fprintf(trace->output, ") = 0 Timeout");
2035 else if (sc->fmt->hexret)
2036 fprintf(trace->output, ") = %#lx", ret);
2040 fputc('\n', trace->output);
2042 ttrace->entry_pending = false;
2045 thread__put(thread);
2049 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2050 union perf_event *event __maybe_unused,
2051 struct perf_sample *sample)
2053 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2054 struct thread_trace *ttrace;
2055 size_t filename_len, entry_str_len, to_move;
2056 ssize_t remaining_space;
2058 const char *filename;
2060 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2065 ttrace = thread__priv(thread);
2069 if (!ttrace->filename.ptr)
2072 entry_str_len = strlen(ttrace->entry_str);
2073 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2074 if (remaining_space <= 0)
2077 filename = trace->last_vfs_getname;
2078 filename_len = strlen(filename);
2079 if (filename_len > (size_t)remaining_space) {
2080 filename += filename_len - remaining_space;
2081 filename_len = remaining_space;
2084 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2085 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2086 memmove(pos + filename_len, pos, to_move);
2087 memcpy(pos, filename, filename_len);
2089 ttrace->filename.ptr = 0;
2090 ttrace->filename.entry_str_pos = 0;
2095 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2096 union perf_event *event __maybe_unused,
2097 struct perf_sample *sample)
2099 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2100 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2101 struct thread *thread = machine__findnew_thread(trace->host,
2104 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2109 ttrace->runtime_ms += runtime_ms;
2110 trace->runtime_ms += runtime_ms;
2111 thread__put(thread);
2115 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2117 perf_evsel__strval(evsel, sample, "comm"),
2118 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2120 perf_evsel__intval(evsel, sample, "vruntime"));
2121 thread__put(thread);
2125 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2126 union perf_event *event __maybe_unused,
2127 struct perf_sample *sample)
2129 trace__printf_interrupted_entry(trace, sample);
2130 trace__fprintf_tstamp(trace, sample->time, trace->output);
2132 if (trace->trace_syscalls)
2133 fprintf(trace->output, "( ): ");
2135 fprintf(trace->output, "%s:", evsel->name);
2137 if (evsel->tp_format) {
2138 event_format__fprintf(evsel->tp_format, sample->cpu,
2139 sample->raw_data, sample->raw_size,
2143 fprintf(trace->output, ")\n");
2147 static void print_location(FILE *f, struct perf_sample *sample,
2148 struct addr_location *al,
2149 bool print_dso, bool print_sym)
2152 if ((verbose || print_dso) && al->map)
2153 fprintf(f, "%s@", al->map->dso->long_name);
2155 if ((verbose || print_sym) && al->sym)
2156 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2157 al->addr - al->sym->start);
2159 fprintf(f, "0x%" PRIx64, al->addr);
2161 fprintf(f, "0x%" PRIx64, sample->addr);
2164 static int trace__pgfault(struct trace *trace,
2165 struct perf_evsel *evsel,
2166 union perf_event *event,
2167 struct perf_sample *sample)
2169 struct thread *thread;
2170 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2171 struct addr_location al;
2172 char map_type = 'd';
2173 struct thread_trace *ttrace;
2176 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2177 ttrace = thread__trace(thread, trace->output);
2181 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2186 if (trace->summary_only)
2189 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2192 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2194 fprintf(trace->output, "%sfault [",
2195 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2198 print_location(trace->output, sample, &al, false, true);
2200 fprintf(trace->output, "] => ");
2202 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2206 thread__find_addr_location(thread, cpumode,
2207 MAP__FUNCTION, sample->addr, &al);
2215 print_location(trace->output, sample, &al, true, false);
2217 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2221 thread__put(thread);
2225 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2227 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2228 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2231 if (trace->pid_list || trace->tid_list)
2237 static int trace__process_sample(struct perf_tool *tool,
2238 union perf_event *event,
2239 struct perf_sample *sample,
2240 struct perf_evsel *evsel,
2241 struct machine *machine __maybe_unused)
2243 struct trace *trace = container_of(tool, struct trace, tool);
2246 tracepoint_handler handler = evsel->handler;
2248 if (skip_sample(trace, sample))
2251 if (!trace->full_time && trace->base_time == 0)
2252 trace->base_time = sample->time;
2256 handler(trace, evsel, event, sample);
2262 static int parse_target_str(struct trace *trace)
2264 if (trace->opts.target.pid) {
2265 trace->pid_list = intlist__new(trace->opts.target.pid);
2266 if (trace->pid_list == NULL) {
2267 pr_err("Error parsing process id string\n");
2272 if (trace->opts.target.tid) {
2273 trace->tid_list = intlist__new(trace->opts.target.tid);
2274 if (trace->tid_list == NULL) {
2275 pr_err("Error parsing thread id string\n");
2283 static int trace__record(struct trace *trace, int argc, const char **argv)
2285 unsigned int rec_argc, i, j;
2286 const char **rec_argv;
2287 const char * const record_args[] = {
2294 const char * const sc_args[] = { "-e", };
2295 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2296 const char * const majpf_args[] = { "-e", "major-faults" };
2297 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2298 const char * const minpf_args[] = { "-e", "minor-faults" };
2299 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2301 /* +1 is for the event string below */
2302 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2303 majpf_args_nr + minpf_args_nr + argc;
2304 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2306 if (rec_argv == NULL)
2310 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2311 rec_argv[j++] = record_args[i];
2313 if (trace->trace_syscalls) {
2314 for (i = 0; i < sc_args_nr; i++)
2315 rec_argv[j++] = sc_args[i];
2317 /* event string may be different for older kernels - e.g., RHEL6 */
2318 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2319 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2320 else if (is_valid_tracepoint("syscalls:sys_enter"))
2321 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2323 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2328 if (trace->trace_pgfaults & TRACE_PFMAJ)
2329 for (i = 0; i < majpf_args_nr; i++)
2330 rec_argv[j++] = majpf_args[i];
2332 if (trace->trace_pgfaults & TRACE_PFMIN)
2333 for (i = 0; i < minpf_args_nr; i++)
2334 rec_argv[j++] = minpf_args[i];
2336 for (i = 0; i < (unsigned int)argc; i++)
2337 rec_argv[j++] = argv[i];
2339 return cmd_record(j, rec_argv, NULL);
2342 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2344 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2346 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2350 if (perf_evsel__field(evsel, "pathname") == NULL) {
2351 perf_evsel__delete(evsel);
2355 evsel->handler = trace__vfs_getname;
2356 perf_evlist__add(evlist, evsel);
2360 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2363 struct perf_evsel *evsel;
2364 struct perf_event_attr attr = {
2365 .type = PERF_TYPE_SOFTWARE,
2369 attr.config = config;
2370 attr.sample_period = 1;
2372 event_attr_init(&attr);
2374 evsel = perf_evsel__new(&attr);
2378 evsel->handler = trace__pgfault;
2379 perf_evlist__add(evlist, evsel);
2384 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2386 const u32 type = event->header.type;
2387 struct perf_evsel *evsel;
2389 if (!trace->full_time && trace->base_time == 0)
2390 trace->base_time = sample->time;
2392 if (type != PERF_RECORD_SAMPLE) {
2393 trace__process_event(trace, trace->host, event, sample);
2397 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2398 if (evsel == NULL) {
2399 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2403 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2404 sample->raw_data == NULL) {
2405 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2406 perf_evsel__name(evsel), sample->tid,
2407 sample->cpu, sample->raw_size);
2409 tracepoint_handler handler = evsel->handler;
2410 handler(trace, evsel, event, sample);
2414 static int trace__add_syscall_newtp(struct trace *trace)
2417 struct perf_evlist *evlist = trace->evlist;
2418 struct perf_evsel *sys_enter, *sys_exit;
2420 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2421 if (sys_enter == NULL)
2424 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2425 goto out_delete_sys_enter;
2427 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2428 if (sys_exit == NULL)
2429 goto out_delete_sys_enter;
2431 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2432 goto out_delete_sys_exit;
2434 perf_evlist__add(evlist, sys_enter);
2435 perf_evlist__add(evlist, sys_exit);
2437 trace->syscalls.events.sys_enter = sys_enter;
2438 trace->syscalls.events.sys_exit = sys_exit;
2444 out_delete_sys_exit:
2445 perf_evsel__delete_priv(sys_exit);
2446 out_delete_sys_enter:
2447 perf_evsel__delete_priv(sys_enter);
2451 static int trace__set_ev_qualifier_filter(struct trace *trace)
2454 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2455 trace->ev_qualifier_ids.nr,
2456 trace->ev_qualifier_ids.entries);
2461 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2462 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2472 static int trace__run(struct trace *trace, int argc, const char **argv)
2474 struct perf_evlist *evlist = trace->evlist;
2475 struct perf_evsel *evsel;
2477 unsigned long before;
2478 const bool forks = argc > 0;
2479 bool draining = false;
2483 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2484 goto out_error_raw_syscalls;
2486 if (trace->trace_syscalls)
2487 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2489 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2490 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2494 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2495 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2499 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2500 trace__sched_stat_runtime))
2501 goto out_error_sched_stat_runtime;
2503 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2505 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2506 goto out_delete_evlist;
2509 err = trace__symbols_init(trace, evlist);
2511 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2512 goto out_delete_evlist;
2515 perf_evlist__config(evlist, &trace->opts);
2517 signal(SIGCHLD, sig_handler);
2518 signal(SIGINT, sig_handler);
2521 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2524 fprintf(trace->output, "Couldn't run the workload!\n");
2525 goto out_delete_evlist;
2529 err = perf_evlist__open(evlist);
2531 goto out_error_open;
2534 * Better not use !target__has_task() here because we need to cover the
2535 * case where no threads were specified in the command line, but a
2536 * workload was, and in that case we will fill in the thread_map when
2537 * we fork the workload in perf_evlist__prepare_workload.
2539 if (trace->filter_pids.nr > 0)
2540 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2541 else if (thread_map__pid(evlist->threads, 0) == -1)
2542 err = perf_evlist__set_filter_pid(evlist, getpid());
2547 if (trace->ev_qualifier_ids.nr > 0) {
2548 err = trace__set_ev_qualifier_filter(trace);
2552 pr_debug("event qualifier tracepoint filter: %s\n",
2553 trace->syscalls.events.sys_exit->filter);
2556 err = perf_evlist__apply_filters(evlist, &evsel);
2558 goto out_error_apply_filters;
2560 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2562 goto out_error_mmap;
2564 if (!target__none(&trace->opts.target))
2565 perf_evlist__enable(evlist);
2568 perf_evlist__start_workload(evlist);
2570 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2571 evlist->threads->nr > 1 ||
2572 perf_evlist__first(evlist)->attr.inherit;
2574 before = trace->nr_events;
2576 for (i = 0; i < evlist->nr_mmaps; i++) {
2577 union perf_event *event;
2579 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2580 struct perf_sample sample;
2584 err = perf_evlist__parse_sample(evlist, event, &sample);
2586 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2590 trace__handle_event(trace, event, &sample);
2592 perf_evlist__mmap_consume(evlist, i);
2597 if (done && !draining) {
2598 perf_evlist__disable(evlist);
2604 if (trace->nr_events == before) {
2605 int timeout = done ? 100 : -1;
2607 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2608 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2618 thread__zput(trace->current);
2620 perf_evlist__disable(evlist);
2624 trace__fprintf_thread_summary(trace, trace->output);
2626 if (trace->show_tool_stats) {
2627 fprintf(trace->output, "Stats:\n "
2628 " vfs_getname : %" PRIu64 "\n"
2629 " proc_getname: %" PRIu64 "\n",
2630 trace->stats.vfs_getname,
2631 trace->stats.proc_getname);
2636 perf_evlist__delete(evlist);
2637 trace->evlist = NULL;
2638 trace->live = false;
2641 char errbuf[BUFSIZ];
2643 out_error_sched_stat_runtime:
2644 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2647 out_error_raw_syscalls:
2648 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2652 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2656 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2659 fprintf(trace->output, "%s\n", errbuf);
2660 goto out_delete_evlist;
2662 out_error_apply_filters:
2663 fprintf(trace->output,
2664 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2665 evsel->filter, perf_evsel__name(evsel), errno,
2666 strerror_r(errno, errbuf, sizeof(errbuf)));
2667 goto out_delete_evlist;
2670 fprintf(trace->output, "Not enough memory to run!\n");
2671 goto out_delete_evlist;
2674 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2675 goto out_delete_evlist;
2678 static int trace__replay(struct trace *trace)
2680 const struct perf_evsel_str_handler handlers[] = {
2681 { "probe:vfs_getname", trace__vfs_getname, },
2683 struct perf_data_file file = {
2685 .mode = PERF_DATA_MODE_READ,
2686 .force = trace->force,
2688 struct perf_session *session;
2689 struct perf_evsel *evsel;
2692 trace->tool.sample = trace__process_sample;
2693 trace->tool.mmap = perf_event__process_mmap;
2694 trace->tool.mmap2 = perf_event__process_mmap2;
2695 trace->tool.comm = perf_event__process_comm;
2696 trace->tool.exit = perf_event__process_exit;
2697 trace->tool.fork = perf_event__process_fork;
2698 trace->tool.attr = perf_event__process_attr;
2699 trace->tool.tracing_data = perf_event__process_tracing_data;
2700 trace->tool.build_id = perf_event__process_build_id;
2702 trace->tool.ordered_events = true;
2703 trace->tool.ordering_requires_timestamps = true;
2705 /* add tid to output */
2706 trace->multiple_threads = true;
2708 session = perf_session__new(&file, false, &trace->tool);
2709 if (session == NULL)
2712 if (symbol__init(&session->header.env) < 0)
2715 trace->host = &session->machines.host;
2717 err = perf_session__set_tracepoints_handlers(session, handlers);
2721 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2722 "raw_syscalls:sys_enter");
2723 /* older kernels have syscalls tp versus raw_syscalls */
2725 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2726 "syscalls:sys_enter");
2729 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2730 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2731 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2735 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2736 "raw_syscalls:sys_exit");
2738 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2739 "syscalls:sys_exit");
2741 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2742 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2743 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2747 evlist__for_each(session->evlist, evsel) {
2748 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2749 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2750 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2751 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2752 evsel->handler = trace__pgfault;
2755 err = parse_target_str(trace);
2761 err = perf_session__process_events(session);
2763 pr_err("Failed to process events, error %d", err);
2765 else if (trace->summary)
2766 trace__fprintf_thread_summary(trace, trace->output);
2769 perf_session__delete(session);
2774 static size_t trace__fprintf_threads_header(FILE *fp)
2778 printed = fprintf(fp, "\n Summary of events:\n\n");
2783 static size_t thread__dump_stats(struct thread_trace *ttrace,
2784 struct trace *trace, FILE *fp)
2786 struct stats *stats;
2789 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2794 printed += fprintf(fp, "\n");
2796 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2797 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2798 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2800 /* each int_node is a syscall */
2802 stats = inode->priv;
2804 double min = (double)(stats->min) / NSEC_PER_MSEC;
2805 double max = (double)(stats->max) / NSEC_PER_MSEC;
2806 double avg = avg_stats(stats);
2808 u64 n = (u64) stats->n;
2810 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2811 avg /= NSEC_PER_MSEC;
2813 sc = &trace->syscalls.table[inode->i];
2814 printed += fprintf(fp, " %-15s", sc->name);
2815 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2816 n, avg * n, min, avg);
2817 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2820 inode = intlist__next(inode);
2823 printed += fprintf(fp, "\n\n");
2828 /* struct used to pass data to per-thread function */
2829 struct summary_data {
2831 struct trace *trace;
2835 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2837 struct summary_data *data = priv;
2838 FILE *fp = data->fp;
2839 size_t printed = data->printed;
2840 struct trace *trace = data->trace;
2841 struct thread_trace *ttrace = thread__priv(thread);
2847 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2849 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2850 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2851 printed += fprintf(fp, "%.1f%%", ratio);
2853 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2855 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2856 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2857 printed += thread__dump_stats(ttrace, trace, fp);
2859 data->printed += printed;
2864 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2866 struct summary_data data = {
2870 data.printed = trace__fprintf_threads_header(fp);
2872 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2874 return data.printed;
2877 static int trace__set_duration(const struct option *opt, const char *str,
2878 int unset __maybe_unused)
2880 struct trace *trace = opt->value;
2882 trace->duration_filter = atof(str);
2886 static int trace__set_filter_pids(const struct option *opt, const char *str,
2887 int unset __maybe_unused)
2891 struct trace *trace = opt->value;
2893 * FIXME: introduce a intarray class, plain parse csv and create a
2894 * { int nr, int entries[] } struct...
2896 struct intlist *list = intlist__new(str);
2901 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2902 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2904 if (trace->filter_pids.entries == NULL)
2907 trace->filter_pids.entries[0] = getpid();
2909 for (i = 1; i < trace->filter_pids.nr; ++i)
2910 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2912 intlist__delete(list);
2918 static int trace__open_output(struct trace *trace, const char *filename)
2922 if (!stat(filename, &st) && st.st_size) {
2923 char oldname[PATH_MAX];
2925 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2927 rename(filename, oldname);
2930 trace->output = fopen(filename, "w");
2932 return trace->output == NULL ? -errno : 0;
2935 static int parse_pagefaults(const struct option *opt, const char *str,
2936 int unset __maybe_unused)
2938 int *trace_pgfaults = opt->value;
2940 if (strcmp(str, "all") == 0)
2941 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2942 else if (strcmp(str, "maj") == 0)
2943 *trace_pgfaults |= TRACE_PFMAJ;
2944 else if (strcmp(str, "min") == 0)
2945 *trace_pgfaults |= TRACE_PFMIN;
2952 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2954 struct perf_evsel *evsel;
2956 evlist__for_each(evlist, evsel)
2957 evsel->handler = handler;
2960 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2962 const char *trace_usage[] = {
2963 "perf trace [<options>] [<command>]",
2964 "perf trace [<options>] -- <command> [<options>]",
2965 "perf trace record [<options>] [<command>]",
2966 "perf trace record [<options>] -- <command> [<options>]",
2969 struct trace trace = {
2971 .machine = audit_detect_machine(),
2972 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2982 .user_freq = UINT_MAX,
2983 .user_interval = ULLONG_MAX,
2984 .no_buffering = true,
2985 .mmap_pages = UINT_MAX,
2986 .proc_map_timeout = 500,
2990 .trace_syscalls = true,
2992 const char *output_name = NULL;
2993 const char *ev_qualifier_str = NULL;
2994 const struct option trace_options[] = {
2995 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2996 "event selector. use 'perf list' to list available events",
2997 parse_events_option),
2998 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2999 "show the thread COMM next to its id"),
3000 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3001 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3002 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3003 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3004 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3005 "trace events on existing process id"),
3006 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3007 "trace events on existing thread id"),
3008 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3009 "pids to filter (by the kernel)", trace__set_filter_pids),
3010 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3011 "system-wide collection from all CPUs"),
3012 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3013 "list of cpus to monitor"),
3014 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3015 "child tasks do not inherit counters"),
3016 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3017 "number of mmap data pages",
3018 perf_evlist__parse_mmap_pages),
3019 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3021 OPT_CALLBACK(0, "duration", &trace, "float",
3022 "show only events with duration > N.M ms",
3023 trace__set_duration),
3024 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3025 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3026 OPT_BOOLEAN('T', "time", &trace.full_time,
3027 "Show full timestamp, not time relative to first start"),
3028 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3029 "Show only syscall summary with statistics"),
3030 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3031 "Show all syscalls and summary with statistics"),
3032 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3033 "Trace pagefaults", parse_pagefaults, "maj"),
3034 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3035 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3036 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3037 "per thread proc mmap processing timeout in ms"),
3040 const char * const trace_subcommands[] = { "record", NULL };
3044 signal(SIGSEGV, sighandler_dump_stack);
3045 signal(SIGFPE, sighandler_dump_stack);
3047 trace.evlist = perf_evlist__new();
3049 if (trace.evlist == NULL) {
3050 pr_err("Not enough memory to run!\n");
3055 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3056 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3058 if (trace.trace_pgfaults) {
3059 trace.opts.sample_address = true;
3060 trace.opts.sample_time = true;
3063 if (trace.evlist->nr_entries > 0)
3064 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3066 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3067 return trace__record(&trace, argc-1, &argv[1]);
3069 /* summary_only implies summary option, but don't overwrite summary if set */
3070 if (trace.summary_only)
3071 trace.summary = trace.summary_only;
3073 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3074 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3075 pr_err("Please specify something to trace.\n");
3079 if (output_name != NULL) {
3080 err = trace__open_output(&trace, output_name);
3082 perror("failed to create output file");
3087 if (ev_qualifier_str != NULL) {
3088 const char *s = ev_qualifier_str;
3089 struct strlist_config slist_config = {
3090 .dirname = system_path(STRACE_GROUPS_DIR),
3093 trace.not_ev_qualifier = *s == '!';
3094 if (trace.not_ev_qualifier)
3096 trace.ev_qualifier = strlist__new(s, &slist_config);
3097 if (trace.ev_qualifier == NULL) {
3098 fputs("Not enough memory to parse event qualifier",
3104 err = trace__validate_ev_qualifier(&trace);
3109 err = target__validate(&trace.opts.target);
3111 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3112 fprintf(trace.output, "%s", bf);
3116 err = target__parse_uid(&trace.opts.target);
3118 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3119 fprintf(trace.output, "%s", bf);
3123 if (!argc && target__none(&trace.opts.target))
3124 trace.opts.target.system_wide = true;
3127 err = trace__replay(&trace);
3129 err = trace__run(&trace, argc, argv);
3132 if (output_name != NULL)
3133 fclose(trace.output);