1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
604 static DEFINE_STRARRAY(clockid);
606 static const char *socket_families[] = {
607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612 "ALG", "NFC", "VSOCK",
614 static DEFINE_STRARRAY(socket_families);
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621 struct syscall_arg *arg)
625 flags = type & ~SOCK_TYPE_MASK;
627 type &= SOCK_TYPE_MASK;
629 * Can't use a strarray, MIPS may override for ABI reasons.
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
637 P_SK_TYPE(SEQPACKET);
642 printed = scnprintf(bf, size, "%#x", type);
645 #define P_SK_FLAG(n) \
646 if (flags & SOCK_##n) { \
647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648 flags &= ~SOCK_##n; \
656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
664 #define MSG_PROBE 0x10
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE 0x10000
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
673 #define MSG_FASTOPEN 0x20000000
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
679 int printed = 0, flags = arg->val;
682 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684 if (flags & MSG_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691 P_MSG_FLAG(DONTROUTE);
696 P_MSG_FLAG(DONTWAIT);
703 P_MSG_FLAG(ERRQUEUE);
704 P_MSG_FLAG(NOSIGNAL);
706 P_MSG_FLAG(WAITFORONE);
707 P_MSG_FLAG(SENDPAGE_NOTLAST);
708 P_MSG_FLAG(FASTOPEN);
709 P_MSG_FLAG(CMSG_CLOEXEC);
713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721 struct syscall_arg *arg)
726 if (mode == F_OK) /* 0 */
727 return scnprintf(bf, size, "F");
729 if (mode & n##_OK) { \
730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
748 struct syscall_arg *arg);
750 #define SCA_FILENAME syscall_arg__scnprintf_filename
752 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
753 struct syscall_arg *arg)
755 int printed = 0, flags = arg->val;
757 if (!(flags & O_CREAT))
758 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
761 return scnprintf(bf, size, "RDONLY");
763 if (flags & O_##n) { \
764 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
788 if ((flags & O_SYNC) == O_SYNC)
789 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
801 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
808 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
809 struct syscall_arg *arg)
811 int printed = 0, flags = arg->val;
817 if (flags & PERF_FLAG_##n) { \
818 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
819 flags &= ~PERF_FLAG_##n; \
829 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
834 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
836 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
837 struct syscall_arg *arg)
839 int printed = 0, flags = arg->val;
842 return scnprintf(bf, size, "NONE");
844 if (flags & EFD_##n) { \
845 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
855 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
862 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
863 struct syscall_arg *arg)
865 int printed = 0, flags = arg->val;
868 if (flags & O_##n) { \
869 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
878 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
883 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
885 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
890 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
933 return scnprintf(bf, size, "%#x", sig);
936 #define SCA_SIGNUM syscall_arg__scnprintf_signum
938 #if defined(__i386__) || defined(__x86_64__)
940 * FIXME: Make this available to all arches.
942 #define TCGETS 0x5401
944 static const char *tioctls[] = {
945 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
946 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
947 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
948 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
949 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
950 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
951 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
952 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
953 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
954 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
955 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
956 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
957 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
958 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
959 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
962 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
963 #endif /* defined(__i386__) || defined(__x86_64__) */
965 #define STRARRAY(arg, name, array) \
966 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
967 .arg_parm = { [arg] = &strarray__##array, }
969 static struct syscall_fmt {
972 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
978 { .name = "access", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
980 [1] = SCA_ACCMODE, /* mode */ }, },
981 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
982 { .name = "brk", .hexret = true,
983 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
984 { .name = "chdir", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
986 { .name = "chmod", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
988 { .name = "chroot", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
990 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
991 { .name = "close", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
993 { .name = "connect", .errmsg = true, },
994 { .name = "creat", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
996 { .name = "dup", .errmsg = true,
997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 { .name = "dup2", .errmsg = true,
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "dup3", .errmsg = true,
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1003 { .name = "eventfd2", .errmsg = true,
1004 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1005 { .name = "faccessat", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1007 [1] = SCA_FILENAME, /* filename */ }, },
1008 { .name = "fadvise64", .errmsg = true,
1009 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010 { .name = "fallocate", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012 { .name = "fchdir", .errmsg = true,
1013 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1014 { .name = "fchmod", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1016 { .name = "fchmodat", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1018 [1] = SCA_FILENAME, /* filename */ }, },
1019 { .name = "fchown", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1021 { .name = "fchownat", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1023 [1] = SCA_FILENAME, /* filename */ }, },
1024 { .name = "fcntl", .errmsg = true,
1025 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1026 [1] = SCA_STRARRAY, /* cmd */ },
1027 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1028 { .name = "fdatasync", .errmsg = true,
1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 { .name = "flock", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1032 [1] = SCA_FLOCK, /* cmd */ }, },
1033 { .name = "fsetxattr", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1036 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1038 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1039 [1] = SCA_FILENAME, /* filename */ }, },
1040 { .name = "fstatfs", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042 { .name = "fsync", .errmsg = true,
1043 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044 { .name = "ftruncate", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046 { .name = "futex", .errmsg = true,
1047 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1048 { .name = "futimesat", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1050 [1] = SCA_FILENAME, /* filename */ }, },
1051 { .name = "getdents", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 { .name = "getdents64", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1056 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1057 { .name = "getxattr", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1059 { .name = "inotify_add_watch", .errmsg = true,
1060 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1061 { .name = "ioctl", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1063 #if defined(__i386__) || defined(__x86_64__)
1065 * FIXME: Make this available to all arches.
1067 [1] = SCA_STRHEXARRAY, /* cmd */
1068 [2] = SCA_HEX, /* arg */ },
1069 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1071 [2] = SCA_HEX, /* arg */ }, },
1073 { .name = "kill", .errmsg = true,
1074 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1075 { .name = "lchown", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1077 { .name = "lgetxattr", .errmsg = true,
1078 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1079 { .name = "linkat", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1081 { .name = "listxattr", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1083 { .name = "llistxattr", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1085 { .name = "lremovexattr", .errmsg = true,
1086 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1087 { .name = "lseek", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1089 [2] = SCA_STRARRAY, /* whence */ },
1090 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1091 { .name = "lsetxattr", .errmsg = true,
1092 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1093 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1094 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1095 { .name = "lsxattr", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1097 { .name = "madvise", .errmsg = true,
1098 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1099 [2] = SCA_MADV_BHV, /* behavior */ }, },
1100 { .name = "mkdir", .errmsg = true,
1101 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1102 { .name = "mkdirat", .errmsg = true,
1103 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1104 [1] = SCA_FILENAME, /* pathname */ }, },
1105 { .name = "mknod", .errmsg = true,
1106 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1107 { .name = "mknodat", .errmsg = true,
1108 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1109 [1] = SCA_FILENAME, /* filename */ }, },
1110 { .name = "mlock", .errmsg = true,
1111 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1112 { .name = "mlockall", .errmsg = true,
1113 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1114 { .name = "mmap", .hexret = true,
1115 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1116 [2] = SCA_MMAP_PROT, /* prot */
1117 [3] = SCA_MMAP_FLAGS, /* flags */
1118 [4] = SCA_FD, /* fd */ }, },
1119 { .name = "mprotect", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1121 [2] = SCA_MMAP_PROT, /* prot */ }, },
1122 { .name = "mq_unlink", .errmsg = true,
1123 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1124 { .name = "mremap", .hexret = true,
1125 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1126 [3] = SCA_MREMAP_FLAGS, /* flags */
1127 [4] = SCA_HEX, /* new_addr */ }, },
1128 { .name = "munlock", .errmsg = true,
1129 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1130 { .name = "munmap", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1132 { .name = "name_to_handle_at", .errmsg = true,
1133 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1134 { .name = "newfstatat", .errmsg = true,
1135 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1136 [1] = SCA_FILENAME, /* filename */ }, },
1137 { .name = "open", .errmsg = true,
1138 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1139 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1140 { .name = "open_by_handle_at", .errmsg = true,
1141 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1142 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1143 { .name = "openat", .errmsg = true,
1144 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1145 [1] = SCA_FILENAME, /* filename */
1146 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1147 { .name = "perf_event_open", .errmsg = true,
1148 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1149 [2] = SCA_INT, /* cpu */
1150 [3] = SCA_FD, /* group_fd */
1151 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1152 { .name = "pipe2", .errmsg = true,
1153 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1154 { .name = "poll", .errmsg = true, .timeout = true, },
1155 { .name = "ppoll", .errmsg = true, .timeout = true, },
1156 { .name = "pread", .errmsg = true, .alias = "pread64",
1157 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1158 { .name = "preadv", .errmsg = true, .alias = "pread",
1159 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1160 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1161 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1162 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1163 { .name = "pwritev", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1165 { .name = "read", .errmsg = true,
1166 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1167 { .name = "readlink", .errmsg = true,
1168 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1169 { .name = "readlinkat", .errmsg = true,
1170 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1171 [1] = SCA_FILENAME, /* pathname */ }, },
1172 { .name = "readv", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1174 { .name = "recvfrom", .errmsg = true,
1175 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1176 { .name = "recvmmsg", .errmsg = true,
1177 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1178 { .name = "recvmsg", .errmsg = true,
1179 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1180 { .name = "removexattr", .errmsg = true,
1181 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1182 { .name = "renameat", .errmsg = true,
1183 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1184 { .name = "rmdir", .errmsg = true,
1185 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1186 { .name = "rt_sigaction", .errmsg = true,
1187 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1188 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1189 { .name = "rt_sigqueueinfo", .errmsg = true,
1190 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1191 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1192 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1193 { .name = "select", .errmsg = true, .timeout = true, },
1194 { .name = "sendmmsg", .errmsg = true,
1195 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1196 { .name = "sendmsg", .errmsg = true,
1197 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1198 { .name = "sendto", .errmsg = true,
1199 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1200 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1201 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1202 { .name = "setxattr", .errmsg = true,
1203 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1204 { .name = "shutdown", .errmsg = true,
1205 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1206 { .name = "socket", .errmsg = true,
1207 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1208 [1] = SCA_SK_TYPE, /* type */ },
1209 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1210 { .name = "socketpair", .errmsg = true,
1211 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1212 [1] = SCA_SK_TYPE, /* type */ },
1213 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1214 { .name = "stat", .errmsg = true, .alias = "newstat",
1215 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1216 { .name = "statfs", .errmsg = true,
1217 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1218 { .name = "swapoff", .errmsg = true,
1219 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1220 { .name = "swapon", .errmsg = true,
1221 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1222 { .name = "symlinkat", .errmsg = true,
1223 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1224 { .name = "tgkill", .errmsg = true,
1225 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1226 { .name = "tkill", .errmsg = true,
1227 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1228 { .name = "truncate", .errmsg = true,
1229 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1230 { .name = "uname", .errmsg = true, .alias = "newuname", },
1231 { .name = "unlinkat", .errmsg = true,
1232 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1233 [1] = SCA_FILENAME, /* pathname */ }, },
1234 { .name = "utime", .errmsg = true,
1235 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1236 { .name = "utimensat", .errmsg = true,
1237 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1238 [1] = SCA_FILENAME, /* filename */ }, },
1239 { .name = "utimes", .errmsg = true,
1240 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1241 { .name = "vmsplice", .errmsg = true,
1242 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1243 { .name = "write", .errmsg = true,
1244 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1245 { .name = "writev", .errmsg = true,
1246 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1249 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1251 const struct syscall_fmt *fmt = fmtp;
1252 return strcmp(name, fmt->name);
1255 static struct syscall_fmt *syscall_fmt__find(const char *name)
1257 const int nmemb = ARRAY_SIZE(syscall_fmts);
1258 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1262 struct event_format *tp_format;
1264 struct format_field *args;
1267 struct syscall_fmt *fmt;
1268 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1272 static size_t fprintf_duration(unsigned long t, FILE *fp)
1274 double duration = (double)t / NSEC_PER_MSEC;
1275 size_t printed = fprintf(fp, "(");
1277 if (duration >= 1.0)
1278 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1279 else if (duration >= 0.01)
1280 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1282 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1283 return printed + fprintf(fp, "): ");
1287 * filename.ptr: The filename char pointer that will be vfs_getname'd
1288 * filename.entry_str_pos: Where to insert the string translated from
1289 * filename.ptr by the vfs_getname tracepoint/kprobe.
1291 struct thread_trace {
1295 unsigned long nr_events;
1296 unsigned long pfmaj, pfmin;
1308 struct intlist *syscall_stats;
1311 static struct thread_trace *thread_trace__new(void)
1313 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1316 ttrace->paths.max = -1;
1318 ttrace->syscall_stats = intlist__new(NULL);
1323 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1325 struct thread_trace *ttrace;
1330 if (thread__priv(thread) == NULL)
1331 thread__set_priv(thread, thread_trace__new());
1333 if (thread__priv(thread) == NULL)
1336 ttrace = thread__priv(thread);
1337 ++ttrace->nr_events;
1341 color_fprintf(fp, PERF_COLOR_RED,
1342 "WARNING: not enough memory, dropping samples!\n");
1346 #define TRACE_PFMAJ (1 << 0)
1347 #define TRACE_PFMIN (1 << 1)
1349 static const size_t trace__entry_str_size = 2048;
1352 struct perf_tool tool;
1359 struct syscall *table;
1361 struct perf_evsel *sys_enter,
1365 struct record_opts opts;
1366 struct perf_evlist *evlist;
1367 struct machine *host;
1368 struct thread *current;
1371 unsigned long nr_events;
1372 struct strlist *ev_qualifier;
1377 const char *last_vfs_getname;
1378 struct intlist *tid_list;
1379 struct intlist *pid_list;
1384 double duration_filter;
1390 bool not_ev_qualifier;
1394 bool multiple_threads;
1398 bool show_tool_stats;
1399 bool trace_syscalls;
1405 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1407 struct thread_trace *ttrace = thread__priv(thread);
1409 if (fd > ttrace->paths.max) {
1410 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1415 if (ttrace->paths.max != -1) {
1416 memset(npath + ttrace->paths.max + 1, 0,
1417 (fd - ttrace->paths.max) * sizeof(char *));
1419 memset(npath, 0, (fd + 1) * sizeof(char *));
1422 ttrace->paths.table = npath;
1423 ttrace->paths.max = fd;
1426 ttrace->paths.table[fd] = strdup(pathname);
1428 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1431 static int thread__read_fd_path(struct thread *thread, int fd)
1433 char linkname[PATH_MAX], pathname[PATH_MAX];
1437 if (thread->pid_ == thread->tid) {
1438 scnprintf(linkname, sizeof(linkname),
1439 "/proc/%d/fd/%d", thread->pid_, fd);
1441 scnprintf(linkname, sizeof(linkname),
1442 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1445 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1448 ret = readlink(linkname, pathname, sizeof(pathname));
1450 if (ret < 0 || ret > st.st_size)
1453 pathname[ret] = '\0';
1454 return trace__set_fd_pathname(thread, fd, pathname);
1457 static const char *thread__fd_path(struct thread *thread, int fd,
1458 struct trace *trace)
1460 struct thread_trace *ttrace = thread__priv(thread);
1468 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1471 ++trace->stats.proc_getname;
1472 if (thread__read_fd_path(thread, fd))
1476 return ttrace->paths.table[fd];
1479 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1480 struct syscall_arg *arg)
1483 size_t printed = scnprintf(bf, size, "%d", fd);
1484 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1487 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1492 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1493 struct syscall_arg *arg)
1496 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1497 struct thread_trace *ttrace = thread__priv(arg->thread);
1499 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1500 zfree(&ttrace->paths.table[fd]);
1505 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1508 struct thread_trace *ttrace = thread__priv(thread);
1510 ttrace->filename.ptr = ptr;
1511 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1514 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1515 struct syscall_arg *arg)
1517 unsigned long ptr = arg->val;
1519 if (!arg->trace->vfs_getname)
1520 return scnprintf(bf, size, "%#x", ptr);
1522 thread__set_filename_pos(arg->thread, bf, ptr);
1526 static bool trace__filter_duration(struct trace *trace, double t)
1528 return t < (trace->duration_filter * NSEC_PER_MSEC);
1531 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1533 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1535 return fprintf(fp, "%10.3f ", ts);
1538 static bool done = false;
1539 static bool interrupted = false;
1541 static void sig_handler(int sig)
1544 interrupted = sig == SIGINT;
1547 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1548 u64 duration, u64 tstamp, FILE *fp)
1550 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1551 printed += fprintf_duration(duration, fp);
1553 if (trace->multiple_threads) {
1554 if (trace->show_comm)
1555 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1556 printed += fprintf(fp, "%d ", thread->tid);
1562 static int trace__process_event(struct trace *trace, struct machine *machine,
1563 union perf_event *event, struct perf_sample *sample)
1567 switch (event->header.type) {
1568 case PERF_RECORD_LOST:
1569 color_fprintf(trace->output, PERF_COLOR_RED,
1570 "LOST %" PRIu64 " events!\n", event->lost.lost);
1571 ret = machine__process_lost_event(machine, event, sample);
1573 ret = machine__process_event(machine, event, sample);
1580 static int trace__tool_process(struct perf_tool *tool,
1581 union perf_event *event,
1582 struct perf_sample *sample,
1583 struct machine *machine)
1585 struct trace *trace = container_of(tool, struct trace, tool);
1586 return trace__process_event(trace, machine, event, sample);
1589 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1591 int err = symbol__init(NULL);
1596 trace->host = machine__new_host();
1597 if (trace->host == NULL)
1600 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1603 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1604 evlist->threads, trace__tool_process, false,
1605 trace->opts.proc_map_timeout);
1612 static int syscall__set_arg_fmts(struct syscall *sc)
1614 struct format_field *field;
1617 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1618 if (sc->arg_scnprintf == NULL)
1622 sc->arg_parm = sc->fmt->arg_parm;
1624 for (field = sc->args; field; field = field->next) {
1625 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1626 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1627 else if (field->flags & FIELD_IS_POINTER)
1628 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1635 static int trace__read_syscall_info(struct trace *trace, int id)
1639 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1644 if (id > trace->syscalls.max) {
1645 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1647 if (nsyscalls == NULL)
1650 if (trace->syscalls.max != -1) {
1651 memset(nsyscalls + trace->syscalls.max + 1, 0,
1652 (id - trace->syscalls.max) * sizeof(*sc));
1654 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1657 trace->syscalls.table = nsyscalls;
1658 trace->syscalls.max = id;
1661 sc = trace->syscalls.table + id;
1664 sc->fmt = syscall_fmt__find(sc->name);
1666 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1667 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1669 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1670 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1671 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1674 if (sc->tp_format == NULL)
1677 sc->args = sc->tp_format->format.fields;
1678 sc->nr_args = sc->tp_format->format.nr_fields;
1679 /* drop nr field - not relevant here; does not exist on older kernels */
1680 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1681 sc->args = sc->args->next;
1685 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1687 return syscall__set_arg_fmts(sc);
1690 static int trace__validate_ev_qualifier(struct trace *trace)
1693 struct str_node *pos;
1695 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1696 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1697 sizeof(trace->ev_qualifier_ids.entries[0]));
1699 if (trace->ev_qualifier_ids.entries == NULL) {
1700 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1708 strlist__for_each(pos, trace->ev_qualifier) {
1709 const char *sc = pos->s;
1710 int id = audit_name_to_syscall(sc, trace->audit.machine);
1714 fputs("Error:\tInvalid syscall ", trace->output);
1717 fputs(", ", trace->output);
1720 fputs(sc, trace->output);
1723 trace->ev_qualifier_ids.entries[i++] = id;
1727 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1728 "\nHint:\tand: 'man syscalls'\n", trace->output);
1729 zfree(&trace->ev_qualifier_ids.entries);
1730 trace->ev_qualifier_ids.nr = 0;
1737 * args is to be interpreted as a series of longs but we need to handle
1738 * 8-byte unaligned accesses. args points to raw_data within the event
1739 * and raw_data is guaranteed to be 8-byte unaligned because it is
1740 * preceded by raw_size which is a u32. So we need to copy args to a temp
1741 * variable to read it. Most notably this avoids extended load instructions
1742 * on unaligned addresses
1745 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1746 unsigned char *args, struct trace *trace,
1747 struct thread *thread)
1753 if (sc->args != NULL) {
1754 struct format_field *field;
1756 struct syscall_arg arg = {
1763 for (field = sc->args; field;
1764 field = field->next, ++arg.idx, bit <<= 1) {
1768 /* special care for unaligned accesses */
1769 p = args + sizeof(unsigned long) * arg.idx;
1770 memcpy(&val, p, sizeof(val));
1773 * Suppress this argument if its value is zero and
1774 * and we don't have a string associated in an
1778 !(sc->arg_scnprintf &&
1779 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1780 sc->arg_parm[arg.idx]))
1783 printed += scnprintf(bf + printed, size - printed,
1784 "%s%s: ", printed ? ", " : "", field->name);
1785 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1788 arg.parm = sc->arg_parm[arg.idx];
1789 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1790 size - printed, &arg);
1792 printed += scnprintf(bf + printed, size - printed,
1800 /* special care for unaligned accesses */
1801 p = args + sizeof(unsigned long) * i;
1802 memcpy(&val, p, sizeof(val));
1803 printed += scnprintf(bf + printed, size - printed,
1805 printed ? ", " : "", i, val);
1813 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1814 union perf_event *event,
1815 struct perf_sample *sample);
1817 static struct syscall *trace__syscall_info(struct trace *trace,
1818 struct perf_evsel *evsel, int id)
1824 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1825 * before that, leaving at a higher verbosity level till that is
1826 * explained. Reproduced with plain ftrace with:
1828 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1829 * grep "NR -1 " /t/trace_pipe
1831 * After generating some load on the machine.
1835 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1836 id, perf_evsel__name(evsel), ++n);
1841 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1842 trace__read_syscall_info(trace, id))
1845 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1848 return &trace->syscalls.table[id];
1852 fprintf(trace->output, "Problems reading syscall %d", id);
1853 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1854 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1855 fputs(" information\n", trace->output);
1860 static void thread__update_stats(struct thread_trace *ttrace,
1861 int id, struct perf_sample *sample)
1863 struct int_node *inode;
1864 struct stats *stats;
1867 inode = intlist__findnew(ttrace->syscall_stats, id);
1871 stats = inode->priv;
1872 if (stats == NULL) {
1873 stats = malloc(sizeof(struct stats));
1877 inode->priv = stats;
1880 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1881 duration = sample->time - ttrace->entry_time;
1883 update_stats(stats, duration);
1886 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1888 struct thread_trace *ttrace;
1892 if (trace->current == NULL)
1895 ttrace = thread__priv(trace->current);
1897 if (!ttrace->entry_pending)
1900 duration = sample->time - ttrace->entry_time;
1902 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1903 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1904 ttrace->entry_pending = false;
1909 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1910 union perf_event *event __maybe_unused,
1911 struct perf_sample *sample)
1916 struct thread *thread;
1917 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1918 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1919 struct thread_trace *ttrace;
1924 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1925 ttrace = thread__trace(thread, trace->output);
1929 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1931 if (ttrace->entry_str == NULL) {
1932 ttrace->entry_str = malloc(trace__entry_str_size);
1933 if (!ttrace->entry_str)
1937 if (!trace->summary_only)
1938 trace__printf_interrupted_entry(trace, sample);
1940 ttrace->entry_time = sample->time;
1941 msg = ttrace->entry_str;
1942 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1944 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1945 args, trace, thread);
1948 if (!trace->duration_filter && !trace->summary_only) {
1949 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1950 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1953 ttrace->entry_pending = true;
1955 if (trace->current != thread) {
1956 thread__put(trace->current);
1957 trace->current = thread__get(thread);
1961 thread__put(thread);
1965 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1966 union perf_event *event __maybe_unused,
1967 struct perf_sample *sample)
1971 struct thread *thread;
1972 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1973 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1974 struct thread_trace *ttrace;
1979 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1980 ttrace = thread__trace(thread, trace->output);
1985 thread__update_stats(ttrace, id, sample);
1987 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1989 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1990 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1991 trace->last_vfs_getname = NULL;
1992 ++trace->stats.vfs_getname;
1995 ttrace->exit_time = sample->time;
1997 if (ttrace->entry_time) {
1998 duration = sample->time - ttrace->entry_time;
1999 if (trace__filter_duration(trace, duration))
2001 } else if (trace->duration_filter)
2004 if (trace->summary_only)
2007 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2009 if (ttrace->entry_pending) {
2010 fprintf(trace->output, "%-70s", ttrace->entry_str);
2012 fprintf(trace->output, " ... [");
2013 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2014 fprintf(trace->output, "]: %s()", sc->name);
2017 if (sc->fmt == NULL) {
2019 fprintf(trace->output, ") = %ld", ret);
2020 } else if (ret < 0 && sc->fmt->errmsg) {
2021 char bf[STRERR_BUFSIZE];
2022 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2023 *e = audit_errno_to_name(-ret);
2025 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2026 } else if (ret == 0 && sc->fmt->timeout)
2027 fprintf(trace->output, ") = 0 Timeout");
2028 else if (sc->fmt->hexret)
2029 fprintf(trace->output, ") = %#lx", ret);
2033 fputc('\n', trace->output);
2035 ttrace->entry_pending = false;
2038 thread__put(thread);
2042 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2043 union perf_event *event __maybe_unused,
2044 struct perf_sample *sample)
2046 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2047 struct thread_trace *ttrace;
2048 size_t filename_len, entry_str_len, to_move;
2049 ssize_t remaining_space;
2051 const char *filename;
2053 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2058 ttrace = thread__priv(thread);
2062 if (!ttrace->filename.ptr)
2065 entry_str_len = strlen(ttrace->entry_str);
2066 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2067 if (remaining_space <= 0)
2070 filename = trace->last_vfs_getname;
2071 filename_len = strlen(filename);
2072 if (filename_len > (size_t)remaining_space) {
2073 filename += filename_len - remaining_space;
2074 filename_len = remaining_space;
2077 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2078 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2079 memmove(pos + filename_len, pos, to_move);
2080 memcpy(pos, filename, filename_len);
2082 ttrace->filename.ptr = 0;
2083 ttrace->filename.entry_str_pos = 0;
2088 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2089 union perf_event *event __maybe_unused,
2090 struct perf_sample *sample)
2092 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2093 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2094 struct thread *thread = machine__findnew_thread(trace->host,
2097 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2102 ttrace->runtime_ms += runtime_ms;
2103 trace->runtime_ms += runtime_ms;
2104 thread__put(thread);
2108 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2110 perf_evsel__strval(evsel, sample, "comm"),
2111 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2113 perf_evsel__intval(evsel, sample, "vruntime"));
2114 thread__put(thread);
2118 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2119 union perf_event *event __maybe_unused,
2120 struct perf_sample *sample)
2122 trace__printf_interrupted_entry(trace, sample);
2123 trace__fprintf_tstamp(trace, sample->time, trace->output);
2125 if (trace->trace_syscalls)
2126 fprintf(trace->output, "( ): ");
2128 fprintf(trace->output, "%s:", evsel->name);
2130 if (evsel->tp_format) {
2131 event_format__fprintf(evsel->tp_format, sample->cpu,
2132 sample->raw_data, sample->raw_size,
2136 fprintf(trace->output, ")\n");
2140 static void print_location(FILE *f, struct perf_sample *sample,
2141 struct addr_location *al,
2142 bool print_dso, bool print_sym)
2145 if ((verbose || print_dso) && al->map)
2146 fprintf(f, "%s@", al->map->dso->long_name);
2148 if ((verbose || print_sym) && al->sym)
2149 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2150 al->addr - al->sym->start);
2152 fprintf(f, "0x%" PRIx64, al->addr);
2154 fprintf(f, "0x%" PRIx64, sample->addr);
2157 static int trace__pgfault(struct trace *trace,
2158 struct perf_evsel *evsel,
2159 union perf_event *event,
2160 struct perf_sample *sample)
2162 struct thread *thread;
2163 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2164 struct addr_location al;
2165 char map_type = 'd';
2166 struct thread_trace *ttrace;
2169 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2170 ttrace = thread__trace(thread, trace->output);
2174 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2179 if (trace->summary_only)
2182 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2185 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2187 fprintf(trace->output, "%sfault [",
2188 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2191 print_location(trace->output, sample, &al, false, true);
2193 fprintf(trace->output, "] => ");
2195 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2199 thread__find_addr_location(thread, cpumode,
2200 MAP__FUNCTION, sample->addr, &al);
2208 print_location(trace->output, sample, &al, true, false);
2210 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2214 thread__put(thread);
2218 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2220 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2221 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2224 if (trace->pid_list || trace->tid_list)
2230 static int trace__process_sample(struct perf_tool *tool,
2231 union perf_event *event,
2232 struct perf_sample *sample,
2233 struct perf_evsel *evsel,
2234 struct machine *machine __maybe_unused)
2236 struct trace *trace = container_of(tool, struct trace, tool);
2239 tracepoint_handler handler = evsel->handler;
2241 if (skip_sample(trace, sample))
2244 if (!trace->full_time && trace->base_time == 0)
2245 trace->base_time = sample->time;
2249 handler(trace, evsel, event, sample);
2255 static int parse_target_str(struct trace *trace)
2257 if (trace->opts.target.pid) {
2258 trace->pid_list = intlist__new(trace->opts.target.pid);
2259 if (trace->pid_list == NULL) {
2260 pr_err("Error parsing process id string\n");
2265 if (trace->opts.target.tid) {
2266 trace->tid_list = intlist__new(trace->opts.target.tid);
2267 if (trace->tid_list == NULL) {
2268 pr_err("Error parsing thread id string\n");
2276 static int trace__record(struct trace *trace, int argc, const char **argv)
2278 unsigned int rec_argc, i, j;
2279 const char **rec_argv;
2280 const char * const record_args[] = {
2287 const char * const sc_args[] = { "-e", };
2288 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2289 const char * const majpf_args[] = { "-e", "major-faults" };
2290 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2291 const char * const minpf_args[] = { "-e", "minor-faults" };
2292 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2294 /* +1 is for the event string below */
2295 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2296 majpf_args_nr + minpf_args_nr + argc;
2297 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2299 if (rec_argv == NULL)
2303 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2304 rec_argv[j++] = record_args[i];
2306 if (trace->trace_syscalls) {
2307 for (i = 0; i < sc_args_nr; i++)
2308 rec_argv[j++] = sc_args[i];
2310 /* event string may be different for older kernels - e.g., RHEL6 */
2311 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2312 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2313 else if (is_valid_tracepoint("syscalls:sys_enter"))
2314 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2316 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2321 if (trace->trace_pgfaults & TRACE_PFMAJ)
2322 for (i = 0; i < majpf_args_nr; i++)
2323 rec_argv[j++] = majpf_args[i];
2325 if (trace->trace_pgfaults & TRACE_PFMIN)
2326 for (i = 0; i < minpf_args_nr; i++)
2327 rec_argv[j++] = minpf_args[i];
2329 for (i = 0; i < (unsigned int)argc; i++)
2330 rec_argv[j++] = argv[i];
2332 return cmd_record(j, rec_argv, NULL);
2335 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2337 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2339 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2343 if (perf_evsel__field(evsel, "pathname") == NULL) {
2344 perf_evsel__delete(evsel);
2348 evsel->handler = trace__vfs_getname;
2349 perf_evlist__add(evlist, evsel);
2353 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2356 struct perf_evsel *evsel;
2357 struct perf_event_attr attr = {
2358 .type = PERF_TYPE_SOFTWARE,
2362 attr.config = config;
2363 attr.sample_period = 1;
2365 event_attr_init(&attr);
2367 evsel = perf_evsel__new(&attr);
2371 evsel->handler = trace__pgfault;
2372 perf_evlist__add(evlist, evsel);
2377 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2379 const u32 type = event->header.type;
2380 struct perf_evsel *evsel;
2382 if (!trace->full_time && trace->base_time == 0)
2383 trace->base_time = sample->time;
2385 if (type != PERF_RECORD_SAMPLE) {
2386 trace__process_event(trace, trace->host, event, sample);
2390 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2391 if (evsel == NULL) {
2392 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2396 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2397 sample->raw_data == NULL) {
2398 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2399 perf_evsel__name(evsel), sample->tid,
2400 sample->cpu, sample->raw_size);
2402 tracepoint_handler handler = evsel->handler;
2403 handler(trace, evsel, event, sample);
2407 static int trace__add_syscall_newtp(struct trace *trace)
2410 struct perf_evlist *evlist = trace->evlist;
2411 struct perf_evsel *sys_enter, *sys_exit;
2413 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2414 if (sys_enter == NULL)
2417 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2418 goto out_delete_sys_enter;
2420 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2421 if (sys_exit == NULL)
2422 goto out_delete_sys_enter;
2424 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2425 goto out_delete_sys_exit;
2427 perf_evlist__add(evlist, sys_enter);
2428 perf_evlist__add(evlist, sys_exit);
2430 trace->syscalls.events.sys_enter = sys_enter;
2431 trace->syscalls.events.sys_exit = sys_exit;
2437 out_delete_sys_exit:
2438 perf_evsel__delete_priv(sys_exit);
2439 out_delete_sys_enter:
2440 perf_evsel__delete_priv(sys_enter);
2444 static int trace__set_ev_qualifier_filter(struct trace *trace)
2447 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2448 trace->ev_qualifier_ids.nr,
2449 trace->ev_qualifier_ids.entries);
2454 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2455 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2465 static int trace__run(struct trace *trace, int argc, const char **argv)
2467 struct perf_evlist *evlist = trace->evlist;
2468 struct perf_evsel *evsel;
2470 unsigned long before;
2471 const bool forks = argc > 0;
2472 bool draining = false;
2476 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2477 goto out_error_raw_syscalls;
2479 if (trace->trace_syscalls)
2480 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2482 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2483 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2487 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2488 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2492 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2493 trace__sched_stat_runtime))
2494 goto out_error_sched_stat_runtime;
2496 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2498 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2499 goto out_delete_evlist;
2502 err = trace__symbols_init(trace, evlist);
2504 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2505 goto out_delete_evlist;
2508 perf_evlist__config(evlist, &trace->opts);
2510 signal(SIGCHLD, sig_handler);
2511 signal(SIGINT, sig_handler);
2514 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2517 fprintf(trace->output, "Couldn't run the workload!\n");
2518 goto out_delete_evlist;
2522 err = perf_evlist__open(evlist);
2524 goto out_error_open;
2527 * Better not use !target__has_task() here because we need to cover the
2528 * case where no threads were specified in the command line, but a
2529 * workload was, and in that case we will fill in the thread_map when
2530 * we fork the workload in perf_evlist__prepare_workload.
2532 if (trace->filter_pids.nr > 0)
2533 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2534 else if (thread_map__pid(evlist->threads, 0) == -1)
2535 err = perf_evlist__set_filter_pid(evlist, getpid());
2540 if (trace->ev_qualifier_ids.nr > 0) {
2541 err = trace__set_ev_qualifier_filter(trace);
2545 pr_debug("event qualifier tracepoint filter: %s\n",
2546 trace->syscalls.events.sys_exit->filter);
2549 err = perf_evlist__apply_filters(evlist, &evsel);
2551 goto out_error_apply_filters;
2553 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2555 goto out_error_mmap;
2557 if (!target__none(&trace->opts.target))
2558 perf_evlist__enable(evlist);
2561 perf_evlist__start_workload(evlist);
2563 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2564 evlist->threads->nr > 1 ||
2565 perf_evlist__first(evlist)->attr.inherit;
2567 before = trace->nr_events;
2569 for (i = 0; i < evlist->nr_mmaps; i++) {
2570 union perf_event *event;
2572 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2573 struct perf_sample sample;
2577 err = perf_evlist__parse_sample(evlist, event, &sample);
2579 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2583 trace__handle_event(trace, event, &sample);
2585 perf_evlist__mmap_consume(evlist, i);
2590 if (done && !draining) {
2591 perf_evlist__disable(evlist);
2597 if (trace->nr_events == before) {
2598 int timeout = done ? 100 : -1;
2600 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2601 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2611 thread__zput(trace->current);
2613 perf_evlist__disable(evlist);
2617 trace__fprintf_thread_summary(trace, trace->output);
2619 if (trace->show_tool_stats) {
2620 fprintf(trace->output, "Stats:\n "
2621 " vfs_getname : %" PRIu64 "\n"
2622 " proc_getname: %" PRIu64 "\n",
2623 trace->stats.vfs_getname,
2624 trace->stats.proc_getname);
2629 perf_evlist__delete(evlist);
2630 trace->evlist = NULL;
2631 trace->live = false;
2634 char errbuf[BUFSIZ];
2636 out_error_sched_stat_runtime:
2637 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2640 out_error_raw_syscalls:
2641 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2645 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2649 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2652 fprintf(trace->output, "%s\n", errbuf);
2653 goto out_delete_evlist;
2655 out_error_apply_filters:
2656 fprintf(trace->output,
2657 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2658 evsel->filter, perf_evsel__name(evsel), errno,
2659 strerror_r(errno, errbuf, sizeof(errbuf)));
2660 goto out_delete_evlist;
2663 fprintf(trace->output, "Not enough memory to run!\n");
2664 goto out_delete_evlist;
2667 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2668 goto out_delete_evlist;
2671 static int trace__replay(struct trace *trace)
2673 const struct perf_evsel_str_handler handlers[] = {
2674 { "probe:vfs_getname", trace__vfs_getname, },
2676 struct perf_data_file file = {
2678 .mode = PERF_DATA_MODE_READ,
2679 .force = trace->force,
2681 struct perf_session *session;
2682 struct perf_evsel *evsel;
2685 trace->tool.sample = trace__process_sample;
2686 trace->tool.mmap = perf_event__process_mmap;
2687 trace->tool.mmap2 = perf_event__process_mmap2;
2688 trace->tool.comm = perf_event__process_comm;
2689 trace->tool.exit = perf_event__process_exit;
2690 trace->tool.fork = perf_event__process_fork;
2691 trace->tool.attr = perf_event__process_attr;
2692 trace->tool.tracing_data = perf_event__process_tracing_data;
2693 trace->tool.build_id = perf_event__process_build_id;
2695 trace->tool.ordered_events = true;
2696 trace->tool.ordering_requires_timestamps = true;
2698 /* add tid to output */
2699 trace->multiple_threads = true;
2701 session = perf_session__new(&file, false, &trace->tool);
2702 if (session == NULL)
2705 if (symbol__init(&session->header.env) < 0)
2708 trace->host = &session->machines.host;
2710 err = perf_session__set_tracepoints_handlers(session, handlers);
2714 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2715 "raw_syscalls:sys_enter");
2716 /* older kernels have syscalls tp versus raw_syscalls */
2718 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2719 "syscalls:sys_enter");
2722 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2723 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2724 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2728 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2729 "raw_syscalls:sys_exit");
2731 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2732 "syscalls:sys_exit");
2734 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2735 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2736 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2740 evlist__for_each(session->evlist, evsel) {
2741 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2742 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2743 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2744 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2745 evsel->handler = trace__pgfault;
2748 err = parse_target_str(trace);
2754 err = perf_session__process_events(session);
2756 pr_err("Failed to process events, error %d", err);
2758 else if (trace->summary)
2759 trace__fprintf_thread_summary(trace, trace->output);
2762 perf_session__delete(session);
2767 static size_t trace__fprintf_threads_header(FILE *fp)
2771 printed = fprintf(fp, "\n Summary of events:\n\n");
2776 static size_t thread__dump_stats(struct thread_trace *ttrace,
2777 struct trace *trace, FILE *fp)
2779 struct stats *stats;
2782 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2787 printed += fprintf(fp, "\n");
2789 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2790 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2791 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2793 /* each int_node is a syscall */
2795 stats = inode->priv;
2797 double min = (double)(stats->min) / NSEC_PER_MSEC;
2798 double max = (double)(stats->max) / NSEC_PER_MSEC;
2799 double avg = avg_stats(stats);
2801 u64 n = (u64) stats->n;
2803 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2804 avg /= NSEC_PER_MSEC;
2806 sc = &trace->syscalls.table[inode->i];
2807 printed += fprintf(fp, " %-15s", sc->name);
2808 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2809 n, avg * n, min, avg);
2810 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2813 inode = intlist__next(inode);
2816 printed += fprintf(fp, "\n\n");
2821 /* struct used to pass data to per-thread function */
2822 struct summary_data {
2824 struct trace *trace;
2828 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2830 struct summary_data *data = priv;
2831 FILE *fp = data->fp;
2832 size_t printed = data->printed;
2833 struct trace *trace = data->trace;
2834 struct thread_trace *ttrace = thread__priv(thread);
2840 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2842 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2843 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2844 printed += fprintf(fp, "%.1f%%", ratio);
2846 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2848 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2849 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2850 printed += thread__dump_stats(ttrace, trace, fp);
2852 data->printed += printed;
2857 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2859 struct summary_data data = {
2863 data.printed = trace__fprintf_threads_header(fp);
2865 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2867 return data.printed;
2870 static int trace__set_duration(const struct option *opt, const char *str,
2871 int unset __maybe_unused)
2873 struct trace *trace = opt->value;
2875 trace->duration_filter = atof(str);
2879 static int trace__set_filter_pids(const struct option *opt, const char *str,
2880 int unset __maybe_unused)
2884 struct trace *trace = opt->value;
2886 * FIXME: introduce a intarray class, plain parse csv and create a
2887 * { int nr, int entries[] } struct...
2889 struct intlist *list = intlist__new(str);
2894 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2895 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2897 if (trace->filter_pids.entries == NULL)
2900 trace->filter_pids.entries[0] = getpid();
2902 for (i = 1; i < trace->filter_pids.nr; ++i)
2903 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2905 intlist__delete(list);
2911 static int trace__open_output(struct trace *trace, const char *filename)
2915 if (!stat(filename, &st) && st.st_size) {
2916 char oldname[PATH_MAX];
2918 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2920 rename(filename, oldname);
2923 trace->output = fopen(filename, "w");
2925 return trace->output == NULL ? -errno : 0;
2928 static int parse_pagefaults(const struct option *opt, const char *str,
2929 int unset __maybe_unused)
2931 int *trace_pgfaults = opt->value;
2933 if (strcmp(str, "all") == 0)
2934 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2935 else if (strcmp(str, "maj") == 0)
2936 *trace_pgfaults |= TRACE_PFMAJ;
2937 else if (strcmp(str, "min") == 0)
2938 *trace_pgfaults |= TRACE_PFMIN;
2945 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2947 struct perf_evsel *evsel;
2949 evlist__for_each(evlist, evsel)
2950 evsel->handler = handler;
2953 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2955 const char *trace_usage[] = {
2956 "perf trace [<options>] [<command>]",
2957 "perf trace [<options>] -- <command> [<options>]",
2958 "perf trace record [<options>] [<command>]",
2959 "perf trace record [<options>] -- <command> [<options>]",
2962 struct trace trace = {
2964 .machine = audit_detect_machine(),
2965 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2975 .user_freq = UINT_MAX,
2976 .user_interval = ULLONG_MAX,
2977 .no_buffering = true,
2978 .mmap_pages = UINT_MAX,
2979 .proc_map_timeout = 500,
2983 .trace_syscalls = true,
2985 const char *output_name = NULL;
2986 const char *ev_qualifier_str = NULL;
2987 const struct option trace_options[] = {
2988 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2989 "event selector. use 'perf list' to list available events",
2990 parse_events_option),
2991 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2992 "show the thread COMM next to its id"),
2993 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2994 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2995 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2996 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2997 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2998 "trace events on existing process id"),
2999 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3000 "trace events on existing thread id"),
3001 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3002 "pids to filter (by the kernel)", trace__set_filter_pids),
3003 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3004 "system-wide collection from all CPUs"),
3005 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3006 "list of cpus to monitor"),
3007 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3008 "child tasks do not inherit counters"),
3009 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3010 "number of mmap data pages",
3011 perf_evlist__parse_mmap_pages),
3012 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3014 OPT_CALLBACK(0, "duration", &trace, "float",
3015 "show only events with duration > N.M ms",
3016 trace__set_duration),
3017 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3018 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3019 OPT_BOOLEAN('T', "time", &trace.full_time,
3020 "Show full timestamp, not time relative to first start"),
3021 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3022 "Show only syscall summary with statistics"),
3023 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3024 "Show all syscalls and summary with statistics"),
3025 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3026 "Trace pagefaults", parse_pagefaults, "maj"),
3027 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3028 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3029 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3030 "per thread proc mmap processing timeout in ms"),
3033 const char * const trace_subcommands[] = { "record", NULL };
3037 signal(SIGSEGV, sighandler_dump_stack);
3038 signal(SIGFPE, sighandler_dump_stack);
3040 trace.evlist = perf_evlist__new();
3042 if (trace.evlist == NULL) {
3043 pr_err("Not enough memory to run!\n");
3048 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3049 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3051 if (trace.trace_pgfaults) {
3052 trace.opts.sample_address = true;
3053 trace.opts.sample_time = true;
3056 if (trace.evlist->nr_entries > 0)
3057 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3059 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3060 return trace__record(&trace, argc-1, &argv[1]);
3062 /* summary_only implies summary option, but don't overwrite summary if set */
3063 if (trace.summary_only)
3064 trace.summary = trace.summary_only;
3066 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3067 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3068 pr_err("Please specify something to trace.\n");
3072 if (output_name != NULL) {
3073 err = trace__open_output(&trace, output_name);
3075 perror("failed to create output file");
3080 if (ev_qualifier_str != NULL) {
3081 const char *s = ev_qualifier_str;
3082 struct strlist_config slist_config = {
3083 .dirname = system_path(STRACE_GROUPS_DIR),
3086 trace.not_ev_qualifier = *s == '!';
3087 if (trace.not_ev_qualifier)
3089 trace.ev_qualifier = strlist__new(s, &slist_config);
3090 if (trace.ev_qualifier == NULL) {
3091 fputs("Not enough memory to parse event qualifier",
3097 err = trace__validate_ev_qualifier(&trace);
3102 err = target__validate(&trace.opts.target);
3104 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3105 fprintf(trace.output, "%s", bf);
3109 err = target__parse_uid(&trace.opts.target);
3111 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3112 fprintf(trace.output, "%s", bf);
3116 if (!argc && target__none(&trace.opts.target))
3117 trace.opts.target.system_wide = true;
3120 err = trace__replay(&trace);
3122 err = trace__run(&trace, argc, argv);
3125 if (output_name != NULL)
3126 fclose(trace.output);