1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
604 static DEFINE_STRARRAY(clockid);
606 static const char *socket_families[] = {
607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612 "ALG", "NFC", "VSOCK",
614 static DEFINE_STRARRAY(socket_families);
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621 struct syscall_arg *arg)
625 flags = type & ~SOCK_TYPE_MASK;
627 type &= SOCK_TYPE_MASK;
629 * Can't use a strarray, MIPS may override for ABI reasons.
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
637 P_SK_TYPE(SEQPACKET);
642 printed = scnprintf(bf, size, "%#x", type);
645 #define P_SK_FLAG(n) \
646 if (flags & SOCK_##n) { \
647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648 flags &= ~SOCK_##n; \
656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
664 #define MSG_PROBE 0x10
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE 0x10000
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
673 #define MSG_FASTOPEN 0x20000000
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
679 int printed = 0, flags = arg->val;
682 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684 if (flags & MSG_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691 P_MSG_FLAG(DONTROUTE);
696 P_MSG_FLAG(DONTWAIT);
703 P_MSG_FLAG(ERRQUEUE);
704 P_MSG_FLAG(NOSIGNAL);
706 P_MSG_FLAG(WAITFORONE);
707 P_MSG_FLAG(SENDPAGE_NOTLAST);
708 P_MSG_FLAG(FASTOPEN);
709 P_MSG_FLAG(CMSG_CLOEXEC);
713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721 struct syscall_arg *arg)
726 if (mode == F_OK) /* 0 */
727 return scnprintf(bf, size, "F");
729 if (mode & n##_OK) { \
730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
748 struct syscall_arg *arg);
750 #define SCA_FILENAME syscall_arg__scnprintf_filename
752 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
753 struct syscall_arg *arg)
755 int printed = 0, flags = arg->val;
757 if (!(flags & O_CREAT))
758 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
761 return scnprintf(bf, size, "RDONLY");
763 if (flags & O_##n) { \
764 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
788 if ((flags & O_SYNC) == O_SYNC)
789 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
801 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
808 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
809 struct syscall_arg *arg)
811 int printed = 0, flags = arg->val;
817 if (flags & PERF_FLAG_##n) { \
818 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
819 flags &= ~PERF_FLAG_##n; \
829 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
834 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
836 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
837 struct syscall_arg *arg)
839 int printed = 0, flags = arg->val;
842 return scnprintf(bf, size, "NONE");
844 if (flags & EFD_##n) { \
845 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
855 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
862 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
863 struct syscall_arg *arg)
865 int printed = 0, flags = arg->val;
868 if (flags & O_##n) { \
869 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
878 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
883 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
885 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
890 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
933 return scnprintf(bf, size, "%#x", sig);
936 #define SCA_SIGNUM syscall_arg__scnprintf_signum
938 #if defined(__i386__) || defined(__x86_64__)
940 * FIXME: Make this available to all arches.
942 #define TCGETS 0x5401
944 static const char *tioctls[] = {
945 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
946 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
947 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
948 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
949 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
950 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
951 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
952 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
953 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
954 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
955 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
956 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
957 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
958 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
959 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
962 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
963 #endif /* defined(__i386__) || defined(__x86_64__) */
965 #define STRARRAY(arg, name, array) \
966 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
967 .arg_parm = { [arg] = &strarray__##array, }
969 static struct syscall_fmt {
972 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
978 { .name = "access", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
980 [1] = SCA_ACCMODE, /* mode */ }, },
981 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
982 { .name = "brk", .hexret = true,
983 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
984 { .name = "chdir", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
986 { .name = "chmod", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
988 { .name = "chroot", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
990 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
991 { .name = "close", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
993 { .name = "connect", .errmsg = true, },
994 { .name = "creat", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
996 { .name = "dup", .errmsg = true,
997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 { .name = "dup2", .errmsg = true,
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "dup3", .errmsg = true,
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1003 { .name = "eventfd2", .errmsg = true,
1004 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1005 { .name = "faccessat", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1007 [1] = SCA_FILENAME, /* filename */ }, },
1008 { .name = "fadvise64", .errmsg = true,
1009 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010 { .name = "fallocate", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012 { .name = "fchdir", .errmsg = true,
1013 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1014 { .name = "fchmod", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1016 { .name = "fchmodat", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018 { .name = "fchown", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020 { .name = "fchownat", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1022 [1] = SCA_FILENAME, /* filename */ }, },
1023 { .name = "fcntl", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1025 [1] = SCA_STRARRAY, /* cmd */ },
1026 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1027 { .name = "fdatasync", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029 { .name = "flock", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1031 [1] = SCA_FLOCK, /* cmd */ }, },
1032 { .name = "fsetxattr", .errmsg = true,
1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1037 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1038 [1] = SCA_FILENAME, /* filename */ }, },
1039 { .name = "fstatfs", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "fsync", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "ftruncate", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045 { .name = "futex", .errmsg = true,
1046 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1047 { .name = "futimesat", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1049 { .name = "getdents", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051 { .name = "getdents64", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1054 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1055 { .name = "getxattr", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1057 { .name = "inotify_add_watch", .errmsg = true,
1058 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1059 { .name = "ioctl", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1061 #if defined(__i386__) || defined(__x86_64__)
1063 * FIXME: Make this available to all arches.
1065 [1] = SCA_STRHEXARRAY, /* cmd */
1066 [2] = SCA_HEX, /* arg */ },
1067 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1069 [2] = SCA_HEX, /* arg */ }, },
1071 { .name = "kill", .errmsg = true,
1072 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1073 { .name = "lchown", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1075 { .name = "lgetxattr", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1077 { .name = "linkat", .errmsg = true,
1078 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1079 { .name = "listxattr", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1081 { .name = "lseek", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1083 [2] = SCA_STRARRAY, /* whence */ },
1084 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1085 { .name = "lsetxattr", .errmsg = true,
1086 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1087 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1088 { .name = "lsxattr", .errmsg = true,
1089 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1090 { .name = "madvise", .errmsg = true,
1091 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1092 [2] = SCA_MADV_BHV, /* behavior */ }, },
1093 { .name = "mkdir", .errmsg = true,
1094 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1095 { .name = "mkdirat", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1097 [1] = SCA_FILENAME, /* pathname */ }, },
1098 { .name = "mknod", .errmsg = true,
1099 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1100 { .name = "mknodat", .errmsg = true,
1101 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1102 { .name = "mlock", .errmsg = true,
1103 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1104 { .name = "mlockall", .errmsg = true,
1105 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1106 { .name = "mmap", .hexret = true,
1107 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1108 [2] = SCA_MMAP_PROT, /* prot */
1109 [3] = SCA_MMAP_FLAGS, /* flags */
1110 [4] = SCA_FD, /* fd */ }, },
1111 { .name = "mprotect", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1113 [2] = SCA_MMAP_PROT, /* prot */ }, },
1114 { .name = "mremap", .hexret = true,
1115 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1116 [3] = SCA_MREMAP_FLAGS, /* flags */
1117 [4] = SCA_HEX, /* new_addr */ }, },
1118 { .name = "munlock", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1120 { .name = "munmap", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1122 { .name = "name_to_handle_at", .errmsg = true,
1123 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1124 { .name = "newfstatat", .errmsg = true,
1125 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1126 [1] = SCA_FILENAME, /* filename */ }, },
1127 { .name = "open", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1129 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1130 { .name = "open_by_handle_at", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1132 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1133 { .name = "openat", .errmsg = true,
1134 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1135 [1] = SCA_FILENAME, /* filename */
1136 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1137 { .name = "perf_event_open", .errmsg = true,
1138 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1139 [2] = SCA_INT, /* cpu */
1140 [3] = SCA_FD, /* group_fd */
1141 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1142 { .name = "pipe2", .errmsg = true,
1143 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1144 { .name = "poll", .errmsg = true, .timeout = true, },
1145 { .name = "ppoll", .errmsg = true, .timeout = true, },
1146 { .name = "pread", .errmsg = true, .alias = "pread64",
1147 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1148 { .name = "preadv", .errmsg = true, .alias = "pread",
1149 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1150 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1151 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1152 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1153 { .name = "pwritev", .errmsg = true,
1154 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1155 { .name = "read", .errmsg = true,
1156 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1157 { .name = "readlink", .errmsg = true,
1158 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1159 { .name = "readlinkat", .errmsg = true,
1160 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1161 [1] = SCA_FILENAME, /* pathname */ }, },
1162 { .name = "readv", .errmsg = true,
1163 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1164 { .name = "recvfrom", .errmsg = true,
1165 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1166 { .name = "recvmmsg", .errmsg = true,
1167 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1168 { .name = "recvmsg", .errmsg = true,
1169 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1170 { .name = "removexattr", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1172 { .name = "renameat", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1174 { .name = "rmdir", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1176 { .name = "rt_sigaction", .errmsg = true,
1177 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1178 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1179 { .name = "rt_sigqueueinfo", .errmsg = true,
1180 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1181 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1182 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1183 { .name = "select", .errmsg = true, .timeout = true, },
1184 { .name = "sendmmsg", .errmsg = true,
1185 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1186 { .name = "sendmsg", .errmsg = true,
1187 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1188 { .name = "sendto", .errmsg = true,
1189 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1190 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1191 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1192 { .name = "setxattr", .errmsg = true,
1193 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1194 { .name = "shutdown", .errmsg = true,
1195 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1196 { .name = "socket", .errmsg = true,
1197 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1198 [1] = SCA_SK_TYPE, /* type */ },
1199 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1200 { .name = "socketpair", .errmsg = true,
1201 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1202 [1] = SCA_SK_TYPE, /* type */ },
1203 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1204 { .name = "stat", .errmsg = true, .alias = "newstat", },
1205 { .name = "statfs", .errmsg = true,
1206 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1207 { .name = "swapoff", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1209 { .name = "swapon", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1211 { .name = "symlinkat", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1213 { .name = "tgkill", .errmsg = true,
1214 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1215 { .name = "tkill", .errmsg = true,
1216 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1217 { .name = "truncate", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1219 { .name = "uname", .errmsg = true, .alias = "newuname", },
1220 { .name = "unlinkat", .errmsg = true,
1221 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1222 [1] = SCA_FILENAME, /* pathname */ }, },
1223 { .name = "utime", .errmsg = true,
1224 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1225 { .name = "utimensat", .errmsg = true,
1226 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1227 [1] = SCA_FILENAME, /* filename */ }, },
1228 { .name = "utimes", .errmsg = true,
1229 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1230 { .name = "write", .errmsg = true,
1231 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1232 { .name = "writev", .errmsg = true,
1233 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1236 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1238 const struct syscall_fmt *fmt = fmtp;
1239 return strcmp(name, fmt->name);
1242 static struct syscall_fmt *syscall_fmt__find(const char *name)
1244 const int nmemb = ARRAY_SIZE(syscall_fmts);
1245 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1249 struct event_format *tp_format;
1251 struct format_field *args;
1254 struct syscall_fmt *fmt;
1255 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1259 static size_t fprintf_duration(unsigned long t, FILE *fp)
1261 double duration = (double)t / NSEC_PER_MSEC;
1262 size_t printed = fprintf(fp, "(");
1264 if (duration >= 1.0)
1265 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1266 else if (duration >= 0.01)
1267 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1269 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1270 return printed + fprintf(fp, "): ");
1274 * filename.ptr: The filename char pointer that will be vfs_getname'd
1275 * filename.entry_str_pos: Where to insert the string translated from
1276 * filename.ptr by the vfs_getname tracepoint/kprobe.
1278 struct thread_trace {
1282 unsigned long nr_events;
1283 unsigned long pfmaj, pfmin;
1295 struct intlist *syscall_stats;
1298 static struct thread_trace *thread_trace__new(void)
1300 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1303 ttrace->paths.max = -1;
1305 ttrace->syscall_stats = intlist__new(NULL);
1310 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1312 struct thread_trace *ttrace;
1317 if (thread__priv(thread) == NULL)
1318 thread__set_priv(thread, thread_trace__new());
1320 if (thread__priv(thread) == NULL)
1323 ttrace = thread__priv(thread);
1324 ++ttrace->nr_events;
1328 color_fprintf(fp, PERF_COLOR_RED,
1329 "WARNING: not enough memory, dropping samples!\n");
1333 #define TRACE_PFMAJ (1 << 0)
1334 #define TRACE_PFMIN (1 << 1)
1336 static const size_t trace__entry_str_size = 2048;
1339 struct perf_tool tool;
1346 struct syscall *table;
1348 struct perf_evsel *sys_enter,
1352 struct record_opts opts;
1353 struct perf_evlist *evlist;
1354 struct machine *host;
1355 struct thread *current;
1358 unsigned long nr_events;
1359 struct strlist *ev_qualifier;
1364 const char *last_vfs_getname;
1365 struct intlist *tid_list;
1366 struct intlist *pid_list;
1371 double duration_filter;
1377 bool not_ev_qualifier;
1381 bool multiple_threads;
1385 bool show_tool_stats;
1386 bool trace_syscalls;
1392 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1394 struct thread_trace *ttrace = thread__priv(thread);
1396 if (fd > ttrace->paths.max) {
1397 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1402 if (ttrace->paths.max != -1) {
1403 memset(npath + ttrace->paths.max + 1, 0,
1404 (fd - ttrace->paths.max) * sizeof(char *));
1406 memset(npath, 0, (fd + 1) * sizeof(char *));
1409 ttrace->paths.table = npath;
1410 ttrace->paths.max = fd;
1413 ttrace->paths.table[fd] = strdup(pathname);
1415 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1418 static int thread__read_fd_path(struct thread *thread, int fd)
1420 char linkname[PATH_MAX], pathname[PATH_MAX];
1424 if (thread->pid_ == thread->tid) {
1425 scnprintf(linkname, sizeof(linkname),
1426 "/proc/%d/fd/%d", thread->pid_, fd);
1428 scnprintf(linkname, sizeof(linkname),
1429 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1432 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1435 ret = readlink(linkname, pathname, sizeof(pathname));
1437 if (ret < 0 || ret > st.st_size)
1440 pathname[ret] = '\0';
1441 return trace__set_fd_pathname(thread, fd, pathname);
1444 static const char *thread__fd_path(struct thread *thread, int fd,
1445 struct trace *trace)
1447 struct thread_trace *ttrace = thread__priv(thread);
1455 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1458 ++trace->stats.proc_getname;
1459 if (thread__read_fd_path(thread, fd))
1463 return ttrace->paths.table[fd];
1466 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1467 struct syscall_arg *arg)
1470 size_t printed = scnprintf(bf, size, "%d", fd);
1471 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1474 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1479 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1480 struct syscall_arg *arg)
1483 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1484 struct thread_trace *ttrace = thread__priv(arg->thread);
1486 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1487 zfree(&ttrace->paths.table[fd]);
1492 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1495 struct thread_trace *ttrace = thread__priv(thread);
1497 ttrace->filename.ptr = ptr;
1498 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1501 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1502 struct syscall_arg *arg)
1504 unsigned long ptr = arg->val;
1506 if (!arg->trace->vfs_getname)
1507 return scnprintf(bf, size, "%#x", ptr);
1509 thread__set_filename_pos(arg->thread, bf, ptr);
1513 static bool trace__filter_duration(struct trace *trace, double t)
1515 return t < (trace->duration_filter * NSEC_PER_MSEC);
1518 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1520 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1522 return fprintf(fp, "%10.3f ", ts);
1525 static bool done = false;
1526 static bool interrupted = false;
1528 static void sig_handler(int sig)
1531 interrupted = sig == SIGINT;
1534 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1535 u64 duration, u64 tstamp, FILE *fp)
1537 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1538 printed += fprintf_duration(duration, fp);
1540 if (trace->multiple_threads) {
1541 if (trace->show_comm)
1542 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1543 printed += fprintf(fp, "%d ", thread->tid);
1549 static int trace__process_event(struct trace *trace, struct machine *machine,
1550 union perf_event *event, struct perf_sample *sample)
1554 switch (event->header.type) {
1555 case PERF_RECORD_LOST:
1556 color_fprintf(trace->output, PERF_COLOR_RED,
1557 "LOST %" PRIu64 " events!\n", event->lost.lost);
1558 ret = machine__process_lost_event(machine, event, sample);
1560 ret = machine__process_event(machine, event, sample);
1567 static int trace__tool_process(struct perf_tool *tool,
1568 union perf_event *event,
1569 struct perf_sample *sample,
1570 struct machine *machine)
1572 struct trace *trace = container_of(tool, struct trace, tool);
1573 return trace__process_event(trace, machine, event, sample);
1576 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1578 int err = symbol__init(NULL);
1583 trace->host = machine__new_host();
1584 if (trace->host == NULL)
1587 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1590 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1591 evlist->threads, trace__tool_process, false,
1592 trace->opts.proc_map_timeout);
1599 static int syscall__set_arg_fmts(struct syscall *sc)
1601 struct format_field *field;
1604 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1605 if (sc->arg_scnprintf == NULL)
1609 sc->arg_parm = sc->fmt->arg_parm;
1611 for (field = sc->args; field; field = field->next) {
1612 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1613 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1614 else if (field->flags & FIELD_IS_POINTER)
1615 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1622 static int trace__read_syscall_info(struct trace *trace, int id)
1626 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1631 if (id > trace->syscalls.max) {
1632 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1634 if (nsyscalls == NULL)
1637 if (trace->syscalls.max != -1) {
1638 memset(nsyscalls + trace->syscalls.max + 1, 0,
1639 (id - trace->syscalls.max) * sizeof(*sc));
1641 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1644 trace->syscalls.table = nsyscalls;
1645 trace->syscalls.max = id;
1648 sc = trace->syscalls.table + id;
1651 sc->fmt = syscall_fmt__find(sc->name);
1653 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1654 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1656 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1657 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1658 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1661 if (sc->tp_format == NULL)
1664 sc->args = sc->tp_format->format.fields;
1665 sc->nr_args = sc->tp_format->format.nr_fields;
1666 /* drop nr field - not relevant here; does not exist on older kernels */
1667 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1668 sc->args = sc->args->next;
1672 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1674 return syscall__set_arg_fmts(sc);
1677 static int trace__validate_ev_qualifier(struct trace *trace)
1680 struct str_node *pos;
1682 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1683 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1684 sizeof(trace->ev_qualifier_ids.entries[0]));
1686 if (trace->ev_qualifier_ids.entries == NULL) {
1687 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1695 strlist__for_each(pos, trace->ev_qualifier) {
1696 const char *sc = pos->s;
1697 int id = audit_name_to_syscall(sc, trace->audit.machine);
1701 fputs("Error:\tInvalid syscall ", trace->output);
1704 fputs(", ", trace->output);
1707 fputs(sc, trace->output);
1710 trace->ev_qualifier_ids.entries[i++] = id;
1714 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1715 "\nHint:\tand: 'man syscalls'\n", trace->output);
1716 zfree(&trace->ev_qualifier_ids.entries);
1717 trace->ev_qualifier_ids.nr = 0;
1724 * args is to be interpreted as a series of longs but we need to handle
1725 * 8-byte unaligned accesses. args points to raw_data within the event
1726 * and raw_data is guaranteed to be 8-byte unaligned because it is
1727 * preceded by raw_size which is a u32. So we need to copy args to a temp
1728 * variable to read it. Most notably this avoids extended load instructions
1729 * on unaligned addresses
1732 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1733 unsigned char *args, struct trace *trace,
1734 struct thread *thread)
1740 if (sc->args != NULL) {
1741 struct format_field *field;
1743 struct syscall_arg arg = {
1750 for (field = sc->args; field;
1751 field = field->next, ++arg.idx, bit <<= 1) {
1755 /* special care for unaligned accesses */
1756 p = args + sizeof(unsigned long) * arg.idx;
1757 memcpy(&val, p, sizeof(val));
1760 * Suppress this argument if its value is zero and
1761 * and we don't have a string associated in an
1765 !(sc->arg_scnprintf &&
1766 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1767 sc->arg_parm[arg.idx]))
1770 printed += scnprintf(bf + printed, size - printed,
1771 "%s%s: ", printed ? ", " : "", field->name);
1772 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1775 arg.parm = sc->arg_parm[arg.idx];
1776 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1777 size - printed, &arg);
1779 printed += scnprintf(bf + printed, size - printed,
1787 /* special care for unaligned accesses */
1788 p = args + sizeof(unsigned long) * i;
1789 memcpy(&val, p, sizeof(val));
1790 printed += scnprintf(bf + printed, size - printed,
1792 printed ? ", " : "", i, val);
1800 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1801 union perf_event *event,
1802 struct perf_sample *sample);
1804 static struct syscall *trace__syscall_info(struct trace *trace,
1805 struct perf_evsel *evsel, int id)
1811 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1812 * before that, leaving at a higher verbosity level till that is
1813 * explained. Reproduced with plain ftrace with:
1815 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1816 * grep "NR -1 " /t/trace_pipe
1818 * After generating some load on the machine.
1822 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1823 id, perf_evsel__name(evsel), ++n);
1828 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1829 trace__read_syscall_info(trace, id))
1832 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1835 return &trace->syscalls.table[id];
1839 fprintf(trace->output, "Problems reading syscall %d", id);
1840 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1841 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1842 fputs(" information\n", trace->output);
1847 static void thread__update_stats(struct thread_trace *ttrace,
1848 int id, struct perf_sample *sample)
1850 struct int_node *inode;
1851 struct stats *stats;
1854 inode = intlist__findnew(ttrace->syscall_stats, id);
1858 stats = inode->priv;
1859 if (stats == NULL) {
1860 stats = malloc(sizeof(struct stats));
1864 inode->priv = stats;
1867 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1868 duration = sample->time - ttrace->entry_time;
1870 update_stats(stats, duration);
1873 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1875 struct thread_trace *ttrace;
1879 if (trace->current == NULL)
1882 ttrace = thread__priv(trace->current);
1884 if (!ttrace->entry_pending)
1887 duration = sample->time - ttrace->entry_time;
1889 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1890 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1891 ttrace->entry_pending = false;
1896 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1897 union perf_event *event __maybe_unused,
1898 struct perf_sample *sample)
1903 struct thread *thread;
1904 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1905 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1906 struct thread_trace *ttrace;
1911 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1912 ttrace = thread__trace(thread, trace->output);
1916 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1918 if (ttrace->entry_str == NULL) {
1919 ttrace->entry_str = malloc(trace__entry_str_size);
1920 if (!ttrace->entry_str)
1924 if (!trace->summary_only)
1925 trace__printf_interrupted_entry(trace, sample);
1927 ttrace->entry_time = sample->time;
1928 msg = ttrace->entry_str;
1929 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1931 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1932 args, trace, thread);
1935 if (!trace->duration_filter && !trace->summary_only) {
1936 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1937 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1940 ttrace->entry_pending = true;
1942 if (trace->current != thread) {
1943 thread__put(trace->current);
1944 trace->current = thread__get(thread);
1948 thread__put(thread);
1952 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1953 union perf_event *event __maybe_unused,
1954 struct perf_sample *sample)
1958 struct thread *thread;
1959 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1960 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1961 struct thread_trace *ttrace;
1966 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1967 ttrace = thread__trace(thread, trace->output);
1972 thread__update_stats(ttrace, id, sample);
1974 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1976 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1977 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1978 trace->last_vfs_getname = NULL;
1979 ++trace->stats.vfs_getname;
1982 ttrace->exit_time = sample->time;
1984 if (ttrace->entry_time) {
1985 duration = sample->time - ttrace->entry_time;
1986 if (trace__filter_duration(trace, duration))
1988 } else if (trace->duration_filter)
1991 if (trace->summary_only)
1994 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1996 if (ttrace->entry_pending) {
1997 fprintf(trace->output, "%-70s", ttrace->entry_str);
1999 fprintf(trace->output, " ... [");
2000 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2001 fprintf(trace->output, "]: %s()", sc->name);
2004 if (sc->fmt == NULL) {
2006 fprintf(trace->output, ") = %ld", ret);
2007 } else if (ret < 0 && sc->fmt->errmsg) {
2008 char bf[STRERR_BUFSIZE];
2009 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2010 *e = audit_errno_to_name(-ret);
2012 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2013 } else if (ret == 0 && sc->fmt->timeout)
2014 fprintf(trace->output, ") = 0 Timeout");
2015 else if (sc->fmt->hexret)
2016 fprintf(trace->output, ") = %#lx", ret);
2020 fputc('\n', trace->output);
2022 ttrace->entry_pending = false;
2025 thread__put(thread);
2029 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2030 union perf_event *event __maybe_unused,
2031 struct perf_sample *sample)
2033 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2034 struct thread_trace *ttrace;
2035 size_t filename_len, entry_str_len, to_move;
2036 ssize_t remaining_space;
2038 const char *filename;
2040 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2045 ttrace = thread__priv(thread);
2049 if (!ttrace->filename.ptr)
2052 entry_str_len = strlen(ttrace->entry_str);
2053 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2054 if (remaining_space <= 0)
2057 filename = trace->last_vfs_getname;
2058 filename_len = strlen(filename);
2059 if (filename_len > (size_t)remaining_space) {
2060 filename += filename_len - remaining_space;
2061 filename_len = remaining_space;
2064 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2065 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2066 memmove(pos + filename_len, pos, to_move);
2067 memcpy(pos, filename, filename_len);
2069 ttrace->filename.ptr = 0;
2070 ttrace->filename.entry_str_pos = 0;
2075 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2076 union perf_event *event __maybe_unused,
2077 struct perf_sample *sample)
2079 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2080 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2081 struct thread *thread = machine__findnew_thread(trace->host,
2084 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2089 ttrace->runtime_ms += runtime_ms;
2090 trace->runtime_ms += runtime_ms;
2091 thread__put(thread);
2095 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2097 perf_evsel__strval(evsel, sample, "comm"),
2098 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2100 perf_evsel__intval(evsel, sample, "vruntime"));
2101 thread__put(thread);
2105 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2106 union perf_event *event __maybe_unused,
2107 struct perf_sample *sample)
2109 trace__printf_interrupted_entry(trace, sample);
2110 trace__fprintf_tstamp(trace, sample->time, trace->output);
2112 if (trace->trace_syscalls)
2113 fprintf(trace->output, "( ): ");
2115 fprintf(trace->output, "%s:", evsel->name);
2117 if (evsel->tp_format) {
2118 event_format__fprintf(evsel->tp_format, sample->cpu,
2119 sample->raw_data, sample->raw_size,
2123 fprintf(trace->output, ")\n");
2127 static void print_location(FILE *f, struct perf_sample *sample,
2128 struct addr_location *al,
2129 bool print_dso, bool print_sym)
2132 if ((verbose || print_dso) && al->map)
2133 fprintf(f, "%s@", al->map->dso->long_name);
2135 if ((verbose || print_sym) && al->sym)
2136 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2137 al->addr - al->sym->start);
2139 fprintf(f, "0x%" PRIx64, al->addr);
2141 fprintf(f, "0x%" PRIx64, sample->addr);
2144 static int trace__pgfault(struct trace *trace,
2145 struct perf_evsel *evsel,
2146 union perf_event *event,
2147 struct perf_sample *sample)
2149 struct thread *thread;
2150 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2151 struct addr_location al;
2152 char map_type = 'd';
2153 struct thread_trace *ttrace;
2156 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2157 ttrace = thread__trace(thread, trace->output);
2161 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2166 if (trace->summary_only)
2169 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2172 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2174 fprintf(trace->output, "%sfault [",
2175 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2178 print_location(trace->output, sample, &al, false, true);
2180 fprintf(trace->output, "] => ");
2182 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2186 thread__find_addr_location(thread, cpumode,
2187 MAP__FUNCTION, sample->addr, &al);
2195 print_location(trace->output, sample, &al, true, false);
2197 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2201 thread__put(thread);
2205 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2207 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2208 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2211 if (trace->pid_list || trace->tid_list)
2217 static int trace__process_sample(struct perf_tool *tool,
2218 union perf_event *event,
2219 struct perf_sample *sample,
2220 struct perf_evsel *evsel,
2221 struct machine *machine __maybe_unused)
2223 struct trace *trace = container_of(tool, struct trace, tool);
2226 tracepoint_handler handler = evsel->handler;
2228 if (skip_sample(trace, sample))
2231 if (!trace->full_time && trace->base_time == 0)
2232 trace->base_time = sample->time;
2236 handler(trace, evsel, event, sample);
2242 static int parse_target_str(struct trace *trace)
2244 if (trace->opts.target.pid) {
2245 trace->pid_list = intlist__new(trace->opts.target.pid);
2246 if (trace->pid_list == NULL) {
2247 pr_err("Error parsing process id string\n");
2252 if (trace->opts.target.tid) {
2253 trace->tid_list = intlist__new(trace->opts.target.tid);
2254 if (trace->tid_list == NULL) {
2255 pr_err("Error parsing thread id string\n");
2263 static int trace__record(struct trace *trace, int argc, const char **argv)
2265 unsigned int rec_argc, i, j;
2266 const char **rec_argv;
2267 const char * const record_args[] = {
2274 const char * const sc_args[] = { "-e", };
2275 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2276 const char * const majpf_args[] = { "-e", "major-faults" };
2277 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2278 const char * const minpf_args[] = { "-e", "minor-faults" };
2279 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2281 /* +1 is for the event string below */
2282 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2283 majpf_args_nr + minpf_args_nr + argc;
2284 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2286 if (rec_argv == NULL)
2290 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2291 rec_argv[j++] = record_args[i];
2293 if (trace->trace_syscalls) {
2294 for (i = 0; i < sc_args_nr; i++)
2295 rec_argv[j++] = sc_args[i];
2297 /* event string may be different for older kernels - e.g., RHEL6 */
2298 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2299 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2300 else if (is_valid_tracepoint("syscalls:sys_enter"))
2301 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2303 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2308 if (trace->trace_pgfaults & TRACE_PFMAJ)
2309 for (i = 0; i < majpf_args_nr; i++)
2310 rec_argv[j++] = majpf_args[i];
2312 if (trace->trace_pgfaults & TRACE_PFMIN)
2313 for (i = 0; i < minpf_args_nr; i++)
2314 rec_argv[j++] = minpf_args[i];
2316 for (i = 0; i < (unsigned int)argc; i++)
2317 rec_argv[j++] = argv[i];
2319 return cmd_record(j, rec_argv, NULL);
2322 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2324 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2326 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2330 if (perf_evsel__field(evsel, "pathname") == NULL) {
2331 perf_evsel__delete(evsel);
2335 evsel->handler = trace__vfs_getname;
2336 perf_evlist__add(evlist, evsel);
2340 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2343 struct perf_evsel *evsel;
2344 struct perf_event_attr attr = {
2345 .type = PERF_TYPE_SOFTWARE,
2349 attr.config = config;
2350 attr.sample_period = 1;
2352 event_attr_init(&attr);
2354 evsel = perf_evsel__new(&attr);
2358 evsel->handler = trace__pgfault;
2359 perf_evlist__add(evlist, evsel);
2364 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2366 const u32 type = event->header.type;
2367 struct perf_evsel *evsel;
2369 if (!trace->full_time && trace->base_time == 0)
2370 trace->base_time = sample->time;
2372 if (type != PERF_RECORD_SAMPLE) {
2373 trace__process_event(trace, trace->host, event, sample);
2377 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2378 if (evsel == NULL) {
2379 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2383 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2384 sample->raw_data == NULL) {
2385 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2386 perf_evsel__name(evsel), sample->tid,
2387 sample->cpu, sample->raw_size);
2389 tracepoint_handler handler = evsel->handler;
2390 handler(trace, evsel, event, sample);
2394 static int trace__add_syscall_newtp(struct trace *trace)
2397 struct perf_evlist *evlist = trace->evlist;
2398 struct perf_evsel *sys_enter, *sys_exit;
2400 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2401 if (sys_enter == NULL)
2404 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2405 goto out_delete_sys_enter;
2407 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2408 if (sys_exit == NULL)
2409 goto out_delete_sys_enter;
2411 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2412 goto out_delete_sys_exit;
2414 perf_evlist__add(evlist, sys_enter);
2415 perf_evlist__add(evlist, sys_exit);
2417 trace->syscalls.events.sys_enter = sys_enter;
2418 trace->syscalls.events.sys_exit = sys_exit;
2424 out_delete_sys_exit:
2425 perf_evsel__delete_priv(sys_exit);
2426 out_delete_sys_enter:
2427 perf_evsel__delete_priv(sys_enter);
2431 static int trace__set_ev_qualifier_filter(struct trace *trace)
2434 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2435 trace->ev_qualifier_ids.nr,
2436 trace->ev_qualifier_ids.entries);
2441 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2442 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2452 static int trace__run(struct trace *trace, int argc, const char **argv)
2454 struct perf_evlist *evlist = trace->evlist;
2455 struct perf_evsel *evsel;
2457 unsigned long before;
2458 const bool forks = argc > 0;
2459 bool draining = false;
2463 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2464 goto out_error_raw_syscalls;
2466 if (trace->trace_syscalls)
2467 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2469 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2470 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2474 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2475 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2479 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2480 trace__sched_stat_runtime))
2481 goto out_error_sched_stat_runtime;
2483 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2485 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2486 goto out_delete_evlist;
2489 err = trace__symbols_init(trace, evlist);
2491 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2492 goto out_delete_evlist;
2495 perf_evlist__config(evlist, &trace->opts);
2497 signal(SIGCHLD, sig_handler);
2498 signal(SIGINT, sig_handler);
2501 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2504 fprintf(trace->output, "Couldn't run the workload!\n");
2505 goto out_delete_evlist;
2509 err = perf_evlist__open(evlist);
2511 goto out_error_open;
2514 * Better not use !target__has_task() here because we need to cover the
2515 * case where no threads were specified in the command line, but a
2516 * workload was, and in that case we will fill in the thread_map when
2517 * we fork the workload in perf_evlist__prepare_workload.
2519 if (trace->filter_pids.nr > 0)
2520 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2521 else if (thread_map__pid(evlist->threads, 0) == -1)
2522 err = perf_evlist__set_filter_pid(evlist, getpid());
2527 if (trace->ev_qualifier_ids.nr > 0) {
2528 err = trace__set_ev_qualifier_filter(trace);
2532 pr_debug("event qualifier tracepoint filter: %s\n",
2533 trace->syscalls.events.sys_exit->filter);
2536 err = perf_evlist__apply_filters(evlist, &evsel);
2538 goto out_error_apply_filters;
2540 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2542 goto out_error_mmap;
2544 if (!target__none(&trace->opts.target))
2545 perf_evlist__enable(evlist);
2548 perf_evlist__start_workload(evlist);
2550 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2551 evlist->threads->nr > 1 ||
2552 perf_evlist__first(evlist)->attr.inherit;
2554 before = trace->nr_events;
2556 for (i = 0; i < evlist->nr_mmaps; i++) {
2557 union perf_event *event;
2559 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2560 struct perf_sample sample;
2564 err = perf_evlist__parse_sample(evlist, event, &sample);
2566 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2570 trace__handle_event(trace, event, &sample);
2572 perf_evlist__mmap_consume(evlist, i);
2577 if (done && !draining) {
2578 perf_evlist__disable(evlist);
2584 if (trace->nr_events == before) {
2585 int timeout = done ? 100 : -1;
2587 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2588 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2598 thread__zput(trace->current);
2600 perf_evlist__disable(evlist);
2604 trace__fprintf_thread_summary(trace, trace->output);
2606 if (trace->show_tool_stats) {
2607 fprintf(trace->output, "Stats:\n "
2608 " vfs_getname : %" PRIu64 "\n"
2609 " proc_getname: %" PRIu64 "\n",
2610 trace->stats.vfs_getname,
2611 trace->stats.proc_getname);
2616 perf_evlist__delete(evlist);
2617 trace->evlist = NULL;
2618 trace->live = false;
2621 char errbuf[BUFSIZ];
2623 out_error_sched_stat_runtime:
2624 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2627 out_error_raw_syscalls:
2628 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2632 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2636 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2639 fprintf(trace->output, "%s\n", errbuf);
2640 goto out_delete_evlist;
2642 out_error_apply_filters:
2643 fprintf(trace->output,
2644 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2645 evsel->filter, perf_evsel__name(evsel), errno,
2646 strerror_r(errno, errbuf, sizeof(errbuf)));
2647 goto out_delete_evlist;
2650 fprintf(trace->output, "Not enough memory to run!\n");
2651 goto out_delete_evlist;
2654 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2655 goto out_delete_evlist;
2658 static int trace__replay(struct trace *trace)
2660 const struct perf_evsel_str_handler handlers[] = {
2661 { "probe:vfs_getname", trace__vfs_getname, },
2663 struct perf_data_file file = {
2665 .mode = PERF_DATA_MODE_READ,
2666 .force = trace->force,
2668 struct perf_session *session;
2669 struct perf_evsel *evsel;
2672 trace->tool.sample = trace__process_sample;
2673 trace->tool.mmap = perf_event__process_mmap;
2674 trace->tool.mmap2 = perf_event__process_mmap2;
2675 trace->tool.comm = perf_event__process_comm;
2676 trace->tool.exit = perf_event__process_exit;
2677 trace->tool.fork = perf_event__process_fork;
2678 trace->tool.attr = perf_event__process_attr;
2679 trace->tool.tracing_data = perf_event__process_tracing_data;
2680 trace->tool.build_id = perf_event__process_build_id;
2682 trace->tool.ordered_events = true;
2683 trace->tool.ordering_requires_timestamps = true;
2685 /* add tid to output */
2686 trace->multiple_threads = true;
2688 session = perf_session__new(&file, false, &trace->tool);
2689 if (session == NULL)
2692 if (symbol__init(&session->header.env) < 0)
2695 trace->host = &session->machines.host;
2697 err = perf_session__set_tracepoints_handlers(session, handlers);
2701 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2702 "raw_syscalls:sys_enter");
2703 /* older kernels have syscalls tp versus raw_syscalls */
2705 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2706 "syscalls:sys_enter");
2709 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2710 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2711 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2715 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2716 "raw_syscalls:sys_exit");
2718 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2719 "syscalls:sys_exit");
2721 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2722 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2723 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2727 evlist__for_each(session->evlist, evsel) {
2728 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2729 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2730 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2731 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2732 evsel->handler = trace__pgfault;
2735 err = parse_target_str(trace);
2741 err = perf_session__process_events(session);
2743 pr_err("Failed to process events, error %d", err);
2745 else if (trace->summary)
2746 trace__fprintf_thread_summary(trace, trace->output);
2749 perf_session__delete(session);
2754 static size_t trace__fprintf_threads_header(FILE *fp)
2758 printed = fprintf(fp, "\n Summary of events:\n\n");
2763 static size_t thread__dump_stats(struct thread_trace *ttrace,
2764 struct trace *trace, FILE *fp)
2766 struct stats *stats;
2769 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2774 printed += fprintf(fp, "\n");
2776 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2777 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2778 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2780 /* each int_node is a syscall */
2782 stats = inode->priv;
2784 double min = (double)(stats->min) / NSEC_PER_MSEC;
2785 double max = (double)(stats->max) / NSEC_PER_MSEC;
2786 double avg = avg_stats(stats);
2788 u64 n = (u64) stats->n;
2790 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2791 avg /= NSEC_PER_MSEC;
2793 sc = &trace->syscalls.table[inode->i];
2794 printed += fprintf(fp, " %-15s", sc->name);
2795 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2797 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2800 inode = intlist__next(inode);
2803 printed += fprintf(fp, "\n\n");
2808 /* struct used to pass data to per-thread function */
2809 struct summary_data {
2811 struct trace *trace;
2815 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2817 struct summary_data *data = priv;
2818 FILE *fp = data->fp;
2819 size_t printed = data->printed;
2820 struct trace *trace = data->trace;
2821 struct thread_trace *ttrace = thread__priv(thread);
2827 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2829 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2830 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2831 printed += fprintf(fp, "%.1f%%", ratio);
2833 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2835 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2836 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2837 printed += thread__dump_stats(ttrace, trace, fp);
2839 data->printed += printed;
2844 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2846 struct summary_data data = {
2850 data.printed = trace__fprintf_threads_header(fp);
2852 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2854 return data.printed;
2857 static int trace__set_duration(const struct option *opt, const char *str,
2858 int unset __maybe_unused)
2860 struct trace *trace = opt->value;
2862 trace->duration_filter = atof(str);
2866 static int trace__set_filter_pids(const struct option *opt, const char *str,
2867 int unset __maybe_unused)
2871 struct trace *trace = opt->value;
2873 * FIXME: introduce a intarray class, plain parse csv and create a
2874 * { int nr, int entries[] } struct...
2876 struct intlist *list = intlist__new(str);
2881 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2882 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2884 if (trace->filter_pids.entries == NULL)
2887 trace->filter_pids.entries[0] = getpid();
2889 for (i = 1; i < trace->filter_pids.nr; ++i)
2890 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2892 intlist__delete(list);
2898 static int trace__open_output(struct trace *trace, const char *filename)
2902 if (!stat(filename, &st) && st.st_size) {
2903 char oldname[PATH_MAX];
2905 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2907 rename(filename, oldname);
2910 trace->output = fopen(filename, "w");
2912 return trace->output == NULL ? -errno : 0;
2915 static int parse_pagefaults(const struct option *opt, const char *str,
2916 int unset __maybe_unused)
2918 int *trace_pgfaults = opt->value;
2920 if (strcmp(str, "all") == 0)
2921 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2922 else if (strcmp(str, "maj") == 0)
2923 *trace_pgfaults |= TRACE_PFMAJ;
2924 else if (strcmp(str, "min") == 0)
2925 *trace_pgfaults |= TRACE_PFMIN;
2932 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2934 struct perf_evsel *evsel;
2936 evlist__for_each(evlist, evsel)
2937 evsel->handler = handler;
2940 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2942 const char *trace_usage[] = {
2943 "perf trace [<options>] [<command>]",
2944 "perf trace [<options>] -- <command> [<options>]",
2945 "perf trace record [<options>] [<command>]",
2946 "perf trace record [<options>] -- <command> [<options>]",
2949 struct trace trace = {
2951 .machine = audit_detect_machine(),
2952 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2962 .user_freq = UINT_MAX,
2963 .user_interval = ULLONG_MAX,
2964 .no_buffering = true,
2965 .mmap_pages = UINT_MAX,
2966 .proc_map_timeout = 500,
2970 .trace_syscalls = true,
2972 const char *output_name = NULL;
2973 const char *ev_qualifier_str = NULL;
2974 const struct option trace_options[] = {
2975 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2976 "event selector. use 'perf list' to list available events",
2977 parse_events_option),
2978 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2979 "show the thread COMM next to its id"),
2980 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2981 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2982 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2983 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2984 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2985 "trace events on existing process id"),
2986 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2987 "trace events on existing thread id"),
2988 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2989 "pids to filter (by the kernel)", trace__set_filter_pids),
2990 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2991 "system-wide collection from all CPUs"),
2992 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2993 "list of cpus to monitor"),
2994 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2995 "child tasks do not inherit counters"),
2996 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2997 "number of mmap data pages",
2998 perf_evlist__parse_mmap_pages),
2999 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3001 OPT_CALLBACK(0, "duration", &trace, "float",
3002 "show only events with duration > N.M ms",
3003 trace__set_duration),
3004 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3005 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3006 OPT_BOOLEAN('T', "time", &trace.full_time,
3007 "Show full timestamp, not time relative to first start"),
3008 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3009 "Show only syscall summary with statistics"),
3010 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3011 "Show all syscalls and summary with statistics"),
3012 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3013 "Trace pagefaults", parse_pagefaults, "maj"),
3014 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3015 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3016 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3017 "per thread proc mmap processing timeout in ms"),
3020 const char * const trace_subcommands[] = { "record", NULL };
3024 signal(SIGSEGV, sighandler_dump_stack);
3025 signal(SIGFPE, sighandler_dump_stack);
3027 trace.evlist = perf_evlist__new();
3029 if (trace.evlist == NULL) {
3030 pr_err("Not enough memory to run!\n");
3035 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3036 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3038 if (trace.trace_pgfaults) {
3039 trace.opts.sample_address = true;
3040 trace.opts.sample_time = true;
3043 if (trace.evlist->nr_entries > 0)
3044 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3046 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3047 return trace__record(&trace, argc-1, &argv[1]);
3049 /* summary_only implies summary option, but don't overwrite summary if set */
3050 if (trace.summary_only)
3051 trace.summary = trace.summary_only;
3053 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3054 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3055 pr_err("Please specify something to trace.\n");
3059 if (output_name != NULL) {
3060 err = trace__open_output(&trace, output_name);
3062 perror("failed to create output file");
3067 if (ev_qualifier_str != NULL) {
3068 const char *s = ev_qualifier_str;
3069 struct strlist_config slist_config = {
3070 .dirname = system_path(STRACE_GROUPS_DIR),
3073 trace.not_ev_qualifier = *s == '!';
3074 if (trace.not_ev_qualifier)
3076 trace.ev_qualifier = strlist__new(s, &slist_config);
3077 if (trace.ev_qualifier == NULL) {
3078 fputs("Not enough memory to parse event qualifier",
3084 err = trace__validate_ev_qualifier(&trace);
3089 err = target__validate(&trace.opts.target);
3091 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3092 fprintf(trace.output, "%s", bf);
3096 err = target__parse_uid(&trace.opts.target);
3098 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3099 fprintf(trace.output, "%s", bf);
3103 if (!argc && target__none(&trace.opts.target))
3104 trace.opts.target.system_wide = true;
3107 err = trace__replay(&trace);
3109 err = trace__run(&trace, argc, argv);
3112 if (output_name != NULL)
3113 fclose(trace.output);