1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
604 static DEFINE_STRARRAY(clockid);
606 static const char *socket_families[] = {
607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612 "ALG", "NFC", "VSOCK",
614 static DEFINE_STRARRAY(socket_families);
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621 struct syscall_arg *arg)
625 flags = type & ~SOCK_TYPE_MASK;
627 type &= SOCK_TYPE_MASK;
629 * Can't use a strarray, MIPS may override for ABI reasons.
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
637 P_SK_TYPE(SEQPACKET);
642 printed = scnprintf(bf, size, "%#x", type);
645 #define P_SK_FLAG(n) \
646 if (flags & SOCK_##n) { \
647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648 flags &= ~SOCK_##n; \
656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
664 #define MSG_PROBE 0x10
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE 0x10000
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
673 #define MSG_FASTOPEN 0x20000000
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
679 int printed = 0, flags = arg->val;
682 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684 if (flags & MSG_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691 P_MSG_FLAG(DONTROUTE);
696 P_MSG_FLAG(DONTWAIT);
703 P_MSG_FLAG(ERRQUEUE);
704 P_MSG_FLAG(NOSIGNAL);
706 P_MSG_FLAG(WAITFORONE);
707 P_MSG_FLAG(SENDPAGE_NOTLAST);
708 P_MSG_FLAG(FASTOPEN);
709 P_MSG_FLAG(CMSG_CLOEXEC);
713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721 struct syscall_arg *arg)
726 if (mode == F_OK) /* 0 */
727 return scnprintf(bf, size, "F");
729 if (mode & n##_OK) { \
730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
748 struct syscall_arg *arg);
750 #define SCA_FILENAME syscall_arg__scnprintf_filename
752 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
753 struct syscall_arg *arg)
755 int printed = 0, flags = arg->val;
757 if (!(flags & O_CREAT))
758 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
761 return scnprintf(bf, size, "RDONLY");
763 if (flags & O_##n) { \
764 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
788 if ((flags & O_SYNC) == O_SYNC)
789 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
801 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
808 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
809 struct syscall_arg *arg)
811 int printed = 0, flags = arg->val;
817 if (flags & PERF_FLAG_##n) { \
818 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
819 flags &= ~PERF_FLAG_##n; \
829 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
834 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
836 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
837 struct syscall_arg *arg)
839 int printed = 0, flags = arg->val;
842 return scnprintf(bf, size, "NONE");
844 if (flags & EFD_##n) { \
845 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
855 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
862 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
863 struct syscall_arg *arg)
865 int printed = 0, flags = arg->val;
868 if (flags & O_##n) { \
869 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
878 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
883 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
885 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
890 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
933 return scnprintf(bf, size, "%#x", sig);
936 #define SCA_SIGNUM syscall_arg__scnprintf_signum
938 #if defined(__i386__) || defined(__x86_64__)
940 * FIXME: Make this available to all arches.
942 #define TCGETS 0x5401
944 static const char *tioctls[] = {
945 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
946 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
947 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
948 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
949 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
950 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
951 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
952 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
953 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
954 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
955 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
956 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
957 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
958 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
959 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
962 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
963 #endif /* defined(__i386__) || defined(__x86_64__) */
965 #define STRARRAY(arg, name, array) \
966 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
967 .arg_parm = { [arg] = &strarray__##array, }
969 static struct syscall_fmt {
972 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
978 { .name = "access", .errmsg = true,
979 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
980 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
981 { .name = "brk", .hexret = true,
982 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
983 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
984 { .name = "close", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
986 { .name = "connect", .errmsg = true, },
987 { .name = "dup", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989 { .name = "dup2", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991 { .name = "dup3", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
993 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
994 { .name = "eventfd2", .errmsg = true,
995 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
996 { .name = "faccessat", .errmsg = true,
997 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
998 { .name = "fadvise64", .errmsg = true,
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "fallocate", .errmsg = true,
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "fchdir", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1004 { .name = "fchmod", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1006 { .name = "fchmodat", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1008 { .name = "fchown", .errmsg = true,
1009 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010 { .name = "fchownat", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1012 { .name = "fcntl", .errmsg = true,
1013 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1014 [1] = SCA_STRARRAY, /* cmd */ },
1015 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1016 { .name = "fdatasync", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1018 { .name = "flock", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1020 [1] = SCA_FLOCK, /* cmd */ }, },
1021 { .name = "fsetxattr", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1023 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1024 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1025 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1026 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1027 { .name = "fstatfs", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029 { .name = "fsync", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031 { .name = "ftruncate", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1033 { .name = "futex", .errmsg = true,
1034 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1035 { .name = "futimesat", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1037 { .name = "getdents", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "getdents64", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1042 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1043 { .name = "ioctl", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1045 #if defined(__i386__) || defined(__x86_64__)
1047 * FIXME: Make this available to all arches.
1049 [1] = SCA_STRHEXARRAY, /* cmd */
1050 [2] = SCA_HEX, /* arg */ },
1051 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1053 [2] = SCA_HEX, /* arg */ }, },
1055 { .name = "kill", .errmsg = true,
1056 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1057 { .name = "linkat", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1059 { .name = "lseek", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1061 [2] = SCA_STRARRAY, /* whence */ },
1062 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1063 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1064 { .name = "madvise", .errmsg = true,
1065 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1066 [2] = SCA_MADV_BHV, /* behavior */ }, },
1067 { .name = "mkdirat", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1069 { .name = "mknodat", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1071 { .name = "mlock", .errmsg = true,
1072 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1073 { .name = "mlockall", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1075 { .name = "mmap", .hexret = true,
1076 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1077 [2] = SCA_MMAP_PROT, /* prot */
1078 [3] = SCA_MMAP_FLAGS, /* flags */
1079 [4] = SCA_FD, /* fd */ }, },
1080 { .name = "mprotect", .errmsg = true,
1081 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1082 [2] = SCA_MMAP_PROT, /* prot */ }, },
1083 { .name = "mremap", .hexret = true,
1084 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1085 [3] = SCA_MREMAP_FLAGS, /* flags */
1086 [4] = SCA_HEX, /* new_addr */ }, },
1087 { .name = "munlock", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1089 { .name = "munmap", .errmsg = true,
1090 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1091 { .name = "name_to_handle_at", .errmsg = true,
1092 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1093 { .name = "newfstatat", .errmsg = true,
1094 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1095 { .name = "open", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1097 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1098 { .name = "open_by_handle_at", .errmsg = true,
1099 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1100 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1101 { .name = "openat", .errmsg = true,
1102 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1103 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1104 { .name = "perf_event_open", .errmsg = true,
1105 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1106 [2] = SCA_INT, /* cpu */
1107 [3] = SCA_FD, /* group_fd */
1108 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1109 { .name = "pipe2", .errmsg = true,
1110 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1111 { .name = "poll", .errmsg = true, .timeout = true, },
1112 { .name = "ppoll", .errmsg = true, .timeout = true, },
1113 { .name = "pread", .errmsg = true, .alias = "pread64",
1114 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1115 { .name = "preadv", .errmsg = true, .alias = "pread",
1116 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1118 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1119 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120 { .name = "pwritev", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 { .name = "read", .errmsg = true,
1123 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1124 { .name = "readlinkat", .errmsg = true,
1125 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1126 { .name = "readv", .errmsg = true,
1127 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1128 { .name = "recvfrom", .errmsg = true,
1129 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1130 { .name = "recvmmsg", .errmsg = true,
1131 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1132 { .name = "recvmsg", .errmsg = true,
1133 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1134 { .name = "renameat", .errmsg = true,
1135 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1136 { .name = "rt_sigaction", .errmsg = true,
1137 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1138 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1139 { .name = "rt_sigqueueinfo", .errmsg = true,
1140 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1141 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1142 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1143 { .name = "select", .errmsg = true, .timeout = true, },
1144 { .name = "sendmmsg", .errmsg = true,
1145 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1146 { .name = "sendmsg", .errmsg = true,
1147 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1148 { .name = "sendto", .errmsg = true,
1149 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1150 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1151 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1152 { .name = "shutdown", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1154 { .name = "socket", .errmsg = true,
1155 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1156 [1] = SCA_SK_TYPE, /* type */ },
1157 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1158 { .name = "socketpair", .errmsg = true,
1159 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1160 [1] = SCA_SK_TYPE, /* type */ },
1161 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1162 { .name = "stat", .errmsg = true, .alias = "newstat", },
1163 { .name = "symlinkat", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1165 { .name = "tgkill", .errmsg = true,
1166 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1167 { .name = "tkill", .errmsg = true,
1168 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1169 { .name = "uname", .errmsg = true, .alias = "newuname", },
1170 { .name = "unlinkat", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1172 { .name = "utimensat", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1174 { .name = "write", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1176 { .name = "writev", .errmsg = true,
1177 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1180 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1182 const struct syscall_fmt *fmt = fmtp;
1183 return strcmp(name, fmt->name);
1186 static struct syscall_fmt *syscall_fmt__find(const char *name)
1188 const int nmemb = ARRAY_SIZE(syscall_fmts);
1189 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1193 struct event_format *tp_format;
1195 struct format_field *args;
1198 struct syscall_fmt *fmt;
1199 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1203 static size_t fprintf_duration(unsigned long t, FILE *fp)
1205 double duration = (double)t / NSEC_PER_MSEC;
1206 size_t printed = fprintf(fp, "(");
1208 if (duration >= 1.0)
1209 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1210 else if (duration >= 0.01)
1211 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1213 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1214 return printed + fprintf(fp, "): ");
1218 * filename.ptr: The filename char pointer that will be vfs_getname'd
1219 * filename.entry_str_pos: Where to insert the string translated from
1220 * filename.ptr by the vfs_getname tracepoint/kprobe.
1222 struct thread_trace {
1226 unsigned long nr_events;
1227 unsigned long pfmaj, pfmin;
1239 struct intlist *syscall_stats;
1242 static struct thread_trace *thread_trace__new(void)
1244 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1247 ttrace->paths.max = -1;
1249 ttrace->syscall_stats = intlist__new(NULL);
1254 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1256 struct thread_trace *ttrace;
1261 if (thread__priv(thread) == NULL)
1262 thread__set_priv(thread, thread_trace__new());
1264 if (thread__priv(thread) == NULL)
1267 ttrace = thread__priv(thread);
1268 ++ttrace->nr_events;
1272 color_fprintf(fp, PERF_COLOR_RED,
1273 "WARNING: not enough memory, dropping samples!\n");
1277 #define TRACE_PFMAJ (1 << 0)
1278 #define TRACE_PFMIN (1 << 1)
1280 static const size_t trace__entry_str_size = 2048;
1283 struct perf_tool tool;
1290 struct syscall *table;
1292 struct perf_evsel *sys_enter,
1296 struct record_opts opts;
1297 struct perf_evlist *evlist;
1298 struct machine *host;
1299 struct thread *current;
1302 unsigned long nr_events;
1303 struct strlist *ev_qualifier;
1308 const char *last_vfs_getname;
1309 struct intlist *tid_list;
1310 struct intlist *pid_list;
1315 double duration_filter;
1321 bool not_ev_qualifier;
1325 bool multiple_threads;
1329 bool show_tool_stats;
1330 bool trace_syscalls;
1336 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1338 struct thread_trace *ttrace = thread__priv(thread);
1340 if (fd > ttrace->paths.max) {
1341 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1346 if (ttrace->paths.max != -1) {
1347 memset(npath + ttrace->paths.max + 1, 0,
1348 (fd - ttrace->paths.max) * sizeof(char *));
1350 memset(npath, 0, (fd + 1) * sizeof(char *));
1353 ttrace->paths.table = npath;
1354 ttrace->paths.max = fd;
1357 ttrace->paths.table[fd] = strdup(pathname);
1359 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1362 static int thread__read_fd_path(struct thread *thread, int fd)
1364 char linkname[PATH_MAX], pathname[PATH_MAX];
1368 if (thread->pid_ == thread->tid) {
1369 scnprintf(linkname, sizeof(linkname),
1370 "/proc/%d/fd/%d", thread->pid_, fd);
1372 scnprintf(linkname, sizeof(linkname),
1373 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1376 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1379 ret = readlink(linkname, pathname, sizeof(pathname));
1381 if (ret < 0 || ret > st.st_size)
1384 pathname[ret] = '\0';
1385 return trace__set_fd_pathname(thread, fd, pathname);
1388 static const char *thread__fd_path(struct thread *thread, int fd,
1389 struct trace *trace)
1391 struct thread_trace *ttrace = thread__priv(thread);
1399 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1402 ++trace->stats.proc_getname;
1403 if (thread__read_fd_path(thread, fd))
1407 return ttrace->paths.table[fd];
1410 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1411 struct syscall_arg *arg)
1414 size_t printed = scnprintf(bf, size, "%d", fd);
1415 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1418 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1423 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1424 struct syscall_arg *arg)
1427 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1428 struct thread_trace *ttrace = thread__priv(arg->thread);
1430 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1431 zfree(&ttrace->paths.table[fd]);
1436 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1439 struct thread_trace *ttrace = thread__priv(thread);
1441 ttrace->filename.ptr = ptr;
1442 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1445 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1446 struct syscall_arg *arg)
1448 unsigned long ptr = arg->val;
1450 if (!arg->trace->vfs_getname)
1451 return scnprintf(bf, size, "%#x", ptr);
1453 thread__set_filename_pos(arg->thread, bf, ptr);
1457 static bool trace__filter_duration(struct trace *trace, double t)
1459 return t < (trace->duration_filter * NSEC_PER_MSEC);
1462 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1464 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1466 return fprintf(fp, "%10.3f ", ts);
1469 static bool done = false;
1470 static bool interrupted = false;
1472 static void sig_handler(int sig)
1475 interrupted = sig == SIGINT;
1478 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1479 u64 duration, u64 tstamp, FILE *fp)
1481 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1482 printed += fprintf_duration(duration, fp);
1484 if (trace->multiple_threads) {
1485 if (trace->show_comm)
1486 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1487 printed += fprintf(fp, "%d ", thread->tid);
1493 static int trace__process_event(struct trace *trace, struct machine *machine,
1494 union perf_event *event, struct perf_sample *sample)
1498 switch (event->header.type) {
1499 case PERF_RECORD_LOST:
1500 color_fprintf(trace->output, PERF_COLOR_RED,
1501 "LOST %" PRIu64 " events!\n", event->lost.lost);
1502 ret = machine__process_lost_event(machine, event, sample);
1504 ret = machine__process_event(machine, event, sample);
1511 static int trace__tool_process(struct perf_tool *tool,
1512 union perf_event *event,
1513 struct perf_sample *sample,
1514 struct machine *machine)
1516 struct trace *trace = container_of(tool, struct trace, tool);
1517 return trace__process_event(trace, machine, event, sample);
1520 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1522 int err = symbol__init(NULL);
1527 trace->host = machine__new_host();
1528 if (trace->host == NULL)
1531 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1534 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1535 evlist->threads, trace__tool_process, false,
1536 trace->opts.proc_map_timeout);
1543 static int syscall__set_arg_fmts(struct syscall *sc)
1545 struct format_field *field;
1548 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1549 if (sc->arg_scnprintf == NULL)
1553 sc->arg_parm = sc->fmt->arg_parm;
1555 for (field = sc->args; field; field = field->next) {
1556 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1557 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1558 else if (field->flags & FIELD_IS_POINTER)
1559 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1566 static int trace__read_syscall_info(struct trace *trace, int id)
1570 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1575 if (id > trace->syscalls.max) {
1576 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1578 if (nsyscalls == NULL)
1581 if (trace->syscalls.max != -1) {
1582 memset(nsyscalls + trace->syscalls.max + 1, 0,
1583 (id - trace->syscalls.max) * sizeof(*sc));
1585 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1588 trace->syscalls.table = nsyscalls;
1589 trace->syscalls.max = id;
1592 sc = trace->syscalls.table + id;
1595 sc->fmt = syscall_fmt__find(sc->name);
1597 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1598 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1600 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1601 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1602 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1605 if (sc->tp_format == NULL)
1608 sc->args = sc->tp_format->format.fields;
1609 sc->nr_args = sc->tp_format->format.nr_fields;
1610 /* drop nr field - not relevant here; does not exist on older kernels */
1611 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1612 sc->args = sc->args->next;
1616 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1618 return syscall__set_arg_fmts(sc);
1621 static int trace__validate_ev_qualifier(struct trace *trace)
1624 struct str_node *pos;
1626 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1627 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1628 sizeof(trace->ev_qualifier_ids.entries[0]));
1630 if (trace->ev_qualifier_ids.entries == NULL) {
1631 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1639 strlist__for_each(pos, trace->ev_qualifier) {
1640 const char *sc = pos->s;
1641 int id = audit_name_to_syscall(sc, trace->audit.machine);
1645 fputs("Error:\tInvalid syscall ", trace->output);
1648 fputs(", ", trace->output);
1651 fputs(sc, trace->output);
1654 trace->ev_qualifier_ids.entries[i++] = id;
1658 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1659 "\nHint:\tand: 'man syscalls'\n", trace->output);
1660 zfree(&trace->ev_qualifier_ids.entries);
1661 trace->ev_qualifier_ids.nr = 0;
1668 * args is to be interpreted as a series of longs but we need to handle
1669 * 8-byte unaligned accesses. args points to raw_data within the event
1670 * and raw_data is guaranteed to be 8-byte unaligned because it is
1671 * preceded by raw_size which is a u32. So we need to copy args to a temp
1672 * variable to read it. Most notably this avoids extended load instructions
1673 * on unaligned addresses
1676 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1677 unsigned char *args, struct trace *trace,
1678 struct thread *thread)
1684 if (sc->args != NULL) {
1685 struct format_field *field;
1687 struct syscall_arg arg = {
1694 for (field = sc->args; field;
1695 field = field->next, ++arg.idx, bit <<= 1) {
1699 /* special care for unaligned accesses */
1700 p = args + sizeof(unsigned long) * arg.idx;
1701 memcpy(&val, p, sizeof(val));
1704 * Suppress this argument if its value is zero and
1705 * and we don't have a string associated in an
1709 !(sc->arg_scnprintf &&
1710 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1711 sc->arg_parm[arg.idx]))
1714 printed += scnprintf(bf + printed, size - printed,
1715 "%s%s: ", printed ? ", " : "", field->name);
1716 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1719 arg.parm = sc->arg_parm[arg.idx];
1720 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1721 size - printed, &arg);
1723 printed += scnprintf(bf + printed, size - printed,
1731 /* special care for unaligned accesses */
1732 p = args + sizeof(unsigned long) * i;
1733 memcpy(&val, p, sizeof(val));
1734 printed += scnprintf(bf + printed, size - printed,
1736 printed ? ", " : "", i, val);
1744 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1745 union perf_event *event,
1746 struct perf_sample *sample);
1748 static struct syscall *trace__syscall_info(struct trace *trace,
1749 struct perf_evsel *evsel, int id)
1755 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1756 * before that, leaving at a higher verbosity level till that is
1757 * explained. Reproduced with plain ftrace with:
1759 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1760 * grep "NR -1 " /t/trace_pipe
1762 * After generating some load on the machine.
1766 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1767 id, perf_evsel__name(evsel), ++n);
1772 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1773 trace__read_syscall_info(trace, id))
1776 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1779 return &trace->syscalls.table[id];
1783 fprintf(trace->output, "Problems reading syscall %d", id);
1784 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1785 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1786 fputs(" information\n", trace->output);
1791 static void thread__update_stats(struct thread_trace *ttrace,
1792 int id, struct perf_sample *sample)
1794 struct int_node *inode;
1795 struct stats *stats;
1798 inode = intlist__findnew(ttrace->syscall_stats, id);
1802 stats = inode->priv;
1803 if (stats == NULL) {
1804 stats = malloc(sizeof(struct stats));
1808 inode->priv = stats;
1811 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1812 duration = sample->time - ttrace->entry_time;
1814 update_stats(stats, duration);
1817 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1819 struct thread_trace *ttrace;
1823 if (trace->current == NULL)
1826 ttrace = thread__priv(trace->current);
1828 if (!ttrace->entry_pending)
1831 duration = sample->time - ttrace->entry_time;
1833 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1834 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1835 ttrace->entry_pending = false;
1840 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1841 union perf_event *event __maybe_unused,
1842 struct perf_sample *sample)
1847 struct thread *thread;
1848 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1849 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1850 struct thread_trace *ttrace;
1855 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1856 ttrace = thread__trace(thread, trace->output);
1860 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1862 if (ttrace->entry_str == NULL) {
1863 ttrace->entry_str = malloc(trace__entry_str_size);
1864 if (!ttrace->entry_str)
1868 if (!trace->summary_only)
1869 trace__printf_interrupted_entry(trace, sample);
1871 ttrace->entry_time = sample->time;
1872 msg = ttrace->entry_str;
1873 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1875 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1876 args, trace, thread);
1879 if (!trace->duration_filter && !trace->summary_only) {
1880 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1881 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1884 ttrace->entry_pending = true;
1886 if (trace->current != thread) {
1887 thread__put(trace->current);
1888 trace->current = thread__get(thread);
1892 thread__put(thread);
1896 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1897 union perf_event *event __maybe_unused,
1898 struct perf_sample *sample)
1902 struct thread *thread;
1903 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1904 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1905 struct thread_trace *ttrace;
1910 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1911 ttrace = thread__trace(thread, trace->output);
1916 thread__update_stats(ttrace, id, sample);
1918 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1920 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1921 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1922 trace->last_vfs_getname = NULL;
1923 ++trace->stats.vfs_getname;
1926 ttrace->exit_time = sample->time;
1928 if (ttrace->entry_time) {
1929 duration = sample->time - ttrace->entry_time;
1930 if (trace__filter_duration(trace, duration))
1932 } else if (trace->duration_filter)
1935 if (trace->summary_only)
1938 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1940 if (ttrace->entry_pending) {
1941 fprintf(trace->output, "%-70s", ttrace->entry_str);
1943 fprintf(trace->output, " ... [");
1944 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1945 fprintf(trace->output, "]: %s()", sc->name);
1948 if (sc->fmt == NULL) {
1950 fprintf(trace->output, ") = %ld", ret);
1951 } else if (ret < 0 && sc->fmt->errmsg) {
1952 char bf[STRERR_BUFSIZE];
1953 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1954 *e = audit_errno_to_name(-ret);
1956 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1957 } else if (ret == 0 && sc->fmt->timeout)
1958 fprintf(trace->output, ") = 0 Timeout");
1959 else if (sc->fmt->hexret)
1960 fprintf(trace->output, ") = %#lx", ret);
1964 fputc('\n', trace->output);
1966 ttrace->entry_pending = false;
1969 thread__put(thread);
1973 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1974 union perf_event *event __maybe_unused,
1975 struct perf_sample *sample)
1977 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978 struct thread_trace *ttrace;
1979 size_t filename_len, entry_str_len, to_move;
1980 ssize_t remaining_space;
1982 const char *filename;
1984 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1989 ttrace = thread__priv(thread);
1993 if (!ttrace->filename.ptr)
1996 entry_str_len = strlen(ttrace->entry_str);
1997 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1998 if (remaining_space <= 0)
2001 filename = trace->last_vfs_getname;
2002 filename_len = strlen(filename);
2003 if (filename_len > (size_t)remaining_space) {
2004 filename += filename_len - remaining_space;
2005 filename_len = remaining_space;
2008 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2009 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2010 memmove(pos + filename_len, pos, to_move);
2011 memcpy(pos, filename, filename_len);
2013 ttrace->filename.ptr = 0;
2014 ttrace->filename.entry_str_pos = 0;
2019 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2020 union perf_event *event __maybe_unused,
2021 struct perf_sample *sample)
2023 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2024 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2025 struct thread *thread = machine__findnew_thread(trace->host,
2028 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2033 ttrace->runtime_ms += runtime_ms;
2034 trace->runtime_ms += runtime_ms;
2035 thread__put(thread);
2039 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2041 perf_evsel__strval(evsel, sample, "comm"),
2042 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2044 perf_evsel__intval(evsel, sample, "vruntime"));
2045 thread__put(thread);
2049 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2050 union perf_event *event __maybe_unused,
2051 struct perf_sample *sample)
2053 trace__printf_interrupted_entry(trace, sample);
2054 trace__fprintf_tstamp(trace, sample->time, trace->output);
2056 if (trace->trace_syscalls)
2057 fprintf(trace->output, "( ): ");
2059 fprintf(trace->output, "%s:", evsel->name);
2061 if (evsel->tp_format) {
2062 event_format__fprintf(evsel->tp_format, sample->cpu,
2063 sample->raw_data, sample->raw_size,
2067 fprintf(trace->output, ")\n");
2071 static void print_location(FILE *f, struct perf_sample *sample,
2072 struct addr_location *al,
2073 bool print_dso, bool print_sym)
2076 if ((verbose || print_dso) && al->map)
2077 fprintf(f, "%s@", al->map->dso->long_name);
2079 if ((verbose || print_sym) && al->sym)
2080 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2081 al->addr - al->sym->start);
2083 fprintf(f, "0x%" PRIx64, al->addr);
2085 fprintf(f, "0x%" PRIx64, sample->addr);
2088 static int trace__pgfault(struct trace *trace,
2089 struct perf_evsel *evsel,
2090 union perf_event *event,
2091 struct perf_sample *sample)
2093 struct thread *thread;
2094 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2095 struct addr_location al;
2096 char map_type = 'd';
2097 struct thread_trace *ttrace;
2100 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2101 ttrace = thread__trace(thread, trace->output);
2105 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2110 if (trace->summary_only)
2113 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2116 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2118 fprintf(trace->output, "%sfault [",
2119 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2122 print_location(trace->output, sample, &al, false, true);
2124 fprintf(trace->output, "] => ");
2126 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2130 thread__find_addr_location(thread, cpumode,
2131 MAP__FUNCTION, sample->addr, &al);
2139 print_location(trace->output, sample, &al, true, false);
2141 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2145 thread__put(thread);
2149 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2151 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2152 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2155 if (trace->pid_list || trace->tid_list)
2161 static int trace__process_sample(struct perf_tool *tool,
2162 union perf_event *event,
2163 struct perf_sample *sample,
2164 struct perf_evsel *evsel,
2165 struct machine *machine __maybe_unused)
2167 struct trace *trace = container_of(tool, struct trace, tool);
2170 tracepoint_handler handler = evsel->handler;
2172 if (skip_sample(trace, sample))
2175 if (!trace->full_time && trace->base_time == 0)
2176 trace->base_time = sample->time;
2180 handler(trace, evsel, event, sample);
2186 static int parse_target_str(struct trace *trace)
2188 if (trace->opts.target.pid) {
2189 trace->pid_list = intlist__new(trace->opts.target.pid);
2190 if (trace->pid_list == NULL) {
2191 pr_err("Error parsing process id string\n");
2196 if (trace->opts.target.tid) {
2197 trace->tid_list = intlist__new(trace->opts.target.tid);
2198 if (trace->tid_list == NULL) {
2199 pr_err("Error parsing thread id string\n");
2207 static int trace__record(struct trace *trace, int argc, const char **argv)
2209 unsigned int rec_argc, i, j;
2210 const char **rec_argv;
2211 const char * const record_args[] = {
2218 const char * const sc_args[] = { "-e", };
2219 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2220 const char * const majpf_args[] = { "-e", "major-faults" };
2221 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2222 const char * const minpf_args[] = { "-e", "minor-faults" };
2223 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2225 /* +1 is for the event string below */
2226 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2227 majpf_args_nr + minpf_args_nr + argc;
2228 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2230 if (rec_argv == NULL)
2234 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2235 rec_argv[j++] = record_args[i];
2237 if (trace->trace_syscalls) {
2238 for (i = 0; i < sc_args_nr; i++)
2239 rec_argv[j++] = sc_args[i];
2241 /* event string may be different for older kernels - e.g., RHEL6 */
2242 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2243 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2244 else if (is_valid_tracepoint("syscalls:sys_enter"))
2245 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2247 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2252 if (trace->trace_pgfaults & TRACE_PFMAJ)
2253 for (i = 0; i < majpf_args_nr; i++)
2254 rec_argv[j++] = majpf_args[i];
2256 if (trace->trace_pgfaults & TRACE_PFMIN)
2257 for (i = 0; i < minpf_args_nr; i++)
2258 rec_argv[j++] = minpf_args[i];
2260 for (i = 0; i < (unsigned int)argc; i++)
2261 rec_argv[j++] = argv[i];
2263 return cmd_record(j, rec_argv, NULL);
2266 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2268 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2270 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2274 if (perf_evsel__field(evsel, "pathname") == NULL) {
2275 perf_evsel__delete(evsel);
2279 evsel->handler = trace__vfs_getname;
2280 perf_evlist__add(evlist, evsel);
2284 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2287 struct perf_evsel *evsel;
2288 struct perf_event_attr attr = {
2289 .type = PERF_TYPE_SOFTWARE,
2293 attr.config = config;
2294 attr.sample_period = 1;
2296 event_attr_init(&attr);
2298 evsel = perf_evsel__new(&attr);
2302 evsel->handler = trace__pgfault;
2303 perf_evlist__add(evlist, evsel);
2308 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2310 const u32 type = event->header.type;
2311 struct perf_evsel *evsel;
2313 if (!trace->full_time && trace->base_time == 0)
2314 trace->base_time = sample->time;
2316 if (type != PERF_RECORD_SAMPLE) {
2317 trace__process_event(trace, trace->host, event, sample);
2321 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2322 if (evsel == NULL) {
2323 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2327 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2328 sample->raw_data == NULL) {
2329 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2330 perf_evsel__name(evsel), sample->tid,
2331 sample->cpu, sample->raw_size);
2333 tracepoint_handler handler = evsel->handler;
2334 handler(trace, evsel, event, sample);
2338 static int trace__add_syscall_newtp(struct trace *trace)
2341 struct perf_evlist *evlist = trace->evlist;
2342 struct perf_evsel *sys_enter, *sys_exit;
2344 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2345 if (sys_enter == NULL)
2348 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2349 goto out_delete_sys_enter;
2351 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2352 if (sys_exit == NULL)
2353 goto out_delete_sys_enter;
2355 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2356 goto out_delete_sys_exit;
2358 perf_evlist__add(evlist, sys_enter);
2359 perf_evlist__add(evlist, sys_exit);
2361 trace->syscalls.events.sys_enter = sys_enter;
2362 trace->syscalls.events.sys_exit = sys_exit;
2368 out_delete_sys_exit:
2369 perf_evsel__delete_priv(sys_exit);
2370 out_delete_sys_enter:
2371 perf_evsel__delete_priv(sys_enter);
2375 static int trace__set_ev_qualifier_filter(struct trace *trace)
2378 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2379 trace->ev_qualifier_ids.nr,
2380 trace->ev_qualifier_ids.entries);
2385 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2386 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2396 static int trace__run(struct trace *trace, int argc, const char **argv)
2398 struct perf_evlist *evlist = trace->evlist;
2399 struct perf_evsel *evsel;
2401 unsigned long before;
2402 const bool forks = argc > 0;
2403 bool draining = false;
2407 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2408 goto out_error_raw_syscalls;
2410 if (trace->trace_syscalls)
2411 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2413 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2414 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2418 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2419 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2423 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2424 trace__sched_stat_runtime))
2425 goto out_error_sched_stat_runtime;
2427 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2429 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2430 goto out_delete_evlist;
2433 err = trace__symbols_init(trace, evlist);
2435 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2436 goto out_delete_evlist;
2439 perf_evlist__config(evlist, &trace->opts);
2441 signal(SIGCHLD, sig_handler);
2442 signal(SIGINT, sig_handler);
2445 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2448 fprintf(trace->output, "Couldn't run the workload!\n");
2449 goto out_delete_evlist;
2453 err = perf_evlist__open(evlist);
2455 goto out_error_open;
2458 * Better not use !target__has_task() here because we need to cover the
2459 * case where no threads were specified in the command line, but a
2460 * workload was, and in that case we will fill in the thread_map when
2461 * we fork the workload in perf_evlist__prepare_workload.
2463 if (trace->filter_pids.nr > 0)
2464 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2465 else if (thread_map__pid(evlist->threads, 0) == -1)
2466 err = perf_evlist__set_filter_pid(evlist, getpid());
2471 if (trace->ev_qualifier_ids.nr > 0) {
2472 err = trace__set_ev_qualifier_filter(trace);
2476 pr_debug("event qualifier tracepoint filter: %s\n",
2477 trace->syscalls.events.sys_exit->filter);
2480 err = perf_evlist__apply_filters(evlist, &evsel);
2482 goto out_error_apply_filters;
2484 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2486 goto out_error_mmap;
2488 if (!target__none(&trace->opts.target))
2489 perf_evlist__enable(evlist);
2492 perf_evlist__start_workload(evlist);
2494 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2495 evlist->threads->nr > 1 ||
2496 perf_evlist__first(evlist)->attr.inherit;
2498 before = trace->nr_events;
2500 for (i = 0; i < evlist->nr_mmaps; i++) {
2501 union perf_event *event;
2503 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2504 struct perf_sample sample;
2508 err = perf_evlist__parse_sample(evlist, event, &sample);
2510 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2514 trace__handle_event(trace, event, &sample);
2516 perf_evlist__mmap_consume(evlist, i);
2521 if (done && !draining) {
2522 perf_evlist__disable(evlist);
2528 if (trace->nr_events == before) {
2529 int timeout = done ? 100 : -1;
2531 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2532 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2542 thread__zput(trace->current);
2544 perf_evlist__disable(evlist);
2548 trace__fprintf_thread_summary(trace, trace->output);
2550 if (trace->show_tool_stats) {
2551 fprintf(trace->output, "Stats:\n "
2552 " vfs_getname : %" PRIu64 "\n"
2553 " proc_getname: %" PRIu64 "\n",
2554 trace->stats.vfs_getname,
2555 trace->stats.proc_getname);
2560 perf_evlist__delete(evlist);
2561 trace->evlist = NULL;
2562 trace->live = false;
2565 char errbuf[BUFSIZ];
2567 out_error_sched_stat_runtime:
2568 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2571 out_error_raw_syscalls:
2572 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2576 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2580 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2583 fprintf(trace->output, "%s\n", errbuf);
2584 goto out_delete_evlist;
2586 out_error_apply_filters:
2587 fprintf(trace->output,
2588 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2589 evsel->filter, perf_evsel__name(evsel), errno,
2590 strerror_r(errno, errbuf, sizeof(errbuf)));
2591 goto out_delete_evlist;
2594 fprintf(trace->output, "Not enough memory to run!\n");
2595 goto out_delete_evlist;
2598 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2599 goto out_delete_evlist;
2602 static int trace__replay(struct trace *trace)
2604 const struct perf_evsel_str_handler handlers[] = {
2605 { "probe:vfs_getname", trace__vfs_getname, },
2607 struct perf_data_file file = {
2609 .mode = PERF_DATA_MODE_READ,
2610 .force = trace->force,
2612 struct perf_session *session;
2613 struct perf_evsel *evsel;
2616 trace->tool.sample = trace__process_sample;
2617 trace->tool.mmap = perf_event__process_mmap;
2618 trace->tool.mmap2 = perf_event__process_mmap2;
2619 trace->tool.comm = perf_event__process_comm;
2620 trace->tool.exit = perf_event__process_exit;
2621 trace->tool.fork = perf_event__process_fork;
2622 trace->tool.attr = perf_event__process_attr;
2623 trace->tool.tracing_data = perf_event__process_tracing_data;
2624 trace->tool.build_id = perf_event__process_build_id;
2626 trace->tool.ordered_events = true;
2627 trace->tool.ordering_requires_timestamps = true;
2629 /* add tid to output */
2630 trace->multiple_threads = true;
2632 session = perf_session__new(&file, false, &trace->tool);
2633 if (session == NULL)
2636 if (symbol__init(&session->header.env) < 0)
2639 trace->host = &session->machines.host;
2641 err = perf_session__set_tracepoints_handlers(session, handlers);
2645 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2646 "raw_syscalls:sys_enter");
2647 /* older kernels have syscalls tp versus raw_syscalls */
2649 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2650 "syscalls:sys_enter");
2653 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2654 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2655 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2659 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2660 "raw_syscalls:sys_exit");
2662 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2663 "syscalls:sys_exit");
2665 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2666 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2667 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2671 evlist__for_each(session->evlist, evsel) {
2672 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2673 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2674 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2675 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2676 evsel->handler = trace__pgfault;
2679 err = parse_target_str(trace);
2685 err = perf_session__process_events(session);
2687 pr_err("Failed to process events, error %d", err);
2689 else if (trace->summary)
2690 trace__fprintf_thread_summary(trace, trace->output);
2693 perf_session__delete(session);
2698 static size_t trace__fprintf_threads_header(FILE *fp)
2702 printed = fprintf(fp, "\n Summary of events:\n\n");
2707 static size_t thread__dump_stats(struct thread_trace *ttrace,
2708 struct trace *trace, FILE *fp)
2710 struct stats *stats;
2713 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2718 printed += fprintf(fp, "\n");
2720 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2721 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2722 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2724 /* each int_node is a syscall */
2726 stats = inode->priv;
2728 double min = (double)(stats->min) / NSEC_PER_MSEC;
2729 double max = (double)(stats->max) / NSEC_PER_MSEC;
2730 double avg = avg_stats(stats);
2732 u64 n = (u64) stats->n;
2734 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2735 avg /= NSEC_PER_MSEC;
2737 sc = &trace->syscalls.table[inode->i];
2738 printed += fprintf(fp, " %-15s", sc->name);
2739 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2741 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2744 inode = intlist__next(inode);
2747 printed += fprintf(fp, "\n\n");
2752 /* struct used to pass data to per-thread function */
2753 struct summary_data {
2755 struct trace *trace;
2759 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2761 struct summary_data *data = priv;
2762 FILE *fp = data->fp;
2763 size_t printed = data->printed;
2764 struct trace *trace = data->trace;
2765 struct thread_trace *ttrace = thread__priv(thread);
2771 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2773 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2774 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2775 printed += fprintf(fp, "%.1f%%", ratio);
2777 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2779 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2780 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2781 printed += thread__dump_stats(ttrace, trace, fp);
2783 data->printed += printed;
2788 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2790 struct summary_data data = {
2794 data.printed = trace__fprintf_threads_header(fp);
2796 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2798 return data.printed;
2801 static int trace__set_duration(const struct option *opt, const char *str,
2802 int unset __maybe_unused)
2804 struct trace *trace = opt->value;
2806 trace->duration_filter = atof(str);
2810 static int trace__set_filter_pids(const struct option *opt, const char *str,
2811 int unset __maybe_unused)
2815 struct trace *trace = opt->value;
2817 * FIXME: introduce a intarray class, plain parse csv and create a
2818 * { int nr, int entries[] } struct...
2820 struct intlist *list = intlist__new(str);
2825 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2826 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2828 if (trace->filter_pids.entries == NULL)
2831 trace->filter_pids.entries[0] = getpid();
2833 for (i = 1; i < trace->filter_pids.nr; ++i)
2834 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2836 intlist__delete(list);
2842 static int trace__open_output(struct trace *trace, const char *filename)
2846 if (!stat(filename, &st) && st.st_size) {
2847 char oldname[PATH_MAX];
2849 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2851 rename(filename, oldname);
2854 trace->output = fopen(filename, "w");
2856 return trace->output == NULL ? -errno : 0;
2859 static int parse_pagefaults(const struct option *opt, const char *str,
2860 int unset __maybe_unused)
2862 int *trace_pgfaults = opt->value;
2864 if (strcmp(str, "all") == 0)
2865 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2866 else if (strcmp(str, "maj") == 0)
2867 *trace_pgfaults |= TRACE_PFMAJ;
2868 else if (strcmp(str, "min") == 0)
2869 *trace_pgfaults |= TRACE_PFMIN;
2876 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2878 struct perf_evsel *evsel;
2880 evlist__for_each(evlist, evsel)
2881 evsel->handler = handler;
2884 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2886 const char *trace_usage[] = {
2887 "perf trace [<options>] [<command>]",
2888 "perf trace [<options>] -- <command> [<options>]",
2889 "perf trace record [<options>] [<command>]",
2890 "perf trace record [<options>] -- <command> [<options>]",
2893 struct trace trace = {
2895 .machine = audit_detect_machine(),
2896 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2906 .user_freq = UINT_MAX,
2907 .user_interval = ULLONG_MAX,
2908 .no_buffering = true,
2909 .mmap_pages = UINT_MAX,
2910 .proc_map_timeout = 500,
2914 .trace_syscalls = true,
2916 const char *output_name = NULL;
2917 const char *ev_qualifier_str = NULL;
2918 const struct option trace_options[] = {
2919 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2920 "event selector. use 'perf list' to list available events",
2921 parse_events_option),
2922 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2923 "show the thread COMM next to its id"),
2924 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2925 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2926 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2927 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2928 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2929 "trace events on existing process id"),
2930 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2931 "trace events on existing thread id"),
2932 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2933 "pids to filter (by the kernel)", trace__set_filter_pids),
2934 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2935 "system-wide collection from all CPUs"),
2936 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2937 "list of cpus to monitor"),
2938 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2939 "child tasks do not inherit counters"),
2940 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2941 "number of mmap data pages",
2942 perf_evlist__parse_mmap_pages),
2943 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2945 OPT_CALLBACK(0, "duration", &trace, "float",
2946 "show only events with duration > N.M ms",
2947 trace__set_duration),
2948 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2949 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2950 OPT_BOOLEAN('T', "time", &trace.full_time,
2951 "Show full timestamp, not time relative to first start"),
2952 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2953 "Show only syscall summary with statistics"),
2954 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2955 "Show all syscalls and summary with statistics"),
2956 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2957 "Trace pagefaults", parse_pagefaults, "maj"),
2958 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2959 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2960 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2961 "per thread proc mmap processing timeout in ms"),
2964 const char * const trace_subcommands[] = { "record", NULL };
2968 signal(SIGSEGV, sighandler_dump_stack);
2969 signal(SIGFPE, sighandler_dump_stack);
2971 trace.evlist = perf_evlist__new();
2973 if (trace.evlist == NULL) {
2974 pr_err("Not enough memory to run!\n");
2979 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2980 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2982 if (trace.trace_pgfaults) {
2983 trace.opts.sample_address = true;
2984 trace.opts.sample_time = true;
2987 if (trace.evlist->nr_entries > 0)
2988 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2990 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2991 return trace__record(&trace, argc-1, &argv[1]);
2993 /* summary_only implies summary option, but don't overwrite summary if set */
2994 if (trace.summary_only)
2995 trace.summary = trace.summary_only;
2997 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2998 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2999 pr_err("Please specify something to trace.\n");
3003 if (output_name != NULL) {
3004 err = trace__open_output(&trace, output_name);
3006 perror("failed to create output file");
3011 if (ev_qualifier_str != NULL) {
3012 const char *s = ev_qualifier_str;
3013 struct strlist_config slist_config = {
3014 .dirname = system_path(STRACE_GROUPS_DIR),
3017 trace.not_ev_qualifier = *s == '!';
3018 if (trace.not_ev_qualifier)
3020 trace.ev_qualifier = strlist__new(s, &slist_config);
3021 if (trace.ev_qualifier == NULL) {
3022 fputs("Not enough memory to parse event qualifier",
3028 err = trace__validate_ev_qualifier(&trace);
3033 err = target__validate(&trace.opts.target);
3035 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3036 fprintf(trace.output, "%s", bf);
3040 err = target__parse_uid(&trace.opts.target);
3042 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3043 fprintf(trace.output, "%s", bf);
3047 if (!argc && target__none(&trace.opts.target))
3048 trace.opts.target.system_wide = true;
3051 err = trace__replay(&trace);
3053 err = trace__run(&trace, argc, argv);
3056 if (output_name != NULL)
3057 fclose(trace.output);