1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
604 static DEFINE_STRARRAY(clockid);
606 static const char *socket_families[] = {
607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612 "ALG", "NFC", "VSOCK",
614 static DEFINE_STRARRAY(socket_families);
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621 struct syscall_arg *arg)
625 flags = type & ~SOCK_TYPE_MASK;
627 type &= SOCK_TYPE_MASK;
629 * Can't use a strarray, MIPS may override for ABI reasons.
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
637 P_SK_TYPE(SEQPACKET);
642 printed = scnprintf(bf, size, "%#x", type);
645 #define P_SK_FLAG(n) \
646 if (flags & SOCK_##n) { \
647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648 flags &= ~SOCK_##n; \
656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
664 #define MSG_PROBE 0x10
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE 0x10000
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
673 #define MSG_FASTOPEN 0x20000000
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
679 int printed = 0, flags = arg->val;
682 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684 if (flags & MSG_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691 P_MSG_FLAG(DONTROUTE);
696 P_MSG_FLAG(DONTWAIT);
703 P_MSG_FLAG(ERRQUEUE);
704 P_MSG_FLAG(NOSIGNAL);
706 P_MSG_FLAG(WAITFORONE);
707 P_MSG_FLAG(SENDPAGE_NOTLAST);
708 P_MSG_FLAG(FASTOPEN);
709 P_MSG_FLAG(CMSG_CLOEXEC);
713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721 struct syscall_arg *arg)
726 if (mode == F_OK) /* 0 */
727 return scnprintf(bf, size, "F");
729 if (mode & n##_OK) { \
730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
748 struct syscall_arg *arg)
750 int printed = 0, flags = arg->val;
752 if (!(flags & O_CREAT))
753 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
756 return scnprintf(bf, size, "RDONLY");
758 if (flags & O_##n) { \
759 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
783 if ((flags & O_SYNC) == O_SYNC)
784 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
796 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
803 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
804 struct syscall_arg *arg)
806 int printed = 0, flags = arg->val;
812 if (flags & PERF_FLAG_##n) { \
813 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
814 flags &= ~PERF_FLAG_##n; \
824 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
831 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
832 struct syscall_arg *arg)
834 int printed = 0, flags = arg->val;
837 return scnprintf(bf, size, "NONE");
839 if (flags & EFD_##n) { \
840 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
850 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
855 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
857 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
858 struct syscall_arg *arg)
860 int printed = 0, flags = arg->val;
863 if (flags & O_##n) { \
864 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
873 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
878 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
880 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
885 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
928 return scnprintf(bf, size, "%#x", sig);
931 #define SCA_SIGNUM syscall_arg__scnprintf_signum
933 #if defined(__i386__) || defined(__x86_64__)
935 * FIXME: Make this available to all arches.
937 #define TCGETS 0x5401
939 static const char *tioctls[] = {
940 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
941 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
942 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
943 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
944 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
945 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
946 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
947 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
948 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
949 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
950 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
951 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
952 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
953 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
954 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
957 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
958 #endif /* defined(__i386__) || defined(__x86_64__) */
960 #define STRARRAY(arg, name, array) \
961 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
962 .arg_parm = { [arg] = &strarray__##array, }
964 static struct syscall_fmt {
967 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
973 { .name = "access", .errmsg = true,
974 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
975 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
976 { .name = "brk", .hexret = true,
977 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
978 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
979 { .name = "close", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
981 { .name = "connect", .errmsg = true, },
982 { .name = "dup", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
984 { .name = "dup2", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986 { .name = "dup3", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
989 { .name = "eventfd2", .errmsg = true,
990 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
991 { .name = "faccessat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
993 { .name = "fadvise64", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
995 { .name = "fallocate", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
997 { .name = "fchdir", .errmsg = true,
998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999 { .name = "fchmod", .errmsg = true,
1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 { .name = "fchmodat", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1003 { .name = "fchown", .errmsg = true,
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "fchownat", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1007 { .name = "fcntl", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1009 [1] = SCA_STRARRAY, /* cmd */ },
1010 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1011 { .name = "fdatasync", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "flock", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1015 [1] = SCA_FLOCK, /* cmd */ }, },
1016 { .name = "fsetxattr", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1018 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1019 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1021 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1022 { .name = "fstatfs", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024 { .name = "fsync", .errmsg = true,
1025 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1026 { .name = "ftruncate", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1028 { .name = "futex", .errmsg = true,
1029 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1030 { .name = "futimesat", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1032 { .name = "getdents", .errmsg = true,
1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034 { .name = "getdents64", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1037 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1038 { .name = "ioctl", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1040 #if defined(__i386__) || defined(__x86_64__)
1042 * FIXME: Make this available to all arches.
1044 [1] = SCA_STRHEXARRAY, /* cmd */
1045 [2] = SCA_HEX, /* arg */ },
1046 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1048 [2] = SCA_HEX, /* arg */ }, },
1050 { .name = "kill", .errmsg = true,
1051 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1052 { .name = "linkat", .errmsg = true,
1053 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1054 { .name = "lseek", .errmsg = true,
1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1056 [2] = SCA_STRARRAY, /* whence */ },
1057 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1058 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1059 { .name = "madvise", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1061 [2] = SCA_MADV_BHV, /* behavior */ }, },
1062 { .name = "mkdirat", .errmsg = true,
1063 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1064 { .name = "mknodat", .errmsg = true,
1065 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1066 { .name = "mlock", .errmsg = true,
1067 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1068 { .name = "mlockall", .errmsg = true,
1069 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1070 { .name = "mmap", .hexret = true,
1071 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1072 [2] = SCA_MMAP_PROT, /* prot */
1073 [3] = SCA_MMAP_FLAGS, /* flags */
1074 [4] = SCA_FD, /* fd */ }, },
1075 { .name = "mprotect", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1077 [2] = SCA_MMAP_PROT, /* prot */ }, },
1078 { .name = "mremap", .hexret = true,
1079 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1080 [3] = SCA_MREMAP_FLAGS, /* flags */
1081 [4] = SCA_HEX, /* new_addr */ }, },
1082 { .name = "munlock", .errmsg = true,
1083 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1084 { .name = "munmap", .errmsg = true,
1085 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1086 { .name = "name_to_handle_at", .errmsg = true,
1087 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1088 { .name = "newfstatat", .errmsg = true,
1089 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1090 { .name = "open", .errmsg = true,
1091 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1092 { .name = "open_by_handle_at", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1094 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1095 { .name = "openat", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1097 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1098 { .name = "perf_event_open", .errmsg = true,
1099 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1100 [2] = SCA_INT, /* cpu */
1101 [3] = SCA_FD, /* group_fd */
1102 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1103 { .name = "pipe2", .errmsg = true,
1104 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1105 { .name = "poll", .errmsg = true, .timeout = true, },
1106 { .name = "ppoll", .errmsg = true, .timeout = true, },
1107 { .name = "pread", .errmsg = true, .alias = "pread64",
1108 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1109 { .name = "preadv", .errmsg = true, .alias = "pread",
1110 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1111 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1112 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1113 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1114 { .name = "pwritev", .errmsg = true,
1115 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1116 { .name = "read", .errmsg = true,
1117 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1118 { .name = "readlinkat", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1120 { .name = "readv", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 { .name = "recvfrom", .errmsg = true,
1123 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1124 { .name = "recvmmsg", .errmsg = true,
1125 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1126 { .name = "recvmsg", .errmsg = true,
1127 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1128 { .name = "renameat", .errmsg = true,
1129 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1130 { .name = "rt_sigaction", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1132 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1133 { .name = "rt_sigqueueinfo", .errmsg = true,
1134 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1135 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1136 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1137 { .name = "select", .errmsg = true, .timeout = true, },
1138 { .name = "sendmmsg", .errmsg = true,
1139 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1140 { .name = "sendmsg", .errmsg = true,
1141 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1142 { .name = "sendto", .errmsg = true,
1143 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1144 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1145 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1146 { .name = "shutdown", .errmsg = true,
1147 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1148 { .name = "socket", .errmsg = true,
1149 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1150 [1] = SCA_SK_TYPE, /* type */ },
1151 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1152 { .name = "socketpair", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1154 [1] = SCA_SK_TYPE, /* type */ },
1155 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1156 { .name = "stat", .errmsg = true, .alias = "newstat", },
1157 { .name = "symlinkat", .errmsg = true,
1158 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1159 { .name = "tgkill", .errmsg = true,
1160 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1161 { .name = "tkill", .errmsg = true,
1162 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1163 { .name = "uname", .errmsg = true, .alias = "newuname", },
1164 { .name = "unlinkat", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1166 { .name = "utimensat", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1168 { .name = "write", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1170 { .name = "writev", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1174 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1176 const struct syscall_fmt *fmt = fmtp;
1177 return strcmp(name, fmt->name);
1180 static struct syscall_fmt *syscall_fmt__find(const char *name)
1182 const int nmemb = ARRAY_SIZE(syscall_fmts);
1183 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1187 struct event_format *tp_format;
1189 struct format_field *args;
1192 struct syscall_fmt *fmt;
1193 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1197 static size_t fprintf_duration(unsigned long t, FILE *fp)
1199 double duration = (double)t / NSEC_PER_MSEC;
1200 size_t printed = fprintf(fp, "(");
1202 if (duration >= 1.0)
1203 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1204 else if (duration >= 0.01)
1205 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1207 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1208 return printed + fprintf(fp, "): ");
1211 struct thread_trace {
1215 unsigned long nr_events;
1216 unsigned long pfmaj, pfmin;
1224 struct intlist *syscall_stats;
1227 static struct thread_trace *thread_trace__new(void)
1229 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1232 ttrace->paths.max = -1;
1234 ttrace->syscall_stats = intlist__new(NULL);
1239 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1241 struct thread_trace *ttrace;
1246 if (thread__priv(thread) == NULL)
1247 thread__set_priv(thread, thread_trace__new());
1249 if (thread__priv(thread) == NULL)
1252 ttrace = thread__priv(thread);
1253 ++ttrace->nr_events;
1257 color_fprintf(fp, PERF_COLOR_RED,
1258 "WARNING: not enough memory, dropping samples!\n");
1262 #define TRACE_PFMAJ (1 << 0)
1263 #define TRACE_PFMIN (1 << 1)
1265 static const size_t trace__entry_str_size = 2048;
1268 struct perf_tool tool;
1275 struct syscall *table;
1277 struct perf_evsel *sys_enter,
1281 struct record_opts opts;
1282 struct perf_evlist *evlist;
1283 struct machine *host;
1284 struct thread *current;
1287 unsigned long nr_events;
1288 struct strlist *ev_qualifier;
1293 const char *last_vfs_getname;
1294 struct intlist *tid_list;
1295 struct intlist *pid_list;
1300 double duration_filter;
1306 bool not_ev_qualifier;
1310 bool multiple_threads;
1314 bool show_tool_stats;
1315 bool trace_syscalls;
1321 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1323 struct thread_trace *ttrace = thread__priv(thread);
1325 if (fd > ttrace->paths.max) {
1326 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1331 if (ttrace->paths.max != -1) {
1332 memset(npath + ttrace->paths.max + 1, 0,
1333 (fd - ttrace->paths.max) * sizeof(char *));
1335 memset(npath, 0, (fd + 1) * sizeof(char *));
1338 ttrace->paths.table = npath;
1339 ttrace->paths.max = fd;
1342 ttrace->paths.table[fd] = strdup(pathname);
1344 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1347 static int thread__read_fd_path(struct thread *thread, int fd)
1349 char linkname[PATH_MAX], pathname[PATH_MAX];
1353 if (thread->pid_ == thread->tid) {
1354 scnprintf(linkname, sizeof(linkname),
1355 "/proc/%d/fd/%d", thread->pid_, fd);
1357 scnprintf(linkname, sizeof(linkname),
1358 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1361 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1364 ret = readlink(linkname, pathname, sizeof(pathname));
1366 if (ret < 0 || ret > st.st_size)
1369 pathname[ret] = '\0';
1370 return trace__set_fd_pathname(thread, fd, pathname);
1373 static const char *thread__fd_path(struct thread *thread, int fd,
1374 struct trace *trace)
1376 struct thread_trace *ttrace = thread__priv(thread);
1384 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1387 ++trace->stats.proc_getname;
1388 if (thread__read_fd_path(thread, fd))
1392 return ttrace->paths.table[fd];
1395 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1396 struct syscall_arg *arg)
1399 size_t printed = scnprintf(bf, size, "%d", fd);
1400 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1403 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1408 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1409 struct syscall_arg *arg)
1412 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1413 struct thread_trace *ttrace = thread__priv(arg->thread);
1415 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1416 zfree(&ttrace->paths.table[fd]);
1421 static bool trace__filter_duration(struct trace *trace, double t)
1423 return t < (trace->duration_filter * NSEC_PER_MSEC);
1426 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1428 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1430 return fprintf(fp, "%10.3f ", ts);
1433 static bool done = false;
1434 static bool interrupted = false;
1436 static void sig_handler(int sig)
1439 interrupted = sig == SIGINT;
1442 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1443 u64 duration, u64 tstamp, FILE *fp)
1445 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1446 printed += fprintf_duration(duration, fp);
1448 if (trace->multiple_threads) {
1449 if (trace->show_comm)
1450 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1451 printed += fprintf(fp, "%d ", thread->tid);
1457 static int trace__process_event(struct trace *trace, struct machine *machine,
1458 union perf_event *event, struct perf_sample *sample)
1462 switch (event->header.type) {
1463 case PERF_RECORD_LOST:
1464 color_fprintf(trace->output, PERF_COLOR_RED,
1465 "LOST %" PRIu64 " events!\n", event->lost.lost);
1466 ret = machine__process_lost_event(machine, event, sample);
1468 ret = machine__process_event(machine, event, sample);
1475 static int trace__tool_process(struct perf_tool *tool,
1476 union perf_event *event,
1477 struct perf_sample *sample,
1478 struct machine *machine)
1480 struct trace *trace = container_of(tool, struct trace, tool);
1481 return trace__process_event(trace, machine, event, sample);
1484 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1486 int err = symbol__init(NULL);
1491 trace->host = machine__new_host();
1492 if (trace->host == NULL)
1495 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1498 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1499 evlist->threads, trace__tool_process, false,
1500 trace->opts.proc_map_timeout);
1507 static int syscall__set_arg_fmts(struct syscall *sc)
1509 struct format_field *field;
1512 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1513 if (sc->arg_scnprintf == NULL)
1517 sc->arg_parm = sc->fmt->arg_parm;
1519 for (field = sc->args; field; field = field->next) {
1520 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1521 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1522 else if (field->flags & FIELD_IS_POINTER)
1523 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1530 static int trace__read_syscall_info(struct trace *trace, int id)
1534 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1539 if (id > trace->syscalls.max) {
1540 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1542 if (nsyscalls == NULL)
1545 if (trace->syscalls.max != -1) {
1546 memset(nsyscalls + trace->syscalls.max + 1, 0,
1547 (id - trace->syscalls.max) * sizeof(*sc));
1549 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1552 trace->syscalls.table = nsyscalls;
1553 trace->syscalls.max = id;
1556 sc = trace->syscalls.table + id;
1559 sc->fmt = syscall_fmt__find(sc->name);
1561 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1562 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1564 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1565 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1566 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1569 if (sc->tp_format == NULL)
1572 sc->args = sc->tp_format->format.fields;
1573 sc->nr_args = sc->tp_format->format.nr_fields;
1574 /* drop nr field - not relevant here; does not exist on older kernels */
1575 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1576 sc->args = sc->args->next;
1580 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1582 return syscall__set_arg_fmts(sc);
1585 static int trace__validate_ev_qualifier(struct trace *trace)
1588 struct str_node *pos;
1590 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1591 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1592 sizeof(trace->ev_qualifier_ids.entries[0]));
1594 if (trace->ev_qualifier_ids.entries == NULL) {
1595 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1603 strlist__for_each(pos, trace->ev_qualifier) {
1604 const char *sc = pos->s;
1605 int id = audit_name_to_syscall(sc, trace->audit.machine);
1609 fputs("Error:\tInvalid syscall ", trace->output);
1612 fputs(", ", trace->output);
1615 fputs(sc, trace->output);
1618 trace->ev_qualifier_ids.entries[i++] = id;
1622 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1623 "\nHint:\tand: 'man syscalls'\n", trace->output);
1624 zfree(&trace->ev_qualifier_ids.entries);
1625 trace->ev_qualifier_ids.nr = 0;
1632 * args is to be interpreted as a series of longs but we need to handle
1633 * 8-byte unaligned accesses. args points to raw_data within the event
1634 * and raw_data is guaranteed to be 8-byte unaligned because it is
1635 * preceded by raw_size which is a u32. So we need to copy args to a temp
1636 * variable to read it. Most notably this avoids extended load instructions
1637 * on unaligned addresses
1640 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1641 unsigned char *args, struct trace *trace,
1642 struct thread *thread)
1648 if (sc->args != NULL) {
1649 struct format_field *field;
1651 struct syscall_arg arg = {
1658 for (field = sc->args; field;
1659 field = field->next, ++arg.idx, bit <<= 1) {
1663 /* special care for unaligned accesses */
1664 p = args + sizeof(unsigned long) * arg.idx;
1665 memcpy(&val, p, sizeof(val));
1668 * Suppress this argument if its value is zero and
1669 * and we don't have a string associated in an
1673 !(sc->arg_scnprintf &&
1674 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1675 sc->arg_parm[arg.idx]))
1678 printed += scnprintf(bf + printed, size - printed,
1679 "%s%s: ", printed ? ", " : "", field->name);
1680 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1683 arg.parm = sc->arg_parm[arg.idx];
1684 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1685 size - printed, &arg);
1687 printed += scnprintf(bf + printed, size - printed,
1695 /* special care for unaligned accesses */
1696 p = args + sizeof(unsigned long) * i;
1697 memcpy(&val, p, sizeof(val));
1698 printed += scnprintf(bf + printed, size - printed,
1700 printed ? ", " : "", i, val);
1708 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1709 union perf_event *event,
1710 struct perf_sample *sample);
1712 static struct syscall *trace__syscall_info(struct trace *trace,
1713 struct perf_evsel *evsel, int id)
1719 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1720 * before that, leaving at a higher verbosity level till that is
1721 * explained. Reproduced with plain ftrace with:
1723 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1724 * grep "NR -1 " /t/trace_pipe
1726 * After generating some load on the machine.
1730 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1731 id, perf_evsel__name(evsel), ++n);
1736 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1737 trace__read_syscall_info(trace, id))
1740 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1743 return &trace->syscalls.table[id];
1747 fprintf(trace->output, "Problems reading syscall %d", id);
1748 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1749 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1750 fputs(" information\n", trace->output);
1755 static void thread__update_stats(struct thread_trace *ttrace,
1756 int id, struct perf_sample *sample)
1758 struct int_node *inode;
1759 struct stats *stats;
1762 inode = intlist__findnew(ttrace->syscall_stats, id);
1766 stats = inode->priv;
1767 if (stats == NULL) {
1768 stats = malloc(sizeof(struct stats));
1772 inode->priv = stats;
1775 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1776 duration = sample->time - ttrace->entry_time;
1778 update_stats(stats, duration);
1781 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1783 struct thread_trace *ttrace;
1787 if (trace->current == NULL)
1790 ttrace = thread__priv(trace->current);
1792 if (!ttrace->entry_pending)
1795 duration = sample->time - ttrace->entry_time;
1797 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1798 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1799 ttrace->entry_pending = false;
1804 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1805 union perf_event *event __maybe_unused,
1806 struct perf_sample *sample)
1811 struct thread *thread;
1812 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1813 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1814 struct thread_trace *ttrace;
1819 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1820 ttrace = thread__trace(thread, trace->output);
1824 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1826 if (ttrace->entry_str == NULL) {
1827 ttrace->entry_str = malloc(trace__entry_str_size);
1828 if (!ttrace->entry_str)
1832 if (!trace->summary_only)
1833 trace__printf_interrupted_entry(trace, sample);
1835 ttrace->entry_time = sample->time;
1836 msg = ttrace->entry_str;
1837 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1839 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1840 args, trace, thread);
1843 if (!trace->duration_filter && !trace->summary_only) {
1844 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1845 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1848 ttrace->entry_pending = true;
1850 if (trace->current != thread) {
1851 thread__put(trace->current);
1852 trace->current = thread__get(thread);
1856 thread__put(thread);
1860 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1861 union perf_event *event __maybe_unused,
1862 struct perf_sample *sample)
1866 struct thread *thread;
1867 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1868 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1869 struct thread_trace *ttrace;
1874 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1875 ttrace = thread__trace(thread, trace->output);
1880 thread__update_stats(ttrace, id, sample);
1882 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1884 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1885 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1886 trace->last_vfs_getname = NULL;
1887 ++trace->stats.vfs_getname;
1890 ttrace->exit_time = sample->time;
1892 if (ttrace->entry_time) {
1893 duration = sample->time - ttrace->entry_time;
1894 if (trace__filter_duration(trace, duration))
1896 } else if (trace->duration_filter)
1899 if (trace->summary_only)
1902 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1904 if (ttrace->entry_pending) {
1905 fprintf(trace->output, "%-70s", ttrace->entry_str);
1907 fprintf(trace->output, " ... [");
1908 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1909 fprintf(trace->output, "]: %s()", sc->name);
1912 if (sc->fmt == NULL) {
1914 fprintf(trace->output, ") = %ld", ret);
1915 } else if (ret < 0 && sc->fmt->errmsg) {
1916 char bf[STRERR_BUFSIZE];
1917 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1918 *e = audit_errno_to_name(-ret);
1920 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1921 } else if (ret == 0 && sc->fmt->timeout)
1922 fprintf(trace->output, ") = 0 Timeout");
1923 else if (sc->fmt->hexret)
1924 fprintf(trace->output, ") = %#lx", ret);
1928 fputc('\n', trace->output);
1930 ttrace->entry_pending = false;
1933 thread__put(thread);
1937 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1938 union perf_event *event __maybe_unused,
1939 struct perf_sample *sample)
1941 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1945 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1946 union perf_event *event __maybe_unused,
1947 struct perf_sample *sample)
1949 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1950 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1951 struct thread *thread = machine__findnew_thread(trace->host,
1954 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1959 ttrace->runtime_ms += runtime_ms;
1960 trace->runtime_ms += runtime_ms;
1961 thread__put(thread);
1965 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1967 perf_evsel__strval(evsel, sample, "comm"),
1968 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1970 perf_evsel__intval(evsel, sample, "vruntime"));
1971 thread__put(thread);
1975 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1976 union perf_event *event __maybe_unused,
1977 struct perf_sample *sample)
1979 trace__printf_interrupted_entry(trace, sample);
1980 trace__fprintf_tstamp(trace, sample->time, trace->output);
1982 if (trace->trace_syscalls)
1983 fprintf(trace->output, "( ): ");
1985 fprintf(trace->output, "%s:", evsel->name);
1987 if (evsel->tp_format) {
1988 event_format__fprintf(evsel->tp_format, sample->cpu,
1989 sample->raw_data, sample->raw_size,
1993 fprintf(trace->output, ")\n");
1997 static void print_location(FILE *f, struct perf_sample *sample,
1998 struct addr_location *al,
1999 bool print_dso, bool print_sym)
2002 if ((verbose || print_dso) && al->map)
2003 fprintf(f, "%s@", al->map->dso->long_name);
2005 if ((verbose || print_sym) && al->sym)
2006 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2007 al->addr - al->sym->start);
2009 fprintf(f, "0x%" PRIx64, al->addr);
2011 fprintf(f, "0x%" PRIx64, sample->addr);
2014 static int trace__pgfault(struct trace *trace,
2015 struct perf_evsel *evsel,
2016 union perf_event *event,
2017 struct perf_sample *sample)
2019 struct thread *thread;
2020 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2021 struct addr_location al;
2022 char map_type = 'd';
2023 struct thread_trace *ttrace;
2026 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2027 ttrace = thread__trace(thread, trace->output);
2031 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2036 if (trace->summary_only)
2039 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2042 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2044 fprintf(trace->output, "%sfault [",
2045 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2048 print_location(trace->output, sample, &al, false, true);
2050 fprintf(trace->output, "] => ");
2052 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2056 thread__find_addr_location(thread, cpumode,
2057 MAP__FUNCTION, sample->addr, &al);
2065 print_location(trace->output, sample, &al, true, false);
2067 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2071 thread__put(thread);
2075 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2077 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2078 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2081 if (trace->pid_list || trace->tid_list)
2087 static int trace__process_sample(struct perf_tool *tool,
2088 union perf_event *event,
2089 struct perf_sample *sample,
2090 struct perf_evsel *evsel,
2091 struct machine *machine __maybe_unused)
2093 struct trace *trace = container_of(tool, struct trace, tool);
2096 tracepoint_handler handler = evsel->handler;
2098 if (skip_sample(trace, sample))
2101 if (!trace->full_time && trace->base_time == 0)
2102 trace->base_time = sample->time;
2106 handler(trace, evsel, event, sample);
2112 static int parse_target_str(struct trace *trace)
2114 if (trace->opts.target.pid) {
2115 trace->pid_list = intlist__new(trace->opts.target.pid);
2116 if (trace->pid_list == NULL) {
2117 pr_err("Error parsing process id string\n");
2122 if (trace->opts.target.tid) {
2123 trace->tid_list = intlist__new(trace->opts.target.tid);
2124 if (trace->tid_list == NULL) {
2125 pr_err("Error parsing thread id string\n");
2133 static int trace__record(struct trace *trace, int argc, const char **argv)
2135 unsigned int rec_argc, i, j;
2136 const char **rec_argv;
2137 const char * const record_args[] = {
2144 const char * const sc_args[] = { "-e", };
2145 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2146 const char * const majpf_args[] = { "-e", "major-faults" };
2147 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2148 const char * const minpf_args[] = { "-e", "minor-faults" };
2149 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2151 /* +1 is for the event string below */
2152 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2153 majpf_args_nr + minpf_args_nr + argc;
2154 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2156 if (rec_argv == NULL)
2160 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2161 rec_argv[j++] = record_args[i];
2163 if (trace->trace_syscalls) {
2164 for (i = 0; i < sc_args_nr; i++)
2165 rec_argv[j++] = sc_args[i];
2167 /* event string may be different for older kernels - e.g., RHEL6 */
2168 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2169 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2170 else if (is_valid_tracepoint("syscalls:sys_enter"))
2171 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2173 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2178 if (trace->trace_pgfaults & TRACE_PFMAJ)
2179 for (i = 0; i < majpf_args_nr; i++)
2180 rec_argv[j++] = majpf_args[i];
2182 if (trace->trace_pgfaults & TRACE_PFMIN)
2183 for (i = 0; i < minpf_args_nr; i++)
2184 rec_argv[j++] = minpf_args[i];
2186 for (i = 0; i < (unsigned int)argc; i++)
2187 rec_argv[j++] = argv[i];
2189 return cmd_record(j, rec_argv, NULL);
2192 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2194 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2196 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2200 if (perf_evsel__field(evsel, "pathname") == NULL) {
2201 perf_evsel__delete(evsel);
2205 evsel->handler = trace__vfs_getname;
2206 perf_evlist__add(evlist, evsel);
2210 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2213 struct perf_evsel *evsel;
2214 struct perf_event_attr attr = {
2215 .type = PERF_TYPE_SOFTWARE,
2219 attr.config = config;
2220 attr.sample_period = 1;
2222 event_attr_init(&attr);
2224 evsel = perf_evsel__new(&attr);
2228 evsel->handler = trace__pgfault;
2229 perf_evlist__add(evlist, evsel);
2234 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2236 const u32 type = event->header.type;
2237 struct perf_evsel *evsel;
2239 if (!trace->full_time && trace->base_time == 0)
2240 trace->base_time = sample->time;
2242 if (type != PERF_RECORD_SAMPLE) {
2243 trace__process_event(trace, trace->host, event, sample);
2247 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2248 if (evsel == NULL) {
2249 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2253 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2254 sample->raw_data == NULL) {
2255 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2256 perf_evsel__name(evsel), sample->tid,
2257 sample->cpu, sample->raw_size);
2259 tracepoint_handler handler = evsel->handler;
2260 handler(trace, evsel, event, sample);
2264 static int trace__add_syscall_newtp(struct trace *trace)
2267 struct perf_evlist *evlist = trace->evlist;
2268 struct perf_evsel *sys_enter, *sys_exit;
2270 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2271 if (sys_enter == NULL)
2274 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2275 goto out_delete_sys_enter;
2277 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2278 if (sys_exit == NULL)
2279 goto out_delete_sys_enter;
2281 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2282 goto out_delete_sys_exit;
2284 perf_evlist__add(evlist, sys_enter);
2285 perf_evlist__add(evlist, sys_exit);
2287 trace->syscalls.events.sys_enter = sys_enter;
2288 trace->syscalls.events.sys_exit = sys_exit;
2294 out_delete_sys_exit:
2295 perf_evsel__delete_priv(sys_exit);
2296 out_delete_sys_enter:
2297 perf_evsel__delete_priv(sys_enter);
2301 static int trace__set_ev_qualifier_filter(struct trace *trace)
2304 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2305 trace->ev_qualifier_ids.nr,
2306 trace->ev_qualifier_ids.entries);
2311 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2312 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2322 static int trace__run(struct trace *trace, int argc, const char **argv)
2324 struct perf_evlist *evlist = trace->evlist;
2325 struct perf_evsel *evsel;
2327 unsigned long before;
2328 const bool forks = argc > 0;
2329 bool draining = false;
2333 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2334 goto out_error_raw_syscalls;
2336 if (trace->trace_syscalls)
2337 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2339 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2340 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2344 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2345 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2349 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2350 trace__sched_stat_runtime))
2351 goto out_error_sched_stat_runtime;
2353 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2355 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2356 goto out_delete_evlist;
2359 err = trace__symbols_init(trace, evlist);
2361 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2362 goto out_delete_evlist;
2365 perf_evlist__config(evlist, &trace->opts);
2367 signal(SIGCHLD, sig_handler);
2368 signal(SIGINT, sig_handler);
2371 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2374 fprintf(trace->output, "Couldn't run the workload!\n");
2375 goto out_delete_evlist;
2379 err = perf_evlist__open(evlist);
2381 goto out_error_open;
2384 * Better not use !target__has_task() here because we need to cover the
2385 * case where no threads were specified in the command line, but a
2386 * workload was, and in that case we will fill in the thread_map when
2387 * we fork the workload in perf_evlist__prepare_workload.
2389 if (trace->filter_pids.nr > 0)
2390 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2391 else if (thread_map__pid(evlist->threads, 0) == -1)
2392 err = perf_evlist__set_filter_pid(evlist, getpid());
2397 if (trace->ev_qualifier_ids.nr > 0) {
2398 err = trace__set_ev_qualifier_filter(trace);
2402 pr_debug("event qualifier tracepoint filter: %s\n",
2403 trace->syscalls.events.sys_exit->filter);
2406 err = perf_evlist__apply_filters(evlist, &evsel);
2408 goto out_error_apply_filters;
2410 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2412 goto out_error_mmap;
2414 if (!target__none(&trace->opts.target))
2415 perf_evlist__enable(evlist);
2418 perf_evlist__start_workload(evlist);
2420 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2421 evlist->threads->nr > 1 ||
2422 perf_evlist__first(evlist)->attr.inherit;
2424 before = trace->nr_events;
2426 for (i = 0; i < evlist->nr_mmaps; i++) {
2427 union perf_event *event;
2429 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2430 struct perf_sample sample;
2434 err = perf_evlist__parse_sample(evlist, event, &sample);
2436 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2440 trace__handle_event(trace, event, &sample);
2442 perf_evlist__mmap_consume(evlist, i);
2447 if (done && !draining) {
2448 perf_evlist__disable(evlist);
2454 if (trace->nr_events == before) {
2455 int timeout = done ? 100 : -1;
2457 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2458 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2468 thread__zput(trace->current);
2470 perf_evlist__disable(evlist);
2474 trace__fprintf_thread_summary(trace, trace->output);
2476 if (trace->show_tool_stats) {
2477 fprintf(trace->output, "Stats:\n "
2478 " vfs_getname : %" PRIu64 "\n"
2479 " proc_getname: %" PRIu64 "\n",
2480 trace->stats.vfs_getname,
2481 trace->stats.proc_getname);
2486 perf_evlist__delete(evlist);
2487 trace->evlist = NULL;
2488 trace->live = false;
2491 char errbuf[BUFSIZ];
2493 out_error_sched_stat_runtime:
2494 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2497 out_error_raw_syscalls:
2498 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2502 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2506 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2509 fprintf(trace->output, "%s\n", errbuf);
2510 goto out_delete_evlist;
2512 out_error_apply_filters:
2513 fprintf(trace->output,
2514 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2515 evsel->filter, perf_evsel__name(evsel), errno,
2516 strerror_r(errno, errbuf, sizeof(errbuf)));
2517 goto out_delete_evlist;
2520 fprintf(trace->output, "Not enough memory to run!\n");
2521 goto out_delete_evlist;
2524 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2525 goto out_delete_evlist;
2528 static int trace__replay(struct trace *trace)
2530 const struct perf_evsel_str_handler handlers[] = {
2531 { "probe:vfs_getname", trace__vfs_getname, },
2533 struct perf_data_file file = {
2535 .mode = PERF_DATA_MODE_READ,
2536 .force = trace->force,
2538 struct perf_session *session;
2539 struct perf_evsel *evsel;
2542 trace->tool.sample = trace__process_sample;
2543 trace->tool.mmap = perf_event__process_mmap;
2544 trace->tool.mmap2 = perf_event__process_mmap2;
2545 trace->tool.comm = perf_event__process_comm;
2546 trace->tool.exit = perf_event__process_exit;
2547 trace->tool.fork = perf_event__process_fork;
2548 trace->tool.attr = perf_event__process_attr;
2549 trace->tool.tracing_data = perf_event__process_tracing_data;
2550 trace->tool.build_id = perf_event__process_build_id;
2552 trace->tool.ordered_events = true;
2553 trace->tool.ordering_requires_timestamps = true;
2555 /* add tid to output */
2556 trace->multiple_threads = true;
2558 session = perf_session__new(&file, false, &trace->tool);
2559 if (session == NULL)
2562 if (symbol__init(&session->header.env) < 0)
2565 trace->host = &session->machines.host;
2567 err = perf_session__set_tracepoints_handlers(session, handlers);
2571 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2572 "raw_syscalls:sys_enter");
2573 /* older kernels have syscalls tp versus raw_syscalls */
2575 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2576 "syscalls:sys_enter");
2579 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2580 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2581 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2585 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2586 "raw_syscalls:sys_exit");
2588 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2589 "syscalls:sys_exit");
2591 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2592 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2593 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2597 evlist__for_each(session->evlist, evsel) {
2598 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2599 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2600 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2601 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2602 evsel->handler = trace__pgfault;
2605 err = parse_target_str(trace);
2611 err = perf_session__process_events(session);
2613 pr_err("Failed to process events, error %d", err);
2615 else if (trace->summary)
2616 trace__fprintf_thread_summary(trace, trace->output);
2619 perf_session__delete(session);
2624 static size_t trace__fprintf_threads_header(FILE *fp)
2628 printed = fprintf(fp, "\n Summary of events:\n\n");
2633 static size_t thread__dump_stats(struct thread_trace *ttrace,
2634 struct trace *trace, FILE *fp)
2636 struct stats *stats;
2639 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2644 printed += fprintf(fp, "\n");
2646 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2647 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2648 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2650 /* each int_node is a syscall */
2652 stats = inode->priv;
2654 double min = (double)(stats->min) / NSEC_PER_MSEC;
2655 double max = (double)(stats->max) / NSEC_PER_MSEC;
2656 double avg = avg_stats(stats);
2658 u64 n = (u64) stats->n;
2660 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2661 avg /= NSEC_PER_MSEC;
2663 sc = &trace->syscalls.table[inode->i];
2664 printed += fprintf(fp, " %-15s", sc->name);
2665 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2667 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2670 inode = intlist__next(inode);
2673 printed += fprintf(fp, "\n\n");
2678 /* struct used to pass data to per-thread function */
2679 struct summary_data {
2681 struct trace *trace;
2685 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2687 struct summary_data *data = priv;
2688 FILE *fp = data->fp;
2689 size_t printed = data->printed;
2690 struct trace *trace = data->trace;
2691 struct thread_trace *ttrace = thread__priv(thread);
2697 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2699 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2700 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2701 printed += fprintf(fp, "%.1f%%", ratio);
2703 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2705 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2706 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2707 printed += thread__dump_stats(ttrace, trace, fp);
2709 data->printed += printed;
2714 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2716 struct summary_data data = {
2720 data.printed = trace__fprintf_threads_header(fp);
2722 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2724 return data.printed;
2727 static int trace__set_duration(const struct option *opt, const char *str,
2728 int unset __maybe_unused)
2730 struct trace *trace = opt->value;
2732 trace->duration_filter = atof(str);
2736 static int trace__set_filter_pids(const struct option *opt, const char *str,
2737 int unset __maybe_unused)
2741 struct trace *trace = opt->value;
2743 * FIXME: introduce a intarray class, plain parse csv and create a
2744 * { int nr, int entries[] } struct...
2746 struct intlist *list = intlist__new(str);
2751 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2752 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2754 if (trace->filter_pids.entries == NULL)
2757 trace->filter_pids.entries[0] = getpid();
2759 for (i = 1; i < trace->filter_pids.nr; ++i)
2760 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2762 intlist__delete(list);
2768 static int trace__open_output(struct trace *trace, const char *filename)
2772 if (!stat(filename, &st) && st.st_size) {
2773 char oldname[PATH_MAX];
2775 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2777 rename(filename, oldname);
2780 trace->output = fopen(filename, "w");
2782 return trace->output == NULL ? -errno : 0;
2785 static int parse_pagefaults(const struct option *opt, const char *str,
2786 int unset __maybe_unused)
2788 int *trace_pgfaults = opt->value;
2790 if (strcmp(str, "all") == 0)
2791 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2792 else if (strcmp(str, "maj") == 0)
2793 *trace_pgfaults |= TRACE_PFMAJ;
2794 else if (strcmp(str, "min") == 0)
2795 *trace_pgfaults |= TRACE_PFMIN;
2802 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2804 struct perf_evsel *evsel;
2806 evlist__for_each(evlist, evsel)
2807 evsel->handler = handler;
2810 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2812 const char *trace_usage[] = {
2813 "perf trace [<options>] [<command>]",
2814 "perf trace [<options>] -- <command> [<options>]",
2815 "perf trace record [<options>] [<command>]",
2816 "perf trace record [<options>] -- <command> [<options>]",
2819 struct trace trace = {
2821 .machine = audit_detect_machine(),
2822 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2832 .user_freq = UINT_MAX,
2833 .user_interval = ULLONG_MAX,
2834 .no_buffering = true,
2835 .mmap_pages = UINT_MAX,
2836 .proc_map_timeout = 500,
2840 .trace_syscalls = true,
2842 const char *output_name = NULL;
2843 const char *ev_qualifier_str = NULL;
2844 const struct option trace_options[] = {
2845 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2846 "event selector. use 'perf list' to list available events",
2847 parse_events_option),
2848 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2849 "show the thread COMM next to its id"),
2850 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2851 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2852 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2853 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2854 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2855 "trace events on existing process id"),
2856 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2857 "trace events on existing thread id"),
2858 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2859 "pids to filter (by the kernel)", trace__set_filter_pids),
2860 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2861 "system-wide collection from all CPUs"),
2862 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2863 "list of cpus to monitor"),
2864 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2865 "child tasks do not inherit counters"),
2866 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2867 "number of mmap data pages",
2868 perf_evlist__parse_mmap_pages),
2869 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2871 OPT_CALLBACK(0, "duration", &trace, "float",
2872 "show only events with duration > N.M ms",
2873 trace__set_duration),
2874 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2875 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2876 OPT_BOOLEAN('T', "time", &trace.full_time,
2877 "Show full timestamp, not time relative to first start"),
2878 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2879 "Show only syscall summary with statistics"),
2880 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2881 "Show all syscalls and summary with statistics"),
2882 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2883 "Trace pagefaults", parse_pagefaults, "maj"),
2884 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2885 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2886 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2887 "per thread proc mmap processing timeout in ms"),
2890 const char * const trace_subcommands[] = { "record", NULL };
2894 signal(SIGSEGV, sighandler_dump_stack);
2895 signal(SIGFPE, sighandler_dump_stack);
2897 trace.evlist = perf_evlist__new();
2899 if (trace.evlist == NULL) {
2900 pr_err("Not enough memory to run!\n");
2905 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2906 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2908 if (trace.trace_pgfaults) {
2909 trace.opts.sample_address = true;
2910 trace.opts.sample_time = true;
2913 if (trace.evlist->nr_entries > 0)
2914 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2916 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2917 return trace__record(&trace, argc-1, &argv[1]);
2919 /* summary_only implies summary option, but don't overwrite summary if set */
2920 if (trace.summary_only)
2921 trace.summary = trace.summary_only;
2923 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2924 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2925 pr_err("Please specify something to trace.\n");
2929 if (output_name != NULL) {
2930 err = trace__open_output(&trace, output_name);
2932 perror("failed to create output file");
2937 if (ev_qualifier_str != NULL) {
2938 const char *s = ev_qualifier_str;
2939 struct strlist_config slist_config = {
2940 .dirname = system_path(STRACE_GROUPS_DIR),
2943 trace.not_ev_qualifier = *s == '!';
2944 if (trace.not_ev_qualifier)
2946 trace.ev_qualifier = strlist__new(s, &slist_config);
2947 if (trace.ev_qualifier == NULL) {
2948 fputs("Not enough memory to parse event qualifier",
2954 err = trace__validate_ev_qualifier(&trace);
2959 err = target__validate(&trace.opts.target);
2961 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2962 fprintf(trace.output, "%s", bf);
2966 err = target__parse_uid(&trace.opts.target);
2968 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2969 fprintf(trace.output, "%s", bf);
2973 if (!argc && target__none(&trace.opts.target))
2974 trace.opts.target.system_wide = true;
2977 err = trace__replay(&trace);
2979 err = trace__run(&trace, argc, argv);
2982 if (output_name != NULL)
2983 fclose(trace.output);