1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
45 # define EFD_NONBLOCK 00004000
49 # define EFD_CLOEXEC 02000000
53 # define O_CLOEXEC 02000000
61 # define SOCK_CLOEXEC 02000000
65 # define SOCK_NONBLOCK 00004000
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC 0x40000000
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115 return bswap_##bits(value);\
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
122 static int tp_field__init_uint(struct tp_field *field,
123 struct format_field *format_field,
126 field->offset = format_field->offset;
128 switch (format_field->size) {
130 field->integer = tp_field__u8;
133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
150 return sample->raw_data + field->offset;
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
155 field->offset = format_field->offset;
156 field->pointer = tp_field__ptr;
163 struct tp_field args, ret;
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168 struct tp_field *field,
171 struct format_field *format_field = perf_evsel__field(evsel, name);
173 if (format_field == NULL)
176 return tp_field__init_uint(field, format_field, evsel->needs_swap);
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180 ({ struct syscall_tp *sc = evsel->priv;\
181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184 struct tp_field *field,
187 struct format_field *format_field = perf_evsel__field(evsel, name);
189 if (format_field == NULL)
192 return tp_field__init_ptr(field, format_field);
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196 ({ struct syscall_tp *sc = evsel->priv;\
197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
202 perf_evsel__delete(evsel);
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
207 evsel->priv = malloc(sizeof(struct syscall_tp));
208 if (evsel->priv != NULL) {
209 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
212 evsel->handler = handler;
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
229 evsel = perf_evsel__newtp("syscalls", direction);
232 if (perf_evsel__init_syscall_tp(evsel, handler))
239 perf_evsel__delete_priv(evsel);
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244 ({ struct syscall_tp *fields = evsel->priv; \
245 fields->name.integer(&fields->name, sample); })
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248 ({ struct syscall_tp *fields = evsel->priv; \
249 fields->name.pointer(&fields->name, sample); })
253 struct thread *thread;
263 const char **entries;
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267 .nr_entries = ARRAY_SIZE(array), \
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
273 .nr_entries = ARRAY_SIZE(array), \
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 struct strarray *sa = arg->parm;
282 int idx = arg->val - sa->offset;
284 if (idx < 0 || idx >= sa->nr_entries)
285 return scnprintf(bf, size, intfmt, arg->val);
287 return scnprintf(bf, size, "%s", sa->entries[idx]);
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291 struct syscall_arg *arg)
293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
298 #if defined(__i386__) || defined(__x86_64__)
300 * FIXME: Make this available to all arches as soon as the ioctl beautifier
301 * gets rewritten to support all arches.
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313 struct syscall_arg *arg);
315 #define SCA_FD syscall_arg__scnprintf_fd
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318 struct syscall_arg *arg)
323 return scnprintf(bf, size, "CWD");
325 return syscall_arg__scnprintf_fd(bf, size, arg);
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336 struct syscall_arg *arg)
338 return scnprintf(bf, size, "%#lx", arg->val);
341 #define SCA_HEX syscall_arg__scnprintf_hex
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344 struct syscall_arg *arg)
346 return scnprintf(bf, size, "%d", arg->val);
349 #define SCA_INT syscall_arg__scnprintf_int
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352 struct syscall_arg *arg)
354 int printed = 0, prot = arg->val;
356 if (prot == PROT_NONE)
357 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359 if (prot & PROT_##n) { \
360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 P_MMAP_PROT(GROWSDOWN);
371 P_MMAP_PROT(GROWSUP);
375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383 struct syscall_arg *arg)
385 int printed = 0, flags = arg->val;
387 #define P_MMAP_FLAG(n) \
388 if (flags & MAP_##n) { \
389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
394 P_MMAP_FLAG(PRIVATE);
398 P_MMAP_FLAG(ANONYMOUS);
399 P_MMAP_FLAG(DENYWRITE);
400 P_MMAP_FLAG(EXECUTABLE);
403 P_MMAP_FLAG(GROWSDOWN);
405 P_MMAP_FLAG(HUGETLB);
408 P_MMAP_FLAG(NONBLOCK);
409 P_MMAP_FLAG(NORESERVE);
410 P_MMAP_FLAG(POPULATE);
412 #ifdef MAP_UNINITIALIZED
413 P_MMAP_FLAG(UNINITIALIZED);
418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426 struct syscall_arg *arg)
428 int printed = 0, flags = arg->val;
430 #define P_MREMAP_FLAG(n) \
431 if (flags & MREMAP_##n) { \
432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433 flags &= ~MREMAP_##n; \
436 P_MREMAP_FLAG(MAYMOVE);
438 P_MREMAP_FLAG(FIXED);
443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451 struct syscall_arg *arg)
453 int behavior = arg->val;
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
459 P_MADV_BHV(SEQUENTIAL);
460 P_MADV_BHV(WILLNEED);
461 P_MADV_BHV(DONTNEED);
463 P_MADV_BHV(DONTFORK);
465 P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467 P_MADV_BHV(SOFT_OFFLINE);
469 P_MADV_BHV(MERGEABLE);
470 P_MADV_BHV(UNMERGEABLE);
472 P_MADV_BHV(HUGEPAGE);
474 #ifdef MADV_NOHUGEPAGE
475 P_MADV_BHV(NOHUGEPAGE);
478 P_MADV_BHV(DONTDUMP);
487 return scnprintf(bf, size, "%#x", behavior);
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493 struct syscall_arg *arg)
495 int printed = 0, op = arg->val;
498 return scnprintf(bf, size, "NONE");
500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
525 enum syscall_futex_args {
526 SCF_UADDR = (1 << 0),
529 SCF_TIMEOUT = (1 << 3),
530 SCF_UADDR2 = (1 << 4),
534 int cmd = op & FUTEX_CMD_MASK;
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
545 P_FUTEX_OP(WAKE_OP); break;
546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
551 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
552 default: printed = scnprintf(bf, size, "%#x", cmd); break;
555 if (op & FUTEX_PRIVATE_FLAG)
556 printed += scnprintf(bf + printed, size - printed, "|PRIV");
558 if (op & FUTEX_CLOCK_REALTIME)
559 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
572 static const char *whences[] = { "SET", "CUR", "END",
580 static DEFINE_STRARRAY(whences);
582 static const char *fcntl_cmds[] = {
583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
588 static DEFINE_STRARRAY(fcntl_cmds);
590 static const char *rlimit_resources[] = {
591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
595 static DEFINE_STRARRAY(rlimit_resources);
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
600 static const char *clockid[] = {
601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
604 static DEFINE_STRARRAY(clockid);
606 static const char *socket_families[] = {
607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612 "ALG", "NFC", "VSOCK",
614 static DEFINE_STRARRAY(socket_families);
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621 struct syscall_arg *arg)
625 flags = type & ~SOCK_TYPE_MASK;
627 type &= SOCK_TYPE_MASK;
629 * Can't use a strarray, MIPS may override for ABI reasons.
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
637 P_SK_TYPE(SEQPACKET);
642 printed = scnprintf(bf, size, "%#x", type);
645 #define P_SK_FLAG(n) \
646 if (flags & SOCK_##n) { \
647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648 flags &= ~SOCK_##n; \
656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
664 #define MSG_PROBE 0x10
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE 0x10000
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
673 #define MSG_FASTOPEN 0x20000000
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
679 int printed = 0, flags = arg->val;
682 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684 if (flags & MSG_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
691 P_MSG_FLAG(DONTROUTE);
696 P_MSG_FLAG(DONTWAIT);
703 P_MSG_FLAG(ERRQUEUE);
704 P_MSG_FLAG(NOSIGNAL);
706 P_MSG_FLAG(WAITFORONE);
707 P_MSG_FLAG(SENDPAGE_NOTLAST);
708 P_MSG_FLAG(FASTOPEN);
709 P_MSG_FLAG(CMSG_CLOEXEC);
713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721 struct syscall_arg *arg)
726 if (mode == F_OK) /* 0 */
727 return scnprintf(bf, size, "F");
729 if (mode & n##_OK) { \
730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
748 struct syscall_arg *arg)
750 int printed = 0, flags = arg->val;
752 if (!(flags & O_CREAT))
753 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
756 return scnprintf(bf, size, "RDONLY");
758 if (flags & O_##n) { \
759 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
783 if ((flags & O_SYNC) == O_SYNC)
784 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
796 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
803 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
804 struct syscall_arg *arg)
806 int printed = 0, flags = arg->val;
812 if (flags & PERF_FLAG_##n) { \
813 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
814 flags &= ~PERF_FLAG_##n; \
824 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
831 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
832 struct syscall_arg *arg)
834 int printed = 0, flags = arg->val;
837 return scnprintf(bf, size, "NONE");
839 if (flags & EFD_##n) { \
840 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
850 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
855 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
857 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
858 struct syscall_arg *arg)
860 int printed = 0, flags = arg->val;
863 if (flags & O_##n) { \
864 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
873 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
878 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
880 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
885 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
928 return scnprintf(bf, size, "%#x", sig);
931 #define SCA_SIGNUM syscall_arg__scnprintf_signum
933 #if defined(__i386__) || defined(__x86_64__)
935 * FIXME: Make this available to all arches.
937 #define TCGETS 0x5401
939 static const char *tioctls[] = {
940 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
941 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
942 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
943 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
944 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
945 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
946 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
947 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
948 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
949 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
950 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
951 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
952 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
953 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
954 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
957 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
958 #endif /* defined(__i386__) || defined(__x86_64__) */
960 #define STRARRAY(arg, name, array) \
961 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
962 .arg_parm = { [arg] = &strarray__##array, }
964 static struct syscall_fmt {
967 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
973 { .name = "access", .errmsg = true,
974 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
975 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
976 { .name = "brk", .hexret = true,
977 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
978 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
979 { .name = "close", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
981 { .name = "connect", .errmsg = true, },
982 { .name = "dup", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
984 { .name = "dup2", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986 { .name = "dup3", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
989 { .name = "eventfd2", .errmsg = true,
990 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
991 { .name = "faccessat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
993 { .name = "fadvise64", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
995 { .name = "fallocate", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
997 { .name = "fchdir", .errmsg = true,
998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999 { .name = "fchmod", .errmsg = true,
1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 { .name = "fchmodat", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1003 { .name = "fchown", .errmsg = true,
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "fchownat", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1007 { .name = "fcntl", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1009 [1] = SCA_STRARRAY, /* cmd */ },
1010 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1011 { .name = "fdatasync", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "flock", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1015 [1] = SCA_FLOCK, /* cmd */ }, },
1016 { .name = "fsetxattr", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1018 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1019 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1021 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1022 { .name = "fstatfs", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024 { .name = "fsync", .errmsg = true,
1025 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1026 { .name = "ftruncate", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1028 { .name = "futex", .errmsg = true,
1029 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1030 { .name = "futimesat", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1032 { .name = "getdents", .errmsg = true,
1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034 { .name = "getdents64", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1037 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1038 { .name = "ioctl", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1040 #if defined(__i386__) || defined(__x86_64__)
1042 * FIXME: Make this available to all arches.
1044 [1] = SCA_STRHEXARRAY, /* cmd */
1045 [2] = SCA_HEX, /* arg */ },
1046 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1048 [2] = SCA_HEX, /* arg */ }, },
1050 { .name = "kill", .errmsg = true,
1051 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1052 { .name = "linkat", .errmsg = true,
1053 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1054 { .name = "lseek", .errmsg = true,
1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1056 [2] = SCA_STRARRAY, /* whence */ },
1057 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1058 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1059 { .name = "madvise", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1061 [2] = SCA_MADV_BHV, /* behavior */ }, },
1062 { .name = "mkdirat", .errmsg = true,
1063 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1064 { .name = "mknodat", .errmsg = true,
1065 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1066 { .name = "mlock", .errmsg = true,
1067 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1068 { .name = "mlockall", .errmsg = true,
1069 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1070 { .name = "mmap", .hexret = true,
1071 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1072 [2] = SCA_MMAP_PROT, /* prot */
1073 [3] = SCA_MMAP_FLAGS, /* flags */
1074 [4] = SCA_FD, /* fd */ }, },
1075 { .name = "mprotect", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1077 [2] = SCA_MMAP_PROT, /* prot */ }, },
1078 { .name = "mremap", .hexret = true,
1079 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1080 [3] = SCA_MREMAP_FLAGS, /* flags */
1081 [4] = SCA_HEX, /* new_addr */ }, },
1082 { .name = "munlock", .errmsg = true,
1083 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1084 { .name = "munmap", .errmsg = true,
1085 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1086 { .name = "name_to_handle_at", .errmsg = true,
1087 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1088 { .name = "newfstatat", .errmsg = true,
1089 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1090 { .name = "open", .errmsg = true,
1091 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1092 { .name = "open_by_handle_at", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1094 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1095 { .name = "openat", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1097 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1098 { .name = "perf_event_open", .errmsg = true,
1099 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1100 [2] = SCA_INT, /* cpu */
1101 [3] = SCA_FD, /* group_fd */
1102 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1103 { .name = "pipe2", .errmsg = true,
1104 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1105 { .name = "poll", .errmsg = true, .timeout = true, },
1106 { .name = "ppoll", .errmsg = true, .timeout = true, },
1107 { .name = "pread", .errmsg = true, .alias = "pread64",
1108 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1109 { .name = "preadv", .errmsg = true, .alias = "pread",
1110 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1111 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1112 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1113 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1114 { .name = "pwritev", .errmsg = true,
1115 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1116 { .name = "read", .errmsg = true,
1117 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1118 { .name = "readlinkat", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1120 { .name = "readv", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 { .name = "recvfrom", .errmsg = true,
1123 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1124 { .name = "recvmmsg", .errmsg = true,
1125 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1126 { .name = "recvmsg", .errmsg = true,
1127 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1128 { .name = "renameat", .errmsg = true,
1129 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1130 { .name = "rt_sigaction", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1132 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1133 { .name = "rt_sigqueueinfo", .errmsg = true,
1134 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1135 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1136 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1137 { .name = "select", .errmsg = true, .timeout = true, },
1138 { .name = "sendmmsg", .errmsg = true,
1139 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1140 { .name = "sendmsg", .errmsg = true,
1141 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1142 { .name = "sendto", .errmsg = true,
1143 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1144 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1145 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1146 { .name = "shutdown", .errmsg = true,
1147 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1148 { .name = "socket", .errmsg = true,
1149 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1150 [1] = SCA_SK_TYPE, /* type */ },
1151 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1152 { .name = "socketpair", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1154 [1] = SCA_SK_TYPE, /* type */ },
1155 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1156 { .name = "stat", .errmsg = true, .alias = "newstat", },
1157 { .name = "symlinkat", .errmsg = true,
1158 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1159 { .name = "tgkill", .errmsg = true,
1160 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1161 { .name = "tkill", .errmsg = true,
1162 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1163 { .name = "uname", .errmsg = true, .alias = "newuname", },
1164 { .name = "unlinkat", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1166 { .name = "utimensat", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1168 { .name = "write", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1170 { .name = "writev", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1174 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1176 const struct syscall_fmt *fmt = fmtp;
1177 return strcmp(name, fmt->name);
1180 static struct syscall_fmt *syscall_fmt__find(const char *name)
1182 const int nmemb = ARRAY_SIZE(syscall_fmts);
1183 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1187 struct event_format *tp_format;
1189 struct format_field *args;
1192 struct syscall_fmt *fmt;
1193 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1197 static size_t fprintf_duration(unsigned long t, FILE *fp)
1199 double duration = (double)t / NSEC_PER_MSEC;
1200 size_t printed = fprintf(fp, "(");
1202 if (duration >= 1.0)
1203 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1204 else if (duration >= 0.01)
1205 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1207 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1208 return printed + fprintf(fp, "): ");
1211 struct thread_trace {
1215 unsigned long nr_events;
1216 unsigned long pfmaj, pfmin;
1224 struct intlist *syscall_stats;
1227 static struct thread_trace *thread_trace__new(void)
1229 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1232 ttrace->paths.max = -1;
1234 ttrace->syscall_stats = intlist__new(NULL);
1239 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1241 struct thread_trace *ttrace;
1246 if (thread__priv(thread) == NULL)
1247 thread__set_priv(thread, thread_trace__new());
1249 if (thread__priv(thread) == NULL)
1252 ttrace = thread__priv(thread);
1253 ++ttrace->nr_events;
1257 color_fprintf(fp, PERF_COLOR_RED,
1258 "WARNING: not enough memory, dropping samples!\n");
1262 #define TRACE_PFMAJ (1 << 0)
1263 #define TRACE_PFMIN (1 << 1)
1266 struct perf_tool tool;
1273 struct syscall *table;
1275 struct perf_evsel *sys_enter,
1279 struct record_opts opts;
1280 struct perf_evlist *evlist;
1281 struct machine *host;
1282 struct thread *current;
1285 unsigned long nr_events;
1286 struct strlist *ev_qualifier;
1291 const char *last_vfs_getname;
1292 struct intlist *tid_list;
1293 struct intlist *pid_list;
1298 double duration_filter;
1304 bool not_ev_qualifier;
1308 bool multiple_threads;
1312 bool show_tool_stats;
1313 bool trace_syscalls;
1318 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1320 struct thread_trace *ttrace = thread__priv(thread);
1322 if (fd > ttrace->paths.max) {
1323 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1328 if (ttrace->paths.max != -1) {
1329 memset(npath + ttrace->paths.max + 1, 0,
1330 (fd - ttrace->paths.max) * sizeof(char *));
1332 memset(npath, 0, (fd + 1) * sizeof(char *));
1335 ttrace->paths.table = npath;
1336 ttrace->paths.max = fd;
1339 ttrace->paths.table[fd] = strdup(pathname);
1341 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1344 static int thread__read_fd_path(struct thread *thread, int fd)
1346 char linkname[PATH_MAX], pathname[PATH_MAX];
1350 if (thread->pid_ == thread->tid) {
1351 scnprintf(linkname, sizeof(linkname),
1352 "/proc/%d/fd/%d", thread->pid_, fd);
1354 scnprintf(linkname, sizeof(linkname),
1355 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1358 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1361 ret = readlink(linkname, pathname, sizeof(pathname));
1363 if (ret < 0 || ret > st.st_size)
1366 pathname[ret] = '\0';
1367 return trace__set_fd_pathname(thread, fd, pathname);
1370 static const char *thread__fd_path(struct thread *thread, int fd,
1371 struct trace *trace)
1373 struct thread_trace *ttrace = thread__priv(thread);
1381 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1384 ++trace->stats.proc_getname;
1385 if (thread__read_fd_path(thread, fd))
1389 return ttrace->paths.table[fd];
1392 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1393 struct syscall_arg *arg)
1396 size_t printed = scnprintf(bf, size, "%d", fd);
1397 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1400 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1405 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1406 struct syscall_arg *arg)
1409 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1410 struct thread_trace *ttrace = thread__priv(arg->thread);
1412 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1413 zfree(&ttrace->paths.table[fd]);
1418 static bool trace__filter_duration(struct trace *trace, double t)
1420 return t < (trace->duration_filter * NSEC_PER_MSEC);
1423 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1425 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1427 return fprintf(fp, "%10.3f ", ts);
1430 static bool done = false;
1431 static bool interrupted = false;
1433 static void sig_handler(int sig)
1436 interrupted = sig == SIGINT;
1439 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1440 u64 duration, u64 tstamp, FILE *fp)
1442 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1443 printed += fprintf_duration(duration, fp);
1445 if (trace->multiple_threads) {
1446 if (trace->show_comm)
1447 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1448 printed += fprintf(fp, "%d ", thread->tid);
1454 static int trace__process_event(struct trace *trace, struct machine *machine,
1455 union perf_event *event, struct perf_sample *sample)
1459 switch (event->header.type) {
1460 case PERF_RECORD_LOST:
1461 color_fprintf(trace->output, PERF_COLOR_RED,
1462 "LOST %" PRIu64 " events!\n", event->lost.lost);
1463 ret = machine__process_lost_event(machine, event, sample);
1465 ret = machine__process_event(machine, event, sample);
1472 static int trace__tool_process(struct perf_tool *tool,
1473 union perf_event *event,
1474 struct perf_sample *sample,
1475 struct machine *machine)
1477 struct trace *trace = container_of(tool, struct trace, tool);
1478 return trace__process_event(trace, machine, event, sample);
1481 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1483 int err = symbol__init(NULL);
1488 trace->host = machine__new_host();
1489 if (trace->host == NULL)
1492 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1495 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1496 evlist->threads, trace__tool_process, false,
1497 trace->opts.proc_map_timeout);
1504 static int syscall__set_arg_fmts(struct syscall *sc)
1506 struct format_field *field;
1509 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1510 if (sc->arg_scnprintf == NULL)
1514 sc->arg_parm = sc->fmt->arg_parm;
1516 for (field = sc->args; field; field = field->next) {
1517 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1518 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1519 else if (field->flags & FIELD_IS_POINTER)
1520 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1527 static int trace__read_syscall_info(struct trace *trace, int id)
1531 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1536 if (id > trace->syscalls.max) {
1537 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1539 if (nsyscalls == NULL)
1542 if (trace->syscalls.max != -1) {
1543 memset(nsyscalls + trace->syscalls.max + 1, 0,
1544 (id - trace->syscalls.max) * sizeof(*sc));
1546 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1549 trace->syscalls.table = nsyscalls;
1550 trace->syscalls.max = id;
1553 sc = trace->syscalls.table + id;
1556 sc->fmt = syscall_fmt__find(sc->name);
1558 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1559 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1561 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1562 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1563 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1566 if (sc->tp_format == NULL)
1569 sc->args = sc->tp_format->format.fields;
1570 sc->nr_args = sc->tp_format->format.nr_fields;
1571 /* drop nr field - not relevant here; does not exist on older kernels */
1572 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1573 sc->args = sc->args->next;
1577 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1579 return syscall__set_arg_fmts(sc);
1582 static int trace__validate_ev_qualifier(struct trace *trace)
1585 struct str_node *pos;
1587 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1588 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1589 sizeof(trace->ev_qualifier_ids.entries[0]));
1591 if (trace->ev_qualifier_ids.entries == NULL) {
1592 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1600 strlist__for_each(pos, trace->ev_qualifier) {
1601 const char *sc = pos->s;
1602 int id = audit_name_to_syscall(sc, trace->audit.machine);
1606 fputs("Error:\tInvalid syscall ", trace->output);
1609 fputs(", ", trace->output);
1612 fputs(sc, trace->output);
1615 trace->ev_qualifier_ids.entries[i++] = id;
1619 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1620 "\nHint:\tand: 'man syscalls'\n", trace->output);
1621 zfree(&trace->ev_qualifier_ids.entries);
1622 trace->ev_qualifier_ids.nr = 0;
1629 * args is to be interpreted as a series of longs but we need to handle
1630 * 8-byte unaligned accesses. args points to raw_data within the event
1631 * and raw_data is guaranteed to be 8-byte unaligned because it is
1632 * preceded by raw_size which is a u32. So we need to copy args to a temp
1633 * variable to read it. Most notably this avoids extended load instructions
1634 * on unaligned addresses
1637 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1638 unsigned char *args, struct trace *trace,
1639 struct thread *thread)
1645 if (sc->args != NULL) {
1646 struct format_field *field;
1648 struct syscall_arg arg = {
1655 for (field = sc->args; field;
1656 field = field->next, ++arg.idx, bit <<= 1) {
1660 /* special care for unaligned accesses */
1661 p = args + sizeof(unsigned long) * arg.idx;
1662 memcpy(&val, p, sizeof(val));
1665 * Suppress this argument if its value is zero and
1666 * and we don't have a string associated in an
1670 !(sc->arg_scnprintf &&
1671 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1672 sc->arg_parm[arg.idx]))
1675 printed += scnprintf(bf + printed, size - printed,
1676 "%s%s: ", printed ? ", " : "", field->name);
1677 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1680 arg.parm = sc->arg_parm[arg.idx];
1681 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1682 size - printed, &arg);
1684 printed += scnprintf(bf + printed, size - printed,
1692 /* special care for unaligned accesses */
1693 p = args + sizeof(unsigned long) * i;
1694 memcpy(&val, p, sizeof(val));
1695 printed += scnprintf(bf + printed, size - printed,
1697 printed ? ", " : "", i, val);
1705 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1706 union perf_event *event,
1707 struct perf_sample *sample);
1709 static struct syscall *trace__syscall_info(struct trace *trace,
1710 struct perf_evsel *evsel, int id)
1716 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1717 * before that, leaving at a higher verbosity level till that is
1718 * explained. Reproduced with plain ftrace with:
1720 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1721 * grep "NR -1 " /t/trace_pipe
1723 * After generating some load on the machine.
1727 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1728 id, perf_evsel__name(evsel), ++n);
1733 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1734 trace__read_syscall_info(trace, id))
1737 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1740 return &trace->syscalls.table[id];
1744 fprintf(trace->output, "Problems reading syscall %d", id);
1745 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1746 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1747 fputs(" information\n", trace->output);
1752 static void thread__update_stats(struct thread_trace *ttrace,
1753 int id, struct perf_sample *sample)
1755 struct int_node *inode;
1756 struct stats *stats;
1759 inode = intlist__findnew(ttrace->syscall_stats, id);
1763 stats = inode->priv;
1764 if (stats == NULL) {
1765 stats = malloc(sizeof(struct stats));
1769 inode->priv = stats;
1772 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1773 duration = sample->time - ttrace->entry_time;
1775 update_stats(stats, duration);
1778 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1780 struct thread_trace *ttrace;
1784 if (trace->current == NULL)
1787 ttrace = thread__priv(trace->current);
1789 if (!ttrace->entry_pending)
1792 duration = sample->time - ttrace->entry_time;
1794 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1795 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1796 ttrace->entry_pending = false;
1801 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1802 union perf_event *event __maybe_unused,
1803 struct perf_sample *sample)
1808 struct thread *thread;
1809 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1810 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1811 struct thread_trace *ttrace;
1816 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1817 ttrace = thread__trace(thread, trace->output);
1821 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1823 if (ttrace->entry_str == NULL) {
1824 ttrace->entry_str = malloc(1024);
1825 if (!ttrace->entry_str)
1829 if (!trace->summary_only)
1830 trace__printf_interrupted_entry(trace, sample);
1832 ttrace->entry_time = sample->time;
1833 msg = ttrace->entry_str;
1834 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1836 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1837 args, trace, thread);
1840 if (!trace->duration_filter && !trace->summary_only) {
1841 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1842 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1845 ttrace->entry_pending = true;
1847 if (trace->current != thread) {
1848 thread__put(trace->current);
1849 trace->current = thread__get(thread);
1853 thread__put(thread);
1857 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1858 union perf_event *event __maybe_unused,
1859 struct perf_sample *sample)
1863 struct thread *thread;
1864 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1865 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1866 struct thread_trace *ttrace;
1871 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1872 ttrace = thread__trace(thread, trace->output);
1877 thread__update_stats(ttrace, id, sample);
1879 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1881 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1882 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1883 trace->last_vfs_getname = NULL;
1884 ++trace->stats.vfs_getname;
1887 ttrace->exit_time = sample->time;
1889 if (ttrace->entry_time) {
1890 duration = sample->time - ttrace->entry_time;
1891 if (trace__filter_duration(trace, duration))
1893 } else if (trace->duration_filter)
1896 if (trace->summary_only)
1899 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1901 if (ttrace->entry_pending) {
1902 fprintf(trace->output, "%-70s", ttrace->entry_str);
1904 fprintf(trace->output, " ... [");
1905 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1906 fprintf(trace->output, "]: %s()", sc->name);
1909 if (sc->fmt == NULL) {
1911 fprintf(trace->output, ") = %ld", ret);
1912 } else if (ret < 0 && sc->fmt->errmsg) {
1913 char bf[STRERR_BUFSIZE];
1914 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1915 *e = audit_errno_to_name(-ret);
1917 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1918 } else if (ret == 0 && sc->fmt->timeout)
1919 fprintf(trace->output, ") = 0 Timeout");
1920 else if (sc->fmt->hexret)
1921 fprintf(trace->output, ") = %#lx", ret);
1925 fputc('\n', trace->output);
1927 ttrace->entry_pending = false;
1930 thread__put(thread);
1934 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1935 union perf_event *event __maybe_unused,
1936 struct perf_sample *sample)
1938 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1942 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1943 union perf_event *event __maybe_unused,
1944 struct perf_sample *sample)
1946 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1947 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1948 struct thread *thread = machine__findnew_thread(trace->host,
1951 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1956 ttrace->runtime_ms += runtime_ms;
1957 trace->runtime_ms += runtime_ms;
1958 thread__put(thread);
1962 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1964 perf_evsel__strval(evsel, sample, "comm"),
1965 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1967 perf_evsel__intval(evsel, sample, "vruntime"));
1968 thread__put(thread);
1972 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1973 union perf_event *event __maybe_unused,
1974 struct perf_sample *sample)
1976 trace__printf_interrupted_entry(trace, sample);
1977 trace__fprintf_tstamp(trace, sample->time, trace->output);
1979 if (trace->trace_syscalls)
1980 fprintf(trace->output, "( ): ");
1982 fprintf(trace->output, "%s:", evsel->name);
1984 if (evsel->tp_format) {
1985 event_format__fprintf(evsel->tp_format, sample->cpu,
1986 sample->raw_data, sample->raw_size,
1990 fprintf(trace->output, ")\n");
1994 static void print_location(FILE *f, struct perf_sample *sample,
1995 struct addr_location *al,
1996 bool print_dso, bool print_sym)
1999 if ((verbose || print_dso) && al->map)
2000 fprintf(f, "%s@", al->map->dso->long_name);
2002 if ((verbose || print_sym) && al->sym)
2003 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2004 al->addr - al->sym->start);
2006 fprintf(f, "0x%" PRIx64, al->addr);
2008 fprintf(f, "0x%" PRIx64, sample->addr);
2011 static int trace__pgfault(struct trace *trace,
2012 struct perf_evsel *evsel,
2013 union perf_event *event,
2014 struct perf_sample *sample)
2016 struct thread *thread;
2017 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2018 struct addr_location al;
2019 char map_type = 'd';
2020 struct thread_trace *ttrace;
2023 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2024 ttrace = thread__trace(thread, trace->output);
2028 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2033 if (trace->summary_only)
2036 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2039 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2041 fprintf(trace->output, "%sfault [",
2042 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2045 print_location(trace->output, sample, &al, false, true);
2047 fprintf(trace->output, "] => ");
2049 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2053 thread__find_addr_location(thread, cpumode,
2054 MAP__FUNCTION, sample->addr, &al);
2062 print_location(trace->output, sample, &al, true, false);
2064 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2068 thread__put(thread);
2072 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2074 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2075 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2078 if (trace->pid_list || trace->tid_list)
2084 static int trace__process_sample(struct perf_tool *tool,
2085 union perf_event *event,
2086 struct perf_sample *sample,
2087 struct perf_evsel *evsel,
2088 struct machine *machine __maybe_unused)
2090 struct trace *trace = container_of(tool, struct trace, tool);
2093 tracepoint_handler handler = evsel->handler;
2095 if (skip_sample(trace, sample))
2098 if (!trace->full_time && trace->base_time == 0)
2099 trace->base_time = sample->time;
2103 handler(trace, evsel, event, sample);
2109 static int parse_target_str(struct trace *trace)
2111 if (trace->opts.target.pid) {
2112 trace->pid_list = intlist__new(trace->opts.target.pid);
2113 if (trace->pid_list == NULL) {
2114 pr_err("Error parsing process id string\n");
2119 if (trace->opts.target.tid) {
2120 trace->tid_list = intlist__new(trace->opts.target.tid);
2121 if (trace->tid_list == NULL) {
2122 pr_err("Error parsing thread id string\n");
2130 static int trace__record(struct trace *trace, int argc, const char **argv)
2132 unsigned int rec_argc, i, j;
2133 const char **rec_argv;
2134 const char * const record_args[] = {
2141 const char * const sc_args[] = { "-e", };
2142 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2143 const char * const majpf_args[] = { "-e", "major-faults" };
2144 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2145 const char * const minpf_args[] = { "-e", "minor-faults" };
2146 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2148 /* +1 is for the event string below */
2149 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2150 majpf_args_nr + minpf_args_nr + argc;
2151 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2153 if (rec_argv == NULL)
2157 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2158 rec_argv[j++] = record_args[i];
2160 if (trace->trace_syscalls) {
2161 for (i = 0; i < sc_args_nr; i++)
2162 rec_argv[j++] = sc_args[i];
2164 /* event string may be different for older kernels - e.g., RHEL6 */
2165 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2166 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2167 else if (is_valid_tracepoint("syscalls:sys_enter"))
2168 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2170 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2175 if (trace->trace_pgfaults & TRACE_PFMAJ)
2176 for (i = 0; i < majpf_args_nr; i++)
2177 rec_argv[j++] = majpf_args[i];
2179 if (trace->trace_pgfaults & TRACE_PFMIN)
2180 for (i = 0; i < minpf_args_nr; i++)
2181 rec_argv[j++] = minpf_args[i];
2183 for (i = 0; i < (unsigned int)argc; i++)
2184 rec_argv[j++] = argv[i];
2186 return cmd_record(j, rec_argv, NULL);
2189 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2191 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2193 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2197 if (perf_evsel__field(evsel, "pathname") == NULL) {
2198 perf_evsel__delete(evsel);
2202 evsel->handler = trace__vfs_getname;
2203 perf_evlist__add(evlist, evsel);
2206 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2209 struct perf_evsel *evsel;
2210 struct perf_event_attr attr = {
2211 .type = PERF_TYPE_SOFTWARE,
2215 attr.config = config;
2216 attr.sample_period = 1;
2218 event_attr_init(&attr);
2220 evsel = perf_evsel__new(&attr);
2224 evsel->handler = trace__pgfault;
2225 perf_evlist__add(evlist, evsel);
2230 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2232 const u32 type = event->header.type;
2233 struct perf_evsel *evsel;
2235 if (!trace->full_time && trace->base_time == 0)
2236 trace->base_time = sample->time;
2238 if (type != PERF_RECORD_SAMPLE) {
2239 trace__process_event(trace, trace->host, event, sample);
2243 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2244 if (evsel == NULL) {
2245 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2249 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2250 sample->raw_data == NULL) {
2251 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2252 perf_evsel__name(evsel), sample->tid,
2253 sample->cpu, sample->raw_size);
2255 tracepoint_handler handler = evsel->handler;
2256 handler(trace, evsel, event, sample);
2260 static int trace__add_syscall_newtp(struct trace *trace)
2263 struct perf_evlist *evlist = trace->evlist;
2264 struct perf_evsel *sys_enter, *sys_exit;
2266 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2267 if (sys_enter == NULL)
2270 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2271 goto out_delete_sys_enter;
2273 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2274 if (sys_exit == NULL)
2275 goto out_delete_sys_enter;
2277 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2278 goto out_delete_sys_exit;
2280 perf_evlist__add(evlist, sys_enter);
2281 perf_evlist__add(evlist, sys_exit);
2283 trace->syscalls.events.sys_enter = sys_enter;
2284 trace->syscalls.events.sys_exit = sys_exit;
2290 out_delete_sys_exit:
2291 perf_evsel__delete_priv(sys_exit);
2292 out_delete_sys_enter:
2293 perf_evsel__delete_priv(sys_enter);
2297 static int trace__set_ev_qualifier_filter(struct trace *trace)
2300 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2301 trace->ev_qualifier_ids.nr,
2302 trace->ev_qualifier_ids.entries);
2307 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2308 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2318 static int trace__run(struct trace *trace, int argc, const char **argv)
2320 struct perf_evlist *evlist = trace->evlist;
2321 struct perf_evsel *evsel;
2323 unsigned long before;
2324 const bool forks = argc > 0;
2325 bool draining = false;
2329 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2330 goto out_error_raw_syscalls;
2332 if (trace->trace_syscalls)
2333 perf_evlist__add_vfs_getname(evlist);
2335 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2336 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2340 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2341 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2345 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2346 trace__sched_stat_runtime))
2347 goto out_error_sched_stat_runtime;
2349 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2351 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2352 goto out_delete_evlist;
2355 err = trace__symbols_init(trace, evlist);
2357 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2358 goto out_delete_evlist;
2361 perf_evlist__config(evlist, &trace->opts);
2363 signal(SIGCHLD, sig_handler);
2364 signal(SIGINT, sig_handler);
2367 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2370 fprintf(trace->output, "Couldn't run the workload!\n");
2371 goto out_delete_evlist;
2375 err = perf_evlist__open(evlist);
2377 goto out_error_open;
2380 * Better not use !target__has_task() here because we need to cover the
2381 * case where no threads were specified in the command line, but a
2382 * workload was, and in that case we will fill in the thread_map when
2383 * we fork the workload in perf_evlist__prepare_workload.
2385 if (trace->filter_pids.nr > 0)
2386 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2387 else if (thread_map__pid(evlist->threads, 0) == -1)
2388 err = perf_evlist__set_filter_pid(evlist, getpid());
2393 if (trace->ev_qualifier_ids.nr > 0) {
2394 err = trace__set_ev_qualifier_filter(trace);
2398 pr_debug("event qualifier tracepoint filter: %s\n",
2399 trace->syscalls.events.sys_exit->filter);
2402 err = perf_evlist__apply_filters(evlist, &evsel);
2404 goto out_error_apply_filters;
2406 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2408 goto out_error_mmap;
2410 if (!target__none(&trace->opts.target))
2411 perf_evlist__enable(evlist);
2414 perf_evlist__start_workload(evlist);
2416 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2417 evlist->threads->nr > 1 ||
2418 perf_evlist__first(evlist)->attr.inherit;
2420 before = trace->nr_events;
2422 for (i = 0; i < evlist->nr_mmaps; i++) {
2423 union perf_event *event;
2425 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2426 struct perf_sample sample;
2430 err = perf_evlist__parse_sample(evlist, event, &sample);
2432 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2436 trace__handle_event(trace, event, &sample);
2438 perf_evlist__mmap_consume(evlist, i);
2443 if (done && !draining) {
2444 perf_evlist__disable(evlist);
2450 if (trace->nr_events == before) {
2451 int timeout = done ? 100 : -1;
2453 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2454 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2464 thread__zput(trace->current);
2466 perf_evlist__disable(evlist);
2470 trace__fprintf_thread_summary(trace, trace->output);
2472 if (trace->show_tool_stats) {
2473 fprintf(trace->output, "Stats:\n "
2474 " vfs_getname : %" PRIu64 "\n"
2475 " proc_getname: %" PRIu64 "\n",
2476 trace->stats.vfs_getname,
2477 trace->stats.proc_getname);
2482 perf_evlist__delete(evlist);
2483 trace->evlist = NULL;
2484 trace->live = false;
2487 char errbuf[BUFSIZ];
2489 out_error_sched_stat_runtime:
2490 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2493 out_error_raw_syscalls:
2494 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2498 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2502 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2505 fprintf(trace->output, "%s\n", errbuf);
2506 goto out_delete_evlist;
2508 out_error_apply_filters:
2509 fprintf(trace->output,
2510 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2511 evsel->filter, perf_evsel__name(evsel), errno,
2512 strerror_r(errno, errbuf, sizeof(errbuf)));
2513 goto out_delete_evlist;
2516 fprintf(trace->output, "Not enough memory to run!\n");
2517 goto out_delete_evlist;
2520 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2521 goto out_delete_evlist;
2524 static int trace__replay(struct trace *trace)
2526 const struct perf_evsel_str_handler handlers[] = {
2527 { "probe:vfs_getname", trace__vfs_getname, },
2529 struct perf_data_file file = {
2531 .mode = PERF_DATA_MODE_READ,
2532 .force = trace->force,
2534 struct perf_session *session;
2535 struct perf_evsel *evsel;
2538 trace->tool.sample = trace__process_sample;
2539 trace->tool.mmap = perf_event__process_mmap;
2540 trace->tool.mmap2 = perf_event__process_mmap2;
2541 trace->tool.comm = perf_event__process_comm;
2542 trace->tool.exit = perf_event__process_exit;
2543 trace->tool.fork = perf_event__process_fork;
2544 trace->tool.attr = perf_event__process_attr;
2545 trace->tool.tracing_data = perf_event__process_tracing_data;
2546 trace->tool.build_id = perf_event__process_build_id;
2548 trace->tool.ordered_events = true;
2549 trace->tool.ordering_requires_timestamps = true;
2551 /* add tid to output */
2552 trace->multiple_threads = true;
2554 session = perf_session__new(&file, false, &trace->tool);
2555 if (session == NULL)
2558 if (symbol__init(&session->header.env) < 0)
2561 trace->host = &session->machines.host;
2563 err = perf_session__set_tracepoints_handlers(session, handlers);
2567 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2568 "raw_syscalls:sys_enter");
2569 /* older kernels have syscalls tp versus raw_syscalls */
2571 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2572 "syscalls:sys_enter");
2575 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2576 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2577 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2581 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2582 "raw_syscalls:sys_exit");
2584 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2585 "syscalls:sys_exit");
2587 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2588 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2589 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2593 evlist__for_each(session->evlist, evsel) {
2594 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2595 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2596 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2597 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2598 evsel->handler = trace__pgfault;
2601 err = parse_target_str(trace);
2607 err = perf_session__process_events(session);
2609 pr_err("Failed to process events, error %d", err);
2611 else if (trace->summary)
2612 trace__fprintf_thread_summary(trace, trace->output);
2615 perf_session__delete(session);
2620 static size_t trace__fprintf_threads_header(FILE *fp)
2624 printed = fprintf(fp, "\n Summary of events:\n\n");
2629 static size_t thread__dump_stats(struct thread_trace *ttrace,
2630 struct trace *trace, FILE *fp)
2632 struct stats *stats;
2635 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2640 printed += fprintf(fp, "\n");
2642 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2643 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2644 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2646 /* each int_node is a syscall */
2648 stats = inode->priv;
2650 double min = (double)(stats->min) / NSEC_PER_MSEC;
2651 double max = (double)(stats->max) / NSEC_PER_MSEC;
2652 double avg = avg_stats(stats);
2654 u64 n = (u64) stats->n;
2656 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2657 avg /= NSEC_PER_MSEC;
2659 sc = &trace->syscalls.table[inode->i];
2660 printed += fprintf(fp, " %-15s", sc->name);
2661 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2663 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2666 inode = intlist__next(inode);
2669 printed += fprintf(fp, "\n\n");
2674 /* struct used to pass data to per-thread function */
2675 struct summary_data {
2677 struct trace *trace;
2681 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2683 struct summary_data *data = priv;
2684 FILE *fp = data->fp;
2685 size_t printed = data->printed;
2686 struct trace *trace = data->trace;
2687 struct thread_trace *ttrace = thread__priv(thread);
2693 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2695 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2696 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2697 printed += fprintf(fp, "%.1f%%", ratio);
2699 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2701 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2702 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2703 printed += thread__dump_stats(ttrace, trace, fp);
2705 data->printed += printed;
2710 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2712 struct summary_data data = {
2716 data.printed = trace__fprintf_threads_header(fp);
2718 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2720 return data.printed;
2723 static int trace__set_duration(const struct option *opt, const char *str,
2724 int unset __maybe_unused)
2726 struct trace *trace = opt->value;
2728 trace->duration_filter = atof(str);
2732 static int trace__set_filter_pids(const struct option *opt, const char *str,
2733 int unset __maybe_unused)
2737 struct trace *trace = opt->value;
2739 * FIXME: introduce a intarray class, plain parse csv and create a
2740 * { int nr, int entries[] } struct...
2742 struct intlist *list = intlist__new(str);
2747 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2748 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2750 if (trace->filter_pids.entries == NULL)
2753 trace->filter_pids.entries[0] = getpid();
2755 for (i = 1; i < trace->filter_pids.nr; ++i)
2756 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2758 intlist__delete(list);
2764 static int trace__open_output(struct trace *trace, const char *filename)
2768 if (!stat(filename, &st) && st.st_size) {
2769 char oldname[PATH_MAX];
2771 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2773 rename(filename, oldname);
2776 trace->output = fopen(filename, "w");
2778 return trace->output == NULL ? -errno : 0;
2781 static int parse_pagefaults(const struct option *opt, const char *str,
2782 int unset __maybe_unused)
2784 int *trace_pgfaults = opt->value;
2786 if (strcmp(str, "all") == 0)
2787 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2788 else if (strcmp(str, "maj") == 0)
2789 *trace_pgfaults |= TRACE_PFMAJ;
2790 else if (strcmp(str, "min") == 0)
2791 *trace_pgfaults |= TRACE_PFMIN;
2798 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2800 struct perf_evsel *evsel;
2802 evlist__for_each(evlist, evsel)
2803 evsel->handler = handler;
2806 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2808 const char *trace_usage[] = {
2809 "perf trace [<options>] [<command>]",
2810 "perf trace [<options>] -- <command> [<options>]",
2811 "perf trace record [<options>] [<command>]",
2812 "perf trace record [<options>] -- <command> [<options>]",
2815 struct trace trace = {
2817 .machine = audit_detect_machine(),
2818 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2828 .user_freq = UINT_MAX,
2829 .user_interval = ULLONG_MAX,
2830 .no_buffering = true,
2831 .mmap_pages = UINT_MAX,
2832 .proc_map_timeout = 500,
2836 .trace_syscalls = true,
2838 const char *output_name = NULL;
2839 const char *ev_qualifier_str = NULL;
2840 const struct option trace_options[] = {
2841 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2842 "event selector. use 'perf list' to list available events",
2843 parse_events_option),
2844 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2845 "show the thread COMM next to its id"),
2846 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2847 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2848 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2849 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2850 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2851 "trace events on existing process id"),
2852 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2853 "trace events on existing thread id"),
2854 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2855 "pids to filter (by the kernel)", trace__set_filter_pids),
2856 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2857 "system-wide collection from all CPUs"),
2858 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2859 "list of cpus to monitor"),
2860 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2861 "child tasks do not inherit counters"),
2862 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2863 "number of mmap data pages",
2864 perf_evlist__parse_mmap_pages),
2865 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2867 OPT_CALLBACK(0, "duration", &trace, "float",
2868 "show only events with duration > N.M ms",
2869 trace__set_duration),
2870 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2871 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2872 OPT_BOOLEAN('T', "time", &trace.full_time,
2873 "Show full timestamp, not time relative to first start"),
2874 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2875 "Show only syscall summary with statistics"),
2876 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2877 "Show all syscalls and summary with statistics"),
2878 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2879 "Trace pagefaults", parse_pagefaults, "maj"),
2880 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2881 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2882 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2883 "per thread proc mmap processing timeout in ms"),
2886 const char * const trace_subcommands[] = { "record", NULL };
2890 signal(SIGSEGV, sighandler_dump_stack);
2891 signal(SIGFPE, sighandler_dump_stack);
2893 trace.evlist = perf_evlist__new();
2895 if (trace.evlist == NULL) {
2896 pr_err("Not enough memory to run!\n");
2901 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2902 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2904 if (trace.trace_pgfaults) {
2905 trace.opts.sample_address = true;
2906 trace.opts.sample_time = true;
2909 if (trace.evlist->nr_entries > 0)
2910 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2912 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2913 return trace__record(&trace, argc-1, &argv[1]);
2915 /* summary_only implies summary option, but don't overwrite summary if set */
2916 if (trace.summary_only)
2917 trace.summary = trace.summary_only;
2919 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2920 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2921 pr_err("Please specify something to trace.\n");
2925 if (output_name != NULL) {
2926 err = trace__open_output(&trace, output_name);
2928 perror("failed to create output file");
2933 if (ev_qualifier_str != NULL) {
2934 const char *s = ev_qualifier_str;
2935 struct strlist_config slist_config = {
2936 .dirname = system_path(STRACE_GROUPS_DIR),
2939 trace.not_ev_qualifier = *s == '!';
2940 if (trace.not_ev_qualifier)
2942 trace.ev_qualifier = strlist__new(s, &slist_config);
2943 if (trace.ev_qualifier == NULL) {
2944 fputs("Not enough memory to parse event qualifier",
2950 err = trace__validate_ev_qualifier(&trace);
2955 err = target__validate(&trace.opts.target);
2957 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2958 fprintf(trace.output, "%s", bf);
2962 err = target__parse_uid(&trace.opts.target);
2964 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2965 fprintf(trace.output, "%s", bf);
2969 if (!argc && target__none(&trace.opts.target))
2970 trace.opts.target.system_wide = true;
2973 err = trace__replay(&trace);
2975 err = trace__run(&trace, argc, argv);
2978 if (output_name != NULL)
2979 fclose(trace.output);