1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
56 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
69 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70 return bswap_##bits(value);\
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
77 static int tp_field__init_uint(struct tp_field *field,
78 struct format_field *format_field,
81 field->offset = format_field->offset;
83 switch (format_field->size) {
85 field->integer = tp_field__u8;
88 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
91 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
94 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
103 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
105 return sample->raw_data + field->offset;
108 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
110 field->offset = format_field->offset;
111 field->pointer = tp_field__ptr;
118 struct tp_field args, ret;
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
123 struct tp_field *field,
126 struct format_field *format_field = perf_evsel__field(evsel, name);
128 if (format_field == NULL)
131 return tp_field__init_uint(field, format_field, evsel->needs_swap);
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135 ({ struct syscall_tp *sc = evsel->priv;\
136 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
139 struct tp_field *field,
142 struct format_field *format_field = perf_evsel__field(evsel, name);
144 if (format_field == NULL)
147 return tp_field__init_ptr(field, format_field);
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151 ({ struct syscall_tp *sc = evsel->priv;\
152 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
154 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
157 perf_evsel__delete(evsel);
160 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
162 evsel->priv = malloc(sizeof(struct syscall_tp));
163 if (evsel->priv != NULL) {
164 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
167 evsel->handler = handler;
178 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
180 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
182 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
184 evsel = perf_evsel__newtp("syscalls", direction);
187 if (perf_evsel__init_syscall_tp(evsel, handler))
194 perf_evsel__delete_priv(evsel);
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199 ({ struct syscall_tp *fields = evsel->priv; \
200 fields->name.integer(&fields->name, sample); })
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203 ({ struct syscall_tp *fields = evsel->priv; \
204 fields->name.pointer(&fields->name, sample); })
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
207 void *sys_enter_handler,
208 void *sys_exit_handler)
211 struct perf_evsel *sys_enter, *sys_exit;
213 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
214 if (sys_enter == NULL)
217 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
218 goto out_delete_sys_enter;
220 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
221 if (sys_exit == NULL)
222 goto out_delete_sys_enter;
224 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
225 goto out_delete_sys_exit;
227 perf_evlist__add(evlist, sys_enter);
228 perf_evlist__add(evlist, sys_exit);
235 perf_evsel__delete_priv(sys_exit);
236 out_delete_sys_enter:
237 perf_evsel__delete_priv(sys_enter);
244 struct thread *thread;
254 const char **entries;
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258 .nr_entries = ARRAY_SIZE(array), \
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
264 .nr_entries = ARRAY_SIZE(array), \
268 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
270 struct syscall_arg *arg)
272 struct strarray *sa = arg->parm;
273 int idx = arg->val - sa->offset;
275 if (idx < 0 || idx >= sa->nr_entries)
276 return scnprintf(bf, size, intfmt, arg->val);
278 return scnprintf(bf, size, "%s", sa->entries[idx]);
281 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
282 struct syscall_arg *arg)
284 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
289 #if defined(__i386__) || defined(__x86_64__)
291 * FIXME: Make this available to all arches as soon as the ioctl beautifier
292 * gets rewritten to support all arches.
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
295 struct syscall_arg *arg)
297 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
303 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
304 struct syscall_arg *arg);
306 #define SCA_FD syscall_arg__scnprintf_fd
308 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
309 struct syscall_arg *arg)
314 return scnprintf(bf, size, "CWD");
316 return syscall_arg__scnprintf_fd(bf, size, arg);
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
321 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
322 struct syscall_arg *arg);
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
326 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
327 struct syscall_arg *arg)
329 return scnprintf(bf, size, "%#lx", arg->val);
332 #define SCA_HEX syscall_arg__scnprintf_hex
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335 struct syscall_arg *arg)
337 int printed = 0, prot = arg->val;
339 if (prot == PROT_NONE)
340 return scnprintf(bf, size, "NONE");
341 #define P_MMAP_PROT(n) \
342 if (prot & PROT_##n) { \
343 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
353 P_MMAP_PROT(GROWSDOWN);
354 P_MMAP_PROT(GROWSUP);
358 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
366 struct syscall_arg *arg)
368 int printed = 0, flags = arg->val;
370 #define P_MMAP_FLAG(n) \
371 if (flags & MAP_##n) { \
372 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
377 P_MMAP_FLAG(PRIVATE);
381 P_MMAP_FLAG(ANONYMOUS);
382 P_MMAP_FLAG(DENYWRITE);
383 P_MMAP_FLAG(EXECUTABLE);
386 P_MMAP_FLAG(GROWSDOWN);
388 P_MMAP_FLAG(HUGETLB);
391 P_MMAP_FLAG(NONBLOCK);
392 P_MMAP_FLAG(NORESERVE);
393 P_MMAP_FLAG(POPULATE);
395 #ifdef MAP_UNINITIALIZED
396 P_MMAP_FLAG(UNINITIALIZED);
401 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
409 struct syscall_arg *arg)
411 int printed = 0, flags = arg->val;
413 #define P_MREMAP_FLAG(n) \
414 if (flags & MREMAP_##n) { \
415 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416 flags &= ~MREMAP_##n; \
419 P_MREMAP_FLAG(MAYMOVE);
421 P_MREMAP_FLAG(FIXED);
426 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
434 struct syscall_arg *arg)
436 int behavior = arg->val;
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
442 P_MADV_BHV(SEQUENTIAL);
443 P_MADV_BHV(WILLNEED);
444 P_MADV_BHV(DONTNEED);
446 P_MADV_BHV(DONTFORK);
448 P_MADV_BHV(HWPOISON);
449 #ifdef MADV_SOFT_OFFLINE
450 P_MADV_BHV(SOFT_OFFLINE);
452 P_MADV_BHV(MERGEABLE);
453 P_MADV_BHV(UNMERGEABLE);
455 P_MADV_BHV(HUGEPAGE);
457 #ifdef MADV_NOHUGEPAGE
458 P_MADV_BHV(NOHUGEPAGE);
461 P_MADV_BHV(DONTDUMP);
470 return scnprintf(bf, size, "%#x", behavior);
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
475 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
476 struct syscall_arg *arg)
478 int printed = 0, op = arg->val;
481 return scnprintf(bf, size, "NONE");
483 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
499 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
506 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
508 enum syscall_futex_args {
509 SCF_UADDR = (1 << 0),
512 SCF_TIMEOUT = (1 << 3),
513 SCF_UADDR2 = (1 << 4),
517 int cmd = op & FUTEX_CMD_MASK;
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
523 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
524 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
525 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
526 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
527 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
528 P_FUTEX_OP(WAKE_OP); break;
529 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
530 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
531 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
532 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
533 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
534 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
535 default: printed = scnprintf(bf, size, "%#x", cmd); break;
538 if (op & FUTEX_PRIVATE_FLAG)
539 printed += scnprintf(bf + printed, size - printed, "|PRIV");
541 if (op & FUTEX_CLOCK_REALTIME)
542 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
547 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
549 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
552 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers);
555 static const char *whences[] = { "SET", "CUR", "END",
563 static DEFINE_STRARRAY(whences);
565 static const char *fcntl_cmds[] = {
566 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
571 static DEFINE_STRARRAY(fcntl_cmds);
573 static const char *rlimit_resources[] = {
574 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
578 static DEFINE_STRARRAY(rlimit_resources);
580 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow);
583 static const char *clockid[] = {
584 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
587 static DEFINE_STRARRAY(clockid);
589 static const char *socket_families[] = {
590 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595 "ALG", "NFC", "VSOCK",
597 static DEFINE_STRARRAY(socket_families);
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
603 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
604 struct syscall_arg *arg)
608 flags = type & ~SOCK_TYPE_MASK;
610 type &= SOCK_TYPE_MASK;
612 * Can't use a strarray, MIPS may override for ABI reasons.
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
620 P_SK_TYPE(SEQPACKET);
625 printed = scnprintf(bf, size, "%#x", type);
628 #define P_SK_FLAG(n) \
629 if (flags & SOCK_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631 flags &= ~SOCK_##n; \
639 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
647 #define MSG_PROBE 0x10
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE 0x10000
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
656 #define MSG_FASTOPEN 0x20000000
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
660 struct syscall_arg *arg)
662 int printed = 0, flags = arg->val;
665 return scnprintf(bf, size, "NONE");
666 #define P_MSG_FLAG(n) \
667 if (flags & MSG_##n) { \
668 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
674 P_MSG_FLAG(DONTROUTE);
679 P_MSG_FLAG(DONTWAIT);
686 P_MSG_FLAG(ERRQUEUE);
687 P_MSG_FLAG(NOSIGNAL);
689 P_MSG_FLAG(WAITFORONE);
690 P_MSG_FLAG(SENDPAGE_NOTLAST);
691 P_MSG_FLAG(FASTOPEN);
692 P_MSG_FLAG(CMSG_CLOEXEC);
696 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
703 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
704 struct syscall_arg *arg)
709 if (mode == F_OK) /* 0 */
710 return scnprintf(bf, size, "F");
712 if (mode & n##_OK) { \
713 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
723 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
730 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
731 struct syscall_arg *arg)
733 int printed = 0, flags = arg->val;
735 if (!(flags & O_CREAT))
736 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
739 return scnprintf(bf, size, "RDONLY");
741 if (flags & O_##n) { \
742 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
766 if ((flags & O_SYNC) == O_SYNC)
767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
779 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787 struct syscall_arg *arg)
789 int printed = 0, flags = arg->val;
792 return scnprintf(bf, size, "NONE");
794 if (flags & EFD_##n) { \
795 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
805 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
813 struct syscall_arg *arg)
815 int printed = 0, flags = arg->val;
818 if (flags & O_##n) { \
819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
828 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
835 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
883 return scnprintf(bf, size, "%#x", sig);
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
888 #if defined(__i386__) || defined(__x86_64__)
890 * FIXME: Make this available to all arches.
892 #define TCGETS 0x5401
894 static const char *tioctls[] = {
895 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
912 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
915 #define STRARRAY(arg, name, array) \
916 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917 .arg_parm = { [arg] = &strarray__##array, }
919 static struct syscall_fmt {
922 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
928 { .name = "access", .errmsg = true,
929 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
930 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
931 { .name = "brk", .hexret = true,
932 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
933 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
934 { .name = "close", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
936 { .name = "connect", .errmsg = true, },
937 { .name = "dup", .errmsg = true,
938 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939 { .name = "dup2", .errmsg = true,
940 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
941 { .name = "dup3", .errmsg = true,
942 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
944 { .name = "eventfd2", .errmsg = true,
945 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
946 { .name = "faccessat", .errmsg = true,
947 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
948 { .name = "fadvise64", .errmsg = true,
949 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
950 { .name = "fallocate", .errmsg = true,
951 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
952 { .name = "fchdir", .errmsg = true,
953 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
954 { .name = "fchmod", .errmsg = true,
955 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956 { .name = "fchmodat", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958 { .name = "fchown", .errmsg = true,
959 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
960 { .name = "fchownat", .errmsg = true,
961 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
962 { .name = "fcntl", .errmsg = true,
963 .arg_scnprintf = { [0] = SCA_FD, /* fd */
964 [1] = SCA_STRARRAY, /* cmd */ },
965 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
966 { .name = "fdatasync", .errmsg = true,
967 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
968 { .name = "flock", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_FD, /* fd */
970 [1] = SCA_FLOCK, /* cmd */ }, },
971 { .name = "fsetxattr", .errmsg = true,
972 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
973 { .name = "fstat", .errmsg = true, .alias = "newfstat",
974 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
975 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
976 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
977 { .name = "fstatfs", .errmsg = true,
978 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979 { .name = "fsync", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981 { .name = "ftruncate", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983 { .name = "futex", .errmsg = true,
984 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
985 { .name = "futimesat", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
987 { .name = "getdents", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989 { .name = "getdents64", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
992 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
993 { .name = "ioctl", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_FD, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
997 * FIXME: Make this available to all arches.
999 [1] = SCA_STRHEXARRAY, /* cmd */
1000 [2] = SCA_HEX, /* arg */ },
1001 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1003 [2] = SCA_HEX, /* arg */ }, },
1005 { .name = "kill", .errmsg = true,
1006 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1007 { .name = "linkat", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1009 { .name = "lseek", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1011 [2] = SCA_STRARRAY, /* whence */ },
1012 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1013 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1014 { .name = "madvise", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1016 [2] = SCA_MADV_BHV, /* behavior */ }, },
1017 { .name = "mkdirat", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1019 { .name = "mknodat", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1021 { .name = "mlock", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1023 { .name = "mlockall", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1025 { .name = "mmap", .hexret = true,
1026 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1027 [2] = SCA_MMAP_PROT, /* prot */
1028 [3] = SCA_MMAP_FLAGS, /* flags */
1029 [4] = SCA_FD, /* fd */ }, },
1030 { .name = "mprotect", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1032 [2] = SCA_MMAP_PROT, /* prot */ }, },
1033 { .name = "mremap", .hexret = true,
1034 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1035 [3] = SCA_MREMAP_FLAGS, /* flags */
1036 [4] = SCA_HEX, /* new_addr */ }, },
1037 { .name = "munlock", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1039 { .name = "munmap", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1041 { .name = "name_to_handle_at", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043 { .name = "newfstatat", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1045 { .name = "open", .errmsg = true,
1046 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1047 { .name = "open_by_handle_at", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050 { .name = "openat", .errmsg = true,
1051 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1053 { .name = "pipe2", .errmsg = true,
1054 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055 { .name = "poll", .errmsg = true, .timeout = true, },
1056 { .name = "ppoll", .errmsg = true, .timeout = true, },
1057 { .name = "pread", .errmsg = true, .alias = "pread64",
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 { .name = "preadv", .errmsg = true, .alias = "pread",
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1062 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1063 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064 { .name = "pwritev", .errmsg = true,
1065 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066 { .name = "read", .errmsg = true,
1067 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1068 { .name = "readlinkat", .errmsg = true,
1069 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1070 { .name = "readv", .errmsg = true,
1071 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072 { .name = "recvfrom", .errmsg = true,
1073 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1074 { .name = "recvmmsg", .errmsg = true,
1075 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1076 { .name = "recvmsg", .errmsg = true,
1077 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1078 { .name = "renameat", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080 { .name = "rt_sigaction", .errmsg = true,
1081 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1082 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1083 { .name = "rt_sigqueueinfo", .errmsg = true,
1084 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1085 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1086 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1087 { .name = "select", .errmsg = true, .timeout = true, },
1088 { .name = "sendmmsg", .errmsg = true,
1089 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1090 { .name = "sendmsg", .errmsg = true,
1091 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1092 { .name = "sendto", .errmsg = true,
1093 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1094 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1095 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 { .name = "shutdown", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1098 { .name = "socket", .errmsg = true,
1099 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1100 [1] = SCA_SK_TYPE, /* type */ },
1101 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1102 { .name = "socketpair", .errmsg = true,
1103 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1104 [1] = SCA_SK_TYPE, /* type */ },
1105 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1106 { .name = "stat", .errmsg = true, .alias = "newstat", },
1107 { .name = "symlinkat", .errmsg = true,
1108 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1109 { .name = "tgkill", .errmsg = true,
1110 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1111 { .name = "tkill", .errmsg = true,
1112 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1113 { .name = "uname", .errmsg = true, .alias = "newuname", },
1114 { .name = "unlinkat", .errmsg = true,
1115 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1116 { .name = "utimensat", .errmsg = true,
1117 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1118 { .name = "write", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120 { .name = "writev", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1124 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1126 const struct syscall_fmt *fmt = fmtp;
1127 return strcmp(name, fmt->name);
1130 static struct syscall_fmt *syscall_fmt__find(const char *name)
1132 const int nmemb = ARRAY_SIZE(syscall_fmts);
1133 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1137 struct event_format *tp_format;
1139 struct format_field *args;
1143 struct syscall_fmt *fmt;
1144 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1148 static size_t fprintf_duration(unsigned long t, FILE *fp)
1150 double duration = (double)t / NSEC_PER_MSEC;
1151 size_t printed = fprintf(fp, "(");
1153 if (duration >= 1.0)
1154 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1155 else if (duration >= 0.01)
1156 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1158 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1159 return printed + fprintf(fp, "): ");
1162 struct thread_trace {
1166 unsigned long nr_events;
1167 unsigned long pfmaj, pfmin;
1175 struct intlist *syscall_stats;
1178 static struct thread_trace *thread_trace__new(void)
1180 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1183 ttrace->paths.max = -1;
1185 ttrace->syscall_stats = intlist__new(NULL);
1190 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1192 struct thread_trace *ttrace;
1197 if (thread__priv(thread) == NULL)
1198 thread__set_priv(thread, thread_trace__new());
1200 if (thread__priv(thread) == NULL)
1203 ttrace = thread__priv(thread);
1204 ++ttrace->nr_events;
1208 color_fprintf(fp, PERF_COLOR_RED,
1209 "WARNING: not enough memory, dropping samples!\n");
1213 #define TRACE_PFMAJ (1 << 0)
1214 #define TRACE_PFMIN (1 << 1)
1217 struct perf_tool tool;
1224 struct syscall *table;
1226 struct record_opts opts;
1227 struct perf_evlist *evlist;
1228 struct machine *host;
1229 struct thread *current;
1232 unsigned long nr_events;
1233 struct strlist *ev_qualifier;
1234 const char *last_vfs_getname;
1235 struct intlist *tid_list;
1236 struct intlist *pid_list;
1241 double duration_filter;
1247 bool not_ev_qualifier;
1251 bool multiple_threads;
1255 bool show_tool_stats;
1256 bool trace_syscalls;
1260 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1262 struct thread_trace *ttrace = thread__priv(thread);
1264 if (fd > ttrace->paths.max) {
1265 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1270 if (ttrace->paths.max != -1) {
1271 memset(npath + ttrace->paths.max + 1, 0,
1272 (fd - ttrace->paths.max) * sizeof(char *));
1274 memset(npath, 0, (fd + 1) * sizeof(char *));
1277 ttrace->paths.table = npath;
1278 ttrace->paths.max = fd;
1281 ttrace->paths.table[fd] = strdup(pathname);
1283 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1286 static int thread__read_fd_path(struct thread *thread, int fd)
1288 char linkname[PATH_MAX], pathname[PATH_MAX];
1292 if (thread->pid_ == thread->tid) {
1293 scnprintf(linkname, sizeof(linkname),
1294 "/proc/%d/fd/%d", thread->pid_, fd);
1296 scnprintf(linkname, sizeof(linkname),
1297 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1300 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1303 ret = readlink(linkname, pathname, sizeof(pathname));
1305 if (ret < 0 || ret > st.st_size)
1308 pathname[ret] = '\0';
1309 return trace__set_fd_pathname(thread, fd, pathname);
1312 static const char *thread__fd_path(struct thread *thread, int fd,
1313 struct trace *trace)
1315 struct thread_trace *ttrace = thread__priv(thread);
1323 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1326 ++trace->stats.proc_getname;
1327 if (thread__read_fd_path(thread, fd))
1331 return ttrace->paths.table[fd];
1334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1335 struct syscall_arg *arg)
1338 size_t printed = scnprintf(bf, size, "%d", fd);
1339 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1342 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1347 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1348 struct syscall_arg *arg)
1351 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1352 struct thread_trace *ttrace = thread__priv(arg->thread);
1354 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1355 zfree(&ttrace->paths.table[fd]);
1360 static bool trace__filter_duration(struct trace *trace, double t)
1362 return t < (trace->duration_filter * NSEC_PER_MSEC);
1365 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1367 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1369 return fprintf(fp, "%10.3f ", ts);
1372 static bool done = false;
1373 static bool interrupted = false;
1375 static void sig_handler(int sig)
1378 interrupted = sig == SIGINT;
1381 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1382 u64 duration, u64 tstamp, FILE *fp)
1384 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1385 printed += fprintf_duration(duration, fp);
1387 if (trace->multiple_threads) {
1388 if (trace->show_comm)
1389 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1390 printed += fprintf(fp, "%d ", thread->tid);
1396 static int trace__process_event(struct trace *trace, struct machine *machine,
1397 union perf_event *event, struct perf_sample *sample)
1401 switch (event->header.type) {
1402 case PERF_RECORD_LOST:
1403 color_fprintf(trace->output, PERF_COLOR_RED,
1404 "LOST %" PRIu64 " events!\n", event->lost.lost);
1405 ret = machine__process_lost_event(machine, event, sample);
1407 ret = machine__process_event(machine, event, sample);
1414 static int trace__tool_process(struct perf_tool *tool,
1415 union perf_event *event,
1416 struct perf_sample *sample,
1417 struct machine *machine)
1419 struct trace *trace = container_of(tool, struct trace, tool);
1420 return trace__process_event(trace, machine, event, sample);
1423 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1425 int err = symbol__init(NULL);
1430 trace->host = machine__new_host();
1431 if (trace->host == NULL)
1434 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1435 evlist->threads, trace__tool_process, false);
1442 static int syscall__set_arg_fmts(struct syscall *sc)
1444 struct format_field *field;
1447 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1448 if (sc->arg_scnprintf == NULL)
1452 sc->arg_parm = sc->fmt->arg_parm;
1454 for (field = sc->args; field; field = field->next) {
1455 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1456 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1457 else if (field->flags & FIELD_IS_POINTER)
1458 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1465 static int trace__read_syscall_info(struct trace *trace, int id)
1469 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1474 if (id > trace->syscalls.max) {
1475 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1477 if (nsyscalls == NULL)
1480 if (trace->syscalls.max != -1) {
1481 memset(nsyscalls + trace->syscalls.max + 1, 0,
1482 (id - trace->syscalls.max) * sizeof(*sc));
1484 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1487 trace->syscalls.table = nsyscalls;
1488 trace->syscalls.max = id;
1491 sc = trace->syscalls.table + id;
1494 if (trace->ev_qualifier) {
1495 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1497 if (!(in ^ trace->not_ev_qualifier)) {
1498 sc->filtered = true;
1500 * No need to do read tracepoint information since this will be
1507 sc->fmt = syscall_fmt__find(sc->name);
1509 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1510 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1512 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1513 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1514 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1517 if (sc->tp_format == NULL)
1520 sc->args = sc->tp_format->format.fields;
1521 sc->nr_args = sc->tp_format->format.nr_fields;
1522 /* drop nr field - not relevant here; does not exist on older kernels */
1523 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1524 sc->args = sc->args->next;
1528 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1530 return syscall__set_arg_fmts(sc);
1534 * args is to be interpreted as a series of longs but we need to handle
1535 * 8-byte unaligned accesses. args points to raw_data within the event
1536 * and raw_data is guaranteed to be 8-byte unaligned because it is
1537 * preceded by raw_size which is a u32. So we need to copy args to a temp
1538 * variable to read it. Most notably this avoids extended load instructions
1539 * on unaligned addresses
1542 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1543 unsigned char *args, struct trace *trace,
1544 struct thread *thread)
1550 if (sc->args != NULL) {
1551 struct format_field *field;
1553 struct syscall_arg arg = {
1560 for (field = sc->args; field;
1561 field = field->next, ++arg.idx, bit <<= 1) {
1565 /* special care for unaligned accesses */
1566 p = args + sizeof(unsigned long) * arg.idx;
1567 memcpy(&val, p, sizeof(val));
1570 * Suppress this argument if its value is zero and
1571 * and we don't have a string associated in an
1575 !(sc->arg_scnprintf &&
1576 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1577 sc->arg_parm[arg.idx]))
1580 printed += scnprintf(bf + printed, size - printed,
1581 "%s%s: ", printed ? ", " : "", field->name);
1582 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1585 arg.parm = sc->arg_parm[arg.idx];
1586 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1587 size - printed, &arg);
1589 printed += scnprintf(bf + printed, size - printed,
1597 /* special care for unaligned accesses */
1598 p = args + sizeof(unsigned long) * i;
1599 memcpy(&val, p, sizeof(val));
1600 printed += scnprintf(bf + printed, size - printed,
1602 printed ? ", " : "", i, val);
1610 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1611 union perf_event *event,
1612 struct perf_sample *sample);
1614 static struct syscall *trace__syscall_info(struct trace *trace,
1615 struct perf_evsel *evsel, int id)
1621 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1622 * before that, leaving at a higher verbosity level till that is
1623 * explained. Reproduced with plain ftrace with:
1625 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1626 * grep "NR -1 " /t/trace_pipe
1628 * After generating some load on the machine.
1632 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1633 id, perf_evsel__name(evsel), ++n);
1638 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1639 trace__read_syscall_info(trace, id))
1642 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1645 return &trace->syscalls.table[id];
1649 fprintf(trace->output, "Problems reading syscall %d", id);
1650 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1651 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1652 fputs(" information\n", trace->output);
1657 static void thread__update_stats(struct thread_trace *ttrace,
1658 int id, struct perf_sample *sample)
1660 struct int_node *inode;
1661 struct stats *stats;
1664 inode = intlist__findnew(ttrace->syscall_stats, id);
1668 stats = inode->priv;
1669 if (stats == NULL) {
1670 stats = malloc(sizeof(struct stats));
1674 inode->priv = stats;
1677 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1678 duration = sample->time - ttrace->entry_time;
1680 update_stats(stats, duration);
1683 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1685 struct thread_trace *ttrace;
1689 if (trace->current == NULL)
1692 ttrace = thread__priv(trace->current);
1694 if (!ttrace->entry_pending)
1697 duration = sample->time - ttrace->entry_time;
1699 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1700 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1701 ttrace->entry_pending = false;
1706 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1707 union perf_event *event __maybe_unused,
1708 struct perf_sample *sample)
1713 struct thread *thread;
1714 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1715 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1716 struct thread_trace *ttrace;
1724 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1725 ttrace = thread__trace(thread, trace->output);
1729 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1731 if (ttrace->entry_str == NULL) {
1732 ttrace->entry_str = malloc(1024);
1733 if (!ttrace->entry_str)
1737 if (!trace->summary_only)
1738 printed += trace__printf_interrupted_entry(trace, sample);
1740 ttrace->entry_time = sample->time;
1741 msg = ttrace->entry_str;
1742 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1744 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1745 args, trace, thread);
1748 if (!trace->duration_filter && !trace->summary_only) {
1749 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1750 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1753 ttrace->entry_pending = true;
1755 if (trace->current != thread) {
1756 thread__put(trace->current);
1757 trace->current = thread__get(thread);
1763 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1764 union perf_event *event __maybe_unused,
1765 struct perf_sample *sample)
1769 struct thread *thread;
1770 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1771 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1772 struct thread_trace *ttrace;
1780 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1781 ttrace = thread__trace(thread, trace->output);
1786 thread__update_stats(ttrace, id, sample);
1788 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1790 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1791 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1792 trace->last_vfs_getname = NULL;
1793 ++trace->stats.vfs_getname;
1796 ttrace->exit_time = sample->time;
1798 if (ttrace->entry_time) {
1799 duration = sample->time - ttrace->entry_time;
1800 if (trace__filter_duration(trace, duration))
1802 } else if (trace->duration_filter)
1805 if (trace->summary_only)
1808 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1810 if (ttrace->entry_pending) {
1811 fprintf(trace->output, "%-70s", ttrace->entry_str);
1813 fprintf(trace->output, " ... [");
1814 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1815 fprintf(trace->output, "]: %s()", sc->name);
1818 if (sc->fmt == NULL) {
1820 fprintf(trace->output, ") = %ld", ret);
1821 } else if (ret < 0 && sc->fmt->errmsg) {
1822 char bf[STRERR_BUFSIZE];
1823 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1824 *e = audit_errno_to_name(-ret);
1826 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1827 } else if (ret == 0 && sc->fmt->timeout)
1828 fprintf(trace->output, ") = 0 Timeout");
1829 else if (sc->fmt->hexret)
1830 fprintf(trace->output, ") = %#lx", ret);
1834 fputc('\n', trace->output);
1836 ttrace->entry_pending = false;
1841 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1842 union perf_event *event __maybe_unused,
1843 struct perf_sample *sample)
1845 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1849 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1850 union perf_event *event __maybe_unused,
1851 struct perf_sample *sample)
1853 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1854 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1855 struct thread *thread = machine__findnew_thread(trace->host,
1858 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1863 ttrace->runtime_ms += runtime_ms;
1864 trace->runtime_ms += runtime_ms;
1868 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1870 perf_evsel__strval(evsel, sample, "comm"),
1871 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1873 perf_evsel__intval(evsel, sample, "vruntime"));
1877 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1878 union perf_event *event __maybe_unused,
1879 struct perf_sample *sample)
1881 trace__printf_interrupted_entry(trace, sample);
1882 trace__fprintf_tstamp(trace, sample->time, trace->output);
1884 if (trace->trace_syscalls)
1885 fprintf(trace->output, "( ): ");
1887 fprintf(trace->output, "%s:", evsel->name);
1889 if (evsel->tp_format) {
1890 event_format__fprintf(evsel->tp_format, sample->cpu,
1891 sample->raw_data, sample->raw_size,
1895 fprintf(trace->output, ")\n");
1899 static void print_location(FILE *f, struct perf_sample *sample,
1900 struct addr_location *al,
1901 bool print_dso, bool print_sym)
1904 if ((verbose || print_dso) && al->map)
1905 fprintf(f, "%s@", al->map->dso->long_name);
1907 if ((verbose || print_sym) && al->sym)
1908 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1909 al->addr - al->sym->start);
1911 fprintf(f, "0x%" PRIx64, al->addr);
1913 fprintf(f, "0x%" PRIx64, sample->addr);
1916 static int trace__pgfault(struct trace *trace,
1917 struct perf_evsel *evsel,
1918 union perf_event *event,
1919 struct perf_sample *sample)
1921 struct thread *thread;
1922 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1923 struct addr_location al;
1924 char map_type = 'd';
1925 struct thread_trace *ttrace;
1927 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1928 ttrace = thread__trace(thread, trace->output);
1932 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1937 if (trace->summary_only)
1940 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1943 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1945 fprintf(trace->output, "%sfault [",
1946 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1949 print_location(trace->output, sample, &al, false, true);
1951 fprintf(trace->output, "] => ");
1953 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1957 thread__find_addr_location(thread, cpumode,
1958 MAP__FUNCTION, sample->addr, &al);
1966 print_location(trace->output, sample, &al, true, false);
1968 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1973 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1975 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1976 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1979 if (trace->pid_list || trace->tid_list)
1985 static int trace__process_sample(struct perf_tool *tool,
1986 union perf_event *event,
1987 struct perf_sample *sample,
1988 struct perf_evsel *evsel,
1989 struct machine *machine __maybe_unused)
1991 struct trace *trace = container_of(tool, struct trace, tool);
1994 tracepoint_handler handler = evsel->handler;
1996 if (skip_sample(trace, sample))
1999 if (!trace->full_time && trace->base_time == 0)
2000 trace->base_time = sample->time;
2004 handler(trace, evsel, event, sample);
2010 static int parse_target_str(struct trace *trace)
2012 if (trace->opts.target.pid) {
2013 trace->pid_list = intlist__new(trace->opts.target.pid);
2014 if (trace->pid_list == NULL) {
2015 pr_err("Error parsing process id string\n");
2020 if (trace->opts.target.tid) {
2021 trace->tid_list = intlist__new(trace->opts.target.tid);
2022 if (trace->tid_list == NULL) {
2023 pr_err("Error parsing thread id string\n");
2031 static int trace__record(struct trace *trace, int argc, const char **argv)
2033 unsigned int rec_argc, i, j;
2034 const char **rec_argv;
2035 const char * const record_args[] = {
2042 const char * const sc_args[] = { "-e", };
2043 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2044 const char * const majpf_args[] = { "-e", "major-faults" };
2045 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2046 const char * const minpf_args[] = { "-e", "minor-faults" };
2047 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2049 /* +1 is for the event string below */
2050 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2051 majpf_args_nr + minpf_args_nr + argc;
2052 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2054 if (rec_argv == NULL)
2058 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2059 rec_argv[j++] = record_args[i];
2061 if (trace->trace_syscalls) {
2062 for (i = 0; i < sc_args_nr; i++)
2063 rec_argv[j++] = sc_args[i];
2065 /* event string may be different for older kernels - e.g., RHEL6 */
2066 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2067 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2068 else if (is_valid_tracepoint("syscalls:sys_enter"))
2069 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2071 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2076 if (trace->trace_pgfaults & TRACE_PFMAJ)
2077 for (i = 0; i < majpf_args_nr; i++)
2078 rec_argv[j++] = majpf_args[i];
2080 if (trace->trace_pgfaults & TRACE_PFMIN)
2081 for (i = 0; i < minpf_args_nr; i++)
2082 rec_argv[j++] = minpf_args[i];
2084 for (i = 0; i < (unsigned int)argc; i++)
2085 rec_argv[j++] = argv[i];
2087 return cmd_record(j, rec_argv, NULL);
2090 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2092 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2094 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2098 if (perf_evsel__field(evsel, "pathname") == NULL) {
2099 perf_evsel__delete(evsel);
2103 evsel->handler = trace__vfs_getname;
2104 perf_evlist__add(evlist, evsel);
2107 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2110 struct perf_evsel *evsel;
2111 struct perf_event_attr attr = {
2112 .type = PERF_TYPE_SOFTWARE,
2116 attr.config = config;
2117 attr.sample_period = 1;
2119 event_attr_init(&attr);
2121 evsel = perf_evsel__new(&attr);
2125 evsel->handler = trace__pgfault;
2126 perf_evlist__add(evlist, evsel);
2131 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2133 const u32 type = event->header.type;
2134 struct perf_evsel *evsel;
2136 if (!trace->full_time && trace->base_time == 0)
2137 trace->base_time = sample->time;
2139 if (type != PERF_RECORD_SAMPLE) {
2140 trace__process_event(trace, trace->host, event, sample);
2144 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2145 if (evsel == NULL) {
2146 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2150 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2151 sample->raw_data == NULL) {
2152 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2153 perf_evsel__name(evsel), sample->tid,
2154 sample->cpu, sample->raw_size);
2156 tracepoint_handler handler = evsel->handler;
2157 handler(trace, evsel, event, sample);
2161 static int trace__run(struct trace *trace, int argc, const char **argv)
2163 struct perf_evlist *evlist = trace->evlist;
2165 unsigned long before;
2166 const bool forks = argc > 0;
2167 bool draining = false;
2171 if (trace->trace_syscalls &&
2172 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2174 goto out_error_raw_syscalls;
2176 if (trace->trace_syscalls)
2177 perf_evlist__add_vfs_getname(evlist);
2179 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2180 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2184 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2185 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2189 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2190 trace__sched_stat_runtime))
2191 goto out_error_sched_stat_runtime;
2193 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2195 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2196 goto out_delete_evlist;
2199 err = trace__symbols_init(trace, evlist);
2201 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2202 goto out_delete_evlist;
2205 perf_evlist__config(evlist, &trace->opts);
2207 signal(SIGCHLD, sig_handler);
2208 signal(SIGINT, sig_handler);
2211 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2214 fprintf(trace->output, "Couldn't run the workload!\n");
2215 goto out_delete_evlist;
2219 err = perf_evlist__open(evlist);
2221 goto out_error_open;
2224 * Better not use !target__has_task() here because we need to cover the
2225 * case where no threads were specified in the command line, but a
2226 * workload was, and in that case we will fill in the thread_map when
2227 * we fork the workload in perf_evlist__prepare_workload.
2229 if (trace->filter_pids.nr > 0)
2230 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2231 else if (evlist->threads->map[0] == -1)
2232 err = perf_evlist__set_filter_pid(evlist, getpid());
2235 printf("err=%d,%s\n", -err, strerror(-err));
2239 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2241 goto out_error_mmap;
2244 perf_evlist__start_workload(evlist);
2246 perf_evlist__enable(evlist);
2248 trace->multiple_threads = evlist->threads->map[0] == -1 ||
2249 evlist->threads->nr > 1 ||
2250 perf_evlist__first(evlist)->attr.inherit;
2252 before = trace->nr_events;
2254 for (i = 0; i < evlist->nr_mmaps; i++) {
2255 union perf_event *event;
2257 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2258 struct perf_sample sample;
2262 err = perf_evlist__parse_sample(evlist, event, &sample);
2264 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2268 trace__handle_event(trace, event, &sample);
2270 perf_evlist__mmap_consume(evlist, i);
2277 if (trace->nr_events == before) {
2278 int timeout = done ? 100 : -1;
2280 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2281 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2291 thread__zput(trace->current);
2293 perf_evlist__disable(evlist);
2297 trace__fprintf_thread_summary(trace, trace->output);
2299 if (trace->show_tool_stats) {
2300 fprintf(trace->output, "Stats:\n "
2301 " vfs_getname : %" PRIu64 "\n"
2302 " proc_getname: %" PRIu64 "\n",
2303 trace->stats.vfs_getname,
2304 trace->stats.proc_getname);
2309 perf_evlist__delete(evlist);
2310 trace->evlist = NULL;
2311 trace->live = false;
2314 char errbuf[BUFSIZ];
2316 out_error_sched_stat_runtime:
2317 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2320 out_error_raw_syscalls:
2321 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2325 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2329 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2332 fprintf(trace->output, "%s\n", errbuf);
2333 goto out_delete_evlist;
2336 fprintf(trace->output, "Not enough memory to run!\n");
2337 goto out_delete_evlist;
2340 static int trace__replay(struct trace *trace)
2342 const struct perf_evsel_str_handler handlers[] = {
2343 { "probe:vfs_getname", trace__vfs_getname, },
2345 struct perf_data_file file = {
2347 .mode = PERF_DATA_MODE_READ,
2349 struct perf_session *session;
2350 struct perf_evsel *evsel;
2353 trace->tool.sample = trace__process_sample;
2354 trace->tool.mmap = perf_event__process_mmap;
2355 trace->tool.mmap2 = perf_event__process_mmap2;
2356 trace->tool.comm = perf_event__process_comm;
2357 trace->tool.exit = perf_event__process_exit;
2358 trace->tool.fork = perf_event__process_fork;
2359 trace->tool.attr = perf_event__process_attr;
2360 trace->tool.tracing_data = perf_event__process_tracing_data;
2361 trace->tool.build_id = perf_event__process_build_id;
2363 trace->tool.ordered_events = true;
2364 trace->tool.ordering_requires_timestamps = true;
2366 /* add tid to output */
2367 trace->multiple_threads = true;
2369 session = perf_session__new(&file, false, &trace->tool);
2370 if (session == NULL)
2373 if (symbol__init(&session->header.env) < 0)
2376 trace->host = &session->machines.host;
2378 err = perf_session__set_tracepoints_handlers(session, handlers);
2382 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2383 "raw_syscalls:sys_enter");
2384 /* older kernels have syscalls tp versus raw_syscalls */
2386 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2387 "syscalls:sys_enter");
2390 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2391 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2392 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2396 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2397 "raw_syscalls:sys_exit");
2399 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2400 "syscalls:sys_exit");
2402 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2403 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2404 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2408 evlist__for_each(session->evlist, evsel) {
2409 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2410 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2411 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2412 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2413 evsel->handler = trace__pgfault;
2416 err = parse_target_str(trace);
2422 err = perf_session__process_events(session);
2424 pr_err("Failed to process events, error %d", err);
2426 else if (trace->summary)
2427 trace__fprintf_thread_summary(trace, trace->output);
2430 perf_session__delete(session);
2435 static size_t trace__fprintf_threads_header(FILE *fp)
2439 printed = fprintf(fp, "\n Summary of events:\n\n");
2444 static size_t thread__dump_stats(struct thread_trace *ttrace,
2445 struct trace *trace, FILE *fp)
2447 struct stats *stats;
2450 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2455 printed += fprintf(fp, "\n");
2457 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2458 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2459 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2461 /* each int_node is a syscall */
2463 stats = inode->priv;
2465 double min = (double)(stats->min) / NSEC_PER_MSEC;
2466 double max = (double)(stats->max) / NSEC_PER_MSEC;
2467 double avg = avg_stats(stats);
2469 u64 n = (u64) stats->n;
2471 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2472 avg /= NSEC_PER_MSEC;
2474 sc = &trace->syscalls.table[inode->i];
2475 printed += fprintf(fp, " %-15s", sc->name);
2476 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2478 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2481 inode = intlist__next(inode);
2484 printed += fprintf(fp, "\n\n");
2489 /* struct used to pass data to per-thread function */
2490 struct summary_data {
2492 struct trace *trace;
2496 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2498 struct summary_data *data = priv;
2499 FILE *fp = data->fp;
2500 size_t printed = data->printed;
2501 struct trace *trace = data->trace;
2502 struct thread_trace *ttrace = thread__priv(thread);
2508 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2510 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2511 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2512 printed += fprintf(fp, "%.1f%%", ratio);
2514 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2516 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2517 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2518 printed += thread__dump_stats(ttrace, trace, fp);
2520 data->printed += printed;
2525 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2527 struct summary_data data = {
2531 data.printed = trace__fprintf_threads_header(fp);
2533 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2535 return data.printed;
2538 static int trace__set_duration(const struct option *opt, const char *str,
2539 int unset __maybe_unused)
2541 struct trace *trace = opt->value;
2543 trace->duration_filter = atof(str);
2547 static int trace__set_filter_pids(const struct option *opt, const char *str,
2548 int unset __maybe_unused)
2552 struct trace *trace = opt->value;
2554 * FIXME: introduce a intarray class, plain parse csv and create a
2555 * { int nr, int entries[] } struct...
2557 struct intlist *list = intlist__new(str);
2562 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2563 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2565 if (trace->filter_pids.entries == NULL)
2568 trace->filter_pids.entries[0] = getpid();
2570 for (i = 1; i < trace->filter_pids.nr; ++i)
2571 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2573 intlist__delete(list);
2579 static int trace__open_output(struct trace *trace, const char *filename)
2583 if (!stat(filename, &st) && st.st_size) {
2584 char oldname[PATH_MAX];
2586 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2588 rename(filename, oldname);
2591 trace->output = fopen(filename, "w");
2593 return trace->output == NULL ? -errno : 0;
2596 static int parse_pagefaults(const struct option *opt, const char *str,
2597 int unset __maybe_unused)
2599 int *trace_pgfaults = opt->value;
2601 if (strcmp(str, "all") == 0)
2602 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2603 else if (strcmp(str, "maj") == 0)
2604 *trace_pgfaults |= TRACE_PFMAJ;
2605 else if (strcmp(str, "min") == 0)
2606 *trace_pgfaults |= TRACE_PFMIN;
2613 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2615 struct perf_evsel *evsel;
2617 evlist__for_each(evlist, evsel)
2618 evsel->handler = handler;
2621 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2623 const char *trace_usage[] = {
2624 "perf trace [<options>] [<command>]",
2625 "perf trace [<options>] -- <command> [<options>]",
2626 "perf trace record [<options>] [<command>]",
2627 "perf trace record [<options>] -- <command> [<options>]",
2630 struct trace trace = {
2632 .machine = audit_detect_machine(),
2633 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2643 .user_freq = UINT_MAX,
2644 .user_interval = ULLONG_MAX,
2645 .no_buffering = true,
2646 .mmap_pages = UINT_MAX,
2650 .trace_syscalls = true,
2652 const char *output_name = NULL;
2653 const char *ev_qualifier_str = NULL;
2654 const struct option trace_options[] = {
2655 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2656 "event selector. use 'perf list' to list available events",
2657 parse_events_option),
2658 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2659 "show the thread COMM next to its id"),
2660 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2661 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2662 "list of events to trace"),
2663 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2664 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2665 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2666 "trace events on existing process id"),
2667 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2668 "trace events on existing thread id"),
2669 OPT_CALLBACK(0, "filter-pids", &trace, "float",
2670 "show only events with duration > N.M ms", trace__set_filter_pids),
2671 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2672 "system-wide collection from all CPUs"),
2673 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2674 "list of cpus to monitor"),
2675 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2676 "child tasks do not inherit counters"),
2677 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2678 "number of mmap data pages",
2679 perf_evlist__parse_mmap_pages),
2680 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2682 OPT_CALLBACK(0, "duration", &trace, "float",
2683 "show only events with duration > N.M ms",
2684 trace__set_duration),
2685 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2686 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2687 OPT_BOOLEAN('T', "time", &trace.full_time,
2688 "Show full timestamp, not time relative to first start"),
2689 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2690 "Show only syscall summary with statistics"),
2691 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2692 "Show all syscalls and summary with statistics"),
2693 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2694 "Trace pagefaults", parse_pagefaults, "maj"),
2695 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2698 const char * const trace_subcommands[] = { "record", NULL };
2702 signal(SIGSEGV, sighandler_dump_stack);
2703 signal(SIGFPE, sighandler_dump_stack);
2705 trace.evlist = perf_evlist__new();
2706 if (trace.evlist == NULL)
2709 if (trace.evlist == NULL) {
2710 pr_err("Not enough memory to run!\n");
2714 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2715 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2717 if (trace.trace_pgfaults) {
2718 trace.opts.sample_address = true;
2719 trace.opts.sample_time = true;
2722 if (trace.evlist->nr_entries > 0)
2723 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2725 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2726 return trace__record(&trace, argc-1, &argv[1]);
2728 /* summary_only implies summary option, but don't overwrite summary if set */
2729 if (trace.summary_only)
2730 trace.summary = trace.summary_only;
2732 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2733 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2734 pr_err("Please specify something to trace.\n");
2738 if (output_name != NULL) {
2739 err = trace__open_output(&trace, output_name);
2741 perror("failed to create output file");
2746 if (ev_qualifier_str != NULL) {
2747 const char *s = ev_qualifier_str;
2749 trace.not_ev_qualifier = *s == '!';
2750 if (trace.not_ev_qualifier)
2752 trace.ev_qualifier = strlist__new(true, s);
2753 if (trace.ev_qualifier == NULL) {
2754 fputs("Not enough memory to parse event qualifier",
2761 err = target__validate(&trace.opts.target);
2763 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2764 fprintf(trace.output, "%s", bf);
2768 err = target__parse_uid(&trace.opts.target);
2770 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2771 fprintf(trace.output, "%s", bf);
2775 if (!argc && target__none(&trace.opts.target))
2776 trace.opts.target.system_wide = true;
2779 err = trace__replay(&trace);
2781 err = trace__run(&trace, argc, argv);
2784 if (output_name != NULL)
2785 fclose(trace.output);