1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
55 return *(u##bits *)(sample->raw_data + field->offset); \
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 return bswap_##bits(value);\
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
74 static int tp_field__init_uint(struct tp_field *field,
75 struct format_field *format_field,
78 field->offset = format_field->offset;
80 switch (format_field->size) {
82 field->integer = tp_field__u8;
85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
102 return sample->raw_data + field->offset;
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
107 field->offset = format_field->offset;
108 field->pointer = tp_field__ptr;
115 struct tp_field args, ret;
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 struct tp_field *field,
123 struct format_field *format_field = perf_evsel__field(evsel, name);
125 if (format_field == NULL)
128 return tp_field__init_uint(field, format_field, evsel->needs_swap);
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 ({ struct syscall_tp *sc = evsel->priv;\
133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 struct tp_field *field,
139 struct format_field *format_field = perf_evsel__field(evsel, name);
141 if (format_field == NULL)
144 return tp_field__init_ptr(field, format_field);
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 ({ struct syscall_tp *sc = evsel->priv;\
149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
154 perf_evsel__delete(evsel);
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
159 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv != NULL) {
161 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 evsel->handler = handler;
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
181 evsel = perf_evsel__newtp("syscalls", direction);
184 if (perf_evsel__init_syscall_tp(evsel, handler))
191 perf_evsel__delete_priv(evsel);
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.integer(&fields->name, sample); })
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 ({ struct syscall_tp *fields = evsel->priv; \
201 fields->name.pointer(&fields->name, sample); })
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 void *sys_enter_handler,
205 void *sys_exit_handler)
208 struct perf_evsel *sys_enter, *sys_exit;
210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 if (sys_enter == NULL)
214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 goto out_delete_sys_enter;
217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 if (sys_exit == NULL)
219 goto out_delete_sys_enter;
221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 goto out_delete_sys_exit;
224 perf_evlist__add(evlist, sys_enter);
225 perf_evlist__add(evlist, sys_exit);
232 perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 perf_evsel__delete_priv(sys_enter);
241 struct thread *thread;
251 const char **entries;
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 .nr_entries = ARRAY_SIZE(array), \
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
261 .nr_entries = ARRAY_SIZE(array), \
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 struct strarray *sa = arg->parm;
270 int idx = arg->val - sa->offset;
272 if (idx < 0 || idx >= sa->nr_entries)
273 return scnprintf(bf, size, intfmt, arg->val);
275 return scnprintf(bf, size, "%s", sa->entries[idx]);
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
286 #if defined(__i386__) || defined(__x86_64__)
288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
289 * gets rewritten to support all arches.
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 struct syscall_arg *arg)
294 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 struct syscall_arg *arg);
303 #define SCA_FD syscall_arg__scnprintf_fd
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 struct syscall_arg *arg)
311 return scnprintf(bf, size, "CWD");
313 return syscall_arg__scnprintf_fd(bf, size, arg);
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 struct syscall_arg *arg);
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 struct syscall_arg *arg)
326 return scnprintf(bf, size, "%#lx", arg->val);
329 #define SCA_HEX syscall_arg__scnprintf_hex
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 struct syscall_arg *arg)
334 int printed = 0, prot = arg->val;
336 if (prot == PROT_NONE)
337 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339 if (prot & PROT_##n) { \
340 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
350 P_MMAP_PROT(GROWSDOWN);
351 P_MMAP_PROT(GROWSUP);
355 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 struct syscall_arg *arg)
365 int printed = 0, flags = arg->val;
367 #define P_MMAP_FLAG(n) \
368 if (flags & MAP_##n) { \
369 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
374 P_MMAP_FLAG(PRIVATE);
378 P_MMAP_FLAG(ANONYMOUS);
379 P_MMAP_FLAG(DENYWRITE);
380 P_MMAP_FLAG(EXECUTABLE);
383 P_MMAP_FLAG(GROWSDOWN);
385 P_MMAP_FLAG(HUGETLB);
388 P_MMAP_FLAG(NONBLOCK);
389 P_MMAP_FLAG(NORESERVE);
390 P_MMAP_FLAG(POPULATE);
392 #ifdef MAP_UNINITIALIZED
393 P_MMAP_FLAG(UNINITIALIZED);
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
405 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
406 struct syscall_arg *arg)
408 int printed = 0, flags = arg->val;
410 #define P_MREMAP_FLAG(n) \
411 if (flags & MREMAP_##n) { \
412 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
413 flags &= ~MREMAP_##n; \
416 P_MREMAP_FLAG(MAYMOVE);
418 P_MREMAP_FLAG(FIXED);
423 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
428 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
430 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
431 struct syscall_arg *arg)
433 int behavior = arg->val;
436 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
439 P_MADV_BHV(SEQUENTIAL);
440 P_MADV_BHV(WILLNEED);
441 P_MADV_BHV(DONTNEED);
443 P_MADV_BHV(DONTFORK);
445 P_MADV_BHV(HWPOISON);
446 #ifdef MADV_SOFT_OFFLINE
447 P_MADV_BHV(SOFT_OFFLINE);
449 P_MADV_BHV(MERGEABLE);
450 P_MADV_BHV(UNMERGEABLE);
452 P_MADV_BHV(HUGEPAGE);
454 #ifdef MADV_NOHUGEPAGE
455 P_MADV_BHV(NOHUGEPAGE);
458 P_MADV_BHV(DONTDUMP);
467 return scnprintf(bf, size, "%#x", behavior);
470 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
472 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
473 struct syscall_arg *arg)
475 int printed = 0, op = arg->val;
478 return scnprintf(bf, size, "NONE");
480 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
481 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
496 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
501 #define SCA_FLOCK syscall_arg__scnprintf_flock
503 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
505 enum syscall_futex_args {
506 SCF_UADDR = (1 << 0),
509 SCF_TIMEOUT = (1 << 3),
510 SCF_UADDR2 = (1 << 4),
514 int cmd = op & FUTEX_CMD_MASK;
518 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
519 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
520 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
521 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
522 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
523 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
524 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
525 P_FUTEX_OP(WAKE_OP); break;
526 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
527 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
528 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
529 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
530 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
531 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
532 default: printed = scnprintf(bf, size, "%#x", cmd); break;
535 if (op & FUTEX_PRIVATE_FLAG)
536 printed += scnprintf(bf + printed, size - printed, "|PRIV");
538 if (op & FUTEX_CLOCK_REALTIME)
539 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
544 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
546 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
547 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
549 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
550 static DEFINE_STRARRAY(itimers);
552 static const char *whences[] = { "SET", "CUR", "END",
560 static DEFINE_STRARRAY(whences);
562 static const char *fcntl_cmds[] = {
563 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
564 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
565 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
568 static DEFINE_STRARRAY(fcntl_cmds);
570 static const char *rlimit_resources[] = {
571 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
572 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
575 static DEFINE_STRARRAY(rlimit_resources);
577 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
578 static DEFINE_STRARRAY(sighow);
580 static const char *clockid[] = {
581 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
582 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
584 static DEFINE_STRARRAY(clockid);
586 static const char *socket_families[] = {
587 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
588 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
589 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
590 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
591 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
592 "ALG", "NFC", "VSOCK",
594 static DEFINE_STRARRAY(socket_families);
596 #ifndef SOCK_TYPE_MASK
597 #define SOCK_TYPE_MASK 0xf
600 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
601 struct syscall_arg *arg)
605 flags = type & ~SOCK_TYPE_MASK;
607 type &= SOCK_TYPE_MASK;
609 * Can't use a strarray, MIPS may override for ABI reasons.
612 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
617 P_SK_TYPE(SEQPACKET);
622 printed = scnprintf(bf, size, "%#x", type);
625 #define P_SK_FLAG(n) \
626 if (flags & SOCK_##n) { \
627 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
628 flags &= ~SOCK_##n; \
636 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
641 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
644 #define MSG_PROBE 0x10
646 #ifndef MSG_WAITFORONE
647 #define MSG_WAITFORONE 0x10000
649 #ifndef MSG_SENDPAGE_NOTLAST
650 #define MSG_SENDPAGE_NOTLAST 0x20000
653 #define MSG_FASTOPEN 0x20000000
656 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
657 struct syscall_arg *arg)
659 int printed = 0, flags = arg->val;
662 return scnprintf(bf, size, "NONE");
663 #define P_MSG_FLAG(n) \
664 if (flags & MSG_##n) { \
665 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
671 P_MSG_FLAG(DONTROUTE);
676 P_MSG_FLAG(DONTWAIT);
683 P_MSG_FLAG(ERRQUEUE);
684 P_MSG_FLAG(NOSIGNAL);
686 P_MSG_FLAG(WAITFORONE);
687 P_MSG_FLAG(SENDPAGE_NOTLAST);
688 P_MSG_FLAG(FASTOPEN);
689 P_MSG_FLAG(CMSG_CLOEXEC);
693 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
698 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
700 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
701 struct syscall_arg *arg)
706 if (mode == F_OK) /* 0 */
707 return scnprintf(bf, size, "F");
709 if (mode & n##_OK) { \
710 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
720 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
725 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
727 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
728 struct syscall_arg *arg)
730 int printed = 0, flags = arg->val;
732 if (!(flags & O_CREAT))
733 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
736 return scnprintf(bf, size, "RDONLY");
738 if (flags & O_##n) { \
739 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
763 if ((flags & O_SYNC) == O_SYNC)
764 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
776 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
781 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
783 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
784 struct syscall_arg *arg)
786 int printed = 0, flags = arg->val;
789 return scnprintf(bf, size, "NONE");
791 if (flags & EFD_##n) { \
792 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
802 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
807 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
809 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
810 struct syscall_arg *arg)
812 int printed = 0, flags = arg->val;
815 if (flags & O_##n) { \
816 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
825 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
830 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
832 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
837 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
880 return scnprintf(bf, size, "%#x", sig);
883 #define SCA_SIGNUM syscall_arg__scnprintf_signum
885 #if defined(__i386__) || defined(__x86_64__)
887 * FIXME: Make this available to all arches.
889 #define TCGETS 0x5401
891 static const char *tioctls[] = {
892 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
893 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
894 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
895 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
896 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
897 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
898 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
899 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
900 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
901 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
902 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
903 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
904 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
905 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
906 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
909 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
910 #endif /* defined(__i386__) || defined(__x86_64__) */
912 #define STRARRAY(arg, name, array) \
913 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
914 .arg_parm = { [arg] = &strarray__##array, }
916 static struct syscall_fmt {
919 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
925 { .name = "access", .errmsg = true,
926 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
927 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
928 { .name = "brk", .hexret = true,
929 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
930 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
931 { .name = "close", .errmsg = true,
932 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
933 { .name = "connect", .errmsg = true, },
934 { .name = "dup", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 { .name = "dup2", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 { .name = "dup3", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
941 { .name = "eventfd2", .errmsg = true,
942 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
943 { .name = "faccessat", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
945 { .name = "fadvise64", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 { .name = "fallocate", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949 { .name = "fchdir", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 { .name = "fchmod", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 { .name = "fchmodat", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955 { .name = "fchown", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
957 { .name = "fchownat", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 { .name = "fcntl", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FD, /* fd */
961 [1] = SCA_STRARRAY, /* cmd */ },
962 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963 { .name = "fdatasync", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965 { .name = "flock", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 [1] = SCA_FLOCK, /* cmd */ }, },
968 { .name = "fsetxattr", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970 { .name = "fstat", .errmsg = true, .alias = "newfstat",
971 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
973 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
974 { .name = "fstatfs", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
976 { .name = "fsync", .errmsg = true,
977 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
978 { .name = "ftruncate", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
980 { .name = "futex", .errmsg = true,
981 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
982 { .name = "futimesat", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
984 { .name = "getdents", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986 { .name = "getdents64", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
989 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
990 { .name = "ioctl", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FD, /* fd */
992 #if defined(__i386__) || defined(__x86_64__)
994 * FIXME: Make this available to all arches.
996 [1] = SCA_STRHEXARRAY, /* cmd */
997 [2] = SCA_HEX, /* arg */ },
998 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1000 [2] = SCA_HEX, /* arg */ }, },
1002 { .name = "kill", .errmsg = true,
1003 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004 { .name = "linkat", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006 { .name = "lseek", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008 [2] = SCA_STRARRAY, /* whence */ },
1009 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1010 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1011 { .name = "madvise", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1013 [2] = SCA_MADV_BHV, /* behavior */ }, },
1014 { .name = "mkdirat", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016 { .name = "mknodat", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018 { .name = "mlock", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020 { .name = "mlockall", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022 { .name = "mmap", .hexret = true,
1023 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1024 [2] = SCA_MMAP_PROT, /* prot */
1025 [3] = SCA_MMAP_FLAGS, /* flags */
1026 [4] = SCA_FD, /* fd */ }, },
1027 { .name = "mprotect", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029 [2] = SCA_MMAP_PROT, /* prot */ }, },
1030 { .name = "mremap", .hexret = true,
1031 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032 [3] = SCA_MREMAP_FLAGS, /* flags */
1033 [4] = SCA_HEX, /* new_addr */ }, },
1034 { .name = "munlock", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036 { .name = "munmap", .errmsg = true,
1037 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038 { .name = "name_to_handle_at", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040 { .name = "newfstatat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042 { .name = "open", .errmsg = true,
1043 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044 { .name = "open_by_handle_at", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047 { .name = "openat", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050 { .name = "pipe2", .errmsg = true,
1051 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052 { .name = "poll", .errmsg = true, .timeout = true, },
1053 { .name = "ppoll", .errmsg = true, .timeout = true, },
1054 { .name = "pread", .errmsg = true, .alias = "pread64",
1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056 { .name = "preadv", .errmsg = true, .alias = "pread",
1057 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "pwritev", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 { .name = "read", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 { .name = "readlinkat", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067 { .name = "readv", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "recvfrom", .errmsg = true,
1070 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071 { .name = "recvmmsg", .errmsg = true,
1072 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073 { .name = "recvmsg", .errmsg = true,
1074 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075 { .name = "renameat", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077 { .name = "rt_sigaction", .errmsg = true,
1078 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1080 { .name = "rt_sigqueueinfo", .errmsg = true,
1081 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1083 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084 { .name = "select", .errmsg = true, .timeout = true, },
1085 { .name = "sendmmsg", .errmsg = true,
1086 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087 { .name = "sendmsg", .errmsg = true,
1088 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089 { .name = "sendto", .errmsg = true,
1090 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1092 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093 { .name = "shutdown", .errmsg = true,
1094 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095 { .name = "socket", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097 [1] = SCA_SK_TYPE, /* type */ },
1098 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1099 { .name = "socketpair", .errmsg = true,
1100 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101 [1] = SCA_SK_TYPE, /* type */ },
1102 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1103 { .name = "stat", .errmsg = true, .alias = "newstat", },
1104 { .name = "symlinkat", .errmsg = true,
1105 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106 { .name = "tgkill", .errmsg = true,
1107 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108 { .name = "tkill", .errmsg = true,
1109 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110 { .name = "uname", .errmsg = true, .alias = "newuname", },
1111 { .name = "unlinkat", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113 { .name = "utimensat", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115 { .name = "write", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117 { .name = "writev", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1121 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1123 const struct syscall_fmt *fmt = fmtp;
1124 return strcmp(name, fmt->name);
1127 static struct syscall_fmt *syscall_fmt__find(const char *name)
1129 const int nmemb = ARRAY_SIZE(syscall_fmts);
1130 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 struct event_format *tp_format;
1138 struct syscall_fmt *fmt;
1139 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1143 static size_t fprintf_duration(unsigned long t, FILE *fp)
1145 double duration = (double)t / NSEC_PER_MSEC;
1146 size_t printed = fprintf(fp, "(");
1148 if (duration >= 1.0)
1149 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150 else if (duration >= 0.01)
1151 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1153 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154 return printed + fprintf(fp, "): ");
1157 struct thread_trace {
1161 unsigned long nr_events;
1162 unsigned long pfmaj, pfmin;
1170 struct intlist *syscall_stats;
1173 static struct thread_trace *thread_trace__new(void)
1175 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1178 ttrace->paths.max = -1;
1180 ttrace->syscall_stats = intlist__new(NULL);
1185 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1187 struct thread_trace *ttrace;
1192 if (thread__priv(thread) == NULL)
1193 thread__set_priv(thread, thread_trace__new());
1195 if (thread__priv(thread) == NULL)
1198 ttrace = thread__priv(thread);
1199 ++ttrace->nr_events;
1203 color_fprintf(fp, PERF_COLOR_RED,
1204 "WARNING: not enough memory, dropping samples!\n");
1208 #define TRACE_PFMAJ (1 << 0)
1209 #define TRACE_PFMIN (1 << 1)
1212 struct perf_tool tool;
1219 struct syscall *table;
1221 struct record_opts opts;
1222 struct perf_evlist *evlist;
1223 struct machine *host;
1224 struct thread *current;
1227 unsigned long nr_events;
1228 struct strlist *ev_qualifier;
1229 const char *last_vfs_getname;
1230 struct intlist *tid_list;
1231 struct intlist *pid_list;
1232 double duration_filter;
1238 bool not_ev_qualifier;
1242 bool multiple_threads;
1246 bool show_tool_stats;
1247 bool trace_syscalls;
1251 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1253 struct thread_trace *ttrace = thread__priv(thread);
1255 if (fd > ttrace->paths.max) {
1256 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1261 if (ttrace->paths.max != -1) {
1262 memset(npath + ttrace->paths.max + 1, 0,
1263 (fd - ttrace->paths.max) * sizeof(char *));
1265 memset(npath, 0, (fd + 1) * sizeof(char *));
1268 ttrace->paths.table = npath;
1269 ttrace->paths.max = fd;
1272 ttrace->paths.table[fd] = strdup(pathname);
1274 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1277 static int thread__read_fd_path(struct thread *thread, int fd)
1279 char linkname[PATH_MAX], pathname[PATH_MAX];
1283 if (thread->pid_ == thread->tid) {
1284 scnprintf(linkname, sizeof(linkname),
1285 "/proc/%d/fd/%d", thread->pid_, fd);
1287 scnprintf(linkname, sizeof(linkname),
1288 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1291 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1294 ret = readlink(linkname, pathname, sizeof(pathname));
1296 if (ret < 0 || ret > st.st_size)
1299 pathname[ret] = '\0';
1300 return trace__set_fd_pathname(thread, fd, pathname);
1303 static const char *thread__fd_path(struct thread *thread, int fd,
1304 struct trace *trace)
1306 struct thread_trace *ttrace = thread__priv(thread);
1314 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1317 ++trace->stats.proc_getname;
1318 if (thread__read_fd_path(thread, fd))
1322 return ttrace->paths.table[fd];
1325 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1326 struct syscall_arg *arg)
1329 size_t printed = scnprintf(bf, size, "%d", fd);
1330 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1333 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1338 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1339 struct syscall_arg *arg)
1342 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1343 struct thread_trace *ttrace = thread__priv(arg->thread);
1345 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1346 zfree(&ttrace->paths.table[fd]);
1351 static bool trace__filter_duration(struct trace *trace, double t)
1353 return t < (trace->duration_filter * NSEC_PER_MSEC);
1356 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1358 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1360 return fprintf(fp, "%10.3f ", ts);
1363 static bool done = false;
1364 static bool interrupted = false;
1366 static void sig_handler(int sig)
1369 interrupted = sig == SIGINT;
1372 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1373 u64 duration, u64 tstamp, FILE *fp)
1375 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1376 printed += fprintf_duration(duration, fp);
1378 if (trace->multiple_threads) {
1379 if (trace->show_comm)
1380 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1381 printed += fprintf(fp, "%d ", thread->tid);
1387 static int trace__process_event(struct trace *trace, struct machine *machine,
1388 union perf_event *event, struct perf_sample *sample)
1392 switch (event->header.type) {
1393 case PERF_RECORD_LOST:
1394 color_fprintf(trace->output, PERF_COLOR_RED,
1395 "LOST %" PRIu64 " events!\n", event->lost.lost);
1396 ret = machine__process_lost_event(machine, event, sample);
1398 ret = machine__process_event(machine, event, sample);
1405 static int trace__tool_process(struct perf_tool *tool,
1406 union perf_event *event,
1407 struct perf_sample *sample,
1408 struct machine *machine)
1410 struct trace *trace = container_of(tool, struct trace, tool);
1411 return trace__process_event(trace, machine, event, sample);
1414 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1416 int err = symbol__init(NULL);
1421 trace->host = machine__new_host();
1422 if (trace->host == NULL)
1425 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1426 evlist->threads, trace__tool_process, false);
1433 static int syscall__set_arg_fmts(struct syscall *sc)
1435 struct format_field *field;
1438 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1439 if (sc->arg_scnprintf == NULL)
1443 sc->arg_parm = sc->fmt->arg_parm;
1445 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1446 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1447 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1448 else if (field->flags & FIELD_IS_POINTER)
1449 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1456 static int trace__read_syscall_info(struct trace *trace, int id)
1460 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1465 if (id > trace->syscalls.max) {
1466 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1468 if (nsyscalls == NULL)
1471 if (trace->syscalls.max != -1) {
1472 memset(nsyscalls + trace->syscalls.max + 1, 0,
1473 (id - trace->syscalls.max) * sizeof(*sc));
1475 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1478 trace->syscalls.table = nsyscalls;
1479 trace->syscalls.max = id;
1482 sc = trace->syscalls.table + id;
1485 if (trace->ev_qualifier) {
1486 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1488 if (!(in ^ trace->not_ev_qualifier)) {
1489 sc->filtered = true;
1491 * No need to do read tracepoint information since this will be
1498 sc->fmt = syscall_fmt__find(sc->name);
1500 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1501 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1503 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1504 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1505 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1508 if (sc->tp_format == NULL)
1511 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1513 return syscall__set_arg_fmts(sc);
1516 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1517 unsigned long *args, struct trace *trace,
1518 struct thread *thread)
1522 if (sc->tp_format != NULL) {
1523 struct format_field *field;
1525 struct syscall_arg arg = {
1532 for (field = sc->tp_format->format.fields->next; field;
1533 field = field->next, ++arg.idx, bit <<= 1) {
1537 * Suppress this argument if its value is zero and
1538 * and we don't have a string associated in an
1541 if (args[arg.idx] == 0 &&
1542 !(sc->arg_scnprintf &&
1543 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1544 sc->arg_parm[arg.idx]))
1547 printed += scnprintf(bf + printed, size - printed,
1548 "%s%s: ", printed ? ", " : "", field->name);
1549 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1550 arg.val = args[arg.idx];
1552 arg.parm = sc->arg_parm[arg.idx];
1553 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1554 size - printed, &arg);
1556 printed += scnprintf(bf + printed, size - printed,
1557 "%ld", args[arg.idx]);
1564 printed += scnprintf(bf + printed, size - printed,
1566 printed ? ", " : "", i, args[i]);
1574 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1575 union perf_event *event,
1576 struct perf_sample *sample);
1578 static struct syscall *trace__syscall_info(struct trace *trace,
1579 struct perf_evsel *evsel, int id)
1585 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1586 * before that, leaving at a higher verbosity level till that is
1587 * explained. Reproduced with plain ftrace with:
1589 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1590 * grep "NR -1 " /t/trace_pipe
1592 * After generating some load on the machine.
1596 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1597 id, perf_evsel__name(evsel), ++n);
1602 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1603 trace__read_syscall_info(trace, id))
1606 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1609 return &trace->syscalls.table[id];
1613 fprintf(trace->output, "Problems reading syscall %d", id);
1614 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1615 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1616 fputs(" information\n", trace->output);
1621 static void thread__update_stats(struct thread_trace *ttrace,
1622 int id, struct perf_sample *sample)
1624 struct int_node *inode;
1625 struct stats *stats;
1628 inode = intlist__findnew(ttrace->syscall_stats, id);
1632 stats = inode->priv;
1633 if (stats == NULL) {
1634 stats = malloc(sizeof(struct stats));
1638 inode->priv = stats;
1641 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1642 duration = sample->time - ttrace->entry_time;
1644 update_stats(stats, duration);
1647 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1649 struct thread_trace *ttrace;
1653 if (trace->current == NULL)
1656 ttrace = thread__priv(trace->current);
1658 if (!ttrace->entry_pending)
1661 duration = sample->time - ttrace->entry_time;
1663 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1664 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1665 ttrace->entry_pending = false;
1670 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1671 union perf_event *event __maybe_unused,
1672 struct perf_sample *sample)
1677 struct thread *thread;
1678 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1679 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1680 struct thread_trace *ttrace;
1688 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1689 ttrace = thread__trace(thread, trace->output);
1693 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1695 if (ttrace->entry_str == NULL) {
1696 ttrace->entry_str = malloc(1024);
1697 if (!ttrace->entry_str)
1701 printed += trace__printf_interrupted_entry(trace, sample);
1703 ttrace->entry_time = sample->time;
1704 msg = ttrace->entry_str;
1705 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1707 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1708 args, trace, thread);
1711 if (!trace->duration_filter && !trace->summary_only) {
1712 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1713 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1716 ttrace->entry_pending = true;
1718 trace->current = thread;
1723 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1724 union perf_event *event __maybe_unused,
1725 struct perf_sample *sample)
1729 struct thread *thread;
1730 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1731 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1732 struct thread_trace *ttrace;
1740 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1741 ttrace = thread__trace(thread, trace->output);
1746 thread__update_stats(ttrace, id, sample);
1748 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1750 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1751 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1752 trace->last_vfs_getname = NULL;
1753 ++trace->stats.vfs_getname;
1756 ttrace->exit_time = sample->time;
1758 if (ttrace->entry_time) {
1759 duration = sample->time - ttrace->entry_time;
1760 if (trace__filter_duration(trace, duration))
1762 } else if (trace->duration_filter)
1765 if (trace->summary_only)
1768 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1770 if (ttrace->entry_pending) {
1771 fprintf(trace->output, "%-70s", ttrace->entry_str);
1773 fprintf(trace->output, " ... [");
1774 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1775 fprintf(trace->output, "]: %s()", sc->name);
1778 if (sc->fmt == NULL) {
1780 fprintf(trace->output, ") = %ld", ret);
1781 } else if (ret < 0 && sc->fmt->errmsg) {
1782 char bf[STRERR_BUFSIZE];
1783 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1784 *e = audit_errno_to_name(-ret);
1786 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1787 } else if (ret == 0 && sc->fmt->timeout)
1788 fprintf(trace->output, ") = 0 Timeout");
1789 else if (sc->fmt->hexret)
1790 fprintf(trace->output, ") = %#lx", ret);
1794 fputc('\n', trace->output);
1796 ttrace->entry_pending = false;
1801 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1802 union perf_event *event __maybe_unused,
1803 struct perf_sample *sample)
1805 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1809 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1810 union perf_event *event __maybe_unused,
1811 struct perf_sample *sample)
1813 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1814 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1815 struct thread *thread = machine__findnew_thread(trace->host,
1818 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1823 ttrace->runtime_ms += runtime_ms;
1824 trace->runtime_ms += runtime_ms;
1828 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1830 perf_evsel__strval(evsel, sample, "comm"),
1831 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1833 perf_evsel__intval(evsel, sample, "vruntime"));
1837 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1838 union perf_event *event __maybe_unused,
1839 struct perf_sample *sample)
1841 trace__printf_interrupted_entry(trace, sample);
1842 trace__fprintf_tstamp(trace, sample->time, trace->output);
1843 fprintf(trace->output, "(%9.9s): %s:", " ", evsel->name);
1845 if (evsel->tp_format) {
1846 event_format__fprintf(evsel->tp_format, sample->cpu,
1847 sample->raw_data, sample->raw_size,
1851 fprintf(trace->output, ")\n");
1855 static void print_location(FILE *f, struct perf_sample *sample,
1856 struct addr_location *al,
1857 bool print_dso, bool print_sym)
1860 if ((verbose || print_dso) && al->map)
1861 fprintf(f, "%s@", al->map->dso->long_name);
1863 if ((verbose || print_sym) && al->sym)
1864 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1865 al->addr - al->sym->start);
1867 fprintf(f, "0x%" PRIx64, al->addr);
1869 fprintf(f, "0x%" PRIx64, sample->addr);
1872 static int trace__pgfault(struct trace *trace,
1873 struct perf_evsel *evsel,
1874 union perf_event *event,
1875 struct perf_sample *sample)
1877 struct thread *thread;
1878 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1879 struct addr_location al;
1880 char map_type = 'd';
1881 struct thread_trace *ttrace;
1883 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1884 ttrace = thread__trace(thread, trace->output);
1888 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1893 if (trace->summary_only)
1896 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1899 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1901 fprintf(trace->output, "%sfault [",
1902 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1905 print_location(trace->output, sample, &al, false, true);
1907 fprintf(trace->output, "] => ");
1909 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1913 thread__find_addr_location(thread, cpumode,
1914 MAP__FUNCTION, sample->addr, &al);
1922 print_location(trace->output, sample, &al, true, false);
1924 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1929 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1931 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1932 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1935 if (trace->pid_list || trace->tid_list)
1941 static int trace__process_sample(struct perf_tool *tool,
1942 union perf_event *event,
1943 struct perf_sample *sample,
1944 struct perf_evsel *evsel,
1945 struct machine *machine __maybe_unused)
1947 struct trace *trace = container_of(tool, struct trace, tool);
1950 tracepoint_handler handler = evsel->handler;
1952 if (skip_sample(trace, sample))
1955 if (!trace->full_time && trace->base_time == 0)
1956 trace->base_time = sample->time;
1960 handler(trace, evsel, event, sample);
1966 static int parse_target_str(struct trace *trace)
1968 if (trace->opts.target.pid) {
1969 trace->pid_list = intlist__new(trace->opts.target.pid);
1970 if (trace->pid_list == NULL) {
1971 pr_err("Error parsing process id string\n");
1976 if (trace->opts.target.tid) {
1977 trace->tid_list = intlist__new(trace->opts.target.tid);
1978 if (trace->tid_list == NULL) {
1979 pr_err("Error parsing thread id string\n");
1987 static int trace__record(struct trace *trace, int argc, const char **argv)
1989 unsigned int rec_argc, i, j;
1990 const char **rec_argv;
1991 const char * const record_args[] = {
1998 const char * const sc_args[] = { "-e", };
1999 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2000 const char * const majpf_args[] = { "-e", "major-faults" };
2001 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2002 const char * const minpf_args[] = { "-e", "minor-faults" };
2003 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2005 /* +1 is for the event string below */
2006 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2007 majpf_args_nr + minpf_args_nr + argc;
2008 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2010 if (rec_argv == NULL)
2014 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2015 rec_argv[j++] = record_args[i];
2017 if (trace->trace_syscalls) {
2018 for (i = 0; i < sc_args_nr; i++)
2019 rec_argv[j++] = sc_args[i];
2021 /* event string may be different for older kernels - e.g., RHEL6 */
2022 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2023 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2024 else if (is_valid_tracepoint("syscalls:sys_enter"))
2025 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2027 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2032 if (trace->trace_pgfaults & TRACE_PFMAJ)
2033 for (i = 0; i < majpf_args_nr; i++)
2034 rec_argv[j++] = majpf_args[i];
2036 if (trace->trace_pgfaults & TRACE_PFMIN)
2037 for (i = 0; i < minpf_args_nr; i++)
2038 rec_argv[j++] = minpf_args[i];
2040 for (i = 0; i < (unsigned int)argc; i++)
2041 rec_argv[j++] = argv[i];
2043 return cmd_record(j, rec_argv, NULL);
2046 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2048 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2050 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2054 if (perf_evsel__field(evsel, "pathname") == NULL) {
2055 perf_evsel__delete(evsel);
2059 evsel->handler = trace__vfs_getname;
2060 perf_evlist__add(evlist, evsel);
2063 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2066 struct perf_evsel *evsel;
2067 struct perf_event_attr attr = {
2068 .type = PERF_TYPE_SOFTWARE,
2072 attr.config = config;
2073 attr.sample_period = 1;
2075 event_attr_init(&attr);
2077 evsel = perf_evsel__new(&attr);
2081 evsel->handler = trace__pgfault;
2082 perf_evlist__add(evlist, evsel);
2087 static int trace__run(struct trace *trace, int argc, const char **argv)
2089 struct perf_evlist *evlist = trace->evlist;
2090 struct perf_evsel *evsel;
2092 unsigned long before;
2093 const bool forks = argc > 0;
2094 bool draining = false;
2098 if (trace->trace_syscalls &&
2099 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2101 goto out_error_raw_syscalls;
2103 if (trace->trace_syscalls)
2104 perf_evlist__add_vfs_getname(evlist);
2106 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2107 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2111 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2112 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2116 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2117 trace__sched_stat_runtime))
2118 goto out_error_sched_stat_runtime;
2120 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2122 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2123 goto out_delete_evlist;
2126 err = trace__symbols_init(trace, evlist);
2128 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2129 goto out_delete_evlist;
2132 perf_evlist__config(evlist, &trace->opts);
2134 signal(SIGCHLD, sig_handler);
2135 signal(SIGINT, sig_handler);
2138 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2141 fprintf(trace->output, "Couldn't run the workload!\n");
2142 goto out_delete_evlist;
2146 err = perf_evlist__open(evlist);
2148 goto out_error_open;
2150 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2152 goto out_error_mmap;
2155 perf_evlist__start_workload(evlist);
2157 perf_evlist__enable(evlist);
2159 trace->multiple_threads = evlist->threads->map[0] == -1 ||
2160 evlist->threads->nr > 1 ||
2161 perf_evlist__first(evlist)->attr.inherit;
2163 before = trace->nr_events;
2165 for (i = 0; i < evlist->nr_mmaps; i++) {
2166 union perf_event *event;
2168 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2169 const u32 type = event->header.type;
2170 tracepoint_handler handler;
2171 struct perf_sample sample;
2175 err = perf_evlist__parse_sample(evlist, event, &sample);
2177 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2181 if (!trace->full_time && trace->base_time == 0)
2182 trace->base_time = sample.time;
2184 if (type != PERF_RECORD_SAMPLE) {
2185 trace__process_event(trace, trace->host, event, &sample);
2189 evsel = perf_evlist__id2evsel(evlist, sample.id);
2190 if (evsel == NULL) {
2191 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2195 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2196 sample.raw_data == NULL) {
2197 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2198 perf_evsel__name(evsel), sample.tid,
2199 sample.cpu, sample.raw_size);
2203 handler = evsel->handler;
2204 handler(trace, evsel, event, &sample);
2206 perf_evlist__mmap_consume(evlist, i);
2213 if (trace->nr_events == before) {
2214 int timeout = done ? 100 : -1;
2216 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2217 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2227 perf_evlist__disable(evlist);
2231 trace__fprintf_thread_summary(trace, trace->output);
2233 if (trace->show_tool_stats) {
2234 fprintf(trace->output, "Stats:\n "
2235 " vfs_getname : %" PRIu64 "\n"
2236 " proc_getname: %" PRIu64 "\n",
2237 trace->stats.vfs_getname,
2238 trace->stats.proc_getname);
2243 perf_evlist__delete(evlist);
2244 trace->evlist = NULL;
2245 trace->live = false;
2248 char errbuf[BUFSIZ];
2250 out_error_sched_stat_runtime:
2251 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2254 out_error_raw_syscalls:
2255 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2259 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2263 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2266 fprintf(trace->output, "%s\n", errbuf);
2267 goto out_delete_evlist;
2270 fprintf(trace->output, "Not enough memory to run!\n");
2271 goto out_delete_evlist;
2274 static int trace__replay(struct trace *trace)
2276 const struct perf_evsel_str_handler handlers[] = {
2277 { "probe:vfs_getname", trace__vfs_getname, },
2279 struct perf_data_file file = {
2281 .mode = PERF_DATA_MODE_READ,
2283 struct perf_session *session;
2284 struct perf_evsel *evsel;
2287 trace->tool.sample = trace__process_sample;
2288 trace->tool.mmap = perf_event__process_mmap;
2289 trace->tool.mmap2 = perf_event__process_mmap2;
2290 trace->tool.comm = perf_event__process_comm;
2291 trace->tool.exit = perf_event__process_exit;
2292 trace->tool.fork = perf_event__process_fork;
2293 trace->tool.attr = perf_event__process_attr;
2294 trace->tool.tracing_data = perf_event__process_tracing_data;
2295 trace->tool.build_id = perf_event__process_build_id;
2297 trace->tool.ordered_events = true;
2298 trace->tool.ordering_requires_timestamps = true;
2300 /* add tid to output */
2301 trace->multiple_threads = true;
2303 session = perf_session__new(&file, false, &trace->tool);
2304 if (session == NULL)
2307 if (symbol__init(&session->header.env) < 0)
2310 trace->host = &session->machines.host;
2312 err = perf_session__set_tracepoints_handlers(session, handlers);
2316 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2317 "raw_syscalls:sys_enter");
2318 /* older kernels have syscalls tp versus raw_syscalls */
2320 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2321 "syscalls:sys_enter");
2324 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2325 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2326 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2330 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2331 "raw_syscalls:sys_exit");
2333 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2334 "syscalls:sys_exit");
2336 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2337 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2338 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2342 evlist__for_each(session->evlist, evsel) {
2343 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2344 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2345 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2346 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2347 evsel->handler = trace__pgfault;
2350 err = parse_target_str(trace);
2356 err = perf_session__process_events(session, &trace->tool);
2358 pr_err("Failed to process events, error %d", err);
2360 else if (trace->summary)
2361 trace__fprintf_thread_summary(trace, trace->output);
2364 perf_session__delete(session);
2369 static size_t trace__fprintf_threads_header(FILE *fp)
2373 printed = fprintf(fp, "\n Summary of events:\n\n");
2378 static size_t thread__dump_stats(struct thread_trace *ttrace,
2379 struct trace *trace, FILE *fp)
2381 struct stats *stats;
2384 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2389 printed += fprintf(fp, "\n");
2391 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2392 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2393 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2395 /* each int_node is a syscall */
2397 stats = inode->priv;
2399 double min = (double)(stats->min) / NSEC_PER_MSEC;
2400 double max = (double)(stats->max) / NSEC_PER_MSEC;
2401 double avg = avg_stats(stats);
2403 u64 n = (u64) stats->n;
2405 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2406 avg /= NSEC_PER_MSEC;
2408 sc = &trace->syscalls.table[inode->i];
2409 printed += fprintf(fp, " %-15s", sc->name);
2410 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2412 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2415 inode = intlist__next(inode);
2418 printed += fprintf(fp, "\n\n");
2423 /* struct used to pass data to per-thread function */
2424 struct summary_data {
2426 struct trace *trace;
2430 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2432 struct summary_data *data = priv;
2433 FILE *fp = data->fp;
2434 size_t printed = data->printed;
2435 struct trace *trace = data->trace;
2436 struct thread_trace *ttrace = thread__priv(thread);
2442 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2444 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2445 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2446 printed += fprintf(fp, "%.1f%%", ratio);
2448 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2450 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2451 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2452 printed += thread__dump_stats(ttrace, trace, fp);
2454 data->printed += printed;
2459 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2461 struct summary_data data = {
2465 data.printed = trace__fprintf_threads_header(fp);
2467 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2469 return data.printed;
2472 static int trace__set_duration(const struct option *opt, const char *str,
2473 int unset __maybe_unused)
2475 struct trace *trace = opt->value;
2477 trace->duration_filter = atof(str);
2481 static int trace__open_output(struct trace *trace, const char *filename)
2485 if (!stat(filename, &st) && st.st_size) {
2486 char oldname[PATH_MAX];
2488 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2490 rename(filename, oldname);
2493 trace->output = fopen(filename, "w");
2495 return trace->output == NULL ? -errno : 0;
2498 static int parse_pagefaults(const struct option *opt, const char *str,
2499 int unset __maybe_unused)
2501 int *trace_pgfaults = opt->value;
2503 if (strcmp(str, "all") == 0)
2504 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2505 else if (strcmp(str, "maj") == 0)
2506 *trace_pgfaults |= TRACE_PFMAJ;
2507 else if (strcmp(str, "min") == 0)
2508 *trace_pgfaults |= TRACE_PFMIN;
2515 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2517 struct perf_evsel *evsel;
2519 evlist__for_each(evlist, evsel)
2520 evsel->handler = handler;
2523 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2525 const char * const trace_usage[] = {
2526 "perf trace [<options>] [<command>]",
2527 "perf trace [<options>] -- <command> [<options>]",
2528 "perf trace record [<options>] [<command>]",
2529 "perf trace record [<options>] -- <command> [<options>]",
2532 struct trace trace = {
2534 .machine = audit_detect_machine(),
2535 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2545 .user_freq = UINT_MAX,
2546 .user_interval = ULLONG_MAX,
2547 .no_buffering = true,
2548 .mmap_pages = UINT_MAX,
2552 .trace_syscalls = true,
2554 const char *output_name = NULL;
2555 const char *ev_qualifier_str = NULL;
2556 const struct option trace_options[] = {
2557 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2558 "event selector. use 'perf list' to list available events",
2559 parse_events_option),
2560 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2561 "show the thread COMM next to its id"),
2562 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2563 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2564 "list of events to trace"),
2565 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2566 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2567 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2568 "trace events on existing process id"),
2569 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2570 "trace events on existing thread id"),
2571 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2572 "system-wide collection from all CPUs"),
2573 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2574 "list of cpus to monitor"),
2575 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2576 "child tasks do not inherit counters"),
2577 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2578 "number of mmap data pages",
2579 perf_evlist__parse_mmap_pages),
2580 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2582 OPT_CALLBACK(0, "duration", &trace, "float",
2583 "show only events with duration > N.M ms",
2584 trace__set_duration),
2585 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2586 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2587 OPT_BOOLEAN('T', "time", &trace.full_time,
2588 "Show full timestamp, not time relative to first start"),
2589 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2590 "Show only syscall summary with statistics"),
2591 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2592 "Show all syscalls and summary with statistics"),
2593 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2594 "Trace pagefaults", parse_pagefaults, "maj"),
2595 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2601 trace.evlist = perf_evlist__new();
2602 if (trace.evlist == NULL)
2605 if (trace.evlist == NULL) {
2606 pr_err("Not enough memory to run!\n");
2610 argc = parse_options(argc, argv, trace_options, trace_usage,
2611 PARSE_OPT_STOP_AT_NON_OPTION);
2613 if (trace.trace_pgfaults) {
2614 trace.opts.sample_address = true;
2615 trace.opts.sample_time = true;
2618 if (trace.evlist->nr_entries > 0)
2619 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2621 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2622 return trace__record(&trace, argc-1, &argv[1]);
2624 /* summary_only implies summary option, but don't overwrite summary if set */
2625 if (trace.summary_only)
2626 trace.summary = trace.summary_only;
2628 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2629 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2630 pr_err("Please specify something to trace.\n");
2634 if (output_name != NULL) {
2635 err = trace__open_output(&trace, output_name);
2637 perror("failed to create output file");
2642 if (ev_qualifier_str != NULL) {
2643 const char *s = ev_qualifier_str;
2645 trace.not_ev_qualifier = *s == '!';
2646 if (trace.not_ev_qualifier)
2648 trace.ev_qualifier = strlist__new(true, s);
2649 if (trace.ev_qualifier == NULL) {
2650 fputs("Not enough memory to parse event qualifier",
2657 err = target__validate(&trace.opts.target);
2659 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2660 fprintf(trace.output, "%s", bf);
2664 err = target__parse_uid(&trace.opts.target);
2666 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2667 fprintf(trace.output, "%s", bf);
2671 if (!argc && target__none(&trace.opts.target))
2672 trace.opts.target.system_wide = true;
2675 err = trace__replay(&trace);
2677 err = trace__run(&trace, argc, argv);
2680 if (output_name != NULL)
2681 fclose(trace.output);