1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
55 return *(u##bits *)(sample->raw_data + field->offset); \
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 return bswap_##bits(value);\
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
74 static int tp_field__init_uint(struct tp_field *field,
75 struct format_field *format_field,
78 field->offset = format_field->offset;
80 switch (format_field->size) {
82 field->integer = tp_field__u8;
85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
102 return sample->raw_data + field->offset;
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
107 field->offset = format_field->offset;
108 field->pointer = tp_field__ptr;
115 struct tp_field args, ret;
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 struct tp_field *field,
123 struct format_field *format_field = perf_evsel__field(evsel, name);
125 if (format_field == NULL)
128 return tp_field__init_uint(field, format_field, evsel->needs_swap);
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 ({ struct syscall_tp *sc = evsel->priv;\
133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 struct tp_field *field,
139 struct format_field *format_field = perf_evsel__field(evsel, name);
141 if (format_field == NULL)
144 return tp_field__init_ptr(field, format_field);
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 ({ struct syscall_tp *sc = evsel->priv;\
149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
154 perf_evsel__delete(evsel);
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
159 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv != NULL) {
161 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 evsel->handler = handler;
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
181 evsel = perf_evsel__newtp("syscalls", direction);
184 if (perf_evsel__init_syscall_tp(evsel, handler))
191 perf_evsel__delete_priv(evsel);
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.integer(&fields->name, sample); })
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 ({ struct syscall_tp *fields = evsel->priv; \
201 fields->name.pointer(&fields->name, sample); })
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 void *sys_enter_handler,
205 void *sys_exit_handler)
208 struct perf_evsel *sys_enter, *sys_exit;
210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 if (sys_enter == NULL)
214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 goto out_delete_sys_enter;
217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 if (sys_exit == NULL)
219 goto out_delete_sys_enter;
221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 goto out_delete_sys_exit;
224 perf_evlist__add(evlist, sys_enter);
225 perf_evlist__add(evlist, sys_exit);
232 perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 perf_evsel__delete_priv(sys_enter);
241 struct thread *thread;
251 const char **entries;
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 .nr_entries = ARRAY_SIZE(array), \
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
261 .nr_entries = ARRAY_SIZE(array), \
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 struct strarray *sa = arg->parm;
270 int idx = arg->val - sa->offset;
272 if (idx < 0 || idx >= sa->nr_entries)
273 return scnprintf(bf, size, intfmt, arg->val);
275 return scnprintf(bf, size, "%s", sa->entries[idx]);
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
286 #if defined(__i386__) || defined(__x86_64__)
288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
289 * gets rewritten to support all arches.
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 struct syscall_arg *arg)
294 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 struct syscall_arg *arg);
303 #define SCA_FD syscall_arg__scnprintf_fd
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 struct syscall_arg *arg)
311 return scnprintf(bf, size, "CWD");
313 return syscall_arg__scnprintf_fd(bf, size, arg);
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 struct syscall_arg *arg);
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 struct syscall_arg *arg)
326 return scnprintf(bf, size, "%#lx", arg->val);
329 #define SCA_HEX syscall_arg__scnprintf_hex
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 struct syscall_arg *arg)
334 int printed = 0, prot = arg->val;
336 if (prot == PROT_NONE)
337 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339 if (prot & PROT_##n) { \
340 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
350 P_MMAP_PROT(GROWSDOWN);
351 P_MMAP_PROT(GROWSUP);
355 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 struct syscall_arg *arg)
365 int printed = 0, flags = arg->val;
367 #define P_MMAP_FLAG(n) \
368 if (flags & MAP_##n) { \
369 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
374 P_MMAP_FLAG(PRIVATE);
378 P_MMAP_FLAG(ANONYMOUS);
379 P_MMAP_FLAG(DENYWRITE);
380 P_MMAP_FLAG(EXECUTABLE);
383 P_MMAP_FLAG(GROWSDOWN);
385 P_MMAP_FLAG(HUGETLB);
388 P_MMAP_FLAG(NONBLOCK);
389 P_MMAP_FLAG(NORESERVE);
390 P_MMAP_FLAG(POPULATE);
392 #ifdef MAP_UNINITIALIZED
393 P_MMAP_FLAG(UNINITIALIZED);
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
405 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
406 struct syscall_arg *arg)
408 int printed = 0, flags = arg->val;
410 #define P_MREMAP_FLAG(n) \
411 if (flags & MREMAP_##n) { \
412 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
413 flags &= ~MREMAP_##n; \
416 P_MREMAP_FLAG(MAYMOVE);
418 P_MREMAP_FLAG(FIXED);
423 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
428 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
430 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
431 struct syscall_arg *arg)
433 int behavior = arg->val;
436 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
439 P_MADV_BHV(SEQUENTIAL);
440 P_MADV_BHV(WILLNEED);
441 P_MADV_BHV(DONTNEED);
443 P_MADV_BHV(DONTFORK);
445 P_MADV_BHV(HWPOISON);
446 #ifdef MADV_SOFT_OFFLINE
447 P_MADV_BHV(SOFT_OFFLINE);
449 P_MADV_BHV(MERGEABLE);
450 P_MADV_BHV(UNMERGEABLE);
452 P_MADV_BHV(HUGEPAGE);
454 #ifdef MADV_NOHUGEPAGE
455 P_MADV_BHV(NOHUGEPAGE);
458 P_MADV_BHV(DONTDUMP);
467 return scnprintf(bf, size, "%#x", behavior);
470 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
472 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
473 struct syscall_arg *arg)
475 int printed = 0, op = arg->val;
478 return scnprintf(bf, size, "NONE");
480 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
481 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
496 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
501 #define SCA_FLOCK syscall_arg__scnprintf_flock
503 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
505 enum syscall_futex_args {
506 SCF_UADDR = (1 << 0),
509 SCF_TIMEOUT = (1 << 3),
510 SCF_UADDR2 = (1 << 4),
514 int cmd = op & FUTEX_CMD_MASK;
518 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
519 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
520 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
521 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
522 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
523 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
524 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
525 P_FUTEX_OP(WAKE_OP); break;
526 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
527 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
528 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
529 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
530 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
531 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
532 default: printed = scnprintf(bf, size, "%#x", cmd); break;
535 if (op & FUTEX_PRIVATE_FLAG)
536 printed += scnprintf(bf + printed, size - printed, "|PRIV");
538 if (op & FUTEX_CLOCK_REALTIME)
539 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
544 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
546 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
547 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
549 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
550 static DEFINE_STRARRAY(itimers);
552 static const char *whences[] = { "SET", "CUR", "END",
560 static DEFINE_STRARRAY(whences);
562 static const char *fcntl_cmds[] = {
563 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
564 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
565 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
568 static DEFINE_STRARRAY(fcntl_cmds);
570 static const char *rlimit_resources[] = {
571 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
572 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
575 static DEFINE_STRARRAY(rlimit_resources);
577 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
578 static DEFINE_STRARRAY(sighow);
580 static const char *clockid[] = {
581 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
582 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
584 static DEFINE_STRARRAY(clockid);
586 static const char *socket_families[] = {
587 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
588 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
589 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
590 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
591 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
592 "ALG", "NFC", "VSOCK",
594 static DEFINE_STRARRAY(socket_families);
596 #ifndef SOCK_TYPE_MASK
597 #define SOCK_TYPE_MASK 0xf
600 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
601 struct syscall_arg *arg)
605 flags = type & ~SOCK_TYPE_MASK;
607 type &= SOCK_TYPE_MASK;
609 * Can't use a strarray, MIPS may override for ABI reasons.
612 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
617 P_SK_TYPE(SEQPACKET);
622 printed = scnprintf(bf, size, "%#x", type);
625 #define P_SK_FLAG(n) \
626 if (flags & SOCK_##n) { \
627 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
628 flags &= ~SOCK_##n; \
636 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
641 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
644 #define MSG_PROBE 0x10
646 #ifndef MSG_WAITFORONE
647 #define MSG_WAITFORONE 0x10000
649 #ifndef MSG_SENDPAGE_NOTLAST
650 #define MSG_SENDPAGE_NOTLAST 0x20000
653 #define MSG_FASTOPEN 0x20000000
656 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
657 struct syscall_arg *arg)
659 int printed = 0, flags = arg->val;
662 return scnprintf(bf, size, "NONE");
663 #define P_MSG_FLAG(n) \
664 if (flags & MSG_##n) { \
665 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
671 P_MSG_FLAG(DONTROUTE);
676 P_MSG_FLAG(DONTWAIT);
683 P_MSG_FLAG(ERRQUEUE);
684 P_MSG_FLAG(NOSIGNAL);
686 P_MSG_FLAG(WAITFORONE);
687 P_MSG_FLAG(SENDPAGE_NOTLAST);
688 P_MSG_FLAG(FASTOPEN);
689 P_MSG_FLAG(CMSG_CLOEXEC);
693 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
698 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
700 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
701 struct syscall_arg *arg)
706 if (mode == F_OK) /* 0 */
707 return scnprintf(bf, size, "F");
709 if (mode & n##_OK) { \
710 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
720 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
725 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
727 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
728 struct syscall_arg *arg)
730 int printed = 0, flags = arg->val;
732 if (!(flags & O_CREAT))
733 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
736 return scnprintf(bf, size, "RDONLY");
738 if (flags & O_##n) { \
739 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
763 if ((flags & O_SYNC) == O_SYNC)
764 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
776 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
781 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
783 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
784 struct syscall_arg *arg)
786 int printed = 0, flags = arg->val;
789 return scnprintf(bf, size, "NONE");
791 if (flags & EFD_##n) { \
792 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
802 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
807 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
809 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
810 struct syscall_arg *arg)
812 int printed = 0, flags = arg->val;
815 if (flags & O_##n) { \
816 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
825 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
830 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
832 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
837 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
880 return scnprintf(bf, size, "%#x", sig);
883 #define SCA_SIGNUM syscall_arg__scnprintf_signum
885 #if defined(__i386__) || defined(__x86_64__)
887 * FIXME: Make this available to all arches.
889 #define TCGETS 0x5401
891 static const char *tioctls[] = {
892 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
893 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
894 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
895 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
896 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
897 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
898 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
899 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
900 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
901 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
902 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
903 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
904 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
905 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
906 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
909 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
910 #endif /* defined(__i386__) || defined(__x86_64__) */
912 #define STRARRAY(arg, name, array) \
913 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
914 .arg_parm = { [arg] = &strarray__##array, }
916 static struct syscall_fmt {
919 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
925 { .name = "access", .errmsg = true,
926 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
927 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
928 { .name = "brk", .hexret = true,
929 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
930 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
931 { .name = "close", .errmsg = true,
932 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
933 { .name = "connect", .errmsg = true, },
934 { .name = "dup", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 { .name = "dup2", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 { .name = "dup3", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
941 { .name = "eventfd2", .errmsg = true,
942 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
943 { .name = "faccessat", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
945 { .name = "fadvise64", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 { .name = "fallocate", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949 { .name = "fchdir", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 { .name = "fchmod", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 { .name = "fchmodat", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955 { .name = "fchown", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
957 { .name = "fchownat", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 { .name = "fcntl", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FD, /* fd */
961 [1] = SCA_STRARRAY, /* cmd */ },
962 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963 { .name = "fdatasync", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965 { .name = "flock", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 [1] = SCA_FLOCK, /* cmd */ }, },
968 { .name = "fsetxattr", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970 { .name = "fstat", .errmsg = true, .alias = "newfstat",
971 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
973 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
974 { .name = "fstatfs", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
976 { .name = "fsync", .errmsg = true,
977 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
978 { .name = "ftruncate", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
980 { .name = "futex", .errmsg = true,
981 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
982 { .name = "futimesat", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
984 { .name = "getdents", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986 { .name = "getdents64", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
989 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
990 { .name = "ioctl", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FD, /* fd */
992 #if defined(__i386__) || defined(__x86_64__)
994 * FIXME: Make this available to all arches.
996 [1] = SCA_STRHEXARRAY, /* cmd */
997 [2] = SCA_HEX, /* arg */ },
998 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1000 [2] = SCA_HEX, /* arg */ }, },
1002 { .name = "kill", .errmsg = true,
1003 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004 { .name = "linkat", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006 { .name = "lseek", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008 [2] = SCA_STRARRAY, /* whence */ },
1009 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1010 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1011 { .name = "madvise", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1013 [2] = SCA_MADV_BHV, /* behavior */ }, },
1014 { .name = "mkdirat", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016 { .name = "mknodat", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018 { .name = "mlock", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020 { .name = "mlockall", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022 { .name = "mmap", .hexret = true,
1023 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1024 [2] = SCA_MMAP_PROT, /* prot */
1025 [3] = SCA_MMAP_FLAGS, /* flags */
1026 [4] = SCA_FD, /* fd */ }, },
1027 { .name = "mprotect", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029 [2] = SCA_MMAP_PROT, /* prot */ }, },
1030 { .name = "mremap", .hexret = true,
1031 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032 [3] = SCA_MREMAP_FLAGS, /* flags */
1033 [4] = SCA_HEX, /* new_addr */ }, },
1034 { .name = "munlock", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036 { .name = "munmap", .errmsg = true,
1037 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038 { .name = "name_to_handle_at", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040 { .name = "newfstatat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042 { .name = "open", .errmsg = true,
1043 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044 { .name = "open_by_handle_at", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047 { .name = "openat", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050 { .name = "pipe2", .errmsg = true,
1051 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052 { .name = "poll", .errmsg = true, .timeout = true, },
1053 { .name = "ppoll", .errmsg = true, .timeout = true, },
1054 { .name = "pread", .errmsg = true, .alias = "pread64",
1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056 { .name = "preadv", .errmsg = true, .alias = "pread",
1057 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "pwritev", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 { .name = "read", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 { .name = "readlinkat", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067 { .name = "readv", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "recvfrom", .errmsg = true,
1070 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071 { .name = "recvmmsg", .errmsg = true,
1072 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073 { .name = "recvmsg", .errmsg = true,
1074 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075 { .name = "renameat", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077 { .name = "rt_sigaction", .errmsg = true,
1078 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1080 { .name = "rt_sigqueueinfo", .errmsg = true,
1081 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1083 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084 { .name = "select", .errmsg = true, .timeout = true, },
1085 { .name = "sendmmsg", .errmsg = true,
1086 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087 { .name = "sendmsg", .errmsg = true,
1088 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089 { .name = "sendto", .errmsg = true,
1090 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1092 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093 { .name = "shutdown", .errmsg = true,
1094 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095 { .name = "socket", .errmsg = true,
1096 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097 [1] = SCA_SK_TYPE, /* type */ },
1098 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1099 { .name = "socketpair", .errmsg = true,
1100 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101 [1] = SCA_SK_TYPE, /* type */ },
1102 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1103 { .name = "stat", .errmsg = true, .alias = "newstat", },
1104 { .name = "symlinkat", .errmsg = true,
1105 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106 { .name = "tgkill", .errmsg = true,
1107 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108 { .name = "tkill", .errmsg = true,
1109 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110 { .name = "uname", .errmsg = true, .alias = "newuname", },
1111 { .name = "unlinkat", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113 { .name = "utimensat", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115 { .name = "write", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117 { .name = "writev", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1121 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1123 const struct syscall_fmt *fmt = fmtp;
1124 return strcmp(name, fmt->name);
1127 static struct syscall_fmt *syscall_fmt__find(const char *name)
1129 const int nmemb = ARRAY_SIZE(syscall_fmts);
1130 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 struct event_format *tp_format;
1138 struct syscall_fmt *fmt;
1139 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1143 static size_t fprintf_duration(unsigned long t, FILE *fp)
1145 double duration = (double)t / NSEC_PER_MSEC;
1146 size_t printed = fprintf(fp, "(");
1148 if (duration >= 1.0)
1149 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150 else if (duration >= 0.01)
1151 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1153 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154 return printed + fprintf(fp, "): ");
1157 struct thread_trace {
1161 unsigned long nr_events;
1162 unsigned long pfmaj, pfmin;
1170 struct intlist *syscall_stats;
1173 static struct thread_trace *thread_trace__new(void)
1175 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1178 ttrace->paths.max = -1;
1180 ttrace->syscall_stats = intlist__new(NULL);
1185 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1187 struct thread_trace *ttrace;
1192 if (thread__priv(thread) == NULL)
1193 thread__set_priv(thread, thread_trace__new());
1195 if (thread__priv(thread) == NULL)
1198 ttrace = thread__priv(thread);
1199 ++ttrace->nr_events;
1203 color_fprintf(fp, PERF_COLOR_RED,
1204 "WARNING: not enough memory, dropping samples!\n");
1208 #define TRACE_PFMAJ (1 << 0)
1209 #define TRACE_PFMIN (1 << 1)
1212 struct perf_tool tool;
1219 struct syscall *table;
1221 struct record_opts opts;
1222 struct perf_evlist *evlist;
1223 struct machine *host;
1224 struct thread *current;
1227 unsigned long nr_events;
1228 struct strlist *ev_qualifier;
1229 const char *last_vfs_getname;
1230 struct intlist *tid_list;
1231 struct intlist *pid_list;
1236 double duration_filter;
1242 bool not_ev_qualifier;
1246 bool multiple_threads;
1250 bool show_tool_stats;
1251 bool trace_syscalls;
1255 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1257 struct thread_trace *ttrace = thread__priv(thread);
1259 if (fd > ttrace->paths.max) {
1260 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1265 if (ttrace->paths.max != -1) {
1266 memset(npath + ttrace->paths.max + 1, 0,
1267 (fd - ttrace->paths.max) * sizeof(char *));
1269 memset(npath, 0, (fd + 1) * sizeof(char *));
1272 ttrace->paths.table = npath;
1273 ttrace->paths.max = fd;
1276 ttrace->paths.table[fd] = strdup(pathname);
1278 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1281 static int thread__read_fd_path(struct thread *thread, int fd)
1283 char linkname[PATH_MAX], pathname[PATH_MAX];
1287 if (thread->pid_ == thread->tid) {
1288 scnprintf(linkname, sizeof(linkname),
1289 "/proc/%d/fd/%d", thread->pid_, fd);
1291 scnprintf(linkname, sizeof(linkname),
1292 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1295 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1298 ret = readlink(linkname, pathname, sizeof(pathname));
1300 if (ret < 0 || ret > st.st_size)
1303 pathname[ret] = '\0';
1304 return trace__set_fd_pathname(thread, fd, pathname);
1307 static const char *thread__fd_path(struct thread *thread, int fd,
1308 struct trace *trace)
1310 struct thread_trace *ttrace = thread__priv(thread);
1318 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1321 ++trace->stats.proc_getname;
1322 if (thread__read_fd_path(thread, fd))
1326 return ttrace->paths.table[fd];
1329 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1330 struct syscall_arg *arg)
1333 size_t printed = scnprintf(bf, size, "%d", fd);
1334 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1337 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1342 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1343 struct syscall_arg *arg)
1346 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1347 struct thread_trace *ttrace = thread__priv(arg->thread);
1349 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1350 zfree(&ttrace->paths.table[fd]);
1355 static bool trace__filter_duration(struct trace *trace, double t)
1357 return t < (trace->duration_filter * NSEC_PER_MSEC);
1360 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1362 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1364 return fprintf(fp, "%10.3f ", ts);
1367 static bool done = false;
1368 static bool interrupted = false;
1370 static void sig_handler(int sig)
1373 interrupted = sig == SIGINT;
1376 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1377 u64 duration, u64 tstamp, FILE *fp)
1379 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1380 printed += fprintf_duration(duration, fp);
1382 if (trace->multiple_threads) {
1383 if (trace->show_comm)
1384 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1385 printed += fprintf(fp, "%d ", thread->tid);
1391 static int trace__process_event(struct trace *trace, struct machine *machine,
1392 union perf_event *event, struct perf_sample *sample)
1396 switch (event->header.type) {
1397 case PERF_RECORD_LOST:
1398 color_fprintf(trace->output, PERF_COLOR_RED,
1399 "LOST %" PRIu64 " events!\n", event->lost.lost);
1400 ret = machine__process_lost_event(machine, event, sample);
1402 ret = machine__process_event(machine, event, sample);
1409 static int trace__tool_process(struct perf_tool *tool,
1410 union perf_event *event,
1411 struct perf_sample *sample,
1412 struct machine *machine)
1414 struct trace *trace = container_of(tool, struct trace, tool);
1415 return trace__process_event(trace, machine, event, sample);
1418 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1420 int err = symbol__init(NULL);
1425 trace->host = machine__new_host();
1426 if (trace->host == NULL)
1429 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1430 evlist->threads, trace__tool_process, false);
1437 static int syscall__set_arg_fmts(struct syscall *sc)
1439 struct format_field *field;
1442 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1443 if (sc->arg_scnprintf == NULL)
1447 sc->arg_parm = sc->fmt->arg_parm;
1449 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1450 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1451 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1452 else if (field->flags & FIELD_IS_POINTER)
1453 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1460 static int trace__read_syscall_info(struct trace *trace, int id)
1464 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1469 if (id > trace->syscalls.max) {
1470 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1472 if (nsyscalls == NULL)
1475 if (trace->syscalls.max != -1) {
1476 memset(nsyscalls + trace->syscalls.max + 1, 0,
1477 (id - trace->syscalls.max) * sizeof(*sc));
1479 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1482 trace->syscalls.table = nsyscalls;
1483 trace->syscalls.max = id;
1486 sc = trace->syscalls.table + id;
1489 if (trace->ev_qualifier) {
1490 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1492 if (!(in ^ trace->not_ev_qualifier)) {
1493 sc->filtered = true;
1495 * No need to do read tracepoint information since this will be
1502 sc->fmt = syscall_fmt__find(sc->name);
1504 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1505 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1507 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1508 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1509 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1512 if (sc->tp_format == NULL)
1515 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1517 return syscall__set_arg_fmts(sc);
1520 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1521 unsigned long *args, struct trace *trace,
1522 struct thread *thread)
1526 if (sc->tp_format != NULL) {
1527 struct format_field *field;
1529 struct syscall_arg arg = {
1536 for (field = sc->tp_format->format.fields->next; field;
1537 field = field->next, ++arg.idx, bit <<= 1) {
1541 * Suppress this argument if its value is zero and
1542 * and we don't have a string associated in an
1545 if (args[arg.idx] == 0 &&
1546 !(sc->arg_scnprintf &&
1547 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1548 sc->arg_parm[arg.idx]))
1551 printed += scnprintf(bf + printed, size - printed,
1552 "%s%s: ", printed ? ", " : "", field->name);
1553 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1554 arg.val = args[arg.idx];
1556 arg.parm = sc->arg_parm[arg.idx];
1557 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1558 size - printed, &arg);
1560 printed += scnprintf(bf + printed, size - printed,
1561 "%ld", args[arg.idx]);
1568 printed += scnprintf(bf + printed, size - printed,
1570 printed ? ", " : "", i, args[i]);
1578 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1579 union perf_event *event,
1580 struct perf_sample *sample);
1582 static struct syscall *trace__syscall_info(struct trace *trace,
1583 struct perf_evsel *evsel, int id)
1589 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1590 * before that, leaving at a higher verbosity level till that is
1591 * explained. Reproduced with plain ftrace with:
1593 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1594 * grep "NR -1 " /t/trace_pipe
1596 * After generating some load on the machine.
1600 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1601 id, perf_evsel__name(evsel), ++n);
1606 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1607 trace__read_syscall_info(trace, id))
1610 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1613 return &trace->syscalls.table[id];
1617 fprintf(trace->output, "Problems reading syscall %d", id);
1618 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1619 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1620 fputs(" information\n", trace->output);
1625 static void thread__update_stats(struct thread_trace *ttrace,
1626 int id, struct perf_sample *sample)
1628 struct int_node *inode;
1629 struct stats *stats;
1632 inode = intlist__findnew(ttrace->syscall_stats, id);
1636 stats = inode->priv;
1637 if (stats == NULL) {
1638 stats = malloc(sizeof(struct stats));
1642 inode->priv = stats;
1645 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1646 duration = sample->time - ttrace->entry_time;
1648 update_stats(stats, duration);
1651 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1653 struct thread_trace *ttrace;
1657 if (trace->current == NULL)
1660 ttrace = thread__priv(trace->current);
1662 if (!ttrace->entry_pending)
1665 duration = sample->time - ttrace->entry_time;
1667 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1668 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1669 ttrace->entry_pending = false;
1674 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1675 union perf_event *event __maybe_unused,
1676 struct perf_sample *sample)
1681 struct thread *thread;
1682 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1683 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1684 struct thread_trace *ttrace;
1692 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1693 ttrace = thread__trace(thread, trace->output);
1697 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1699 if (ttrace->entry_str == NULL) {
1700 ttrace->entry_str = malloc(1024);
1701 if (!ttrace->entry_str)
1705 printed += trace__printf_interrupted_entry(trace, sample);
1707 ttrace->entry_time = sample->time;
1708 msg = ttrace->entry_str;
1709 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1711 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1712 args, trace, thread);
1715 if (!trace->duration_filter && !trace->summary_only) {
1716 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1717 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1720 ttrace->entry_pending = true;
1722 trace->current = thread;
1727 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1728 union perf_event *event __maybe_unused,
1729 struct perf_sample *sample)
1733 struct thread *thread;
1734 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1735 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1736 struct thread_trace *ttrace;
1744 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1745 ttrace = thread__trace(thread, trace->output);
1750 thread__update_stats(ttrace, id, sample);
1752 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1754 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1755 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1756 trace->last_vfs_getname = NULL;
1757 ++trace->stats.vfs_getname;
1760 ttrace->exit_time = sample->time;
1762 if (ttrace->entry_time) {
1763 duration = sample->time - ttrace->entry_time;
1764 if (trace__filter_duration(trace, duration))
1766 } else if (trace->duration_filter)
1769 if (trace->summary_only)
1772 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1774 if (ttrace->entry_pending) {
1775 fprintf(trace->output, "%-70s", ttrace->entry_str);
1777 fprintf(trace->output, " ... [");
1778 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1779 fprintf(trace->output, "]: %s()", sc->name);
1782 if (sc->fmt == NULL) {
1784 fprintf(trace->output, ") = %ld", ret);
1785 } else if (ret < 0 && sc->fmt->errmsg) {
1786 char bf[STRERR_BUFSIZE];
1787 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1788 *e = audit_errno_to_name(-ret);
1790 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1791 } else if (ret == 0 && sc->fmt->timeout)
1792 fprintf(trace->output, ") = 0 Timeout");
1793 else if (sc->fmt->hexret)
1794 fprintf(trace->output, ") = %#lx", ret);
1798 fputc('\n', trace->output);
1800 ttrace->entry_pending = false;
1805 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1806 union perf_event *event __maybe_unused,
1807 struct perf_sample *sample)
1809 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1813 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1814 union perf_event *event __maybe_unused,
1815 struct perf_sample *sample)
1817 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1818 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1819 struct thread *thread = machine__findnew_thread(trace->host,
1822 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1827 ttrace->runtime_ms += runtime_ms;
1828 trace->runtime_ms += runtime_ms;
1832 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1834 perf_evsel__strval(evsel, sample, "comm"),
1835 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1837 perf_evsel__intval(evsel, sample, "vruntime"));
1841 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1842 union perf_event *event __maybe_unused,
1843 struct perf_sample *sample)
1845 trace__printf_interrupted_entry(trace, sample);
1846 trace__fprintf_tstamp(trace, sample->time, trace->output);
1848 if (trace->trace_syscalls)
1849 fprintf(trace->output, "( ): ");
1851 fprintf(trace->output, "%s:", evsel->name);
1853 if (evsel->tp_format) {
1854 event_format__fprintf(evsel->tp_format, sample->cpu,
1855 sample->raw_data, sample->raw_size,
1859 fprintf(trace->output, ")\n");
1863 static void print_location(FILE *f, struct perf_sample *sample,
1864 struct addr_location *al,
1865 bool print_dso, bool print_sym)
1868 if ((verbose || print_dso) && al->map)
1869 fprintf(f, "%s@", al->map->dso->long_name);
1871 if ((verbose || print_sym) && al->sym)
1872 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1873 al->addr - al->sym->start);
1875 fprintf(f, "0x%" PRIx64, al->addr);
1877 fprintf(f, "0x%" PRIx64, sample->addr);
1880 static int trace__pgfault(struct trace *trace,
1881 struct perf_evsel *evsel,
1882 union perf_event *event,
1883 struct perf_sample *sample)
1885 struct thread *thread;
1886 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1887 struct addr_location al;
1888 char map_type = 'd';
1889 struct thread_trace *ttrace;
1891 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1892 ttrace = thread__trace(thread, trace->output);
1896 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1901 if (trace->summary_only)
1904 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1907 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1909 fprintf(trace->output, "%sfault [",
1910 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1913 print_location(trace->output, sample, &al, false, true);
1915 fprintf(trace->output, "] => ");
1917 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1921 thread__find_addr_location(thread, cpumode,
1922 MAP__FUNCTION, sample->addr, &al);
1930 print_location(trace->output, sample, &al, true, false);
1932 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1937 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1939 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1940 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1943 if (trace->pid_list || trace->tid_list)
1949 static int trace__process_sample(struct perf_tool *tool,
1950 union perf_event *event,
1951 struct perf_sample *sample,
1952 struct perf_evsel *evsel,
1953 struct machine *machine __maybe_unused)
1955 struct trace *trace = container_of(tool, struct trace, tool);
1958 tracepoint_handler handler = evsel->handler;
1960 if (skip_sample(trace, sample))
1963 if (!trace->full_time && trace->base_time == 0)
1964 trace->base_time = sample->time;
1968 handler(trace, evsel, event, sample);
1974 static int parse_target_str(struct trace *trace)
1976 if (trace->opts.target.pid) {
1977 trace->pid_list = intlist__new(trace->opts.target.pid);
1978 if (trace->pid_list == NULL) {
1979 pr_err("Error parsing process id string\n");
1984 if (trace->opts.target.tid) {
1985 trace->tid_list = intlist__new(trace->opts.target.tid);
1986 if (trace->tid_list == NULL) {
1987 pr_err("Error parsing thread id string\n");
1995 static int trace__record(struct trace *trace, int argc, const char **argv)
1997 unsigned int rec_argc, i, j;
1998 const char **rec_argv;
1999 const char * const record_args[] = {
2006 const char * const sc_args[] = { "-e", };
2007 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2008 const char * const majpf_args[] = { "-e", "major-faults" };
2009 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2010 const char * const minpf_args[] = { "-e", "minor-faults" };
2011 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2013 /* +1 is for the event string below */
2014 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2015 majpf_args_nr + minpf_args_nr + argc;
2016 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2018 if (rec_argv == NULL)
2022 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2023 rec_argv[j++] = record_args[i];
2025 if (trace->trace_syscalls) {
2026 for (i = 0; i < sc_args_nr; i++)
2027 rec_argv[j++] = sc_args[i];
2029 /* event string may be different for older kernels - e.g., RHEL6 */
2030 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2031 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2032 else if (is_valid_tracepoint("syscalls:sys_enter"))
2033 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2035 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2040 if (trace->trace_pgfaults & TRACE_PFMAJ)
2041 for (i = 0; i < majpf_args_nr; i++)
2042 rec_argv[j++] = majpf_args[i];
2044 if (trace->trace_pgfaults & TRACE_PFMIN)
2045 for (i = 0; i < minpf_args_nr; i++)
2046 rec_argv[j++] = minpf_args[i];
2048 for (i = 0; i < (unsigned int)argc; i++)
2049 rec_argv[j++] = argv[i];
2051 return cmd_record(j, rec_argv, NULL);
2054 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2056 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2058 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2062 if (perf_evsel__field(evsel, "pathname") == NULL) {
2063 perf_evsel__delete(evsel);
2067 evsel->handler = trace__vfs_getname;
2068 perf_evlist__add(evlist, evsel);
2071 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2074 struct perf_evsel *evsel;
2075 struct perf_event_attr attr = {
2076 .type = PERF_TYPE_SOFTWARE,
2080 attr.config = config;
2081 attr.sample_period = 1;
2083 event_attr_init(&attr);
2085 evsel = perf_evsel__new(&attr);
2089 evsel->handler = trace__pgfault;
2090 perf_evlist__add(evlist, evsel);
2095 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2097 const u32 type = event->header.type;
2098 struct perf_evsel *evsel;
2100 if (!trace->full_time && trace->base_time == 0)
2101 trace->base_time = sample->time;
2103 if (type != PERF_RECORD_SAMPLE) {
2104 trace__process_event(trace, trace->host, event, sample);
2108 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2109 if (evsel == NULL) {
2110 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2114 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2115 sample->raw_data == NULL) {
2116 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2117 perf_evsel__name(evsel), sample->tid,
2118 sample->cpu, sample->raw_size);
2120 tracepoint_handler handler = evsel->handler;
2121 handler(trace, evsel, event, sample);
2125 static int trace__run(struct trace *trace, int argc, const char **argv)
2127 struct perf_evlist *evlist = trace->evlist;
2129 unsigned long before;
2130 const bool forks = argc > 0;
2131 bool draining = false;
2135 if (trace->trace_syscalls &&
2136 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2138 goto out_error_raw_syscalls;
2140 if (trace->trace_syscalls)
2141 perf_evlist__add_vfs_getname(evlist);
2143 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2144 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2148 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2149 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2153 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2154 trace__sched_stat_runtime))
2155 goto out_error_sched_stat_runtime;
2157 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2159 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2160 goto out_delete_evlist;
2163 err = trace__symbols_init(trace, evlist);
2165 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2166 goto out_delete_evlist;
2169 perf_evlist__config(evlist, &trace->opts);
2171 signal(SIGCHLD, sig_handler);
2172 signal(SIGINT, sig_handler);
2175 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2178 fprintf(trace->output, "Couldn't run the workload!\n");
2179 goto out_delete_evlist;
2183 err = perf_evlist__open(evlist);
2185 goto out_error_open;
2188 * Better not use !target__has_task() here because we need to cover the
2189 * case where no threads were specified in the command line, but a
2190 * workload was, and in that case we will fill in the thread_map when
2191 * we fork the workload in perf_evlist__prepare_workload.
2193 if (trace->filter_pids.nr > 0)
2194 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2195 else if (evlist->threads->map[0] == -1)
2196 err = perf_evlist__set_filter_pid(evlist, getpid());
2199 printf("err=%d,%s\n", -err, strerror(-err));
2203 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2205 goto out_error_mmap;
2208 perf_evlist__start_workload(evlist);
2210 perf_evlist__enable(evlist);
2212 trace->multiple_threads = evlist->threads->map[0] == -1 ||
2213 evlist->threads->nr > 1 ||
2214 perf_evlist__first(evlist)->attr.inherit;
2216 before = trace->nr_events;
2218 for (i = 0; i < evlist->nr_mmaps; i++) {
2219 union perf_event *event;
2221 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2222 struct perf_sample sample;
2226 err = perf_evlist__parse_sample(evlist, event, &sample);
2228 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2232 trace__handle_event(trace, event, &sample);
2234 perf_evlist__mmap_consume(evlist, i);
2241 if (trace->nr_events == before) {
2242 int timeout = done ? 100 : -1;
2244 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2245 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2255 perf_evlist__disable(evlist);
2259 trace__fprintf_thread_summary(trace, trace->output);
2261 if (trace->show_tool_stats) {
2262 fprintf(trace->output, "Stats:\n "
2263 " vfs_getname : %" PRIu64 "\n"
2264 " proc_getname: %" PRIu64 "\n",
2265 trace->stats.vfs_getname,
2266 trace->stats.proc_getname);
2271 perf_evlist__delete(evlist);
2272 trace->evlist = NULL;
2273 trace->live = false;
2276 char errbuf[BUFSIZ];
2278 out_error_sched_stat_runtime:
2279 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2282 out_error_raw_syscalls:
2283 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2287 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2291 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2294 fprintf(trace->output, "%s\n", errbuf);
2295 goto out_delete_evlist;
2298 fprintf(trace->output, "Not enough memory to run!\n");
2299 goto out_delete_evlist;
2302 static int trace__replay(struct trace *trace)
2304 const struct perf_evsel_str_handler handlers[] = {
2305 { "probe:vfs_getname", trace__vfs_getname, },
2307 struct perf_data_file file = {
2309 .mode = PERF_DATA_MODE_READ,
2311 struct perf_session *session;
2312 struct perf_evsel *evsel;
2315 trace->tool.sample = trace__process_sample;
2316 trace->tool.mmap = perf_event__process_mmap;
2317 trace->tool.mmap2 = perf_event__process_mmap2;
2318 trace->tool.comm = perf_event__process_comm;
2319 trace->tool.exit = perf_event__process_exit;
2320 trace->tool.fork = perf_event__process_fork;
2321 trace->tool.attr = perf_event__process_attr;
2322 trace->tool.tracing_data = perf_event__process_tracing_data;
2323 trace->tool.build_id = perf_event__process_build_id;
2325 trace->tool.ordered_events = true;
2326 trace->tool.ordering_requires_timestamps = true;
2328 /* add tid to output */
2329 trace->multiple_threads = true;
2331 session = perf_session__new(&file, false, &trace->tool);
2332 if (session == NULL)
2335 if (symbol__init(&session->header.env) < 0)
2338 trace->host = &session->machines.host;
2340 err = perf_session__set_tracepoints_handlers(session, handlers);
2344 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2345 "raw_syscalls:sys_enter");
2346 /* older kernels have syscalls tp versus raw_syscalls */
2348 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2349 "syscalls:sys_enter");
2352 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2353 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2354 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2358 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2359 "raw_syscalls:sys_exit");
2361 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2362 "syscalls:sys_exit");
2364 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2365 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2366 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2370 evlist__for_each(session->evlist, evsel) {
2371 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2372 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2373 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2374 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2375 evsel->handler = trace__pgfault;
2378 err = parse_target_str(trace);
2384 err = perf_session__process_events(session, &trace->tool);
2386 pr_err("Failed to process events, error %d", err);
2388 else if (trace->summary)
2389 trace__fprintf_thread_summary(trace, trace->output);
2392 perf_session__delete(session);
2397 static size_t trace__fprintf_threads_header(FILE *fp)
2401 printed = fprintf(fp, "\n Summary of events:\n\n");
2406 static size_t thread__dump_stats(struct thread_trace *ttrace,
2407 struct trace *trace, FILE *fp)
2409 struct stats *stats;
2412 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2417 printed += fprintf(fp, "\n");
2419 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2420 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2421 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2423 /* each int_node is a syscall */
2425 stats = inode->priv;
2427 double min = (double)(stats->min) / NSEC_PER_MSEC;
2428 double max = (double)(stats->max) / NSEC_PER_MSEC;
2429 double avg = avg_stats(stats);
2431 u64 n = (u64) stats->n;
2433 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2434 avg /= NSEC_PER_MSEC;
2436 sc = &trace->syscalls.table[inode->i];
2437 printed += fprintf(fp, " %-15s", sc->name);
2438 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2440 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2443 inode = intlist__next(inode);
2446 printed += fprintf(fp, "\n\n");
2451 /* struct used to pass data to per-thread function */
2452 struct summary_data {
2454 struct trace *trace;
2458 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2460 struct summary_data *data = priv;
2461 FILE *fp = data->fp;
2462 size_t printed = data->printed;
2463 struct trace *trace = data->trace;
2464 struct thread_trace *ttrace = thread__priv(thread);
2470 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2472 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2473 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2474 printed += fprintf(fp, "%.1f%%", ratio);
2476 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2478 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2479 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2480 printed += thread__dump_stats(ttrace, trace, fp);
2482 data->printed += printed;
2487 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2489 struct summary_data data = {
2493 data.printed = trace__fprintf_threads_header(fp);
2495 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2497 return data.printed;
2500 static int trace__set_duration(const struct option *opt, const char *str,
2501 int unset __maybe_unused)
2503 struct trace *trace = opt->value;
2505 trace->duration_filter = atof(str);
2509 static int trace__set_filter_pids(const struct option *opt, const char *str,
2510 int unset __maybe_unused)
2514 struct trace *trace = opt->value;
2516 * FIXME: introduce a intarray class, plain parse csv and create a
2517 * { int nr, int entries[] } struct...
2519 struct intlist *list = intlist__new(str);
2524 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2525 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2527 if (trace->filter_pids.entries == NULL)
2530 trace->filter_pids.entries[0] = getpid();
2532 for (i = 1; i < trace->filter_pids.nr; ++i)
2533 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2535 intlist__delete(list);
2541 static int trace__open_output(struct trace *trace, const char *filename)
2545 if (!stat(filename, &st) && st.st_size) {
2546 char oldname[PATH_MAX];
2548 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2550 rename(filename, oldname);
2553 trace->output = fopen(filename, "w");
2555 return trace->output == NULL ? -errno : 0;
2558 static int parse_pagefaults(const struct option *opt, const char *str,
2559 int unset __maybe_unused)
2561 int *trace_pgfaults = opt->value;
2563 if (strcmp(str, "all") == 0)
2564 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2565 else if (strcmp(str, "maj") == 0)
2566 *trace_pgfaults |= TRACE_PFMAJ;
2567 else if (strcmp(str, "min") == 0)
2568 *trace_pgfaults |= TRACE_PFMIN;
2575 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2577 struct perf_evsel *evsel;
2579 evlist__for_each(evlist, evsel)
2580 evsel->handler = handler;
2583 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2585 const char * const trace_usage[] = {
2586 "perf trace [<options>] [<command>]",
2587 "perf trace [<options>] -- <command> [<options>]",
2588 "perf trace record [<options>] [<command>]",
2589 "perf trace record [<options>] -- <command> [<options>]",
2592 struct trace trace = {
2594 .machine = audit_detect_machine(),
2595 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2605 .user_freq = UINT_MAX,
2606 .user_interval = ULLONG_MAX,
2607 .no_buffering = true,
2608 .mmap_pages = UINT_MAX,
2612 .trace_syscalls = true,
2614 const char *output_name = NULL;
2615 const char *ev_qualifier_str = NULL;
2616 const struct option trace_options[] = {
2617 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2618 "event selector. use 'perf list' to list available events",
2619 parse_events_option),
2620 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2621 "show the thread COMM next to its id"),
2622 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2623 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2624 "list of events to trace"),
2625 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2626 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2627 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2628 "trace events on existing process id"),
2629 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2630 "trace events on existing thread id"),
2631 OPT_CALLBACK(0, "filter-pids", &trace, "float",
2632 "show only events with duration > N.M ms", trace__set_filter_pids),
2633 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2634 "system-wide collection from all CPUs"),
2635 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2636 "list of cpus to monitor"),
2637 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2638 "child tasks do not inherit counters"),
2639 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2640 "number of mmap data pages",
2641 perf_evlist__parse_mmap_pages),
2642 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2644 OPT_CALLBACK(0, "duration", &trace, "float",
2645 "show only events with duration > N.M ms",
2646 trace__set_duration),
2647 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2648 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2649 OPT_BOOLEAN('T', "time", &trace.full_time,
2650 "Show full timestamp, not time relative to first start"),
2651 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2652 "Show only syscall summary with statistics"),
2653 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2654 "Show all syscalls and summary with statistics"),
2655 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2656 "Trace pagefaults", parse_pagefaults, "maj"),
2657 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2663 signal(SIGSEGV, sighandler_dump_stack);
2664 signal(SIGFPE, sighandler_dump_stack);
2666 trace.evlist = perf_evlist__new();
2667 if (trace.evlist == NULL)
2670 if (trace.evlist == NULL) {
2671 pr_err("Not enough memory to run!\n");
2675 argc = parse_options(argc, argv, trace_options, trace_usage,
2676 PARSE_OPT_STOP_AT_NON_OPTION);
2678 if (trace.trace_pgfaults) {
2679 trace.opts.sample_address = true;
2680 trace.opts.sample_time = true;
2683 if (trace.evlist->nr_entries > 0)
2684 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2686 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2687 return trace__record(&trace, argc-1, &argv[1]);
2689 /* summary_only implies summary option, but don't overwrite summary if set */
2690 if (trace.summary_only)
2691 trace.summary = trace.summary_only;
2693 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2694 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2695 pr_err("Please specify something to trace.\n");
2699 if (output_name != NULL) {
2700 err = trace__open_output(&trace, output_name);
2702 perror("failed to create output file");
2707 if (ev_qualifier_str != NULL) {
2708 const char *s = ev_qualifier_str;
2710 trace.not_ev_qualifier = *s == '!';
2711 if (trace.not_ev_qualifier)
2713 trace.ev_qualifier = strlist__new(true, s);
2714 if (trace.ev_qualifier == NULL) {
2715 fputs("Not enough memory to parse event qualifier",
2722 err = target__validate(&trace.opts.target);
2724 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2725 fprintf(trace.output, "%s", bf);
2729 err = target__parse_uid(&trace.opts.target);
2731 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2732 fprintf(trace.output, "%s", bf);
2736 if (!argc && target__none(&trace.opts.target))
2737 trace.opts.target.system_wide = true;
2740 err = trace__replay(&trace);
2742 err = trace__run(&trace, argc, argv);
2745 if (output_name != NULL)
2746 fclose(trace.output);