perf trace: Beautify epoll_ctl 'op' arg
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 {
194         enum syscall_futex_args {
195                 SCF_UADDR   = (1 << 0),
196                 SCF_OP      = (1 << 1),
197                 SCF_VAL     = (1 << 2),
198                 SCF_TIMEOUT = (1 << 3),
199                 SCF_UADDR2  = (1 << 4),
200                 SCF_VAL3    = (1 << 5),
201         };
202         int op = arg->val;
203         int cmd = op & FUTEX_CMD_MASK;
204         size_t printed = 0;
205
206         switch (cmd) {
207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
209         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
212         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
213         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
214         P_FUTEX_OP(WAKE_OP);                                                      break;
215         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
218         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
219         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
220         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
221         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
222         }
223
224         if (op & FUTEX_PRIVATE_FLAG)
225                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
226
227         if (op & FUTEX_CLOCK_REALTIME)
228                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
229
230         return printed;
231 }
232
233 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
234
235 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
236 static DEFINE_STRARRAY(epoll_ctl_ops);
237
238 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
239 static DEFINE_STRARRAY(itimers);
240
241 static const char *whences[] = { "SET", "CUR", "END",
242 #ifdef SEEK_DATA
243 "DATA",
244 #endif
245 #ifdef SEEK_HOLE
246 "HOLE",
247 #endif
248 };
249 static DEFINE_STRARRAY(whences);
250
251 static const char *fcntl_cmds[] = {
252         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
253         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
254         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
255         "F_GETOWNER_UIDS",
256 };
257 static DEFINE_STRARRAY(fcntl_cmds);
258
259 static const char *rlimit_resources[] = {
260         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
261         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
262         "RTTIME",
263 };
264 static DEFINE_STRARRAY(rlimit_resources);
265
266 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
267 static DEFINE_STRARRAY(sighow);
268
269 static const char *socket_families[] = {
270         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
271         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
272         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
273         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
274         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
275         "ALG", "NFC", "VSOCK",
276 };
277 static DEFINE_STRARRAY(socket_families);
278
279 #ifndef SOCK_TYPE_MASK
280 #define SOCK_TYPE_MASK 0xf
281 #endif
282
283 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
284                                                       struct syscall_arg *arg)
285 {
286         size_t printed;
287         int type = arg->val,
288             flags = type & ~SOCK_TYPE_MASK;
289
290         type &= SOCK_TYPE_MASK;
291         /*
292          * Can't use a strarray, MIPS may override for ABI reasons.
293          */
294         switch (type) {
295 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
296         P_SK_TYPE(STREAM);
297         P_SK_TYPE(DGRAM);
298         P_SK_TYPE(RAW);
299         P_SK_TYPE(RDM);
300         P_SK_TYPE(SEQPACKET);
301         P_SK_TYPE(DCCP);
302         P_SK_TYPE(PACKET);
303 #undef P_SK_TYPE
304         default:
305                 printed = scnprintf(bf, size, "%#x", type);
306         }
307
308 #define P_SK_FLAG(n) \
309         if (flags & SOCK_##n) { \
310                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
311                 flags &= ~SOCK_##n; \
312         }
313
314         P_SK_FLAG(CLOEXEC);
315         P_SK_FLAG(NONBLOCK);
316 #undef P_SK_FLAG
317
318         if (flags)
319                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
320
321         return printed;
322 }
323
324 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
325
326 #ifndef MSG_PROBE
327 #define MSG_PROBE            0x10
328 #endif
329 #ifndef MSG_SENDPAGE_NOTLAST
330 #define MSG_SENDPAGE_NOTLAST 0x20000
331 #endif
332 #ifndef MSG_FASTOPEN
333 #define MSG_FASTOPEN         0x20000000
334 #endif
335
336 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
337                                                struct syscall_arg *arg)
338 {
339         int printed = 0, flags = arg->val;
340
341         if (flags == 0)
342                 return scnprintf(bf, size, "NONE");
343 #define P_MSG_FLAG(n) \
344         if (flags & MSG_##n) { \
345                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
346                 flags &= ~MSG_##n; \
347         }
348
349         P_MSG_FLAG(OOB);
350         P_MSG_FLAG(PEEK);
351         P_MSG_FLAG(DONTROUTE);
352         P_MSG_FLAG(TRYHARD);
353         P_MSG_FLAG(CTRUNC);
354         P_MSG_FLAG(PROBE);
355         P_MSG_FLAG(TRUNC);
356         P_MSG_FLAG(DONTWAIT);
357         P_MSG_FLAG(EOR);
358         P_MSG_FLAG(WAITALL);
359         P_MSG_FLAG(FIN);
360         P_MSG_FLAG(SYN);
361         P_MSG_FLAG(CONFIRM);
362         P_MSG_FLAG(RST);
363         P_MSG_FLAG(ERRQUEUE);
364         P_MSG_FLAG(NOSIGNAL);
365         P_MSG_FLAG(MORE);
366         P_MSG_FLAG(WAITFORONE);
367         P_MSG_FLAG(SENDPAGE_NOTLAST);
368         P_MSG_FLAG(FASTOPEN);
369         P_MSG_FLAG(CMSG_CLOEXEC);
370 #undef P_MSG_FLAG
371
372         if (flags)
373                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
374
375         return printed;
376 }
377
378 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
379
380 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
381                                                  struct syscall_arg *arg)
382 {
383         size_t printed = 0;
384         int mode = arg->val;
385
386         if (mode == F_OK) /* 0 */
387                 return scnprintf(bf, size, "F");
388 #define P_MODE(n) \
389         if (mode & n##_OK) { \
390                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
391                 mode &= ~n##_OK; \
392         }
393
394         P_MODE(R);
395         P_MODE(W);
396         P_MODE(X);
397 #undef P_MODE
398
399         if (mode)
400                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
401
402         return printed;
403 }
404
405 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
406
407 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
408                                                struct syscall_arg *arg)
409 {
410         int printed = 0, flags = arg->val;
411
412         if (!(flags & O_CREAT))
413                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
414
415         if (flags == 0)
416                 return scnprintf(bf, size, "RDONLY");
417 #define P_FLAG(n) \
418         if (flags & O_##n) { \
419                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
420                 flags &= ~O_##n; \
421         }
422
423         P_FLAG(APPEND);
424         P_FLAG(ASYNC);
425         P_FLAG(CLOEXEC);
426         P_FLAG(CREAT);
427         P_FLAG(DIRECT);
428         P_FLAG(DIRECTORY);
429         P_FLAG(EXCL);
430         P_FLAG(LARGEFILE);
431         P_FLAG(NOATIME);
432         P_FLAG(NOCTTY);
433 #ifdef O_NONBLOCK
434         P_FLAG(NONBLOCK);
435 #elif O_NDELAY
436         P_FLAG(NDELAY);
437 #endif
438 #ifdef O_PATH
439         P_FLAG(PATH);
440 #endif
441         P_FLAG(RDWR);
442 #ifdef O_DSYNC
443         if ((flags & O_SYNC) == O_SYNC)
444                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
445         else {
446                 P_FLAG(DSYNC);
447         }
448 #else
449         P_FLAG(SYNC);
450 #endif
451         P_FLAG(TRUNC);
452         P_FLAG(WRONLY);
453 #undef P_FLAG
454
455         if (flags)
456                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
457
458         return printed;
459 }
460
461 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
462
463 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
464                                                    struct syscall_arg *arg)
465 {
466         int printed = 0, flags = arg->val;
467
468         if (flags == 0)
469                 return scnprintf(bf, size, "NONE");
470 #define P_FLAG(n) \
471         if (flags & EFD_##n) { \
472                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
473                 flags &= ~EFD_##n; \
474         }
475
476         P_FLAG(SEMAPHORE);
477         P_FLAG(CLOEXEC);
478         P_FLAG(NONBLOCK);
479 #undef P_FLAG
480
481         if (flags)
482                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
483
484         return printed;
485 }
486
487 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
488
489 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
490 {
491         int sig = arg->val;
492
493         switch (sig) {
494 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
495         P_SIGNUM(HUP);
496         P_SIGNUM(INT);
497         P_SIGNUM(QUIT);
498         P_SIGNUM(ILL);
499         P_SIGNUM(TRAP);
500         P_SIGNUM(ABRT);
501         P_SIGNUM(BUS);
502         P_SIGNUM(FPE);
503         P_SIGNUM(KILL);
504         P_SIGNUM(USR1);
505         P_SIGNUM(SEGV);
506         P_SIGNUM(USR2);
507         P_SIGNUM(PIPE);
508         P_SIGNUM(ALRM);
509         P_SIGNUM(TERM);
510         P_SIGNUM(STKFLT);
511         P_SIGNUM(CHLD);
512         P_SIGNUM(CONT);
513         P_SIGNUM(STOP);
514         P_SIGNUM(TSTP);
515         P_SIGNUM(TTIN);
516         P_SIGNUM(TTOU);
517         P_SIGNUM(URG);
518         P_SIGNUM(XCPU);
519         P_SIGNUM(XFSZ);
520         P_SIGNUM(VTALRM);
521         P_SIGNUM(PROF);
522         P_SIGNUM(WINCH);
523         P_SIGNUM(IO);
524         P_SIGNUM(PWR);
525         P_SIGNUM(SYS);
526         default: break;
527         }
528
529         return scnprintf(bf, size, "%#x", sig);
530 }
531
532 #define SCA_SIGNUM syscall_arg__scnprintf_signum
533
534 static struct syscall_fmt {
535         const char *name;
536         const char *alias;
537         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
538         void       *arg_parm[6];
539         bool       errmsg;
540         bool       timeout;
541         bool       hexret;
542 } syscall_fmts[] = {
543         { .name     = "access",     .errmsg = true,
544           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
545         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
546         { .name     = "brk",        .hexret = true,
547           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
548         { .name     = "connect",    .errmsg = true, },
549         { .name     = "epoll_ctl",  .errmsg = true,
550           .arg_scnprintf = { [1] = SCA_STRARRAY, /* op */ },
551           .arg_parm      = { [1] = &strarray__epoll_ctl_ops, /* op */ }, },
552         { .name     = "eventfd2",   .errmsg = true,
553           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
554         { .name     = "fcntl",      .errmsg = true,
555           .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
556           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
557         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
558         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
559         { .name     = "futex",      .errmsg = true,
560           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
561         { .name     = "getitimer",  .errmsg = true,
562           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
563           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
564         { .name     = "getrlimit",  .errmsg = true,
565           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
566           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
567         { .name     = "ioctl",      .errmsg = true,
568           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
569         { .name     = "kill",       .errmsg = true,
570           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
571         { .name     = "lseek",      .errmsg = true,
572           .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
573           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
574         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
575         { .name     = "madvise",    .errmsg = true,
576           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
577                              [2] = SCA_MADV_BHV, /* behavior */ }, },
578         { .name     = "mmap",       .hexret = true,
579           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
580                              [2] = SCA_MMAP_PROT, /* prot */
581                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
582         { .name     = "mprotect",   .errmsg = true,
583           .arg_scnprintf = { [0] = SCA_HEX, /* start */
584                              [2] = SCA_MMAP_PROT, /* prot */ }, },
585         { .name     = "mremap",     .hexret = true,
586           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
587                              [4] = SCA_HEX, /* new_addr */ }, },
588         { .name     = "munmap",     .errmsg = true,
589           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
590         { .name     = "open",       .errmsg = true,
591           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
592         { .name     = "open_by_handle_at", .errmsg = true,
593           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
594         { .name     = "openat",     .errmsg = true,
595           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
596         { .name     = "poll",       .errmsg = true, .timeout = true, },
597         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
598         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
599         { .name     = "prlimit64",  .errmsg = true,
600           .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
601           .arg_parm      = { [1] = &strarray__rlimit_resources, /* resource */ }, },
602         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
603         { .name     = "read",       .errmsg = true, },
604         { .name     = "recvfrom",   .errmsg = true,
605           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
606         { .name     = "recvmmsg",   .errmsg = true,
607           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
608         { .name     = "recvmsg",    .errmsg = true,
609           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
610         { .name     = "rt_sigaction", .errmsg = true,
611           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
612         { .name     = "rt_sigprocmask", .errmsg = true,
613           .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
614           .arg_parm      = { [0] = &strarray__sighow, /* how */ }, },
615         { .name     = "rt_sigqueueinfo", .errmsg = true,
616           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
617         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
618           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
619         { .name     = "select",     .errmsg = true, .timeout = true, },
620         { .name     = "sendmmsg",    .errmsg = true,
621           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
622         { .name     = "sendmsg",    .errmsg = true,
623           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
624         { .name     = "sendto",     .errmsg = true,
625           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
626         { .name     = "setitimer",  .errmsg = true,
627           .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
628           .arg_parm      = { [0] = &strarray__itimers, /* which */ }, },
629         { .name     = "setrlimit",  .errmsg = true,
630           .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
631           .arg_parm      = { [0] = &strarray__rlimit_resources, /* resource */ }, },
632         { .name     = "socket",     .errmsg = true,
633           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
634                              [1] = SCA_SK_TYPE, /* type */ },
635           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
636         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
637         { .name     = "tgkill",     .errmsg = true,
638           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
639         { .name     = "tkill",      .errmsg = true,
640           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
641         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
642 };
643
644 static int syscall_fmt__cmp(const void *name, const void *fmtp)
645 {
646         const struct syscall_fmt *fmt = fmtp;
647         return strcmp(name, fmt->name);
648 }
649
650 static struct syscall_fmt *syscall_fmt__find(const char *name)
651 {
652         const int nmemb = ARRAY_SIZE(syscall_fmts);
653         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
654 }
655
656 struct syscall {
657         struct event_format *tp_format;
658         const char          *name;
659         bool                filtered;
660         struct syscall_fmt  *fmt;
661         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
662         void                **arg_parm;
663 };
664
665 static size_t fprintf_duration(unsigned long t, FILE *fp)
666 {
667         double duration = (double)t / NSEC_PER_MSEC;
668         size_t printed = fprintf(fp, "(");
669
670         if (duration >= 1.0)
671                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
672         else if (duration >= 0.01)
673                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
674         else
675                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
676         return printed + fprintf(fp, "): ");
677 }
678
679 struct thread_trace {
680         u64               entry_time;
681         u64               exit_time;
682         bool              entry_pending;
683         unsigned long     nr_events;
684         char              *entry_str;
685         double            runtime_ms;
686 };
687
688 static struct thread_trace *thread_trace__new(void)
689 {
690         return zalloc(sizeof(struct thread_trace));
691 }
692
693 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
694 {
695         struct thread_trace *ttrace;
696
697         if (thread == NULL)
698                 goto fail;
699
700         if (thread->priv == NULL)
701                 thread->priv = thread_trace__new();
702                 
703         if (thread->priv == NULL)
704                 goto fail;
705
706         ttrace = thread->priv;
707         ++ttrace->nr_events;
708
709         return ttrace;
710 fail:
711         color_fprintf(fp, PERF_COLOR_RED,
712                       "WARNING: not enough memory, dropping samples!\n");
713         return NULL;
714 }
715
716 struct trace {
717         struct perf_tool        tool;
718         int                     audit_machine;
719         struct {
720                 int             max;
721                 struct syscall  *table;
722         } syscalls;
723         struct perf_record_opts opts;
724         struct machine          host;
725         u64                     base_time;
726         bool                    full_time;
727         FILE                    *output;
728         unsigned long           nr_events;
729         struct strlist          *ev_qualifier;
730         bool                    not_ev_qualifier;
731         struct intlist          *tid_list;
732         struct intlist          *pid_list;
733         bool                    sched;
734         bool                    multiple_threads;
735         bool                    show_comm;
736         double                  duration_filter;
737         double                  runtime_ms;
738 };
739
740 static bool trace__filter_duration(struct trace *trace, double t)
741 {
742         return t < (trace->duration_filter * NSEC_PER_MSEC);
743 }
744
745 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
746 {
747         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
748
749         return fprintf(fp, "%10.3f ", ts);
750 }
751
752 static bool done = false;
753
754 static void sig_handler(int sig __maybe_unused)
755 {
756         done = true;
757 }
758
759 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
760                                         u64 duration, u64 tstamp, FILE *fp)
761 {
762         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
763         printed += fprintf_duration(duration, fp);
764
765         if (trace->multiple_threads) {
766                 if (trace->show_comm)
767                         printed += fprintf(fp, "%.14s/", thread->comm);
768                 printed += fprintf(fp, "%d ", thread->tid);
769         }
770
771         return printed;
772 }
773
774 static int trace__process_event(struct trace *trace, struct machine *machine,
775                                 union perf_event *event)
776 {
777         int ret = 0;
778
779         switch (event->header.type) {
780         case PERF_RECORD_LOST:
781                 color_fprintf(trace->output, PERF_COLOR_RED,
782                               "LOST %" PRIu64 " events!\n", event->lost.lost);
783                 ret = machine__process_lost_event(machine, event);
784         default:
785                 ret = machine__process_event(machine, event);
786                 break;
787         }
788
789         return ret;
790 }
791
792 static int trace__tool_process(struct perf_tool *tool,
793                                union perf_event *event,
794                                struct perf_sample *sample __maybe_unused,
795                                struct machine *machine)
796 {
797         struct trace *trace = container_of(tool, struct trace, tool);
798         return trace__process_event(trace, machine, event);
799 }
800
801 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
802 {
803         int err = symbol__init();
804
805         if (err)
806                 return err;
807
808         machine__init(&trace->host, "", HOST_KERNEL_ID);
809         machine__create_kernel_maps(&trace->host);
810
811         if (perf_target__has_task(&trace->opts.target)) {
812                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
813                                                         trace__tool_process,
814                                                         &trace->host);
815         } else {
816                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
817                                                      &trace->host);
818         }
819
820         if (err)
821                 symbol__exit();
822
823         return err;
824 }
825
826 static int syscall__set_arg_fmts(struct syscall *sc)
827 {
828         struct format_field *field;
829         int idx = 0;
830
831         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
832         if (sc->arg_scnprintf == NULL)
833                 return -1;
834
835         if (sc->fmt)
836                 sc->arg_parm = sc->fmt->arg_parm;
837
838         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
839                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
840                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
841                 else if (field->flags & FIELD_IS_POINTER)
842                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
843                 ++idx;
844         }
845
846         return 0;
847 }
848
849 static int trace__read_syscall_info(struct trace *trace, int id)
850 {
851         char tp_name[128];
852         struct syscall *sc;
853         const char *name = audit_syscall_to_name(id, trace->audit_machine);
854
855         if (name == NULL)
856                 return -1;
857
858         if (id > trace->syscalls.max) {
859                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
860
861                 if (nsyscalls == NULL)
862                         return -1;
863
864                 if (trace->syscalls.max != -1) {
865                         memset(nsyscalls + trace->syscalls.max + 1, 0,
866                                (id - trace->syscalls.max) * sizeof(*sc));
867                 } else {
868                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
869                 }
870
871                 trace->syscalls.table = nsyscalls;
872                 trace->syscalls.max   = id;
873         }
874
875         sc = trace->syscalls.table + id;
876         sc->name = name;
877
878         if (trace->ev_qualifier) {
879                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
880
881                 if (!(in ^ trace->not_ev_qualifier)) {
882                         sc->filtered = true;
883                         /*
884                          * No need to do read tracepoint information since this will be
885                          * filtered out.
886                          */
887                         return 0;
888                 }
889         }
890
891         sc->fmt  = syscall_fmt__find(sc->name);
892
893         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
894         sc->tp_format = event_format__new("syscalls", tp_name);
895
896         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
897                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
898                 sc->tp_format = event_format__new("syscalls", tp_name);
899         }
900
901         if (sc->tp_format == NULL)
902                 return -1;
903
904         return syscall__set_arg_fmts(sc);
905 }
906
907 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
908                                       unsigned long *args)
909 {
910         size_t printed = 0;
911
912         if (sc->tp_format != NULL) {
913                 struct format_field *field;
914                 u8 bit = 1;
915                 struct syscall_arg arg = {
916                         .idx  = 0,
917                         .mask = 0,
918                 };
919
920                 for (field = sc->tp_format->format.fields->next; field;
921                      field = field->next, ++arg.idx, bit <<= 1) {
922                         if (arg.mask & bit)
923                                 continue;
924
925                         if (args[arg.idx] == 0)
926                                 continue;
927
928                         printed += scnprintf(bf + printed, size - printed,
929                                              "%s%s: ", printed ? ", " : "", field->name);
930                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
931                                 arg.val = args[arg.idx];
932                                 if (sc->arg_parm)
933                                         arg.parm = sc->arg_parm[arg.idx];
934                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
935                                                                       size - printed, &arg);
936                         } else {
937                                 printed += scnprintf(bf + printed, size - printed,
938                                                      "%ld", args[arg.idx]);
939                         }
940                 }
941         } else {
942                 int i = 0;
943
944                 while (i < 6) {
945                         printed += scnprintf(bf + printed, size - printed,
946                                              "%sarg%d: %ld",
947                                              printed ? ", " : "", i, args[i]);
948                         ++i;
949                 }
950         }
951
952         return printed;
953 }
954
955 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
956                                   struct perf_sample *sample);
957
958 static struct syscall *trace__syscall_info(struct trace *trace,
959                                            struct perf_evsel *evsel,
960                                            struct perf_sample *sample)
961 {
962         int id = perf_evsel__intval(evsel, sample, "id");
963
964         if (id < 0) {
965
966                 /*
967                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
968                  * before that, leaving at a higher verbosity level till that is
969                  * explained. Reproduced with plain ftrace with:
970                  *
971                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
972                  * grep "NR -1 " /t/trace_pipe
973                  *
974                  * After generating some load on the machine.
975                  */
976                 if (verbose > 1) {
977                         static u64 n;
978                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
979                                 id, perf_evsel__name(evsel), ++n);
980                 }
981                 return NULL;
982         }
983
984         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
985             trace__read_syscall_info(trace, id))
986                 goto out_cant_read;
987
988         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
989                 goto out_cant_read;
990
991         return &trace->syscalls.table[id];
992
993 out_cant_read:
994         if (verbose) {
995                 fprintf(trace->output, "Problems reading syscall %d", id);
996                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
997                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
998                 fputs(" information\n", trace->output);
999         }
1000         return NULL;
1001 }
1002
1003 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1004                             struct perf_sample *sample)
1005 {
1006         char *msg;
1007         void *args;
1008         size_t printed = 0;
1009         struct thread *thread;
1010         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1011         struct thread_trace *ttrace;
1012
1013         if (sc == NULL)
1014                 return -1;
1015
1016         if (sc->filtered)
1017                 return 0;
1018
1019         thread = machine__findnew_thread(&trace->host, sample->pid,
1020                                          sample->tid);
1021         ttrace = thread__trace(thread, trace->output);
1022         if (ttrace == NULL)
1023                 return -1;
1024
1025         args = perf_evsel__rawptr(evsel, sample, "args");
1026         if (args == NULL) {
1027                 fprintf(trace->output, "Problems reading syscall arguments\n");
1028                 return -1;
1029         }
1030
1031         ttrace = thread->priv;
1032
1033         if (ttrace->entry_str == NULL) {
1034                 ttrace->entry_str = malloc(1024);
1035                 if (!ttrace->entry_str)
1036                         return -1;
1037         }
1038
1039         ttrace->entry_time = sample->time;
1040         msg = ttrace->entry_str;
1041         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1042
1043         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1044
1045         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1046                 if (!trace->duration_filter) {
1047                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1048                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1049                 }
1050         } else
1051                 ttrace->entry_pending = true;
1052
1053         return 0;
1054 }
1055
1056 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1057                            struct perf_sample *sample)
1058 {
1059         int ret;
1060         u64 duration = 0;
1061         struct thread *thread;
1062         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1063         struct thread_trace *ttrace;
1064
1065         if (sc == NULL)
1066                 return -1;
1067
1068         if (sc->filtered)
1069                 return 0;
1070
1071         thread = machine__findnew_thread(&trace->host, sample->pid,
1072                                          sample->tid);
1073         ttrace = thread__trace(thread, trace->output);
1074         if (ttrace == NULL)
1075                 return -1;
1076
1077         ret = perf_evsel__intval(evsel, sample, "ret");
1078
1079         ttrace = thread->priv;
1080
1081         ttrace->exit_time = sample->time;
1082
1083         if (ttrace->entry_time) {
1084                 duration = sample->time - ttrace->entry_time;
1085                 if (trace__filter_duration(trace, duration))
1086                         goto out;
1087         } else if (trace->duration_filter)
1088                 goto out;
1089
1090         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1091
1092         if (ttrace->entry_pending) {
1093                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1094         } else {
1095                 fprintf(trace->output, " ... [");
1096                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1097                 fprintf(trace->output, "]: %s()", sc->name);
1098         }
1099
1100         if (sc->fmt == NULL) {
1101 signed_print:
1102                 fprintf(trace->output, ") = %d", ret);
1103         } else if (ret < 0 && sc->fmt->errmsg) {
1104                 char bf[256];
1105                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1106                            *e = audit_errno_to_name(-ret);
1107
1108                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1109         } else if (ret == 0 && sc->fmt->timeout)
1110                 fprintf(trace->output, ") = 0 Timeout");
1111         else if (sc->fmt->hexret)
1112                 fprintf(trace->output, ") = %#x", ret);
1113         else
1114                 goto signed_print;
1115
1116         fputc('\n', trace->output);
1117 out:
1118         ttrace->entry_pending = false;
1119
1120         return 0;
1121 }
1122
1123 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1124                                      struct perf_sample *sample)
1125 {
1126         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1127         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1128         struct thread *thread = machine__findnew_thread(&trace->host,
1129                                                         sample->pid,
1130                                                         sample->tid);
1131         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1132
1133         if (ttrace == NULL)
1134                 goto out_dump;
1135
1136         ttrace->runtime_ms += runtime_ms;
1137         trace->runtime_ms += runtime_ms;
1138         return 0;
1139
1140 out_dump:
1141         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1142                evsel->name,
1143                perf_evsel__strval(evsel, sample, "comm"),
1144                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1145                runtime,
1146                perf_evsel__intval(evsel, sample, "vruntime"));
1147         return 0;
1148 }
1149
1150 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1151 {
1152         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1153             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1154                 return false;
1155
1156         if (trace->pid_list || trace->tid_list)
1157                 return true;
1158
1159         return false;
1160 }
1161
1162 static int trace__process_sample(struct perf_tool *tool,
1163                                  union perf_event *event __maybe_unused,
1164                                  struct perf_sample *sample,
1165                                  struct perf_evsel *evsel,
1166                                  struct machine *machine __maybe_unused)
1167 {
1168         struct trace *trace = container_of(tool, struct trace, tool);
1169         int err = 0;
1170
1171         tracepoint_handler handler = evsel->handler.func;
1172
1173         if (skip_sample(trace, sample))
1174                 return 0;
1175
1176         if (!trace->full_time && trace->base_time == 0)
1177                 trace->base_time = sample->time;
1178
1179         if (handler)
1180                 handler(trace, evsel, sample);
1181
1182         return err;
1183 }
1184
1185 static bool
1186 perf_session__has_tp(struct perf_session *session, const char *name)
1187 {
1188         struct perf_evsel *evsel;
1189
1190         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1191
1192         return evsel != NULL;
1193 }
1194
1195 static int parse_target_str(struct trace *trace)
1196 {
1197         if (trace->opts.target.pid) {
1198                 trace->pid_list = intlist__new(trace->opts.target.pid);
1199                 if (trace->pid_list == NULL) {
1200                         pr_err("Error parsing process id string\n");
1201                         return -EINVAL;
1202                 }
1203         }
1204
1205         if (trace->opts.target.tid) {
1206                 trace->tid_list = intlist__new(trace->opts.target.tid);
1207                 if (trace->tid_list == NULL) {
1208                         pr_err("Error parsing thread id string\n");
1209                         return -EINVAL;
1210                 }
1211         }
1212
1213         return 0;
1214 }
1215
1216 static int trace__run(struct trace *trace, int argc, const char **argv)
1217 {
1218         struct perf_evlist *evlist = perf_evlist__new();
1219         struct perf_evsel *evsel;
1220         int err = -1, i;
1221         unsigned long before;
1222         const bool forks = argc > 0;
1223
1224         if (evlist == NULL) {
1225                 fprintf(trace->output, "Not enough memory to run!\n");
1226                 goto out;
1227         }
1228
1229         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1230             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1231                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1232                 goto out_delete_evlist;
1233         }
1234
1235         if (trace->sched &&
1236             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1237                                    trace__sched_stat_runtime)) {
1238                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1239                 goto out_delete_evlist;
1240         }
1241
1242         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1243         if (err < 0) {
1244                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1245                 goto out_delete_evlist;
1246         }
1247
1248         err = trace__symbols_init(trace, evlist);
1249         if (err < 0) {
1250                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1251                 goto out_delete_maps;
1252         }
1253
1254         perf_evlist__config(evlist, &trace->opts);
1255
1256         signal(SIGCHLD, sig_handler);
1257         signal(SIGINT, sig_handler);
1258
1259         if (forks) {
1260                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1261                                                     argv, false, false);
1262                 if (err < 0) {
1263                         fprintf(trace->output, "Couldn't run the workload!\n");
1264                         goto out_delete_maps;
1265                 }
1266         }
1267
1268         err = perf_evlist__open(evlist);
1269         if (err < 0) {
1270                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1271                 goto out_delete_maps;
1272         }
1273
1274         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1275         if (err < 0) {
1276                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1277                 goto out_close_evlist;
1278         }
1279
1280         perf_evlist__enable(evlist);
1281
1282         if (forks)
1283                 perf_evlist__start_workload(evlist);
1284
1285         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1286 again:
1287         before = trace->nr_events;
1288
1289         for (i = 0; i < evlist->nr_mmaps; i++) {
1290                 union perf_event *event;
1291
1292                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1293                         const u32 type = event->header.type;
1294                         tracepoint_handler handler;
1295                         struct perf_sample sample;
1296
1297                         ++trace->nr_events;
1298
1299                         err = perf_evlist__parse_sample(evlist, event, &sample);
1300                         if (err) {
1301                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1302                                 continue;
1303                         }
1304
1305                         if (!trace->full_time && trace->base_time == 0)
1306                                 trace->base_time = sample.time;
1307
1308                         if (type != PERF_RECORD_SAMPLE) {
1309                                 trace__process_event(trace, &trace->host, event);
1310                                 continue;
1311                         }
1312
1313                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1314                         if (evsel == NULL) {
1315                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1316                                 continue;
1317                         }
1318
1319                         if (sample.raw_data == NULL) {
1320                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1321                                        perf_evsel__name(evsel), sample.tid,
1322                                        sample.cpu, sample.raw_size);
1323                                 continue;
1324                         }
1325
1326                         handler = evsel->handler.func;
1327                         handler(trace, evsel, &sample);
1328
1329                         if (done)
1330                                 goto out_unmap_evlist;
1331                 }
1332         }
1333
1334         if (trace->nr_events == before) {
1335                 if (done)
1336                         goto out_unmap_evlist;
1337
1338                 poll(evlist->pollfd, evlist->nr_fds, -1);
1339         }
1340
1341         if (done)
1342                 perf_evlist__disable(evlist);
1343
1344         goto again;
1345
1346 out_unmap_evlist:
1347         perf_evlist__munmap(evlist);
1348 out_close_evlist:
1349         perf_evlist__close(evlist);
1350 out_delete_maps:
1351         perf_evlist__delete_maps(evlist);
1352 out_delete_evlist:
1353         perf_evlist__delete(evlist);
1354 out:
1355         return err;
1356 }
1357
1358 static int trace__replay(struct trace *trace)
1359 {
1360         const struct perf_evsel_str_handler handlers[] = {
1361                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1362                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1363         };
1364
1365         struct perf_session *session;
1366         int err = -1;
1367
1368         trace->tool.sample        = trace__process_sample;
1369         trace->tool.mmap          = perf_event__process_mmap;
1370         trace->tool.mmap2         = perf_event__process_mmap2;
1371         trace->tool.comm          = perf_event__process_comm;
1372         trace->tool.exit          = perf_event__process_exit;
1373         trace->tool.fork          = perf_event__process_fork;
1374         trace->tool.attr          = perf_event__process_attr;
1375         trace->tool.tracing_data = perf_event__process_tracing_data;
1376         trace->tool.build_id      = perf_event__process_build_id;
1377
1378         trace->tool.ordered_samples = true;
1379         trace->tool.ordering_requires_timestamps = true;
1380
1381         /* add tid to output */
1382         trace->multiple_threads = true;
1383
1384         if (symbol__init() < 0)
1385                 return -1;
1386
1387         session = perf_session__new(input_name, O_RDONLY, 0, false,
1388                                     &trace->tool);
1389         if (session == NULL)
1390                 return -ENOMEM;
1391
1392         err = perf_session__set_tracepoints_handlers(session, handlers);
1393         if (err)
1394                 goto out;
1395
1396         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1397                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1398                 goto out;
1399         }
1400
1401         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1402                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1403                 goto out;
1404         }
1405
1406         err = parse_target_str(trace);
1407         if (err != 0)
1408                 goto out;
1409
1410         setup_pager();
1411
1412         err = perf_session__process_events(session, &trace->tool);
1413         if (err)
1414                 pr_err("Failed to process events, error %d", err);
1415
1416 out:
1417         perf_session__delete(session);
1418
1419         return err;
1420 }
1421
1422 static size_t trace__fprintf_threads_header(FILE *fp)
1423 {
1424         size_t printed;
1425
1426         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1427         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1428         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1429         printed += fprintf(fp," _____________________________________________________________________\n\n");
1430
1431         return printed;
1432 }
1433
1434 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1435 {
1436         size_t printed = trace__fprintf_threads_header(fp);
1437         struct rb_node *nd;
1438
1439         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1440                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1441                 struct thread_trace *ttrace = thread->priv;
1442                 const char *color;
1443                 double ratio;
1444
1445                 if (ttrace == NULL)
1446                         continue;
1447
1448                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1449
1450                 color = PERF_COLOR_NORMAL;
1451                 if (ratio > 50.0)
1452                         color = PERF_COLOR_RED;
1453                 else if (ratio > 25.0)
1454                         color = PERF_COLOR_GREEN;
1455                 else if (ratio > 5.0)
1456                         color = PERF_COLOR_YELLOW;
1457
1458                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1459                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1460                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1461                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1462         }
1463
1464         return printed;
1465 }
1466
1467 static int trace__set_duration(const struct option *opt, const char *str,
1468                                int unset __maybe_unused)
1469 {
1470         struct trace *trace = opt->value;
1471
1472         trace->duration_filter = atof(str);
1473         return 0;
1474 }
1475
1476 static int trace__open_output(struct trace *trace, const char *filename)
1477 {
1478         struct stat st;
1479
1480         if (!stat(filename, &st) && st.st_size) {
1481                 char oldname[PATH_MAX];
1482
1483                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1484                 unlink(oldname);
1485                 rename(filename, oldname);
1486         }
1487
1488         trace->output = fopen(filename, "w");
1489
1490         return trace->output == NULL ? -errno : 0;
1491 }
1492
1493 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1494 {
1495         const char * const trace_usage[] = {
1496                 "perf trace [<options>] [<command>]",
1497                 "perf trace [<options>] -- <command> [<options>]",
1498                 NULL
1499         };
1500         struct trace trace = {
1501                 .audit_machine = audit_detect_machine(),
1502                 .syscalls = {
1503                         . max = -1,
1504                 },
1505                 .opts = {
1506                         .target = {
1507                                 .uid       = UINT_MAX,
1508                                 .uses_mmap = true,
1509                         },
1510                         .user_freq     = UINT_MAX,
1511                         .user_interval = ULLONG_MAX,
1512                         .no_delay      = true,
1513                         .mmap_pages    = 1024,
1514                 },
1515                 .output = stdout,
1516                 .show_comm = true,
1517         };
1518         const char *output_name = NULL;
1519         const char *ev_qualifier_str = NULL;
1520         const struct option trace_options[] = {
1521         OPT_BOOLEAN(0, "comm", &trace.show_comm,
1522                     "show the thread COMM next to its id"),
1523         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1524                     "list of events to trace"),
1525         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1526         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1527         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1528                     "trace events on existing process id"),
1529         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1530                     "trace events on existing thread id"),
1531         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1532                     "system-wide collection from all CPUs"),
1533         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1534                     "list of cpus to monitor"),
1535         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1536                     "child tasks do not inherit counters"),
1537         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1538                      "number of mmap data pages",
1539                      perf_evlist__parse_mmap_pages),
1540         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1541                    "user to profile"),
1542         OPT_CALLBACK(0, "duration", &trace, "float",
1543                      "show only events with duration > N.M ms",
1544                      trace__set_duration),
1545         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1546         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1547         OPT_BOOLEAN('T', "time", &trace.full_time,
1548                     "Show full timestamp, not time relative to first start"),
1549         OPT_END()
1550         };
1551         int err;
1552         char bf[BUFSIZ];
1553
1554         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1555
1556         if (output_name != NULL) {
1557                 err = trace__open_output(&trace, output_name);
1558                 if (err < 0) {
1559                         perror("failed to create output file");
1560                         goto out;
1561                 }
1562         }
1563
1564         if (ev_qualifier_str != NULL) {
1565                 const char *s = ev_qualifier_str;
1566
1567                 trace.not_ev_qualifier = *s == '!';
1568                 if (trace.not_ev_qualifier)
1569                         ++s;
1570                 trace.ev_qualifier = strlist__new(true, s);
1571                 if (trace.ev_qualifier == NULL) {
1572                         fputs("Not enough memory to parse event qualifier",
1573                               trace.output);
1574                         err = -ENOMEM;
1575                         goto out_close;
1576                 }
1577         }
1578
1579         err = perf_target__validate(&trace.opts.target);
1580         if (err) {
1581                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1582                 fprintf(trace.output, "%s", bf);
1583                 goto out_close;
1584         }
1585
1586         err = perf_target__parse_uid(&trace.opts.target);
1587         if (err) {
1588                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1589                 fprintf(trace.output, "%s", bf);
1590                 goto out_close;
1591         }
1592
1593         if (!argc && perf_target__none(&trace.opts.target))
1594                 trace.opts.target.system_wide = true;
1595
1596         if (input_name)
1597                 err = trace__replay(&trace);
1598         else
1599                 err = trace__run(&trace, argc, argv);
1600
1601         if (trace.sched && !err)
1602                 trace__fprintf_thread_summary(&trace, trace.output);
1603
1604 out_close:
1605         if (output_name != NULL)
1606                 fclose(trace.output);
1607 out:
1608         return err;
1609 }