3ca6a856ba62d5031b64a3cf8b4430b19429fb06
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
193                                            struct syscall_arg *arg)
194 {
195         int printed = 0, op = arg->val;
196
197         if (op == 0)
198                 return scnprintf(bf, size, "NONE");
199 #define P_CMD(cmd) \
200         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
201                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
202                 op &= ~LOCK_##cmd; \
203         }
204
205         P_CMD(SH);
206         P_CMD(EX);
207         P_CMD(NB);
208         P_CMD(UN);
209         P_CMD(MAND);
210         P_CMD(RW);
211         P_CMD(READ);
212         P_CMD(WRITE);
213 #undef P_OP
214
215         if (op)
216                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
217
218         return printed;
219 }
220
221 #define SCA_FLOCK syscall_arg__scnprintf_flock
222
223 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
224 {
225         enum syscall_futex_args {
226                 SCF_UADDR   = (1 << 0),
227                 SCF_OP      = (1 << 1),
228                 SCF_VAL     = (1 << 2),
229                 SCF_TIMEOUT = (1 << 3),
230                 SCF_UADDR2  = (1 << 4),
231                 SCF_VAL3    = (1 << 5),
232         };
233         int op = arg->val;
234         int cmd = op & FUTEX_CMD_MASK;
235         size_t printed = 0;
236
237         switch (cmd) {
238 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
239         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
240         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
241         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
242         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
243         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
244         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
245         P_FUTEX_OP(WAKE_OP);                                                      break;
246         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
247         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
248         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
249         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
250         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
251         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
252         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
253         }
254
255         if (op & FUTEX_PRIVATE_FLAG)
256                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
257
258         if (op & FUTEX_CLOCK_REALTIME)
259                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
260
261         return printed;
262 }
263
264 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
265
266 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
267 static DEFINE_STRARRAY(epoll_ctl_ops);
268
269 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
270 static DEFINE_STRARRAY(itimers);
271
272 static const char *whences[] = { "SET", "CUR", "END",
273 #ifdef SEEK_DATA
274 "DATA",
275 #endif
276 #ifdef SEEK_HOLE
277 "HOLE",
278 #endif
279 };
280 static DEFINE_STRARRAY(whences);
281
282 static const char *fcntl_cmds[] = {
283         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
284         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
285         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
286         "F_GETOWNER_UIDS",
287 };
288 static DEFINE_STRARRAY(fcntl_cmds);
289
290 static const char *rlimit_resources[] = {
291         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
292         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
293         "RTTIME",
294 };
295 static DEFINE_STRARRAY(rlimit_resources);
296
297 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
298 static DEFINE_STRARRAY(sighow);
299
300 static const char *clockid[] = {
301         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
302         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
303 };
304 static DEFINE_STRARRAY(clockid);
305
306 static const char *socket_families[] = {
307         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
308         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
309         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
310         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
311         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
312         "ALG", "NFC", "VSOCK",
313 };
314 static DEFINE_STRARRAY(socket_families);
315
316 #ifndef SOCK_TYPE_MASK
317 #define SOCK_TYPE_MASK 0xf
318 #endif
319
320 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
321                                                       struct syscall_arg *arg)
322 {
323         size_t printed;
324         int type = arg->val,
325             flags = type & ~SOCK_TYPE_MASK;
326
327         type &= SOCK_TYPE_MASK;
328         /*
329          * Can't use a strarray, MIPS may override for ABI reasons.
330          */
331         switch (type) {
332 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
333         P_SK_TYPE(STREAM);
334         P_SK_TYPE(DGRAM);
335         P_SK_TYPE(RAW);
336         P_SK_TYPE(RDM);
337         P_SK_TYPE(SEQPACKET);
338         P_SK_TYPE(DCCP);
339         P_SK_TYPE(PACKET);
340 #undef P_SK_TYPE
341         default:
342                 printed = scnprintf(bf, size, "%#x", type);
343         }
344
345 #define P_SK_FLAG(n) \
346         if (flags & SOCK_##n) { \
347                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
348                 flags &= ~SOCK_##n; \
349         }
350
351         P_SK_FLAG(CLOEXEC);
352         P_SK_FLAG(NONBLOCK);
353 #undef P_SK_FLAG
354
355         if (flags)
356                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
357
358         return printed;
359 }
360
361 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
362
363 #ifndef MSG_PROBE
364 #define MSG_PROBE            0x10
365 #endif
366 #ifndef MSG_WAITFORONE
367 #define MSG_WAITFORONE  0x10000
368 #endif
369 #ifndef MSG_SENDPAGE_NOTLAST
370 #define MSG_SENDPAGE_NOTLAST 0x20000
371 #endif
372 #ifndef MSG_FASTOPEN
373 #define MSG_FASTOPEN         0x20000000
374 #endif
375
376 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
377                                                struct syscall_arg *arg)
378 {
379         int printed = 0, flags = arg->val;
380
381         if (flags == 0)
382                 return scnprintf(bf, size, "NONE");
383 #define P_MSG_FLAG(n) \
384         if (flags & MSG_##n) { \
385                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
386                 flags &= ~MSG_##n; \
387         }
388
389         P_MSG_FLAG(OOB);
390         P_MSG_FLAG(PEEK);
391         P_MSG_FLAG(DONTROUTE);
392         P_MSG_FLAG(TRYHARD);
393         P_MSG_FLAG(CTRUNC);
394         P_MSG_FLAG(PROBE);
395         P_MSG_FLAG(TRUNC);
396         P_MSG_FLAG(DONTWAIT);
397         P_MSG_FLAG(EOR);
398         P_MSG_FLAG(WAITALL);
399         P_MSG_FLAG(FIN);
400         P_MSG_FLAG(SYN);
401         P_MSG_FLAG(CONFIRM);
402         P_MSG_FLAG(RST);
403         P_MSG_FLAG(ERRQUEUE);
404         P_MSG_FLAG(NOSIGNAL);
405         P_MSG_FLAG(MORE);
406         P_MSG_FLAG(WAITFORONE);
407         P_MSG_FLAG(SENDPAGE_NOTLAST);
408         P_MSG_FLAG(FASTOPEN);
409         P_MSG_FLAG(CMSG_CLOEXEC);
410 #undef P_MSG_FLAG
411
412         if (flags)
413                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
414
415         return printed;
416 }
417
418 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
419
420 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
421                                                  struct syscall_arg *arg)
422 {
423         size_t printed = 0;
424         int mode = arg->val;
425
426         if (mode == F_OK) /* 0 */
427                 return scnprintf(bf, size, "F");
428 #define P_MODE(n) \
429         if (mode & n##_OK) { \
430                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
431                 mode &= ~n##_OK; \
432         }
433
434         P_MODE(R);
435         P_MODE(W);
436         P_MODE(X);
437 #undef P_MODE
438
439         if (mode)
440                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
441
442         return printed;
443 }
444
445 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
446
447 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
448                                                struct syscall_arg *arg)
449 {
450         int printed = 0, flags = arg->val;
451
452         if (!(flags & O_CREAT))
453                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
454
455         if (flags == 0)
456                 return scnprintf(bf, size, "RDONLY");
457 #define P_FLAG(n) \
458         if (flags & O_##n) { \
459                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
460                 flags &= ~O_##n; \
461         }
462
463         P_FLAG(APPEND);
464         P_FLAG(ASYNC);
465         P_FLAG(CLOEXEC);
466         P_FLAG(CREAT);
467         P_FLAG(DIRECT);
468         P_FLAG(DIRECTORY);
469         P_FLAG(EXCL);
470         P_FLAG(LARGEFILE);
471         P_FLAG(NOATIME);
472         P_FLAG(NOCTTY);
473 #ifdef O_NONBLOCK
474         P_FLAG(NONBLOCK);
475 #elif O_NDELAY
476         P_FLAG(NDELAY);
477 #endif
478 #ifdef O_PATH
479         P_FLAG(PATH);
480 #endif
481         P_FLAG(RDWR);
482 #ifdef O_DSYNC
483         if ((flags & O_SYNC) == O_SYNC)
484                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
485         else {
486                 P_FLAG(DSYNC);
487         }
488 #else
489         P_FLAG(SYNC);
490 #endif
491         P_FLAG(TRUNC);
492         P_FLAG(WRONLY);
493 #undef P_FLAG
494
495         if (flags)
496                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
497
498         return printed;
499 }
500
501 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
502
503 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
504                                                    struct syscall_arg *arg)
505 {
506         int printed = 0, flags = arg->val;
507
508         if (flags == 0)
509                 return scnprintf(bf, size, "NONE");
510 #define P_FLAG(n) \
511         if (flags & EFD_##n) { \
512                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513                 flags &= ~EFD_##n; \
514         }
515
516         P_FLAG(SEMAPHORE);
517         P_FLAG(CLOEXEC);
518         P_FLAG(NONBLOCK);
519 #undef P_FLAG
520
521         if (flags)
522                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
523
524         return printed;
525 }
526
527 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
528
529 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
530                                                 struct syscall_arg *arg)
531 {
532         int printed = 0, flags = arg->val;
533
534 #define P_FLAG(n) \
535         if (flags & O_##n) { \
536                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
537                 flags &= ~O_##n; \
538         }
539
540         P_FLAG(CLOEXEC);
541         P_FLAG(NONBLOCK);
542 #undef P_FLAG
543
544         if (flags)
545                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546
547         return printed;
548 }
549
550 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
551
552 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
553 {
554         int sig = arg->val;
555
556         switch (sig) {
557 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
558         P_SIGNUM(HUP);
559         P_SIGNUM(INT);
560         P_SIGNUM(QUIT);
561         P_SIGNUM(ILL);
562         P_SIGNUM(TRAP);
563         P_SIGNUM(ABRT);
564         P_SIGNUM(BUS);
565         P_SIGNUM(FPE);
566         P_SIGNUM(KILL);
567         P_SIGNUM(USR1);
568         P_SIGNUM(SEGV);
569         P_SIGNUM(USR2);
570         P_SIGNUM(PIPE);
571         P_SIGNUM(ALRM);
572         P_SIGNUM(TERM);
573         P_SIGNUM(STKFLT);
574         P_SIGNUM(CHLD);
575         P_SIGNUM(CONT);
576         P_SIGNUM(STOP);
577         P_SIGNUM(TSTP);
578         P_SIGNUM(TTIN);
579         P_SIGNUM(TTOU);
580         P_SIGNUM(URG);
581         P_SIGNUM(XCPU);
582         P_SIGNUM(XFSZ);
583         P_SIGNUM(VTALRM);
584         P_SIGNUM(PROF);
585         P_SIGNUM(WINCH);
586         P_SIGNUM(IO);
587         P_SIGNUM(PWR);
588         P_SIGNUM(SYS);
589         default: break;
590         }
591
592         return scnprintf(bf, size, "%#x", sig);
593 }
594
595 #define SCA_SIGNUM syscall_arg__scnprintf_signum
596
597 #define STRARRAY(arg, name, array) \
598           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
599           .arg_parm      = { [arg] = &strarray__##array, }
600
601 static struct syscall_fmt {
602         const char *name;
603         const char *alias;
604         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
605         void       *arg_parm[6];
606         bool       errmsg;
607         bool       timeout;
608         bool       hexret;
609 } syscall_fmts[] = {
610         { .name     = "access",     .errmsg = true,
611           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
612         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
613         { .name     = "brk",        .hexret = true,
614           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
615         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
616         { .name     = "connect",    .errmsg = true, },
617         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
618         { .name     = "eventfd2",   .errmsg = true,
619           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
620         { .name     = "fcntl",      .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), },
621         { .name     = "flock",      .errmsg = true,
622           .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
623         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
624         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
625         { .name     = "futex",      .errmsg = true,
626           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
627         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
628         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
629         { .name     = "ioctl",      .errmsg = true,
630           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
631         { .name     = "kill",       .errmsg = true,
632           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
633         { .name     = "lseek",      .errmsg = true, STRARRAY(2, whence, whences), },
634         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
635         { .name     = "madvise",    .errmsg = true,
636           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
637                              [2] = SCA_MADV_BHV, /* behavior */ }, },
638         { .name     = "mmap",       .hexret = true,
639           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
640                              [2] = SCA_MMAP_PROT, /* prot */
641                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
642         { .name     = "mprotect",   .errmsg = true,
643           .arg_scnprintf = { [0] = SCA_HEX, /* start */
644                              [2] = SCA_MMAP_PROT, /* prot */ }, },
645         { .name     = "mremap",     .hexret = true,
646           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
647                              [4] = SCA_HEX, /* new_addr */ }, },
648         { .name     = "munmap",     .errmsg = true,
649           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
650         { .name     = "open",       .errmsg = true,
651           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
652         { .name     = "open_by_handle_at", .errmsg = true,
653           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
654         { .name     = "openat",     .errmsg = true,
655           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
656         { .name     = "pipe2",      .errmsg = true,
657           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
658         { .name     = "poll",       .errmsg = true, .timeout = true, },
659         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
660         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
661         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
662         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
663         { .name     = "read",       .errmsg = true, },
664         { .name     = "recvfrom",   .errmsg = true,
665           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
666         { .name     = "recvmmsg",   .errmsg = true,
667           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
668         { .name     = "recvmsg",    .errmsg = true,
669           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
670         { .name     = "rt_sigaction", .errmsg = true,
671           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
672         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
673         { .name     = "rt_sigqueueinfo", .errmsg = true,
674           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
675         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
676           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
677         { .name     = "select",     .errmsg = true, .timeout = true, },
678         { .name     = "sendmmsg",    .errmsg = true,
679           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
680         { .name     = "sendmsg",    .errmsg = true,
681           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
682         { .name     = "sendto",     .errmsg = true,
683           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
684         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
685         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
686         { .name     = "socket",     .errmsg = true,
687           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
688                              [1] = SCA_SK_TYPE, /* type */ },
689           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
690         { .name     = "socketpair", .errmsg = true,
691           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
692                              [1] = SCA_SK_TYPE, /* type */ },
693           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
694         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
695         { .name     = "tgkill",     .errmsg = true,
696           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
697         { .name     = "tkill",      .errmsg = true,
698           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
699         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
700 };
701
702 static int syscall_fmt__cmp(const void *name, const void *fmtp)
703 {
704         const struct syscall_fmt *fmt = fmtp;
705         return strcmp(name, fmt->name);
706 }
707
708 static struct syscall_fmt *syscall_fmt__find(const char *name)
709 {
710         const int nmemb = ARRAY_SIZE(syscall_fmts);
711         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
712 }
713
714 struct syscall {
715         struct event_format *tp_format;
716         const char          *name;
717         bool                filtered;
718         struct syscall_fmt  *fmt;
719         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
720         void                **arg_parm;
721 };
722
723 static size_t fprintf_duration(unsigned long t, FILE *fp)
724 {
725         double duration = (double)t / NSEC_PER_MSEC;
726         size_t printed = fprintf(fp, "(");
727
728         if (duration >= 1.0)
729                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
730         else if (duration >= 0.01)
731                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
732         else
733                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
734         return printed + fprintf(fp, "): ");
735 }
736
737 struct thread_trace {
738         u64               entry_time;
739         u64               exit_time;
740         bool              entry_pending;
741         unsigned long     nr_events;
742         char              *entry_str;
743         double            runtime_ms;
744 };
745
746 static struct thread_trace *thread_trace__new(void)
747 {
748         return zalloc(sizeof(struct thread_trace));
749 }
750
751 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752 {
753         struct thread_trace *ttrace;
754
755         if (thread == NULL)
756                 goto fail;
757
758         if (thread->priv == NULL)
759                 thread->priv = thread_trace__new();
760                 
761         if (thread->priv == NULL)
762                 goto fail;
763
764         ttrace = thread->priv;
765         ++ttrace->nr_events;
766
767         return ttrace;
768 fail:
769         color_fprintf(fp, PERF_COLOR_RED,
770                       "WARNING: not enough memory, dropping samples!\n");
771         return NULL;
772 }
773
774 struct trace {
775         struct perf_tool        tool;
776         int                     audit_machine;
777         struct {
778                 int             max;
779                 struct syscall  *table;
780         } syscalls;
781         struct perf_record_opts opts;
782         struct machine          host;
783         u64                     base_time;
784         bool                    full_time;
785         FILE                    *output;
786         unsigned long           nr_events;
787         struct strlist          *ev_qualifier;
788         bool                    not_ev_qualifier;
789         struct intlist          *tid_list;
790         struct intlist          *pid_list;
791         bool                    sched;
792         bool                    multiple_threads;
793         bool                    show_comm;
794         double                  duration_filter;
795         double                  runtime_ms;
796 };
797
798 static bool trace__filter_duration(struct trace *trace, double t)
799 {
800         return t < (trace->duration_filter * NSEC_PER_MSEC);
801 }
802
803 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
804 {
805         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
806
807         return fprintf(fp, "%10.3f ", ts);
808 }
809
810 static bool done = false;
811
812 static void sig_handler(int sig __maybe_unused)
813 {
814         done = true;
815 }
816
817 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
818                                         u64 duration, u64 tstamp, FILE *fp)
819 {
820         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
821         printed += fprintf_duration(duration, fp);
822
823         if (trace->multiple_threads) {
824                 if (trace->show_comm)
825                         printed += fprintf(fp, "%.14s/", thread->comm);
826                 printed += fprintf(fp, "%d ", thread->tid);
827         }
828
829         return printed;
830 }
831
832 static int trace__process_event(struct trace *trace, struct machine *machine,
833                                 union perf_event *event)
834 {
835         int ret = 0;
836
837         switch (event->header.type) {
838         case PERF_RECORD_LOST:
839                 color_fprintf(trace->output, PERF_COLOR_RED,
840                               "LOST %" PRIu64 " events!\n", event->lost.lost);
841                 ret = machine__process_lost_event(machine, event);
842         default:
843                 ret = machine__process_event(machine, event);
844                 break;
845         }
846
847         return ret;
848 }
849
850 static int trace__tool_process(struct perf_tool *tool,
851                                union perf_event *event,
852                                struct perf_sample *sample __maybe_unused,
853                                struct machine *machine)
854 {
855         struct trace *trace = container_of(tool, struct trace, tool);
856         return trace__process_event(trace, machine, event);
857 }
858
859 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
860 {
861         int err = symbol__init();
862
863         if (err)
864                 return err;
865
866         machine__init(&trace->host, "", HOST_KERNEL_ID);
867         machine__create_kernel_maps(&trace->host);
868
869         if (perf_target__has_task(&trace->opts.target)) {
870                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
871                                                         trace__tool_process,
872                                                         &trace->host);
873         } else {
874                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
875                                                      &trace->host);
876         }
877
878         if (err)
879                 symbol__exit();
880
881         return err;
882 }
883
884 static int syscall__set_arg_fmts(struct syscall *sc)
885 {
886         struct format_field *field;
887         int idx = 0;
888
889         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
890         if (sc->arg_scnprintf == NULL)
891                 return -1;
892
893         if (sc->fmt)
894                 sc->arg_parm = sc->fmt->arg_parm;
895
896         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
897                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
898                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
899                 else if (field->flags & FIELD_IS_POINTER)
900                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
901                 ++idx;
902         }
903
904         return 0;
905 }
906
907 static int trace__read_syscall_info(struct trace *trace, int id)
908 {
909         char tp_name[128];
910         struct syscall *sc;
911         const char *name = audit_syscall_to_name(id, trace->audit_machine);
912
913         if (name == NULL)
914                 return -1;
915
916         if (id > trace->syscalls.max) {
917                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
918
919                 if (nsyscalls == NULL)
920                         return -1;
921
922                 if (trace->syscalls.max != -1) {
923                         memset(nsyscalls + trace->syscalls.max + 1, 0,
924                                (id - trace->syscalls.max) * sizeof(*sc));
925                 } else {
926                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
927                 }
928
929                 trace->syscalls.table = nsyscalls;
930                 trace->syscalls.max   = id;
931         }
932
933         sc = trace->syscalls.table + id;
934         sc->name = name;
935
936         if (trace->ev_qualifier) {
937                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
938
939                 if (!(in ^ trace->not_ev_qualifier)) {
940                         sc->filtered = true;
941                         /*
942                          * No need to do read tracepoint information since this will be
943                          * filtered out.
944                          */
945                         return 0;
946                 }
947         }
948
949         sc->fmt  = syscall_fmt__find(sc->name);
950
951         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
952         sc->tp_format = event_format__new("syscalls", tp_name);
953
954         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
955                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
956                 sc->tp_format = event_format__new("syscalls", tp_name);
957         }
958
959         if (sc->tp_format == NULL)
960                 return -1;
961
962         return syscall__set_arg_fmts(sc);
963 }
964
965 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
966                                       unsigned long *args)
967 {
968         size_t printed = 0;
969
970         if (sc->tp_format != NULL) {
971                 struct format_field *field;
972                 u8 bit = 1;
973                 struct syscall_arg arg = {
974                         .idx  = 0,
975                         .mask = 0,
976                 };
977
978                 for (field = sc->tp_format->format.fields->next; field;
979                      field = field->next, ++arg.idx, bit <<= 1) {
980                         if (arg.mask & bit)
981                                 continue;
982                         /*
983                          * Suppress this argument if its value is zero and
984                          * and we don't have a string associated in an
985                          * strarray for it.
986                          */
987                         if (args[arg.idx] == 0 &&
988                             !(sc->arg_scnprintf &&
989                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
990                               sc->arg_parm[arg.idx]))
991                                 continue;
992
993                         printed += scnprintf(bf + printed, size - printed,
994                                              "%s%s: ", printed ? ", " : "", field->name);
995                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
996                                 arg.val = args[arg.idx];
997                                 if (sc->arg_parm)
998                                         arg.parm = sc->arg_parm[arg.idx];
999                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1000                                                                       size - printed, &arg);
1001                         } else {
1002                                 printed += scnprintf(bf + printed, size - printed,
1003                                                      "%ld", args[arg.idx]);
1004                         }
1005                 }
1006         } else {
1007                 int i = 0;
1008
1009                 while (i < 6) {
1010                         printed += scnprintf(bf + printed, size - printed,
1011                                              "%sarg%d: %ld",
1012                                              printed ? ", " : "", i, args[i]);
1013                         ++i;
1014                 }
1015         }
1016
1017         return printed;
1018 }
1019
1020 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1021                                   struct perf_sample *sample);
1022
1023 static struct syscall *trace__syscall_info(struct trace *trace,
1024                                            struct perf_evsel *evsel,
1025                                            struct perf_sample *sample)
1026 {
1027         int id = perf_evsel__intval(evsel, sample, "id");
1028
1029         if (id < 0) {
1030
1031                 /*
1032                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1033                  * before that, leaving at a higher verbosity level till that is
1034                  * explained. Reproduced with plain ftrace with:
1035                  *
1036                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1037                  * grep "NR -1 " /t/trace_pipe
1038                  *
1039                  * After generating some load on the machine.
1040                  */
1041                 if (verbose > 1) {
1042                         static u64 n;
1043                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1044                                 id, perf_evsel__name(evsel), ++n);
1045                 }
1046                 return NULL;
1047         }
1048
1049         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1050             trace__read_syscall_info(trace, id))
1051                 goto out_cant_read;
1052
1053         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1054                 goto out_cant_read;
1055
1056         return &trace->syscalls.table[id];
1057
1058 out_cant_read:
1059         if (verbose) {
1060                 fprintf(trace->output, "Problems reading syscall %d", id);
1061                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1062                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1063                 fputs(" information\n", trace->output);
1064         }
1065         return NULL;
1066 }
1067
1068 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1069                             struct perf_sample *sample)
1070 {
1071         char *msg;
1072         void *args;
1073         size_t printed = 0;
1074         struct thread *thread;
1075         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1076         struct thread_trace *ttrace;
1077
1078         if (sc == NULL)
1079                 return -1;
1080
1081         if (sc->filtered)
1082                 return 0;
1083
1084         thread = machine__findnew_thread(&trace->host, sample->pid,
1085                                          sample->tid);
1086         ttrace = thread__trace(thread, trace->output);
1087         if (ttrace == NULL)
1088                 return -1;
1089
1090         args = perf_evsel__rawptr(evsel, sample, "args");
1091         if (args == NULL) {
1092                 fprintf(trace->output, "Problems reading syscall arguments\n");
1093                 return -1;
1094         }
1095
1096         ttrace = thread->priv;
1097
1098         if (ttrace->entry_str == NULL) {
1099                 ttrace->entry_str = malloc(1024);
1100                 if (!ttrace->entry_str)
1101                         return -1;
1102         }
1103
1104         ttrace->entry_time = sample->time;
1105         msg = ttrace->entry_str;
1106         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1107
1108         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1109
1110         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1111                 if (!trace->duration_filter) {
1112                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1113                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1114                 }
1115         } else
1116                 ttrace->entry_pending = true;
1117
1118         return 0;
1119 }
1120
1121 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1122                            struct perf_sample *sample)
1123 {
1124         int ret;
1125         u64 duration = 0;
1126         struct thread *thread;
1127         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1128         struct thread_trace *ttrace;
1129
1130         if (sc == NULL)
1131                 return -1;
1132
1133         if (sc->filtered)
1134                 return 0;
1135
1136         thread = machine__findnew_thread(&trace->host, sample->pid,
1137                                          sample->tid);
1138         ttrace = thread__trace(thread, trace->output);
1139         if (ttrace == NULL)
1140                 return -1;
1141
1142         ret = perf_evsel__intval(evsel, sample, "ret");
1143
1144         ttrace = thread->priv;
1145
1146         ttrace->exit_time = sample->time;
1147
1148         if (ttrace->entry_time) {
1149                 duration = sample->time - ttrace->entry_time;
1150                 if (trace__filter_duration(trace, duration))
1151                         goto out;
1152         } else if (trace->duration_filter)
1153                 goto out;
1154
1155         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1156
1157         if (ttrace->entry_pending) {
1158                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1159         } else {
1160                 fprintf(trace->output, " ... [");
1161                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1162                 fprintf(trace->output, "]: %s()", sc->name);
1163         }
1164
1165         if (sc->fmt == NULL) {
1166 signed_print:
1167                 fprintf(trace->output, ") = %d", ret);
1168         } else if (ret < 0 && sc->fmt->errmsg) {
1169                 char bf[256];
1170                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1171                            *e = audit_errno_to_name(-ret);
1172
1173                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1174         } else if (ret == 0 && sc->fmt->timeout)
1175                 fprintf(trace->output, ") = 0 Timeout");
1176         else if (sc->fmt->hexret)
1177                 fprintf(trace->output, ") = %#x", ret);
1178         else
1179                 goto signed_print;
1180
1181         fputc('\n', trace->output);
1182 out:
1183         ttrace->entry_pending = false;
1184
1185         return 0;
1186 }
1187
1188 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1189                                      struct perf_sample *sample)
1190 {
1191         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1192         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1193         struct thread *thread = machine__findnew_thread(&trace->host,
1194                                                         sample->pid,
1195                                                         sample->tid);
1196         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1197
1198         if (ttrace == NULL)
1199                 goto out_dump;
1200
1201         ttrace->runtime_ms += runtime_ms;
1202         trace->runtime_ms += runtime_ms;
1203         return 0;
1204
1205 out_dump:
1206         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1207                evsel->name,
1208                perf_evsel__strval(evsel, sample, "comm"),
1209                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1210                runtime,
1211                perf_evsel__intval(evsel, sample, "vruntime"));
1212         return 0;
1213 }
1214
1215 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1216 {
1217         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1218             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1219                 return false;
1220
1221         if (trace->pid_list || trace->tid_list)
1222                 return true;
1223
1224         return false;
1225 }
1226
1227 static int trace__process_sample(struct perf_tool *tool,
1228                                  union perf_event *event __maybe_unused,
1229                                  struct perf_sample *sample,
1230                                  struct perf_evsel *evsel,
1231                                  struct machine *machine __maybe_unused)
1232 {
1233         struct trace *trace = container_of(tool, struct trace, tool);
1234         int err = 0;
1235
1236         tracepoint_handler handler = evsel->handler.func;
1237
1238         if (skip_sample(trace, sample))
1239                 return 0;
1240
1241         if (!trace->full_time && trace->base_time == 0)
1242                 trace->base_time = sample->time;
1243
1244         if (handler)
1245                 handler(trace, evsel, sample);
1246
1247         return err;
1248 }
1249
1250 static bool
1251 perf_session__has_tp(struct perf_session *session, const char *name)
1252 {
1253         struct perf_evsel *evsel;
1254
1255         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1256
1257         return evsel != NULL;
1258 }
1259
1260 static int parse_target_str(struct trace *trace)
1261 {
1262         if (trace->opts.target.pid) {
1263                 trace->pid_list = intlist__new(trace->opts.target.pid);
1264                 if (trace->pid_list == NULL) {
1265                         pr_err("Error parsing process id string\n");
1266                         return -EINVAL;
1267                 }
1268         }
1269
1270         if (trace->opts.target.tid) {
1271                 trace->tid_list = intlist__new(trace->opts.target.tid);
1272                 if (trace->tid_list == NULL) {
1273                         pr_err("Error parsing thread id string\n");
1274                         return -EINVAL;
1275                 }
1276         }
1277
1278         return 0;
1279 }
1280
1281 static int trace__run(struct trace *trace, int argc, const char **argv)
1282 {
1283         struct perf_evlist *evlist = perf_evlist__new();
1284         struct perf_evsel *evsel;
1285         int err = -1, i;
1286         unsigned long before;
1287         const bool forks = argc > 0;
1288
1289         if (evlist == NULL) {
1290                 fprintf(trace->output, "Not enough memory to run!\n");
1291                 goto out;
1292         }
1293
1294         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1295             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1296                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1297                 goto out_delete_evlist;
1298         }
1299
1300         if (trace->sched &&
1301             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1302                                    trace__sched_stat_runtime)) {
1303                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1304                 goto out_delete_evlist;
1305         }
1306
1307         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1308         if (err < 0) {
1309                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1310                 goto out_delete_evlist;
1311         }
1312
1313         err = trace__symbols_init(trace, evlist);
1314         if (err < 0) {
1315                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1316                 goto out_delete_maps;
1317         }
1318
1319         perf_evlist__config(evlist, &trace->opts);
1320
1321         signal(SIGCHLD, sig_handler);
1322         signal(SIGINT, sig_handler);
1323
1324         if (forks) {
1325                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1326                                                     argv, false, false);
1327                 if (err < 0) {
1328                         fprintf(trace->output, "Couldn't run the workload!\n");
1329                         goto out_delete_maps;
1330                 }
1331         }
1332
1333         err = perf_evlist__open(evlist);
1334         if (err < 0) {
1335                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1336                 goto out_delete_maps;
1337         }
1338
1339         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1340         if (err < 0) {
1341                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1342                 goto out_close_evlist;
1343         }
1344
1345         perf_evlist__enable(evlist);
1346
1347         if (forks)
1348                 perf_evlist__start_workload(evlist);
1349
1350         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1351 again:
1352         before = trace->nr_events;
1353
1354         for (i = 0; i < evlist->nr_mmaps; i++) {
1355                 union perf_event *event;
1356
1357                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1358                         const u32 type = event->header.type;
1359                         tracepoint_handler handler;
1360                         struct perf_sample sample;
1361
1362                         ++trace->nr_events;
1363
1364                         err = perf_evlist__parse_sample(evlist, event, &sample);
1365                         if (err) {
1366                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1367                                 continue;
1368                         }
1369
1370                         if (!trace->full_time && trace->base_time == 0)
1371                                 trace->base_time = sample.time;
1372
1373                         if (type != PERF_RECORD_SAMPLE) {
1374                                 trace__process_event(trace, &trace->host, event);
1375                                 continue;
1376                         }
1377
1378                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1379                         if (evsel == NULL) {
1380                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1381                                 continue;
1382                         }
1383
1384                         if (sample.raw_data == NULL) {
1385                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1386                                        perf_evsel__name(evsel), sample.tid,
1387                                        sample.cpu, sample.raw_size);
1388                                 continue;
1389                         }
1390
1391                         handler = evsel->handler.func;
1392                         handler(trace, evsel, &sample);
1393
1394                         if (done)
1395                                 goto out_unmap_evlist;
1396                 }
1397         }
1398
1399         if (trace->nr_events == before) {
1400                 if (done)
1401                         goto out_unmap_evlist;
1402
1403                 poll(evlist->pollfd, evlist->nr_fds, -1);
1404         }
1405
1406         if (done)
1407                 perf_evlist__disable(evlist);
1408
1409         goto again;
1410
1411 out_unmap_evlist:
1412         perf_evlist__munmap(evlist);
1413 out_close_evlist:
1414         perf_evlist__close(evlist);
1415 out_delete_maps:
1416         perf_evlist__delete_maps(evlist);
1417 out_delete_evlist:
1418         perf_evlist__delete(evlist);
1419 out:
1420         return err;
1421 }
1422
1423 static int trace__replay(struct trace *trace)
1424 {
1425         const struct perf_evsel_str_handler handlers[] = {
1426                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1427                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1428         };
1429
1430         struct perf_session *session;
1431         int err = -1;
1432
1433         trace->tool.sample        = trace__process_sample;
1434         trace->tool.mmap          = perf_event__process_mmap;
1435         trace->tool.mmap2         = perf_event__process_mmap2;
1436         trace->tool.comm          = perf_event__process_comm;
1437         trace->tool.exit          = perf_event__process_exit;
1438         trace->tool.fork          = perf_event__process_fork;
1439         trace->tool.attr          = perf_event__process_attr;
1440         trace->tool.tracing_data = perf_event__process_tracing_data;
1441         trace->tool.build_id      = perf_event__process_build_id;
1442
1443         trace->tool.ordered_samples = true;
1444         trace->tool.ordering_requires_timestamps = true;
1445
1446         /* add tid to output */
1447         trace->multiple_threads = true;
1448
1449         if (symbol__init() < 0)
1450                 return -1;
1451
1452         session = perf_session__new(input_name, O_RDONLY, 0, false,
1453                                     &trace->tool);
1454         if (session == NULL)
1455                 return -ENOMEM;
1456
1457         err = perf_session__set_tracepoints_handlers(session, handlers);
1458         if (err)
1459                 goto out;
1460
1461         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1462                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1463                 goto out;
1464         }
1465
1466         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1467                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1468                 goto out;
1469         }
1470
1471         err = parse_target_str(trace);
1472         if (err != 0)
1473                 goto out;
1474
1475         setup_pager();
1476
1477         err = perf_session__process_events(session, &trace->tool);
1478         if (err)
1479                 pr_err("Failed to process events, error %d", err);
1480
1481 out:
1482         perf_session__delete(session);
1483
1484         return err;
1485 }
1486
1487 static size_t trace__fprintf_threads_header(FILE *fp)
1488 {
1489         size_t printed;
1490
1491         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1492         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1493         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1494         printed += fprintf(fp," _____________________________________________________________________\n\n");
1495
1496         return printed;
1497 }
1498
1499 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1500 {
1501         size_t printed = trace__fprintf_threads_header(fp);
1502         struct rb_node *nd;
1503
1504         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1505                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1506                 struct thread_trace *ttrace = thread->priv;
1507                 const char *color;
1508                 double ratio;
1509
1510                 if (ttrace == NULL)
1511                         continue;
1512
1513                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1514
1515                 color = PERF_COLOR_NORMAL;
1516                 if (ratio > 50.0)
1517                         color = PERF_COLOR_RED;
1518                 else if (ratio > 25.0)
1519                         color = PERF_COLOR_GREEN;
1520                 else if (ratio > 5.0)
1521                         color = PERF_COLOR_YELLOW;
1522
1523                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1524                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1525                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1526                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1527         }
1528
1529         return printed;
1530 }
1531
1532 static int trace__set_duration(const struct option *opt, const char *str,
1533                                int unset __maybe_unused)
1534 {
1535         struct trace *trace = opt->value;
1536
1537         trace->duration_filter = atof(str);
1538         return 0;
1539 }
1540
1541 static int trace__open_output(struct trace *trace, const char *filename)
1542 {
1543         struct stat st;
1544
1545         if (!stat(filename, &st) && st.st_size) {
1546                 char oldname[PATH_MAX];
1547
1548                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1549                 unlink(oldname);
1550                 rename(filename, oldname);
1551         }
1552
1553         trace->output = fopen(filename, "w");
1554
1555         return trace->output == NULL ? -errno : 0;
1556 }
1557
1558 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1559 {
1560         const char * const trace_usage[] = {
1561                 "perf trace [<options>] [<command>]",
1562                 "perf trace [<options>] -- <command> [<options>]",
1563                 NULL
1564         };
1565         struct trace trace = {
1566                 .audit_machine = audit_detect_machine(),
1567                 .syscalls = {
1568                         . max = -1,
1569                 },
1570                 .opts = {
1571                         .target = {
1572                                 .uid       = UINT_MAX,
1573                                 .uses_mmap = true,
1574                         },
1575                         .user_freq     = UINT_MAX,
1576                         .user_interval = ULLONG_MAX,
1577                         .no_delay      = true,
1578                         .mmap_pages    = 1024,
1579                 },
1580                 .output = stdout,
1581                 .show_comm = true,
1582         };
1583         const char *output_name = NULL;
1584         const char *ev_qualifier_str = NULL;
1585         const struct option trace_options[] = {
1586         OPT_BOOLEAN(0, "comm", &trace.show_comm,
1587                     "show the thread COMM next to its id"),
1588         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1589                     "list of events to trace"),
1590         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1591         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1592         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1593                     "trace events on existing process id"),
1594         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1595                     "trace events on existing thread id"),
1596         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1597                     "system-wide collection from all CPUs"),
1598         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1599                     "list of cpus to monitor"),
1600         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1601                     "child tasks do not inherit counters"),
1602         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1603                      "number of mmap data pages",
1604                      perf_evlist__parse_mmap_pages),
1605         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1606                    "user to profile"),
1607         OPT_CALLBACK(0, "duration", &trace, "float",
1608                      "show only events with duration > N.M ms",
1609                      trace__set_duration),
1610         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1611         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1612         OPT_BOOLEAN('T', "time", &trace.full_time,
1613                     "Show full timestamp, not time relative to first start"),
1614         OPT_END()
1615         };
1616         int err;
1617         char bf[BUFSIZ];
1618
1619         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1620
1621         if (output_name != NULL) {
1622                 err = trace__open_output(&trace, output_name);
1623                 if (err < 0) {
1624                         perror("failed to create output file");
1625                         goto out;
1626                 }
1627         }
1628
1629         if (ev_qualifier_str != NULL) {
1630                 const char *s = ev_qualifier_str;
1631
1632                 trace.not_ev_qualifier = *s == '!';
1633                 if (trace.not_ev_qualifier)
1634                         ++s;
1635                 trace.ev_qualifier = strlist__new(true, s);
1636                 if (trace.ev_qualifier == NULL) {
1637                         fputs("Not enough memory to parse event qualifier",
1638                               trace.output);
1639                         err = -ENOMEM;
1640                         goto out_close;
1641                 }
1642         }
1643
1644         err = perf_target__validate(&trace.opts.target);
1645         if (err) {
1646                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1647                 fprintf(trace.output, "%s", bf);
1648                 goto out_close;
1649         }
1650
1651         err = perf_target__parse_uid(&trace.opts.target);
1652         if (err) {
1653                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1654                 fprintf(trace.output, "%s", bf);
1655                 goto out_close;
1656         }
1657
1658         if (!argc && perf_target__none(&trace.opts.target))
1659                 trace.opts.target.system_wide = true;
1660
1661         if (input_name)
1662                 err = trace__replay(&trace);
1663         else
1664                 err = trace__run(&trace, argc, argv);
1665
1666         if (trace.sched && !err)
1667                 trace__fprintf_thread_summary(&trace, trace.output);
1668
1669 out_close:
1670         if (output_name != NULL)
1671                 fclose(trace.output);
1672 out:
1673         return err;
1674 }