perf trace: Beautify mlock & friends 'addr' arg
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
193                                            struct syscall_arg *arg)
194 {
195         int printed = 0, op = arg->val;
196
197         if (op == 0)
198                 return scnprintf(bf, size, "NONE");
199 #define P_CMD(cmd) \
200         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
201                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
202                 op &= ~LOCK_##cmd; \
203         }
204
205         P_CMD(SH);
206         P_CMD(EX);
207         P_CMD(NB);
208         P_CMD(UN);
209         P_CMD(MAND);
210         P_CMD(RW);
211         P_CMD(READ);
212         P_CMD(WRITE);
213 #undef P_OP
214
215         if (op)
216                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
217
218         return printed;
219 }
220
221 #define SCA_FLOCK syscall_arg__scnprintf_flock
222
223 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
224 {
225         enum syscall_futex_args {
226                 SCF_UADDR   = (1 << 0),
227                 SCF_OP      = (1 << 1),
228                 SCF_VAL     = (1 << 2),
229                 SCF_TIMEOUT = (1 << 3),
230                 SCF_UADDR2  = (1 << 4),
231                 SCF_VAL3    = (1 << 5),
232         };
233         int op = arg->val;
234         int cmd = op & FUTEX_CMD_MASK;
235         size_t printed = 0;
236
237         switch (cmd) {
238 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
239         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
240         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
241         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
242         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
243         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
244         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
245         P_FUTEX_OP(WAKE_OP);                                                      break;
246         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
247         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
248         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
249         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
250         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
251         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
252         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
253         }
254
255         if (op & FUTEX_PRIVATE_FLAG)
256                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
257
258         if (op & FUTEX_CLOCK_REALTIME)
259                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
260
261         return printed;
262 }
263
264 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
265
266 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
267 static DEFINE_STRARRAY(epoll_ctl_ops);
268
269 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
270 static DEFINE_STRARRAY(itimers);
271
272 static const char *whences[] = { "SET", "CUR", "END",
273 #ifdef SEEK_DATA
274 "DATA",
275 #endif
276 #ifdef SEEK_HOLE
277 "HOLE",
278 #endif
279 };
280 static DEFINE_STRARRAY(whences);
281
282 static const char *fcntl_cmds[] = {
283         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
284         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
285         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
286         "F_GETOWNER_UIDS",
287 };
288 static DEFINE_STRARRAY(fcntl_cmds);
289
290 static const char *rlimit_resources[] = {
291         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
292         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
293         "RTTIME",
294 };
295 static DEFINE_STRARRAY(rlimit_resources);
296
297 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
298 static DEFINE_STRARRAY(sighow);
299
300 static const char *clockid[] = {
301         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
302         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
303 };
304 static DEFINE_STRARRAY(clockid);
305
306 static const char *socket_families[] = {
307         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
308         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
309         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
310         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
311         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
312         "ALG", "NFC", "VSOCK",
313 };
314 static DEFINE_STRARRAY(socket_families);
315
316 #ifndef SOCK_TYPE_MASK
317 #define SOCK_TYPE_MASK 0xf
318 #endif
319
320 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
321                                                       struct syscall_arg *arg)
322 {
323         size_t printed;
324         int type = arg->val,
325             flags = type & ~SOCK_TYPE_MASK;
326
327         type &= SOCK_TYPE_MASK;
328         /*
329          * Can't use a strarray, MIPS may override for ABI reasons.
330          */
331         switch (type) {
332 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
333         P_SK_TYPE(STREAM);
334         P_SK_TYPE(DGRAM);
335         P_SK_TYPE(RAW);
336         P_SK_TYPE(RDM);
337         P_SK_TYPE(SEQPACKET);
338         P_SK_TYPE(DCCP);
339         P_SK_TYPE(PACKET);
340 #undef P_SK_TYPE
341         default:
342                 printed = scnprintf(bf, size, "%#x", type);
343         }
344
345 #define P_SK_FLAG(n) \
346         if (flags & SOCK_##n) { \
347                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
348                 flags &= ~SOCK_##n; \
349         }
350
351         P_SK_FLAG(CLOEXEC);
352         P_SK_FLAG(NONBLOCK);
353 #undef P_SK_FLAG
354
355         if (flags)
356                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
357
358         return printed;
359 }
360
361 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
362
363 #ifndef MSG_PROBE
364 #define MSG_PROBE            0x10
365 #endif
366 #ifndef MSG_WAITFORONE
367 #define MSG_WAITFORONE  0x10000
368 #endif
369 #ifndef MSG_SENDPAGE_NOTLAST
370 #define MSG_SENDPAGE_NOTLAST 0x20000
371 #endif
372 #ifndef MSG_FASTOPEN
373 #define MSG_FASTOPEN         0x20000000
374 #endif
375
376 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
377                                                struct syscall_arg *arg)
378 {
379         int printed = 0, flags = arg->val;
380
381         if (flags == 0)
382                 return scnprintf(bf, size, "NONE");
383 #define P_MSG_FLAG(n) \
384         if (flags & MSG_##n) { \
385                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
386                 flags &= ~MSG_##n; \
387         }
388
389         P_MSG_FLAG(OOB);
390         P_MSG_FLAG(PEEK);
391         P_MSG_FLAG(DONTROUTE);
392         P_MSG_FLAG(TRYHARD);
393         P_MSG_FLAG(CTRUNC);
394         P_MSG_FLAG(PROBE);
395         P_MSG_FLAG(TRUNC);
396         P_MSG_FLAG(DONTWAIT);
397         P_MSG_FLAG(EOR);
398         P_MSG_FLAG(WAITALL);
399         P_MSG_FLAG(FIN);
400         P_MSG_FLAG(SYN);
401         P_MSG_FLAG(CONFIRM);
402         P_MSG_FLAG(RST);
403         P_MSG_FLAG(ERRQUEUE);
404         P_MSG_FLAG(NOSIGNAL);
405         P_MSG_FLAG(MORE);
406         P_MSG_FLAG(WAITFORONE);
407         P_MSG_FLAG(SENDPAGE_NOTLAST);
408         P_MSG_FLAG(FASTOPEN);
409         P_MSG_FLAG(CMSG_CLOEXEC);
410 #undef P_MSG_FLAG
411
412         if (flags)
413                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
414
415         return printed;
416 }
417
418 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
419
420 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
421                                                  struct syscall_arg *arg)
422 {
423         size_t printed = 0;
424         int mode = arg->val;
425
426         if (mode == F_OK) /* 0 */
427                 return scnprintf(bf, size, "F");
428 #define P_MODE(n) \
429         if (mode & n##_OK) { \
430                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
431                 mode &= ~n##_OK; \
432         }
433
434         P_MODE(R);
435         P_MODE(W);
436         P_MODE(X);
437 #undef P_MODE
438
439         if (mode)
440                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
441
442         return printed;
443 }
444
445 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
446
447 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
448                                                struct syscall_arg *arg)
449 {
450         int printed = 0, flags = arg->val;
451
452         if (!(flags & O_CREAT))
453                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
454
455         if (flags == 0)
456                 return scnprintf(bf, size, "RDONLY");
457 #define P_FLAG(n) \
458         if (flags & O_##n) { \
459                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
460                 flags &= ~O_##n; \
461         }
462
463         P_FLAG(APPEND);
464         P_FLAG(ASYNC);
465         P_FLAG(CLOEXEC);
466         P_FLAG(CREAT);
467         P_FLAG(DIRECT);
468         P_FLAG(DIRECTORY);
469         P_FLAG(EXCL);
470         P_FLAG(LARGEFILE);
471         P_FLAG(NOATIME);
472         P_FLAG(NOCTTY);
473 #ifdef O_NONBLOCK
474         P_FLAG(NONBLOCK);
475 #elif O_NDELAY
476         P_FLAG(NDELAY);
477 #endif
478 #ifdef O_PATH
479         P_FLAG(PATH);
480 #endif
481         P_FLAG(RDWR);
482 #ifdef O_DSYNC
483         if ((flags & O_SYNC) == O_SYNC)
484                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
485         else {
486                 P_FLAG(DSYNC);
487         }
488 #else
489         P_FLAG(SYNC);
490 #endif
491         P_FLAG(TRUNC);
492         P_FLAG(WRONLY);
493 #undef P_FLAG
494
495         if (flags)
496                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
497
498         return printed;
499 }
500
501 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
502
503 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
504                                                    struct syscall_arg *arg)
505 {
506         int printed = 0, flags = arg->val;
507
508         if (flags == 0)
509                 return scnprintf(bf, size, "NONE");
510 #define P_FLAG(n) \
511         if (flags & EFD_##n) { \
512                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513                 flags &= ~EFD_##n; \
514         }
515
516         P_FLAG(SEMAPHORE);
517         P_FLAG(CLOEXEC);
518         P_FLAG(NONBLOCK);
519 #undef P_FLAG
520
521         if (flags)
522                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
523
524         return printed;
525 }
526
527 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
528
529 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
530                                                 struct syscall_arg *arg)
531 {
532         int printed = 0, flags = arg->val;
533
534 #define P_FLAG(n) \
535         if (flags & O_##n) { \
536                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
537                 flags &= ~O_##n; \
538         }
539
540         P_FLAG(CLOEXEC);
541         P_FLAG(NONBLOCK);
542 #undef P_FLAG
543
544         if (flags)
545                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546
547         return printed;
548 }
549
550 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
551
552 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
553 {
554         int sig = arg->val;
555
556         switch (sig) {
557 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
558         P_SIGNUM(HUP);
559         P_SIGNUM(INT);
560         P_SIGNUM(QUIT);
561         P_SIGNUM(ILL);
562         P_SIGNUM(TRAP);
563         P_SIGNUM(ABRT);
564         P_SIGNUM(BUS);
565         P_SIGNUM(FPE);
566         P_SIGNUM(KILL);
567         P_SIGNUM(USR1);
568         P_SIGNUM(SEGV);
569         P_SIGNUM(USR2);
570         P_SIGNUM(PIPE);
571         P_SIGNUM(ALRM);
572         P_SIGNUM(TERM);
573         P_SIGNUM(STKFLT);
574         P_SIGNUM(CHLD);
575         P_SIGNUM(CONT);
576         P_SIGNUM(STOP);
577         P_SIGNUM(TSTP);
578         P_SIGNUM(TTIN);
579         P_SIGNUM(TTOU);
580         P_SIGNUM(URG);
581         P_SIGNUM(XCPU);
582         P_SIGNUM(XFSZ);
583         P_SIGNUM(VTALRM);
584         P_SIGNUM(PROF);
585         P_SIGNUM(WINCH);
586         P_SIGNUM(IO);
587         P_SIGNUM(PWR);
588         P_SIGNUM(SYS);
589         default: break;
590         }
591
592         return scnprintf(bf, size, "%#x", sig);
593 }
594
595 #define SCA_SIGNUM syscall_arg__scnprintf_signum
596
597 #define STRARRAY(arg, name, array) \
598           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
599           .arg_parm      = { [arg] = &strarray__##array, }
600
601 static struct syscall_fmt {
602         const char *name;
603         const char *alias;
604         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
605         void       *arg_parm[6];
606         bool       errmsg;
607         bool       timeout;
608         bool       hexret;
609 } syscall_fmts[] = {
610         { .name     = "access",     .errmsg = true,
611           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
612         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
613         { .name     = "brk",        .hexret = true,
614           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
615         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
616         { .name     = "connect",    .errmsg = true, },
617         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
618         { .name     = "eventfd2",   .errmsg = true,
619           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
620         { .name     = "fcntl",      .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), },
621         { .name     = "flock",      .errmsg = true,
622           .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
623         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
624         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
625         { .name     = "futex",      .errmsg = true,
626           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
627         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
628         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
629         { .name     = "ioctl",      .errmsg = true,
630           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
631         { .name     = "kill",       .errmsg = true,
632           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
633         { .name     = "lseek",      .errmsg = true, STRARRAY(2, whence, whences), },
634         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
635         { .name     = "madvise",    .errmsg = true,
636           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
637                              [2] = SCA_MADV_BHV, /* behavior */ }, },
638         { .name     = "mlock",      .errmsg = true,
639           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
640         { .name     = "mlockall",   .errmsg = true,
641           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
642         { .name     = "mmap",       .hexret = true,
643           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
644                              [2] = SCA_MMAP_PROT, /* prot */
645                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
646         { .name     = "mprotect",   .errmsg = true,
647           .arg_scnprintf = { [0] = SCA_HEX, /* start */
648                              [2] = SCA_MMAP_PROT, /* prot */ }, },
649         { .name     = "mremap",     .hexret = true,
650           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
651                              [4] = SCA_HEX, /* new_addr */ }, },
652         { .name     = "munlock",    .errmsg = true,
653           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
654         { .name     = "munmap",     .errmsg = true,
655           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
656         { .name     = "open",       .errmsg = true,
657           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
658         { .name     = "open_by_handle_at", .errmsg = true,
659           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
660         { .name     = "openat",     .errmsg = true,
661           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
662         { .name     = "pipe2",      .errmsg = true,
663           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
664         { .name     = "poll",       .errmsg = true, .timeout = true, },
665         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
666         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
667         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
668         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
669         { .name     = "read",       .errmsg = true, },
670         { .name     = "recvfrom",   .errmsg = true,
671           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
672         { .name     = "recvmmsg",   .errmsg = true,
673           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
674         { .name     = "recvmsg",    .errmsg = true,
675           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
676         { .name     = "rt_sigaction", .errmsg = true,
677           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
678         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
679         { .name     = "rt_sigqueueinfo", .errmsg = true,
680           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
681         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
682           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
683         { .name     = "select",     .errmsg = true, .timeout = true, },
684         { .name     = "sendmmsg",    .errmsg = true,
685           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
686         { .name     = "sendmsg",    .errmsg = true,
687           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
688         { .name     = "sendto",     .errmsg = true,
689           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
690         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
691         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
692         { .name     = "socket",     .errmsg = true,
693           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
694                              [1] = SCA_SK_TYPE, /* type */ },
695           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
696         { .name     = "socketpair", .errmsg = true,
697           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
698                              [1] = SCA_SK_TYPE, /* type */ },
699           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
700         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
701         { .name     = "tgkill",     .errmsg = true,
702           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
703         { .name     = "tkill",      .errmsg = true,
704           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
705         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
706 };
707
708 static int syscall_fmt__cmp(const void *name, const void *fmtp)
709 {
710         const struct syscall_fmt *fmt = fmtp;
711         return strcmp(name, fmt->name);
712 }
713
714 static struct syscall_fmt *syscall_fmt__find(const char *name)
715 {
716         const int nmemb = ARRAY_SIZE(syscall_fmts);
717         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
718 }
719
720 struct syscall {
721         struct event_format *tp_format;
722         const char          *name;
723         bool                filtered;
724         struct syscall_fmt  *fmt;
725         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
726         void                **arg_parm;
727 };
728
729 static size_t fprintf_duration(unsigned long t, FILE *fp)
730 {
731         double duration = (double)t / NSEC_PER_MSEC;
732         size_t printed = fprintf(fp, "(");
733
734         if (duration >= 1.0)
735                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
736         else if (duration >= 0.01)
737                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
738         else
739                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
740         return printed + fprintf(fp, "): ");
741 }
742
743 struct thread_trace {
744         u64               entry_time;
745         u64               exit_time;
746         bool              entry_pending;
747         unsigned long     nr_events;
748         char              *entry_str;
749         double            runtime_ms;
750 };
751
752 static struct thread_trace *thread_trace__new(void)
753 {
754         return zalloc(sizeof(struct thread_trace));
755 }
756
757 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
758 {
759         struct thread_trace *ttrace;
760
761         if (thread == NULL)
762                 goto fail;
763
764         if (thread->priv == NULL)
765                 thread->priv = thread_trace__new();
766                 
767         if (thread->priv == NULL)
768                 goto fail;
769
770         ttrace = thread->priv;
771         ++ttrace->nr_events;
772
773         return ttrace;
774 fail:
775         color_fprintf(fp, PERF_COLOR_RED,
776                       "WARNING: not enough memory, dropping samples!\n");
777         return NULL;
778 }
779
780 struct trace {
781         struct perf_tool        tool;
782         int                     audit_machine;
783         struct {
784                 int             max;
785                 struct syscall  *table;
786         } syscalls;
787         struct perf_record_opts opts;
788         struct machine          host;
789         u64                     base_time;
790         bool                    full_time;
791         FILE                    *output;
792         unsigned long           nr_events;
793         struct strlist          *ev_qualifier;
794         bool                    not_ev_qualifier;
795         struct intlist          *tid_list;
796         struct intlist          *pid_list;
797         bool                    sched;
798         bool                    multiple_threads;
799         bool                    show_comm;
800         double                  duration_filter;
801         double                  runtime_ms;
802 };
803
804 static bool trace__filter_duration(struct trace *trace, double t)
805 {
806         return t < (trace->duration_filter * NSEC_PER_MSEC);
807 }
808
809 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
810 {
811         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
812
813         return fprintf(fp, "%10.3f ", ts);
814 }
815
816 static bool done = false;
817
818 static void sig_handler(int sig __maybe_unused)
819 {
820         done = true;
821 }
822
823 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
824                                         u64 duration, u64 tstamp, FILE *fp)
825 {
826         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
827         printed += fprintf_duration(duration, fp);
828
829         if (trace->multiple_threads) {
830                 if (trace->show_comm)
831                         printed += fprintf(fp, "%.14s/", thread->comm);
832                 printed += fprintf(fp, "%d ", thread->tid);
833         }
834
835         return printed;
836 }
837
838 static int trace__process_event(struct trace *trace, struct machine *machine,
839                                 union perf_event *event)
840 {
841         int ret = 0;
842
843         switch (event->header.type) {
844         case PERF_RECORD_LOST:
845                 color_fprintf(trace->output, PERF_COLOR_RED,
846                               "LOST %" PRIu64 " events!\n", event->lost.lost);
847                 ret = machine__process_lost_event(machine, event);
848         default:
849                 ret = machine__process_event(machine, event);
850                 break;
851         }
852
853         return ret;
854 }
855
856 static int trace__tool_process(struct perf_tool *tool,
857                                union perf_event *event,
858                                struct perf_sample *sample __maybe_unused,
859                                struct machine *machine)
860 {
861         struct trace *trace = container_of(tool, struct trace, tool);
862         return trace__process_event(trace, machine, event);
863 }
864
865 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
866 {
867         int err = symbol__init();
868
869         if (err)
870                 return err;
871
872         machine__init(&trace->host, "", HOST_KERNEL_ID);
873         machine__create_kernel_maps(&trace->host);
874
875         if (perf_target__has_task(&trace->opts.target)) {
876                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
877                                                         trace__tool_process,
878                                                         &trace->host);
879         } else {
880                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
881                                                      &trace->host);
882         }
883
884         if (err)
885                 symbol__exit();
886
887         return err;
888 }
889
890 static int syscall__set_arg_fmts(struct syscall *sc)
891 {
892         struct format_field *field;
893         int idx = 0;
894
895         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
896         if (sc->arg_scnprintf == NULL)
897                 return -1;
898
899         if (sc->fmt)
900                 sc->arg_parm = sc->fmt->arg_parm;
901
902         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
903                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
904                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
905                 else if (field->flags & FIELD_IS_POINTER)
906                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
907                 ++idx;
908         }
909
910         return 0;
911 }
912
913 static int trace__read_syscall_info(struct trace *trace, int id)
914 {
915         char tp_name[128];
916         struct syscall *sc;
917         const char *name = audit_syscall_to_name(id, trace->audit_machine);
918
919         if (name == NULL)
920                 return -1;
921
922         if (id > trace->syscalls.max) {
923                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
924
925                 if (nsyscalls == NULL)
926                         return -1;
927
928                 if (trace->syscalls.max != -1) {
929                         memset(nsyscalls + trace->syscalls.max + 1, 0,
930                                (id - trace->syscalls.max) * sizeof(*sc));
931                 } else {
932                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
933                 }
934
935                 trace->syscalls.table = nsyscalls;
936                 trace->syscalls.max   = id;
937         }
938
939         sc = trace->syscalls.table + id;
940         sc->name = name;
941
942         if (trace->ev_qualifier) {
943                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
944
945                 if (!(in ^ trace->not_ev_qualifier)) {
946                         sc->filtered = true;
947                         /*
948                          * No need to do read tracepoint information since this will be
949                          * filtered out.
950                          */
951                         return 0;
952                 }
953         }
954
955         sc->fmt  = syscall_fmt__find(sc->name);
956
957         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
958         sc->tp_format = event_format__new("syscalls", tp_name);
959
960         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
961                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
962                 sc->tp_format = event_format__new("syscalls", tp_name);
963         }
964
965         if (sc->tp_format == NULL)
966                 return -1;
967
968         return syscall__set_arg_fmts(sc);
969 }
970
971 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
972                                       unsigned long *args)
973 {
974         size_t printed = 0;
975
976         if (sc->tp_format != NULL) {
977                 struct format_field *field;
978                 u8 bit = 1;
979                 struct syscall_arg arg = {
980                         .idx  = 0,
981                         .mask = 0,
982                 };
983
984                 for (field = sc->tp_format->format.fields->next; field;
985                      field = field->next, ++arg.idx, bit <<= 1) {
986                         if (arg.mask & bit)
987                                 continue;
988                         /*
989                          * Suppress this argument if its value is zero and
990                          * and we don't have a string associated in an
991                          * strarray for it.
992                          */
993                         if (args[arg.idx] == 0 &&
994                             !(sc->arg_scnprintf &&
995                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
996                               sc->arg_parm[arg.idx]))
997                                 continue;
998
999                         printed += scnprintf(bf + printed, size - printed,
1000                                              "%s%s: ", printed ? ", " : "", field->name);
1001                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1002                                 arg.val = args[arg.idx];
1003                                 if (sc->arg_parm)
1004                                         arg.parm = sc->arg_parm[arg.idx];
1005                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1006                                                                       size - printed, &arg);
1007                         } else {
1008                                 printed += scnprintf(bf + printed, size - printed,
1009                                                      "%ld", args[arg.idx]);
1010                         }
1011                 }
1012         } else {
1013                 int i = 0;
1014
1015                 while (i < 6) {
1016                         printed += scnprintf(bf + printed, size - printed,
1017                                              "%sarg%d: %ld",
1018                                              printed ? ", " : "", i, args[i]);
1019                         ++i;
1020                 }
1021         }
1022
1023         return printed;
1024 }
1025
1026 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1027                                   struct perf_sample *sample);
1028
1029 static struct syscall *trace__syscall_info(struct trace *trace,
1030                                            struct perf_evsel *evsel,
1031                                            struct perf_sample *sample)
1032 {
1033         int id = perf_evsel__intval(evsel, sample, "id");
1034
1035         if (id < 0) {
1036
1037                 /*
1038                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1039                  * before that, leaving at a higher verbosity level till that is
1040                  * explained. Reproduced with plain ftrace with:
1041                  *
1042                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1043                  * grep "NR -1 " /t/trace_pipe
1044                  *
1045                  * After generating some load on the machine.
1046                  */
1047                 if (verbose > 1) {
1048                         static u64 n;
1049                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1050                                 id, perf_evsel__name(evsel), ++n);
1051                 }
1052                 return NULL;
1053         }
1054
1055         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1056             trace__read_syscall_info(trace, id))
1057                 goto out_cant_read;
1058
1059         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1060                 goto out_cant_read;
1061
1062         return &trace->syscalls.table[id];
1063
1064 out_cant_read:
1065         if (verbose) {
1066                 fprintf(trace->output, "Problems reading syscall %d", id);
1067                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1068                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1069                 fputs(" information\n", trace->output);
1070         }
1071         return NULL;
1072 }
1073
1074 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1075                             struct perf_sample *sample)
1076 {
1077         char *msg;
1078         void *args;
1079         size_t printed = 0;
1080         struct thread *thread;
1081         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1082         struct thread_trace *ttrace;
1083
1084         if (sc == NULL)
1085                 return -1;
1086
1087         if (sc->filtered)
1088                 return 0;
1089
1090         thread = machine__findnew_thread(&trace->host, sample->pid,
1091                                          sample->tid);
1092         ttrace = thread__trace(thread, trace->output);
1093         if (ttrace == NULL)
1094                 return -1;
1095
1096         args = perf_evsel__rawptr(evsel, sample, "args");
1097         if (args == NULL) {
1098                 fprintf(trace->output, "Problems reading syscall arguments\n");
1099                 return -1;
1100         }
1101
1102         ttrace = thread->priv;
1103
1104         if (ttrace->entry_str == NULL) {
1105                 ttrace->entry_str = malloc(1024);
1106                 if (!ttrace->entry_str)
1107                         return -1;
1108         }
1109
1110         ttrace->entry_time = sample->time;
1111         msg = ttrace->entry_str;
1112         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1113
1114         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1115
1116         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1117                 if (!trace->duration_filter) {
1118                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1119                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1120                 }
1121         } else
1122                 ttrace->entry_pending = true;
1123
1124         return 0;
1125 }
1126
1127 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1128                            struct perf_sample *sample)
1129 {
1130         int ret;
1131         u64 duration = 0;
1132         struct thread *thread;
1133         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1134         struct thread_trace *ttrace;
1135
1136         if (sc == NULL)
1137                 return -1;
1138
1139         if (sc->filtered)
1140                 return 0;
1141
1142         thread = machine__findnew_thread(&trace->host, sample->pid,
1143                                          sample->tid);
1144         ttrace = thread__trace(thread, trace->output);
1145         if (ttrace == NULL)
1146                 return -1;
1147
1148         ret = perf_evsel__intval(evsel, sample, "ret");
1149
1150         ttrace = thread->priv;
1151
1152         ttrace->exit_time = sample->time;
1153
1154         if (ttrace->entry_time) {
1155                 duration = sample->time - ttrace->entry_time;
1156                 if (trace__filter_duration(trace, duration))
1157                         goto out;
1158         } else if (trace->duration_filter)
1159                 goto out;
1160
1161         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1162
1163         if (ttrace->entry_pending) {
1164                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1165         } else {
1166                 fprintf(trace->output, " ... [");
1167                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1168                 fprintf(trace->output, "]: %s()", sc->name);
1169         }
1170
1171         if (sc->fmt == NULL) {
1172 signed_print:
1173                 fprintf(trace->output, ") = %d", ret);
1174         } else if (ret < 0 && sc->fmt->errmsg) {
1175                 char bf[256];
1176                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1177                            *e = audit_errno_to_name(-ret);
1178
1179                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1180         } else if (ret == 0 && sc->fmt->timeout)
1181                 fprintf(trace->output, ") = 0 Timeout");
1182         else if (sc->fmt->hexret)
1183                 fprintf(trace->output, ") = %#x", ret);
1184         else
1185                 goto signed_print;
1186
1187         fputc('\n', trace->output);
1188 out:
1189         ttrace->entry_pending = false;
1190
1191         return 0;
1192 }
1193
1194 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1195                                      struct perf_sample *sample)
1196 {
1197         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1198         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1199         struct thread *thread = machine__findnew_thread(&trace->host,
1200                                                         sample->pid,
1201                                                         sample->tid);
1202         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1203
1204         if (ttrace == NULL)
1205                 goto out_dump;
1206
1207         ttrace->runtime_ms += runtime_ms;
1208         trace->runtime_ms += runtime_ms;
1209         return 0;
1210
1211 out_dump:
1212         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1213                evsel->name,
1214                perf_evsel__strval(evsel, sample, "comm"),
1215                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1216                runtime,
1217                perf_evsel__intval(evsel, sample, "vruntime"));
1218         return 0;
1219 }
1220
1221 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1222 {
1223         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1224             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1225                 return false;
1226
1227         if (trace->pid_list || trace->tid_list)
1228                 return true;
1229
1230         return false;
1231 }
1232
1233 static int trace__process_sample(struct perf_tool *tool,
1234                                  union perf_event *event __maybe_unused,
1235                                  struct perf_sample *sample,
1236                                  struct perf_evsel *evsel,
1237                                  struct machine *machine __maybe_unused)
1238 {
1239         struct trace *trace = container_of(tool, struct trace, tool);
1240         int err = 0;
1241
1242         tracepoint_handler handler = evsel->handler.func;
1243
1244         if (skip_sample(trace, sample))
1245                 return 0;
1246
1247         if (!trace->full_time && trace->base_time == 0)
1248                 trace->base_time = sample->time;
1249
1250         if (handler)
1251                 handler(trace, evsel, sample);
1252
1253         return err;
1254 }
1255
1256 static bool
1257 perf_session__has_tp(struct perf_session *session, const char *name)
1258 {
1259         struct perf_evsel *evsel;
1260
1261         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1262
1263         return evsel != NULL;
1264 }
1265
1266 static int parse_target_str(struct trace *trace)
1267 {
1268         if (trace->opts.target.pid) {
1269                 trace->pid_list = intlist__new(trace->opts.target.pid);
1270                 if (trace->pid_list == NULL) {
1271                         pr_err("Error parsing process id string\n");
1272                         return -EINVAL;
1273                 }
1274         }
1275
1276         if (trace->opts.target.tid) {
1277                 trace->tid_list = intlist__new(trace->opts.target.tid);
1278                 if (trace->tid_list == NULL) {
1279                         pr_err("Error parsing thread id string\n");
1280                         return -EINVAL;
1281                 }
1282         }
1283
1284         return 0;
1285 }
1286
1287 static int trace__run(struct trace *trace, int argc, const char **argv)
1288 {
1289         struct perf_evlist *evlist = perf_evlist__new();
1290         struct perf_evsel *evsel;
1291         int err = -1, i;
1292         unsigned long before;
1293         const bool forks = argc > 0;
1294
1295         if (evlist == NULL) {
1296                 fprintf(trace->output, "Not enough memory to run!\n");
1297                 goto out;
1298         }
1299
1300         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1301             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1302                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1303                 goto out_delete_evlist;
1304         }
1305
1306         if (trace->sched &&
1307             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1308                                    trace__sched_stat_runtime)) {
1309                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1310                 goto out_delete_evlist;
1311         }
1312
1313         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1314         if (err < 0) {
1315                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1316                 goto out_delete_evlist;
1317         }
1318
1319         err = trace__symbols_init(trace, evlist);
1320         if (err < 0) {
1321                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1322                 goto out_delete_maps;
1323         }
1324
1325         perf_evlist__config(evlist, &trace->opts);
1326
1327         signal(SIGCHLD, sig_handler);
1328         signal(SIGINT, sig_handler);
1329
1330         if (forks) {
1331                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1332                                                     argv, false, false);
1333                 if (err < 0) {
1334                         fprintf(trace->output, "Couldn't run the workload!\n");
1335                         goto out_delete_maps;
1336                 }
1337         }
1338
1339         err = perf_evlist__open(evlist);
1340         if (err < 0) {
1341                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1342                 goto out_delete_maps;
1343         }
1344
1345         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1346         if (err < 0) {
1347                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1348                 goto out_close_evlist;
1349         }
1350
1351         perf_evlist__enable(evlist);
1352
1353         if (forks)
1354                 perf_evlist__start_workload(evlist);
1355
1356         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1357 again:
1358         before = trace->nr_events;
1359
1360         for (i = 0; i < evlist->nr_mmaps; i++) {
1361                 union perf_event *event;
1362
1363                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1364                         const u32 type = event->header.type;
1365                         tracepoint_handler handler;
1366                         struct perf_sample sample;
1367
1368                         ++trace->nr_events;
1369
1370                         err = perf_evlist__parse_sample(evlist, event, &sample);
1371                         if (err) {
1372                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1373                                 continue;
1374                         }
1375
1376                         if (!trace->full_time && trace->base_time == 0)
1377                                 trace->base_time = sample.time;
1378
1379                         if (type != PERF_RECORD_SAMPLE) {
1380                                 trace__process_event(trace, &trace->host, event);
1381                                 continue;
1382                         }
1383
1384                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1385                         if (evsel == NULL) {
1386                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1387                                 continue;
1388                         }
1389
1390                         if (sample.raw_data == NULL) {
1391                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1392                                        perf_evsel__name(evsel), sample.tid,
1393                                        sample.cpu, sample.raw_size);
1394                                 continue;
1395                         }
1396
1397                         handler = evsel->handler.func;
1398                         handler(trace, evsel, &sample);
1399
1400                         if (done)
1401                                 goto out_unmap_evlist;
1402                 }
1403         }
1404
1405         if (trace->nr_events == before) {
1406                 if (done)
1407                         goto out_unmap_evlist;
1408
1409                 poll(evlist->pollfd, evlist->nr_fds, -1);
1410         }
1411
1412         if (done)
1413                 perf_evlist__disable(evlist);
1414
1415         goto again;
1416
1417 out_unmap_evlist:
1418         perf_evlist__munmap(evlist);
1419 out_close_evlist:
1420         perf_evlist__close(evlist);
1421 out_delete_maps:
1422         perf_evlist__delete_maps(evlist);
1423 out_delete_evlist:
1424         perf_evlist__delete(evlist);
1425 out:
1426         return err;
1427 }
1428
1429 static int trace__replay(struct trace *trace)
1430 {
1431         const struct perf_evsel_str_handler handlers[] = {
1432                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1433                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1434         };
1435
1436         struct perf_session *session;
1437         int err = -1;
1438
1439         trace->tool.sample        = trace__process_sample;
1440         trace->tool.mmap          = perf_event__process_mmap;
1441         trace->tool.mmap2         = perf_event__process_mmap2;
1442         trace->tool.comm          = perf_event__process_comm;
1443         trace->tool.exit          = perf_event__process_exit;
1444         trace->tool.fork          = perf_event__process_fork;
1445         trace->tool.attr          = perf_event__process_attr;
1446         trace->tool.tracing_data = perf_event__process_tracing_data;
1447         trace->tool.build_id      = perf_event__process_build_id;
1448
1449         trace->tool.ordered_samples = true;
1450         trace->tool.ordering_requires_timestamps = true;
1451
1452         /* add tid to output */
1453         trace->multiple_threads = true;
1454
1455         if (symbol__init() < 0)
1456                 return -1;
1457
1458         session = perf_session__new(input_name, O_RDONLY, 0, false,
1459                                     &trace->tool);
1460         if (session == NULL)
1461                 return -ENOMEM;
1462
1463         err = perf_session__set_tracepoints_handlers(session, handlers);
1464         if (err)
1465                 goto out;
1466
1467         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1468                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1469                 goto out;
1470         }
1471
1472         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1473                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1474                 goto out;
1475         }
1476
1477         err = parse_target_str(trace);
1478         if (err != 0)
1479                 goto out;
1480
1481         setup_pager();
1482
1483         err = perf_session__process_events(session, &trace->tool);
1484         if (err)
1485                 pr_err("Failed to process events, error %d", err);
1486
1487 out:
1488         perf_session__delete(session);
1489
1490         return err;
1491 }
1492
1493 static size_t trace__fprintf_threads_header(FILE *fp)
1494 {
1495         size_t printed;
1496
1497         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1498         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1499         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1500         printed += fprintf(fp," _____________________________________________________________________\n\n");
1501
1502         return printed;
1503 }
1504
1505 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1506 {
1507         size_t printed = trace__fprintf_threads_header(fp);
1508         struct rb_node *nd;
1509
1510         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1511                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1512                 struct thread_trace *ttrace = thread->priv;
1513                 const char *color;
1514                 double ratio;
1515
1516                 if (ttrace == NULL)
1517                         continue;
1518
1519                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1520
1521                 color = PERF_COLOR_NORMAL;
1522                 if (ratio > 50.0)
1523                         color = PERF_COLOR_RED;
1524                 else if (ratio > 25.0)
1525                         color = PERF_COLOR_GREEN;
1526                 else if (ratio > 5.0)
1527                         color = PERF_COLOR_YELLOW;
1528
1529                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1530                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1531                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1532                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1533         }
1534
1535         return printed;
1536 }
1537
1538 static int trace__set_duration(const struct option *opt, const char *str,
1539                                int unset __maybe_unused)
1540 {
1541         struct trace *trace = opt->value;
1542
1543         trace->duration_filter = atof(str);
1544         return 0;
1545 }
1546
1547 static int trace__open_output(struct trace *trace, const char *filename)
1548 {
1549         struct stat st;
1550
1551         if (!stat(filename, &st) && st.st_size) {
1552                 char oldname[PATH_MAX];
1553
1554                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1555                 unlink(oldname);
1556                 rename(filename, oldname);
1557         }
1558
1559         trace->output = fopen(filename, "w");
1560
1561         return trace->output == NULL ? -errno : 0;
1562 }
1563
1564 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1565 {
1566         const char * const trace_usage[] = {
1567                 "perf trace [<options>] [<command>]",
1568                 "perf trace [<options>] -- <command> [<options>]",
1569                 NULL
1570         };
1571         struct trace trace = {
1572                 .audit_machine = audit_detect_machine(),
1573                 .syscalls = {
1574                         . max = -1,
1575                 },
1576                 .opts = {
1577                         .target = {
1578                                 .uid       = UINT_MAX,
1579                                 .uses_mmap = true,
1580                         },
1581                         .user_freq     = UINT_MAX,
1582                         .user_interval = ULLONG_MAX,
1583                         .no_delay      = true,
1584                         .mmap_pages    = 1024,
1585                 },
1586                 .output = stdout,
1587                 .show_comm = true,
1588         };
1589         const char *output_name = NULL;
1590         const char *ev_qualifier_str = NULL;
1591         const struct option trace_options[] = {
1592         OPT_BOOLEAN(0, "comm", &trace.show_comm,
1593                     "show the thread COMM next to its id"),
1594         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1595                     "list of events to trace"),
1596         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1597         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1598         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1599                     "trace events on existing process id"),
1600         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1601                     "trace events on existing thread id"),
1602         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1603                     "system-wide collection from all CPUs"),
1604         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1605                     "list of cpus to monitor"),
1606         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1607                     "child tasks do not inherit counters"),
1608         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1609                      "number of mmap data pages",
1610                      perf_evlist__parse_mmap_pages),
1611         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1612                    "user to profile"),
1613         OPT_CALLBACK(0, "duration", &trace, "float",
1614                      "show only events with duration > N.M ms",
1615                      trace__set_duration),
1616         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1617         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1618         OPT_BOOLEAN('T', "time", &trace.full_time,
1619                     "Show full timestamp, not time relative to first start"),
1620         OPT_END()
1621         };
1622         int err;
1623         char bf[BUFSIZ];
1624
1625         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1626
1627         if (output_name != NULL) {
1628                 err = trace__open_output(&trace, output_name);
1629                 if (err < 0) {
1630                         perror("failed to create output file");
1631                         goto out;
1632                 }
1633         }
1634
1635         if (ev_qualifier_str != NULL) {
1636                 const char *s = ev_qualifier_str;
1637
1638                 trace.not_ev_qualifier = *s == '!';
1639                 if (trace.not_ev_qualifier)
1640                         ++s;
1641                 trace.ev_qualifier = strlist__new(true, s);
1642                 if (trace.ev_qualifier == NULL) {
1643                         fputs("Not enough memory to parse event qualifier",
1644                               trace.output);
1645                         err = -ENOMEM;
1646                         goto out_close;
1647                 }
1648         }
1649
1650         err = perf_target__validate(&trace.opts.target);
1651         if (err) {
1652                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1653                 fprintf(trace.output, "%s", bf);
1654                 goto out_close;
1655         }
1656
1657         err = perf_target__parse_uid(&trace.opts.target);
1658         if (err) {
1659                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1660                 fprintf(trace.output, "%s", bf);
1661                 goto out_close;
1662         }
1663
1664         if (!argc && perf_target__none(&trace.opts.target))
1665                 trace.opts.target.system_wide = true;
1666
1667         if (input_name)
1668                 err = trace__replay(&trace);
1669         else
1670                 err = trace__run(&trace, argc, argv);
1671
1672         if (trace.sched && !err)
1673                 trace__fprintf_thread_summary(&trace, trace.output);
1674
1675 out_close:
1676         if (output_name != NULL)
1677                 fclose(trace.output);
1678 out:
1679         return err;
1680 }