perf trace: Beautify pipe2 'flags' arg
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
193                                            struct syscall_arg *arg)
194 {
195         int printed = 0, op = arg->val;
196
197         if (op == 0)
198                 return scnprintf(bf, size, "NONE");
199 #define P_CMD(cmd) \
200         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
201                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
202                 op &= ~LOCK_##cmd; \
203         }
204
205         P_CMD(SH);
206         P_CMD(EX);
207         P_CMD(NB);
208         P_CMD(UN);
209         P_CMD(MAND);
210         P_CMD(RW);
211         P_CMD(READ);
212         P_CMD(WRITE);
213 #undef P_OP
214
215         if (op)
216                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
217
218         return printed;
219 }
220
221 #define SCA_FLOCK syscall_arg__scnprintf_flock
222
223 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
224 {
225         enum syscall_futex_args {
226                 SCF_UADDR   = (1 << 0),
227                 SCF_OP      = (1 << 1),
228                 SCF_VAL     = (1 << 2),
229                 SCF_TIMEOUT = (1 << 3),
230                 SCF_UADDR2  = (1 << 4),
231                 SCF_VAL3    = (1 << 5),
232         };
233         int op = arg->val;
234         int cmd = op & FUTEX_CMD_MASK;
235         size_t printed = 0;
236
237         switch (cmd) {
238 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
239         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
240         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
241         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
242         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
243         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
244         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
245         P_FUTEX_OP(WAKE_OP);                                                      break;
246         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
247         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
248         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
249         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
250         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
251         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
252         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
253         }
254
255         if (op & FUTEX_PRIVATE_FLAG)
256                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
257
258         if (op & FUTEX_CLOCK_REALTIME)
259                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
260
261         return printed;
262 }
263
264 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
265
266 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
267 static DEFINE_STRARRAY(epoll_ctl_ops);
268
269 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
270 static DEFINE_STRARRAY(itimers);
271
272 static const char *whences[] = { "SET", "CUR", "END",
273 #ifdef SEEK_DATA
274 "DATA",
275 #endif
276 #ifdef SEEK_HOLE
277 "HOLE",
278 #endif
279 };
280 static DEFINE_STRARRAY(whences);
281
282 static const char *fcntl_cmds[] = {
283         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
284         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
285         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
286         "F_GETOWNER_UIDS",
287 };
288 static DEFINE_STRARRAY(fcntl_cmds);
289
290 static const char *rlimit_resources[] = {
291         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
292         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
293         "RTTIME",
294 };
295 static DEFINE_STRARRAY(rlimit_resources);
296
297 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
298 static DEFINE_STRARRAY(sighow);
299
300 static const char *socket_families[] = {
301         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
302         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
303         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
304         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
305         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
306         "ALG", "NFC", "VSOCK",
307 };
308 static DEFINE_STRARRAY(socket_families);
309
310 #ifndef SOCK_TYPE_MASK
311 #define SOCK_TYPE_MASK 0xf
312 #endif
313
314 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
315                                                       struct syscall_arg *arg)
316 {
317         size_t printed;
318         int type = arg->val,
319             flags = type & ~SOCK_TYPE_MASK;
320
321         type &= SOCK_TYPE_MASK;
322         /*
323          * Can't use a strarray, MIPS may override for ABI reasons.
324          */
325         switch (type) {
326 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
327         P_SK_TYPE(STREAM);
328         P_SK_TYPE(DGRAM);
329         P_SK_TYPE(RAW);
330         P_SK_TYPE(RDM);
331         P_SK_TYPE(SEQPACKET);
332         P_SK_TYPE(DCCP);
333         P_SK_TYPE(PACKET);
334 #undef P_SK_TYPE
335         default:
336                 printed = scnprintf(bf, size, "%#x", type);
337         }
338
339 #define P_SK_FLAG(n) \
340         if (flags & SOCK_##n) { \
341                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
342                 flags &= ~SOCK_##n; \
343         }
344
345         P_SK_FLAG(CLOEXEC);
346         P_SK_FLAG(NONBLOCK);
347 #undef P_SK_FLAG
348
349         if (flags)
350                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
351
352         return printed;
353 }
354
355 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
356
357 #ifndef MSG_PROBE
358 #define MSG_PROBE            0x10
359 #endif
360 #ifndef MSG_SENDPAGE_NOTLAST
361 #define MSG_SENDPAGE_NOTLAST 0x20000
362 #endif
363 #ifndef MSG_FASTOPEN
364 #define MSG_FASTOPEN         0x20000000
365 #endif
366
367 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
368                                                struct syscall_arg *arg)
369 {
370         int printed = 0, flags = arg->val;
371
372         if (flags == 0)
373                 return scnprintf(bf, size, "NONE");
374 #define P_MSG_FLAG(n) \
375         if (flags & MSG_##n) { \
376                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
377                 flags &= ~MSG_##n; \
378         }
379
380         P_MSG_FLAG(OOB);
381         P_MSG_FLAG(PEEK);
382         P_MSG_FLAG(DONTROUTE);
383         P_MSG_FLAG(TRYHARD);
384         P_MSG_FLAG(CTRUNC);
385         P_MSG_FLAG(PROBE);
386         P_MSG_FLAG(TRUNC);
387         P_MSG_FLAG(DONTWAIT);
388         P_MSG_FLAG(EOR);
389         P_MSG_FLAG(WAITALL);
390         P_MSG_FLAG(FIN);
391         P_MSG_FLAG(SYN);
392         P_MSG_FLAG(CONFIRM);
393         P_MSG_FLAG(RST);
394         P_MSG_FLAG(ERRQUEUE);
395         P_MSG_FLAG(NOSIGNAL);
396         P_MSG_FLAG(MORE);
397         P_MSG_FLAG(WAITFORONE);
398         P_MSG_FLAG(SENDPAGE_NOTLAST);
399         P_MSG_FLAG(FASTOPEN);
400         P_MSG_FLAG(CMSG_CLOEXEC);
401 #undef P_MSG_FLAG
402
403         if (flags)
404                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
405
406         return printed;
407 }
408
409 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
410
411 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
412                                                  struct syscall_arg *arg)
413 {
414         size_t printed = 0;
415         int mode = arg->val;
416
417         if (mode == F_OK) /* 0 */
418                 return scnprintf(bf, size, "F");
419 #define P_MODE(n) \
420         if (mode & n##_OK) { \
421                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
422                 mode &= ~n##_OK; \
423         }
424
425         P_MODE(R);
426         P_MODE(W);
427         P_MODE(X);
428 #undef P_MODE
429
430         if (mode)
431                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
432
433         return printed;
434 }
435
436 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
437
438 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
439                                                struct syscall_arg *arg)
440 {
441         int printed = 0, flags = arg->val;
442
443         if (!(flags & O_CREAT))
444                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
445
446         if (flags == 0)
447                 return scnprintf(bf, size, "RDONLY");
448 #define P_FLAG(n) \
449         if (flags & O_##n) { \
450                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
451                 flags &= ~O_##n; \
452         }
453
454         P_FLAG(APPEND);
455         P_FLAG(ASYNC);
456         P_FLAG(CLOEXEC);
457         P_FLAG(CREAT);
458         P_FLAG(DIRECT);
459         P_FLAG(DIRECTORY);
460         P_FLAG(EXCL);
461         P_FLAG(LARGEFILE);
462         P_FLAG(NOATIME);
463         P_FLAG(NOCTTY);
464 #ifdef O_NONBLOCK
465         P_FLAG(NONBLOCK);
466 #elif O_NDELAY
467         P_FLAG(NDELAY);
468 #endif
469 #ifdef O_PATH
470         P_FLAG(PATH);
471 #endif
472         P_FLAG(RDWR);
473 #ifdef O_DSYNC
474         if ((flags & O_SYNC) == O_SYNC)
475                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
476         else {
477                 P_FLAG(DSYNC);
478         }
479 #else
480         P_FLAG(SYNC);
481 #endif
482         P_FLAG(TRUNC);
483         P_FLAG(WRONLY);
484 #undef P_FLAG
485
486         if (flags)
487                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
488
489         return printed;
490 }
491
492 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
493
494 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
495                                                    struct syscall_arg *arg)
496 {
497         int printed = 0, flags = arg->val;
498
499         if (flags == 0)
500                 return scnprintf(bf, size, "NONE");
501 #define P_FLAG(n) \
502         if (flags & EFD_##n) { \
503                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
504                 flags &= ~EFD_##n; \
505         }
506
507         P_FLAG(SEMAPHORE);
508         P_FLAG(CLOEXEC);
509         P_FLAG(NONBLOCK);
510 #undef P_FLAG
511
512         if (flags)
513                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
514
515         return printed;
516 }
517
518 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
519
520 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
521                                                 struct syscall_arg *arg)
522 {
523         int printed = 0, flags = arg->val;
524
525 #define P_FLAG(n) \
526         if (flags & O_##n) { \
527                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
528                 flags &= ~O_##n; \
529         }
530
531         P_FLAG(CLOEXEC);
532         P_FLAG(NONBLOCK);
533 #undef P_FLAG
534
535         if (flags)
536                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
537
538         return printed;
539 }
540
541 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
542
543 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
544 {
545         int sig = arg->val;
546
547         switch (sig) {
548 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
549         P_SIGNUM(HUP);
550         P_SIGNUM(INT);
551         P_SIGNUM(QUIT);
552         P_SIGNUM(ILL);
553         P_SIGNUM(TRAP);
554         P_SIGNUM(ABRT);
555         P_SIGNUM(BUS);
556         P_SIGNUM(FPE);
557         P_SIGNUM(KILL);
558         P_SIGNUM(USR1);
559         P_SIGNUM(SEGV);
560         P_SIGNUM(USR2);
561         P_SIGNUM(PIPE);
562         P_SIGNUM(ALRM);
563         P_SIGNUM(TERM);
564         P_SIGNUM(STKFLT);
565         P_SIGNUM(CHLD);
566         P_SIGNUM(CONT);
567         P_SIGNUM(STOP);
568         P_SIGNUM(TSTP);
569         P_SIGNUM(TTIN);
570         P_SIGNUM(TTOU);
571         P_SIGNUM(URG);
572         P_SIGNUM(XCPU);
573         P_SIGNUM(XFSZ);
574         P_SIGNUM(VTALRM);
575         P_SIGNUM(PROF);
576         P_SIGNUM(WINCH);
577         P_SIGNUM(IO);
578         P_SIGNUM(PWR);
579         P_SIGNUM(SYS);
580         default: break;
581         }
582
583         return scnprintf(bf, size, "%#x", sig);
584 }
585
586 #define SCA_SIGNUM syscall_arg__scnprintf_signum
587
588 #define STRARRAY(arg, name, array) \
589           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
590           .arg_parm      = { [arg] = &strarray__##array, }
591
592 static struct syscall_fmt {
593         const char *name;
594         const char *alias;
595         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
596         void       *arg_parm[6];
597         bool       errmsg;
598         bool       timeout;
599         bool       hexret;
600 } syscall_fmts[] = {
601         { .name     = "access",     .errmsg = true,
602           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
603         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
604         { .name     = "brk",        .hexret = true,
605           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
606         { .name     = "connect",    .errmsg = true, },
607         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
608         { .name     = "eventfd2",   .errmsg = true,
609           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
610         { .name     = "fcntl",      .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), },
611         { .name     = "flock",      .errmsg = true,
612           .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
613         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
614         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
615         { .name     = "futex",      .errmsg = true,
616           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
617         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
618         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
619         { .name     = "ioctl",      .errmsg = true,
620           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
621         { .name     = "kill",       .errmsg = true,
622           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
623         { .name     = "lseek",      .errmsg = true, STRARRAY(2, whence, whences), },
624         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
625         { .name     = "madvise",    .errmsg = true,
626           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
627                              [2] = SCA_MADV_BHV, /* behavior */ }, },
628         { .name     = "mmap",       .hexret = true,
629           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
630                              [2] = SCA_MMAP_PROT, /* prot */
631                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
632         { .name     = "mprotect",   .errmsg = true,
633           .arg_scnprintf = { [0] = SCA_HEX, /* start */
634                              [2] = SCA_MMAP_PROT, /* prot */ }, },
635         { .name     = "mremap",     .hexret = true,
636           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
637                              [4] = SCA_HEX, /* new_addr */ }, },
638         { .name     = "munmap",     .errmsg = true,
639           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
640         { .name     = "open",       .errmsg = true,
641           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
642         { .name     = "open_by_handle_at", .errmsg = true,
643           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
644         { .name     = "openat",     .errmsg = true,
645           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
646         { .name     = "pipe2",      .errmsg = true,
647           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
648         { .name     = "poll",       .errmsg = true, .timeout = true, },
649         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
650         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
651         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
652         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
653         { .name     = "read",       .errmsg = true, },
654         { .name     = "recvfrom",   .errmsg = true,
655           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
656         { .name     = "recvmmsg",   .errmsg = true,
657           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
658         { .name     = "recvmsg",    .errmsg = true,
659           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
660         { .name     = "rt_sigaction", .errmsg = true,
661           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
662         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
663         { .name     = "rt_sigqueueinfo", .errmsg = true,
664           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
665         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
666           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
667         { .name     = "select",     .errmsg = true, .timeout = true, },
668         { .name     = "sendmmsg",    .errmsg = true,
669           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
670         { .name     = "sendmsg",    .errmsg = true,
671           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
672         { .name     = "sendto",     .errmsg = true,
673           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
674         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
675         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
676         { .name     = "socket",     .errmsg = true,
677           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
678                              [1] = SCA_SK_TYPE, /* type */ },
679           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
680         { .name     = "socketpair", .errmsg = true,
681           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
682                              [1] = SCA_SK_TYPE, /* type */ },
683           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
684         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
685         { .name     = "tgkill",     .errmsg = true,
686           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
687         { .name     = "tkill",      .errmsg = true,
688           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
689         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
690 };
691
692 static int syscall_fmt__cmp(const void *name, const void *fmtp)
693 {
694         const struct syscall_fmt *fmt = fmtp;
695         return strcmp(name, fmt->name);
696 }
697
698 static struct syscall_fmt *syscall_fmt__find(const char *name)
699 {
700         const int nmemb = ARRAY_SIZE(syscall_fmts);
701         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
702 }
703
704 struct syscall {
705         struct event_format *tp_format;
706         const char          *name;
707         bool                filtered;
708         struct syscall_fmt  *fmt;
709         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
710         void                **arg_parm;
711 };
712
713 static size_t fprintf_duration(unsigned long t, FILE *fp)
714 {
715         double duration = (double)t / NSEC_PER_MSEC;
716         size_t printed = fprintf(fp, "(");
717
718         if (duration >= 1.0)
719                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
720         else if (duration >= 0.01)
721                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
722         else
723                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
724         return printed + fprintf(fp, "): ");
725 }
726
727 struct thread_trace {
728         u64               entry_time;
729         u64               exit_time;
730         bool              entry_pending;
731         unsigned long     nr_events;
732         char              *entry_str;
733         double            runtime_ms;
734 };
735
736 static struct thread_trace *thread_trace__new(void)
737 {
738         return zalloc(sizeof(struct thread_trace));
739 }
740
741 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
742 {
743         struct thread_trace *ttrace;
744
745         if (thread == NULL)
746                 goto fail;
747
748         if (thread->priv == NULL)
749                 thread->priv = thread_trace__new();
750                 
751         if (thread->priv == NULL)
752                 goto fail;
753
754         ttrace = thread->priv;
755         ++ttrace->nr_events;
756
757         return ttrace;
758 fail:
759         color_fprintf(fp, PERF_COLOR_RED,
760                       "WARNING: not enough memory, dropping samples!\n");
761         return NULL;
762 }
763
764 struct trace {
765         struct perf_tool        tool;
766         int                     audit_machine;
767         struct {
768                 int             max;
769                 struct syscall  *table;
770         } syscalls;
771         struct perf_record_opts opts;
772         struct machine          host;
773         u64                     base_time;
774         bool                    full_time;
775         FILE                    *output;
776         unsigned long           nr_events;
777         struct strlist          *ev_qualifier;
778         bool                    not_ev_qualifier;
779         struct intlist          *tid_list;
780         struct intlist          *pid_list;
781         bool                    sched;
782         bool                    multiple_threads;
783         bool                    show_comm;
784         double                  duration_filter;
785         double                  runtime_ms;
786 };
787
788 static bool trace__filter_duration(struct trace *trace, double t)
789 {
790         return t < (trace->duration_filter * NSEC_PER_MSEC);
791 }
792
793 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
794 {
795         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
796
797         return fprintf(fp, "%10.3f ", ts);
798 }
799
800 static bool done = false;
801
802 static void sig_handler(int sig __maybe_unused)
803 {
804         done = true;
805 }
806
807 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
808                                         u64 duration, u64 tstamp, FILE *fp)
809 {
810         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
811         printed += fprintf_duration(duration, fp);
812
813         if (trace->multiple_threads) {
814                 if (trace->show_comm)
815                         printed += fprintf(fp, "%.14s/", thread->comm);
816                 printed += fprintf(fp, "%d ", thread->tid);
817         }
818
819         return printed;
820 }
821
822 static int trace__process_event(struct trace *trace, struct machine *machine,
823                                 union perf_event *event)
824 {
825         int ret = 0;
826
827         switch (event->header.type) {
828         case PERF_RECORD_LOST:
829                 color_fprintf(trace->output, PERF_COLOR_RED,
830                               "LOST %" PRIu64 " events!\n", event->lost.lost);
831                 ret = machine__process_lost_event(machine, event);
832         default:
833                 ret = machine__process_event(machine, event);
834                 break;
835         }
836
837         return ret;
838 }
839
840 static int trace__tool_process(struct perf_tool *tool,
841                                union perf_event *event,
842                                struct perf_sample *sample __maybe_unused,
843                                struct machine *machine)
844 {
845         struct trace *trace = container_of(tool, struct trace, tool);
846         return trace__process_event(trace, machine, event);
847 }
848
849 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
850 {
851         int err = symbol__init();
852
853         if (err)
854                 return err;
855
856         machine__init(&trace->host, "", HOST_KERNEL_ID);
857         machine__create_kernel_maps(&trace->host);
858
859         if (perf_target__has_task(&trace->opts.target)) {
860                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
861                                                         trace__tool_process,
862                                                         &trace->host);
863         } else {
864                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
865                                                      &trace->host);
866         }
867
868         if (err)
869                 symbol__exit();
870
871         return err;
872 }
873
874 static int syscall__set_arg_fmts(struct syscall *sc)
875 {
876         struct format_field *field;
877         int idx = 0;
878
879         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
880         if (sc->arg_scnprintf == NULL)
881                 return -1;
882
883         if (sc->fmt)
884                 sc->arg_parm = sc->fmt->arg_parm;
885
886         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
887                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
888                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
889                 else if (field->flags & FIELD_IS_POINTER)
890                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
891                 ++idx;
892         }
893
894         return 0;
895 }
896
897 static int trace__read_syscall_info(struct trace *trace, int id)
898 {
899         char tp_name[128];
900         struct syscall *sc;
901         const char *name = audit_syscall_to_name(id, trace->audit_machine);
902
903         if (name == NULL)
904                 return -1;
905
906         if (id > trace->syscalls.max) {
907                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
908
909                 if (nsyscalls == NULL)
910                         return -1;
911
912                 if (trace->syscalls.max != -1) {
913                         memset(nsyscalls + trace->syscalls.max + 1, 0,
914                                (id - trace->syscalls.max) * sizeof(*sc));
915                 } else {
916                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
917                 }
918
919                 trace->syscalls.table = nsyscalls;
920                 trace->syscalls.max   = id;
921         }
922
923         sc = trace->syscalls.table + id;
924         sc->name = name;
925
926         if (trace->ev_qualifier) {
927                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
928
929                 if (!(in ^ trace->not_ev_qualifier)) {
930                         sc->filtered = true;
931                         /*
932                          * No need to do read tracepoint information since this will be
933                          * filtered out.
934                          */
935                         return 0;
936                 }
937         }
938
939         sc->fmt  = syscall_fmt__find(sc->name);
940
941         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
942         sc->tp_format = event_format__new("syscalls", tp_name);
943
944         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
945                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
946                 sc->tp_format = event_format__new("syscalls", tp_name);
947         }
948
949         if (sc->tp_format == NULL)
950                 return -1;
951
952         return syscall__set_arg_fmts(sc);
953 }
954
955 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
956                                       unsigned long *args)
957 {
958         size_t printed = 0;
959
960         if (sc->tp_format != NULL) {
961                 struct format_field *field;
962                 u8 bit = 1;
963                 struct syscall_arg arg = {
964                         .idx  = 0,
965                         .mask = 0,
966                 };
967
968                 for (field = sc->tp_format->format.fields->next; field;
969                      field = field->next, ++arg.idx, bit <<= 1) {
970                         if (arg.mask & bit)
971                                 continue;
972                         /*
973                          * Suppress this argument if its value is zero and
974                          * and we don't have a string associated in an
975                          * strarray for it.
976                          */
977                         if (args[arg.idx] == 0 &&
978                             !(sc->arg_scnprintf &&
979                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
980                               sc->arg_parm[arg.idx]))
981                                 continue;
982
983                         printed += scnprintf(bf + printed, size - printed,
984                                              "%s%s: ", printed ? ", " : "", field->name);
985                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
986                                 arg.val = args[arg.idx];
987                                 if (sc->arg_parm)
988                                         arg.parm = sc->arg_parm[arg.idx];
989                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
990                                                                       size - printed, &arg);
991                         } else {
992                                 printed += scnprintf(bf + printed, size - printed,
993                                                      "%ld", args[arg.idx]);
994                         }
995                 }
996         } else {
997                 int i = 0;
998
999                 while (i < 6) {
1000                         printed += scnprintf(bf + printed, size - printed,
1001                                              "%sarg%d: %ld",
1002                                              printed ? ", " : "", i, args[i]);
1003                         ++i;
1004                 }
1005         }
1006
1007         return printed;
1008 }
1009
1010 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1011                                   struct perf_sample *sample);
1012
1013 static struct syscall *trace__syscall_info(struct trace *trace,
1014                                            struct perf_evsel *evsel,
1015                                            struct perf_sample *sample)
1016 {
1017         int id = perf_evsel__intval(evsel, sample, "id");
1018
1019         if (id < 0) {
1020
1021                 /*
1022                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1023                  * before that, leaving at a higher verbosity level till that is
1024                  * explained. Reproduced with plain ftrace with:
1025                  *
1026                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1027                  * grep "NR -1 " /t/trace_pipe
1028                  *
1029                  * After generating some load on the machine.
1030                  */
1031                 if (verbose > 1) {
1032                         static u64 n;
1033                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1034                                 id, perf_evsel__name(evsel), ++n);
1035                 }
1036                 return NULL;
1037         }
1038
1039         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1040             trace__read_syscall_info(trace, id))
1041                 goto out_cant_read;
1042
1043         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1044                 goto out_cant_read;
1045
1046         return &trace->syscalls.table[id];
1047
1048 out_cant_read:
1049         if (verbose) {
1050                 fprintf(trace->output, "Problems reading syscall %d", id);
1051                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1052                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1053                 fputs(" information\n", trace->output);
1054         }
1055         return NULL;
1056 }
1057
1058 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1059                             struct perf_sample *sample)
1060 {
1061         char *msg;
1062         void *args;
1063         size_t printed = 0;
1064         struct thread *thread;
1065         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1066         struct thread_trace *ttrace;
1067
1068         if (sc == NULL)
1069                 return -1;
1070
1071         if (sc->filtered)
1072                 return 0;
1073
1074         thread = machine__findnew_thread(&trace->host, sample->pid,
1075                                          sample->tid);
1076         ttrace = thread__trace(thread, trace->output);
1077         if (ttrace == NULL)
1078                 return -1;
1079
1080         args = perf_evsel__rawptr(evsel, sample, "args");
1081         if (args == NULL) {
1082                 fprintf(trace->output, "Problems reading syscall arguments\n");
1083                 return -1;
1084         }
1085
1086         ttrace = thread->priv;
1087
1088         if (ttrace->entry_str == NULL) {
1089                 ttrace->entry_str = malloc(1024);
1090                 if (!ttrace->entry_str)
1091                         return -1;
1092         }
1093
1094         ttrace->entry_time = sample->time;
1095         msg = ttrace->entry_str;
1096         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1097
1098         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1099
1100         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1101                 if (!trace->duration_filter) {
1102                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1103                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1104                 }
1105         } else
1106                 ttrace->entry_pending = true;
1107
1108         return 0;
1109 }
1110
1111 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1112                            struct perf_sample *sample)
1113 {
1114         int ret;
1115         u64 duration = 0;
1116         struct thread *thread;
1117         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1118         struct thread_trace *ttrace;
1119
1120         if (sc == NULL)
1121                 return -1;
1122
1123         if (sc->filtered)
1124                 return 0;
1125
1126         thread = machine__findnew_thread(&trace->host, sample->pid,
1127                                          sample->tid);
1128         ttrace = thread__trace(thread, trace->output);
1129         if (ttrace == NULL)
1130                 return -1;
1131
1132         ret = perf_evsel__intval(evsel, sample, "ret");
1133
1134         ttrace = thread->priv;
1135
1136         ttrace->exit_time = sample->time;
1137
1138         if (ttrace->entry_time) {
1139                 duration = sample->time - ttrace->entry_time;
1140                 if (trace__filter_duration(trace, duration))
1141                         goto out;
1142         } else if (trace->duration_filter)
1143                 goto out;
1144
1145         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1146
1147         if (ttrace->entry_pending) {
1148                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1149         } else {
1150                 fprintf(trace->output, " ... [");
1151                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1152                 fprintf(trace->output, "]: %s()", sc->name);
1153         }
1154
1155         if (sc->fmt == NULL) {
1156 signed_print:
1157                 fprintf(trace->output, ") = %d", ret);
1158         } else if (ret < 0 && sc->fmt->errmsg) {
1159                 char bf[256];
1160                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1161                            *e = audit_errno_to_name(-ret);
1162
1163                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1164         } else if (ret == 0 && sc->fmt->timeout)
1165                 fprintf(trace->output, ") = 0 Timeout");
1166         else if (sc->fmt->hexret)
1167                 fprintf(trace->output, ") = %#x", ret);
1168         else
1169                 goto signed_print;
1170
1171         fputc('\n', trace->output);
1172 out:
1173         ttrace->entry_pending = false;
1174
1175         return 0;
1176 }
1177
1178 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1179                                      struct perf_sample *sample)
1180 {
1181         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1182         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1183         struct thread *thread = machine__findnew_thread(&trace->host,
1184                                                         sample->pid,
1185                                                         sample->tid);
1186         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1187
1188         if (ttrace == NULL)
1189                 goto out_dump;
1190
1191         ttrace->runtime_ms += runtime_ms;
1192         trace->runtime_ms += runtime_ms;
1193         return 0;
1194
1195 out_dump:
1196         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1197                evsel->name,
1198                perf_evsel__strval(evsel, sample, "comm"),
1199                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1200                runtime,
1201                perf_evsel__intval(evsel, sample, "vruntime"));
1202         return 0;
1203 }
1204
1205 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1206 {
1207         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1208             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1209                 return false;
1210
1211         if (trace->pid_list || trace->tid_list)
1212                 return true;
1213
1214         return false;
1215 }
1216
1217 static int trace__process_sample(struct perf_tool *tool,
1218                                  union perf_event *event __maybe_unused,
1219                                  struct perf_sample *sample,
1220                                  struct perf_evsel *evsel,
1221                                  struct machine *machine __maybe_unused)
1222 {
1223         struct trace *trace = container_of(tool, struct trace, tool);
1224         int err = 0;
1225
1226         tracepoint_handler handler = evsel->handler.func;
1227
1228         if (skip_sample(trace, sample))
1229                 return 0;
1230
1231         if (!trace->full_time && trace->base_time == 0)
1232                 trace->base_time = sample->time;
1233
1234         if (handler)
1235                 handler(trace, evsel, sample);
1236
1237         return err;
1238 }
1239
1240 static bool
1241 perf_session__has_tp(struct perf_session *session, const char *name)
1242 {
1243         struct perf_evsel *evsel;
1244
1245         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1246
1247         return evsel != NULL;
1248 }
1249
1250 static int parse_target_str(struct trace *trace)
1251 {
1252         if (trace->opts.target.pid) {
1253                 trace->pid_list = intlist__new(trace->opts.target.pid);
1254                 if (trace->pid_list == NULL) {
1255                         pr_err("Error parsing process id string\n");
1256                         return -EINVAL;
1257                 }
1258         }
1259
1260         if (trace->opts.target.tid) {
1261                 trace->tid_list = intlist__new(trace->opts.target.tid);
1262                 if (trace->tid_list == NULL) {
1263                         pr_err("Error parsing thread id string\n");
1264                         return -EINVAL;
1265                 }
1266         }
1267
1268         return 0;
1269 }
1270
1271 static int trace__run(struct trace *trace, int argc, const char **argv)
1272 {
1273         struct perf_evlist *evlist = perf_evlist__new();
1274         struct perf_evsel *evsel;
1275         int err = -1, i;
1276         unsigned long before;
1277         const bool forks = argc > 0;
1278
1279         if (evlist == NULL) {
1280                 fprintf(trace->output, "Not enough memory to run!\n");
1281                 goto out;
1282         }
1283
1284         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1285             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1286                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1287                 goto out_delete_evlist;
1288         }
1289
1290         if (trace->sched &&
1291             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1292                                    trace__sched_stat_runtime)) {
1293                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1294                 goto out_delete_evlist;
1295         }
1296
1297         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1298         if (err < 0) {
1299                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1300                 goto out_delete_evlist;
1301         }
1302
1303         err = trace__symbols_init(trace, evlist);
1304         if (err < 0) {
1305                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1306                 goto out_delete_maps;
1307         }
1308
1309         perf_evlist__config(evlist, &trace->opts);
1310
1311         signal(SIGCHLD, sig_handler);
1312         signal(SIGINT, sig_handler);
1313
1314         if (forks) {
1315                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1316                                                     argv, false, false);
1317                 if (err < 0) {
1318                         fprintf(trace->output, "Couldn't run the workload!\n");
1319                         goto out_delete_maps;
1320                 }
1321         }
1322
1323         err = perf_evlist__open(evlist);
1324         if (err < 0) {
1325                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1326                 goto out_delete_maps;
1327         }
1328
1329         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1330         if (err < 0) {
1331                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1332                 goto out_close_evlist;
1333         }
1334
1335         perf_evlist__enable(evlist);
1336
1337         if (forks)
1338                 perf_evlist__start_workload(evlist);
1339
1340         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1341 again:
1342         before = trace->nr_events;
1343
1344         for (i = 0; i < evlist->nr_mmaps; i++) {
1345                 union perf_event *event;
1346
1347                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1348                         const u32 type = event->header.type;
1349                         tracepoint_handler handler;
1350                         struct perf_sample sample;
1351
1352                         ++trace->nr_events;
1353
1354                         err = perf_evlist__parse_sample(evlist, event, &sample);
1355                         if (err) {
1356                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1357                                 continue;
1358                         }
1359
1360                         if (!trace->full_time && trace->base_time == 0)
1361                                 trace->base_time = sample.time;
1362
1363                         if (type != PERF_RECORD_SAMPLE) {
1364                                 trace__process_event(trace, &trace->host, event);
1365                                 continue;
1366                         }
1367
1368                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1369                         if (evsel == NULL) {
1370                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1371                                 continue;
1372                         }
1373
1374                         if (sample.raw_data == NULL) {
1375                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1376                                        perf_evsel__name(evsel), sample.tid,
1377                                        sample.cpu, sample.raw_size);
1378                                 continue;
1379                         }
1380
1381                         handler = evsel->handler.func;
1382                         handler(trace, evsel, &sample);
1383
1384                         if (done)
1385                                 goto out_unmap_evlist;
1386                 }
1387         }
1388
1389         if (trace->nr_events == before) {
1390                 if (done)
1391                         goto out_unmap_evlist;
1392
1393                 poll(evlist->pollfd, evlist->nr_fds, -1);
1394         }
1395
1396         if (done)
1397                 perf_evlist__disable(evlist);
1398
1399         goto again;
1400
1401 out_unmap_evlist:
1402         perf_evlist__munmap(evlist);
1403 out_close_evlist:
1404         perf_evlist__close(evlist);
1405 out_delete_maps:
1406         perf_evlist__delete_maps(evlist);
1407 out_delete_evlist:
1408         perf_evlist__delete(evlist);
1409 out:
1410         return err;
1411 }
1412
1413 static int trace__replay(struct trace *trace)
1414 {
1415         const struct perf_evsel_str_handler handlers[] = {
1416                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1417                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1418         };
1419
1420         struct perf_session *session;
1421         int err = -1;
1422
1423         trace->tool.sample        = trace__process_sample;
1424         trace->tool.mmap          = perf_event__process_mmap;
1425         trace->tool.mmap2         = perf_event__process_mmap2;
1426         trace->tool.comm          = perf_event__process_comm;
1427         trace->tool.exit          = perf_event__process_exit;
1428         trace->tool.fork          = perf_event__process_fork;
1429         trace->tool.attr          = perf_event__process_attr;
1430         trace->tool.tracing_data = perf_event__process_tracing_data;
1431         trace->tool.build_id      = perf_event__process_build_id;
1432
1433         trace->tool.ordered_samples = true;
1434         trace->tool.ordering_requires_timestamps = true;
1435
1436         /* add tid to output */
1437         trace->multiple_threads = true;
1438
1439         if (symbol__init() < 0)
1440                 return -1;
1441
1442         session = perf_session__new(input_name, O_RDONLY, 0, false,
1443                                     &trace->tool);
1444         if (session == NULL)
1445                 return -ENOMEM;
1446
1447         err = perf_session__set_tracepoints_handlers(session, handlers);
1448         if (err)
1449                 goto out;
1450
1451         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1452                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1453                 goto out;
1454         }
1455
1456         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1457                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1458                 goto out;
1459         }
1460
1461         err = parse_target_str(trace);
1462         if (err != 0)
1463                 goto out;
1464
1465         setup_pager();
1466
1467         err = perf_session__process_events(session, &trace->tool);
1468         if (err)
1469                 pr_err("Failed to process events, error %d", err);
1470
1471 out:
1472         perf_session__delete(session);
1473
1474         return err;
1475 }
1476
1477 static size_t trace__fprintf_threads_header(FILE *fp)
1478 {
1479         size_t printed;
1480
1481         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1482         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1483         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1484         printed += fprintf(fp," _____________________________________________________________________\n\n");
1485
1486         return printed;
1487 }
1488
1489 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1490 {
1491         size_t printed = trace__fprintf_threads_header(fp);
1492         struct rb_node *nd;
1493
1494         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1495                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1496                 struct thread_trace *ttrace = thread->priv;
1497                 const char *color;
1498                 double ratio;
1499
1500                 if (ttrace == NULL)
1501                         continue;
1502
1503                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1504
1505                 color = PERF_COLOR_NORMAL;
1506                 if (ratio > 50.0)
1507                         color = PERF_COLOR_RED;
1508                 else if (ratio > 25.0)
1509                         color = PERF_COLOR_GREEN;
1510                 else if (ratio > 5.0)
1511                         color = PERF_COLOR_YELLOW;
1512
1513                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1514                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1515                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1516                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1517         }
1518
1519         return printed;
1520 }
1521
1522 static int trace__set_duration(const struct option *opt, const char *str,
1523                                int unset __maybe_unused)
1524 {
1525         struct trace *trace = opt->value;
1526
1527         trace->duration_filter = atof(str);
1528         return 0;
1529 }
1530
1531 static int trace__open_output(struct trace *trace, const char *filename)
1532 {
1533         struct stat st;
1534
1535         if (!stat(filename, &st) && st.st_size) {
1536                 char oldname[PATH_MAX];
1537
1538                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1539                 unlink(oldname);
1540                 rename(filename, oldname);
1541         }
1542
1543         trace->output = fopen(filename, "w");
1544
1545         return trace->output == NULL ? -errno : 0;
1546 }
1547
1548 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1549 {
1550         const char * const trace_usage[] = {
1551                 "perf trace [<options>] [<command>]",
1552                 "perf trace [<options>] -- <command> [<options>]",
1553                 NULL
1554         };
1555         struct trace trace = {
1556                 .audit_machine = audit_detect_machine(),
1557                 .syscalls = {
1558                         . max = -1,
1559                 },
1560                 .opts = {
1561                         .target = {
1562                                 .uid       = UINT_MAX,
1563                                 .uses_mmap = true,
1564                         },
1565                         .user_freq     = UINT_MAX,
1566                         .user_interval = ULLONG_MAX,
1567                         .no_delay      = true,
1568                         .mmap_pages    = 1024,
1569                 },
1570                 .output = stdout,
1571                 .show_comm = true,
1572         };
1573         const char *output_name = NULL;
1574         const char *ev_qualifier_str = NULL;
1575         const struct option trace_options[] = {
1576         OPT_BOOLEAN(0, "comm", &trace.show_comm,
1577                     "show the thread COMM next to its id"),
1578         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1579                     "list of events to trace"),
1580         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1581         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1582         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1583                     "trace events on existing process id"),
1584         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1585                     "trace events on existing thread id"),
1586         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1587                     "system-wide collection from all CPUs"),
1588         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1589                     "list of cpus to monitor"),
1590         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1591                     "child tasks do not inherit counters"),
1592         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1593                      "number of mmap data pages",
1594                      perf_evlist__parse_mmap_pages),
1595         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1596                    "user to profile"),
1597         OPT_CALLBACK(0, "duration", &trace, "float",
1598                      "show only events with duration > N.M ms",
1599                      trace__set_duration),
1600         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1601         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1602         OPT_BOOLEAN('T', "time", &trace.full_time,
1603                     "Show full timestamp, not time relative to first start"),
1604         OPT_END()
1605         };
1606         int err;
1607         char bf[BUFSIZ];
1608
1609         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1610
1611         if (output_name != NULL) {
1612                 err = trace__open_output(&trace, output_name);
1613                 if (err < 0) {
1614                         perror("failed to create output file");
1615                         goto out;
1616                 }
1617         }
1618
1619         if (ev_qualifier_str != NULL) {
1620                 const char *s = ev_qualifier_str;
1621
1622                 trace.not_ev_qualifier = *s == '!';
1623                 if (trace.not_ev_qualifier)
1624                         ++s;
1625                 trace.ev_qualifier = strlist__new(true, s);
1626                 if (trace.ev_qualifier == NULL) {
1627                         fputs("Not enough memory to parse event qualifier",
1628                               trace.output);
1629                         err = -ENOMEM;
1630                         goto out_close;
1631                 }
1632         }
1633
1634         err = perf_target__validate(&trace.opts.target);
1635         if (err) {
1636                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1637                 fprintf(trace.output, "%s", bf);
1638                 goto out_close;
1639         }
1640
1641         err = perf_target__parse_uid(&trace.opts.target);
1642         if (err) {
1643                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1644                 fprintf(trace.output, "%s", bf);
1645                 goto out_close;
1646         }
1647
1648         if (!argc && perf_target__none(&trace.opts.target))
1649                 trace.opts.target.system_wide = true;
1650
1651         if (input_name)
1652                 err = trace__replay(&trace);
1653         else
1654                 err = trace__run(&trace, argc, argv);
1655
1656         if (trace.sched && !err)
1657                 trace__fprintf_thread_summary(&trace, trace.output);
1658
1659 out_close:
1660         if (output_name != NULL)
1661                 fclose(trace.output);
1662 out:
1663         return err;
1664 }