perf trace: Use socket's beautifiers in socketpair
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK              0x20000
23 #endif
24
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON          100
27 #endif
28
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE         12
31 #endif
32
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE       13
35 #endif
36
37 struct syscall_arg {
38         unsigned long val;
39         void          *parm;
40         u8            idx;
41         u8            mask;
42 };
43
44 struct strarray {
45         int         nr_entries;
46         const char **entries;
47 };
48
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50         .nr_entries = ARRAY_SIZE(array), \
51         .entries = array, \
52 }
53
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55                                               struct syscall_arg *arg)
56 {
57         int idx = arg->val;
58         struct strarray *sa = arg->parm;
59
60         if (idx < 0 || idx >= sa->nr_entries)
61                 return scnprintf(bf, size, "%d", idx);
62
63         return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69                                          struct syscall_arg *arg)
70 {
71         return scnprintf(bf, size, "%#lx", arg->val);
72 }
73
74 #define SCA_HEX syscall_arg__scnprintf_hex
75
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77                                                struct syscall_arg *arg)
78 {
79         int printed = 0, prot = arg->val;
80
81         if (prot == PROT_NONE)
82                 return scnprintf(bf, size, "NONE");
83 #define P_MMAP_PROT(n) \
84         if (prot & PROT_##n) { \
85                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86                 prot &= ~PROT_##n; \
87         }
88
89         P_MMAP_PROT(EXEC);
90         P_MMAP_PROT(READ);
91         P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93         P_MMAP_PROT(SEM);
94 #endif
95         P_MMAP_PROT(GROWSDOWN);
96         P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98
99         if (prot)
100                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101
102         return printed;
103 }
104
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108                                                 struct syscall_arg *arg)
109 {
110         int printed = 0, flags = arg->val;
111
112 #define P_MMAP_FLAG(n) \
113         if (flags & MAP_##n) { \
114                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115                 flags &= ~MAP_##n; \
116         }
117
118         P_MMAP_FLAG(SHARED);
119         P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121         P_MMAP_FLAG(32BIT);
122 #endif
123         P_MMAP_FLAG(ANONYMOUS);
124         P_MMAP_FLAG(DENYWRITE);
125         P_MMAP_FLAG(EXECUTABLE);
126         P_MMAP_FLAG(FILE);
127         P_MMAP_FLAG(FIXED);
128         P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130         P_MMAP_FLAG(HUGETLB);
131 #endif
132         P_MMAP_FLAG(LOCKED);
133         P_MMAP_FLAG(NONBLOCK);
134         P_MMAP_FLAG(NORESERVE);
135         P_MMAP_FLAG(POPULATE);
136         P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138         P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141
142         if (flags)
143                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144
145         return printed;
146 }
147
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151                                                       struct syscall_arg *arg)
152 {
153         int behavior = arg->val;
154
155         switch (behavior) {
156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157         P_MADV_BHV(NORMAL);
158         P_MADV_BHV(RANDOM);
159         P_MADV_BHV(SEQUENTIAL);
160         P_MADV_BHV(WILLNEED);
161         P_MADV_BHV(DONTNEED);
162         P_MADV_BHV(REMOVE);
163         P_MADV_BHV(DONTFORK);
164         P_MADV_BHV(DOFORK);
165         P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167         P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169         P_MADV_BHV(MERGEABLE);
170         P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172         P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175         P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178         P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181         P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184         default: break;
185         }
186
187         return scnprintf(bf, size, "%#x", behavior);
188 }
189
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191
192 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
193                                            struct syscall_arg *arg)
194 {
195         int printed = 0, op = arg->val;
196
197         if (op == 0)
198                 return scnprintf(bf, size, "NONE");
199 #define P_CMD(cmd) \
200         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
201                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
202                 op &= ~LOCK_##cmd; \
203         }
204
205         P_CMD(SH);
206         P_CMD(EX);
207         P_CMD(NB);
208         P_CMD(UN);
209         P_CMD(MAND);
210         P_CMD(RW);
211         P_CMD(READ);
212         P_CMD(WRITE);
213 #undef P_OP
214
215         if (op)
216                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
217
218         return printed;
219 }
220
221 #define SCA_FLOCK syscall_arg__scnprintf_flock
222
223 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
224 {
225         enum syscall_futex_args {
226                 SCF_UADDR   = (1 << 0),
227                 SCF_OP      = (1 << 1),
228                 SCF_VAL     = (1 << 2),
229                 SCF_TIMEOUT = (1 << 3),
230                 SCF_UADDR2  = (1 << 4),
231                 SCF_VAL3    = (1 << 5),
232         };
233         int op = arg->val;
234         int cmd = op & FUTEX_CMD_MASK;
235         size_t printed = 0;
236
237         switch (cmd) {
238 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
239         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
240         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
241         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
242         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
243         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
244         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
245         P_FUTEX_OP(WAKE_OP);                                                      break;
246         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
247         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
248         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
249         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
250         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
251         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
252         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
253         }
254
255         if (op & FUTEX_PRIVATE_FLAG)
256                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
257
258         if (op & FUTEX_CLOCK_REALTIME)
259                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
260
261         return printed;
262 }
263
264 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
265
266 static const char *epoll_ctl_ops[] = { [1] = "ADD", "DEL", "MOD", };
267 static DEFINE_STRARRAY(epoll_ctl_ops);
268
269 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
270 static DEFINE_STRARRAY(itimers);
271
272 static const char *whences[] = { "SET", "CUR", "END",
273 #ifdef SEEK_DATA
274 "DATA",
275 #endif
276 #ifdef SEEK_HOLE
277 "HOLE",
278 #endif
279 };
280 static DEFINE_STRARRAY(whences);
281
282 static const char *fcntl_cmds[] = {
283         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
284         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
285         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
286         "F_GETOWNER_UIDS",
287 };
288 static DEFINE_STRARRAY(fcntl_cmds);
289
290 static const char *rlimit_resources[] = {
291         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
292         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
293         "RTTIME",
294 };
295 static DEFINE_STRARRAY(rlimit_resources);
296
297 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
298 static DEFINE_STRARRAY(sighow);
299
300 static const char *socket_families[] = {
301         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
302         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
303         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
304         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
305         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
306         "ALG", "NFC", "VSOCK",
307 };
308 static DEFINE_STRARRAY(socket_families);
309
310 #ifndef SOCK_TYPE_MASK
311 #define SOCK_TYPE_MASK 0xf
312 #endif
313
314 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
315                                                       struct syscall_arg *arg)
316 {
317         size_t printed;
318         int type = arg->val,
319             flags = type & ~SOCK_TYPE_MASK;
320
321         type &= SOCK_TYPE_MASK;
322         /*
323          * Can't use a strarray, MIPS may override for ABI reasons.
324          */
325         switch (type) {
326 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
327         P_SK_TYPE(STREAM);
328         P_SK_TYPE(DGRAM);
329         P_SK_TYPE(RAW);
330         P_SK_TYPE(RDM);
331         P_SK_TYPE(SEQPACKET);
332         P_SK_TYPE(DCCP);
333         P_SK_TYPE(PACKET);
334 #undef P_SK_TYPE
335         default:
336                 printed = scnprintf(bf, size, "%#x", type);
337         }
338
339 #define P_SK_FLAG(n) \
340         if (flags & SOCK_##n) { \
341                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
342                 flags &= ~SOCK_##n; \
343         }
344
345         P_SK_FLAG(CLOEXEC);
346         P_SK_FLAG(NONBLOCK);
347 #undef P_SK_FLAG
348
349         if (flags)
350                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
351
352         return printed;
353 }
354
355 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
356
357 #ifndef MSG_PROBE
358 #define MSG_PROBE            0x10
359 #endif
360 #ifndef MSG_SENDPAGE_NOTLAST
361 #define MSG_SENDPAGE_NOTLAST 0x20000
362 #endif
363 #ifndef MSG_FASTOPEN
364 #define MSG_FASTOPEN         0x20000000
365 #endif
366
367 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
368                                                struct syscall_arg *arg)
369 {
370         int printed = 0, flags = arg->val;
371
372         if (flags == 0)
373                 return scnprintf(bf, size, "NONE");
374 #define P_MSG_FLAG(n) \
375         if (flags & MSG_##n) { \
376                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
377                 flags &= ~MSG_##n; \
378         }
379
380         P_MSG_FLAG(OOB);
381         P_MSG_FLAG(PEEK);
382         P_MSG_FLAG(DONTROUTE);
383         P_MSG_FLAG(TRYHARD);
384         P_MSG_FLAG(CTRUNC);
385         P_MSG_FLAG(PROBE);
386         P_MSG_FLAG(TRUNC);
387         P_MSG_FLAG(DONTWAIT);
388         P_MSG_FLAG(EOR);
389         P_MSG_FLAG(WAITALL);
390         P_MSG_FLAG(FIN);
391         P_MSG_FLAG(SYN);
392         P_MSG_FLAG(CONFIRM);
393         P_MSG_FLAG(RST);
394         P_MSG_FLAG(ERRQUEUE);
395         P_MSG_FLAG(NOSIGNAL);
396         P_MSG_FLAG(MORE);
397         P_MSG_FLAG(WAITFORONE);
398         P_MSG_FLAG(SENDPAGE_NOTLAST);
399         P_MSG_FLAG(FASTOPEN);
400         P_MSG_FLAG(CMSG_CLOEXEC);
401 #undef P_MSG_FLAG
402
403         if (flags)
404                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
405
406         return printed;
407 }
408
409 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
410
411 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
412                                                  struct syscall_arg *arg)
413 {
414         size_t printed = 0;
415         int mode = arg->val;
416
417         if (mode == F_OK) /* 0 */
418                 return scnprintf(bf, size, "F");
419 #define P_MODE(n) \
420         if (mode & n##_OK) { \
421                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
422                 mode &= ~n##_OK; \
423         }
424
425         P_MODE(R);
426         P_MODE(W);
427         P_MODE(X);
428 #undef P_MODE
429
430         if (mode)
431                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
432
433         return printed;
434 }
435
436 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
437
438 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
439                                                struct syscall_arg *arg)
440 {
441         int printed = 0, flags = arg->val;
442
443         if (!(flags & O_CREAT))
444                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
445
446         if (flags == 0)
447                 return scnprintf(bf, size, "RDONLY");
448 #define P_FLAG(n) \
449         if (flags & O_##n) { \
450                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
451                 flags &= ~O_##n; \
452         }
453
454         P_FLAG(APPEND);
455         P_FLAG(ASYNC);
456         P_FLAG(CLOEXEC);
457         P_FLAG(CREAT);
458         P_FLAG(DIRECT);
459         P_FLAG(DIRECTORY);
460         P_FLAG(EXCL);
461         P_FLAG(LARGEFILE);
462         P_FLAG(NOATIME);
463         P_FLAG(NOCTTY);
464 #ifdef O_NONBLOCK
465         P_FLAG(NONBLOCK);
466 #elif O_NDELAY
467         P_FLAG(NDELAY);
468 #endif
469 #ifdef O_PATH
470         P_FLAG(PATH);
471 #endif
472         P_FLAG(RDWR);
473 #ifdef O_DSYNC
474         if ((flags & O_SYNC) == O_SYNC)
475                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
476         else {
477                 P_FLAG(DSYNC);
478         }
479 #else
480         P_FLAG(SYNC);
481 #endif
482         P_FLAG(TRUNC);
483         P_FLAG(WRONLY);
484 #undef P_FLAG
485
486         if (flags)
487                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
488
489         return printed;
490 }
491
492 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
493
494 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
495                                                    struct syscall_arg *arg)
496 {
497         int printed = 0, flags = arg->val;
498
499         if (flags == 0)
500                 return scnprintf(bf, size, "NONE");
501 #define P_FLAG(n) \
502         if (flags & EFD_##n) { \
503                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
504                 flags &= ~EFD_##n; \
505         }
506
507         P_FLAG(SEMAPHORE);
508         P_FLAG(CLOEXEC);
509         P_FLAG(NONBLOCK);
510 #undef P_FLAG
511
512         if (flags)
513                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
514
515         return printed;
516 }
517
518 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
519
520 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
521 {
522         int sig = arg->val;
523
524         switch (sig) {
525 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
526         P_SIGNUM(HUP);
527         P_SIGNUM(INT);
528         P_SIGNUM(QUIT);
529         P_SIGNUM(ILL);
530         P_SIGNUM(TRAP);
531         P_SIGNUM(ABRT);
532         P_SIGNUM(BUS);
533         P_SIGNUM(FPE);
534         P_SIGNUM(KILL);
535         P_SIGNUM(USR1);
536         P_SIGNUM(SEGV);
537         P_SIGNUM(USR2);
538         P_SIGNUM(PIPE);
539         P_SIGNUM(ALRM);
540         P_SIGNUM(TERM);
541         P_SIGNUM(STKFLT);
542         P_SIGNUM(CHLD);
543         P_SIGNUM(CONT);
544         P_SIGNUM(STOP);
545         P_SIGNUM(TSTP);
546         P_SIGNUM(TTIN);
547         P_SIGNUM(TTOU);
548         P_SIGNUM(URG);
549         P_SIGNUM(XCPU);
550         P_SIGNUM(XFSZ);
551         P_SIGNUM(VTALRM);
552         P_SIGNUM(PROF);
553         P_SIGNUM(WINCH);
554         P_SIGNUM(IO);
555         P_SIGNUM(PWR);
556         P_SIGNUM(SYS);
557         default: break;
558         }
559
560         return scnprintf(bf, size, "%#x", sig);
561 }
562
563 #define SCA_SIGNUM syscall_arg__scnprintf_signum
564
565 #define STRARRAY(arg, name, array) \
566           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
567           .arg_parm      = { [arg] = &strarray__##array, }
568
569 static struct syscall_fmt {
570         const char *name;
571         const char *alias;
572         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
573         void       *arg_parm[6];
574         bool       errmsg;
575         bool       timeout;
576         bool       hexret;
577 } syscall_fmts[] = {
578         { .name     = "access",     .errmsg = true,
579           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
580         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
581         { .name     = "brk",        .hexret = true,
582           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
583         { .name     = "connect",    .errmsg = true, },
584         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
585         { .name     = "eventfd2",   .errmsg = true,
586           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
587         { .name     = "fcntl",      .errmsg = true, STRARRAY(1, cmd, fcntl_cmds), },
588         { .name     = "flock",      .errmsg = true,
589           .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
590         { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
591         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
592         { .name     = "futex",      .errmsg = true,
593           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
594         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
595         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
596         { .name     = "ioctl",      .errmsg = true,
597           .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
598         { .name     = "kill",       .errmsg = true,
599           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
600         { .name     = "lseek",      .errmsg = true, STRARRAY(2, whence, whences), },
601         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
602         { .name     = "madvise",    .errmsg = true,
603           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
604                              [2] = SCA_MADV_BHV, /* behavior */ }, },
605         { .name     = "mmap",       .hexret = true,
606           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
607                              [2] = SCA_MMAP_PROT, /* prot */
608                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
609         { .name     = "mprotect",   .errmsg = true,
610           .arg_scnprintf = { [0] = SCA_HEX, /* start */
611                              [2] = SCA_MMAP_PROT, /* prot */ }, },
612         { .name     = "mremap",     .hexret = true,
613           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
614                              [4] = SCA_HEX, /* new_addr */ }, },
615         { .name     = "munmap",     .errmsg = true,
616           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
617         { .name     = "open",       .errmsg = true,
618           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
619         { .name     = "open_by_handle_at", .errmsg = true,
620           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
621         { .name     = "openat",     .errmsg = true,
622           .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
623         { .name     = "poll",       .errmsg = true, .timeout = true, },
624         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
625         { .name     = "pread",      .errmsg = true, .alias = "pread64", },
626         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
627         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
628         { .name     = "read",       .errmsg = true, },
629         { .name     = "recvfrom",   .errmsg = true,
630           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
631         { .name     = "recvmmsg",   .errmsg = true,
632           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
633         { .name     = "recvmsg",    .errmsg = true,
634           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
635         { .name     = "rt_sigaction", .errmsg = true,
636           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
637         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
638         { .name     = "rt_sigqueueinfo", .errmsg = true,
639           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
640         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
641           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
642         { .name     = "select",     .errmsg = true, .timeout = true, },
643         { .name     = "sendmmsg",    .errmsg = true,
644           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
645         { .name     = "sendmsg",    .errmsg = true,
646           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
647         { .name     = "sendto",     .errmsg = true,
648           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
649         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
650         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
651         { .name     = "socket",     .errmsg = true,
652           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
653                              [1] = SCA_SK_TYPE, /* type */ },
654           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
655         { .name     = "socketpair", .errmsg = true,
656           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
657                              [1] = SCA_SK_TYPE, /* type */ },
658           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
659         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
660         { .name     = "tgkill",     .errmsg = true,
661           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
662         { .name     = "tkill",      .errmsg = true,
663           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
664         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
665 };
666
667 static int syscall_fmt__cmp(const void *name, const void *fmtp)
668 {
669         const struct syscall_fmt *fmt = fmtp;
670         return strcmp(name, fmt->name);
671 }
672
673 static struct syscall_fmt *syscall_fmt__find(const char *name)
674 {
675         const int nmemb = ARRAY_SIZE(syscall_fmts);
676         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
677 }
678
679 struct syscall {
680         struct event_format *tp_format;
681         const char          *name;
682         bool                filtered;
683         struct syscall_fmt  *fmt;
684         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
685         void                **arg_parm;
686 };
687
688 static size_t fprintf_duration(unsigned long t, FILE *fp)
689 {
690         double duration = (double)t / NSEC_PER_MSEC;
691         size_t printed = fprintf(fp, "(");
692
693         if (duration >= 1.0)
694                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
695         else if (duration >= 0.01)
696                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
697         else
698                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
699         return printed + fprintf(fp, "): ");
700 }
701
702 struct thread_trace {
703         u64               entry_time;
704         u64               exit_time;
705         bool              entry_pending;
706         unsigned long     nr_events;
707         char              *entry_str;
708         double            runtime_ms;
709 };
710
711 static struct thread_trace *thread_trace__new(void)
712 {
713         return zalloc(sizeof(struct thread_trace));
714 }
715
716 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
717 {
718         struct thread_trace *ttrace;
719
720         if (thread == NULL)
721                 goto fail;
722
723         if (thread->priv == NULL)
724                 thread->priv = thread_trace__new();
725                 
726         if (thread->priv == NULL)
727                 goto fail;
728
729         ttrace = thread->priv;
730         ++ttrace->nr_events;
731
732         return ttrace;
733 fail:
734         color_fprintf(fp, PERF_COLOR_RED,
735                       "WARNING: not enough memory, dropping samples!\n");
736         return NULL;
737 }
738
739 struct trace {
740         struct perf_tool        tool;
741         int                     audit_machine;
742         struct {
743                 int             max;
744                 struct syscall  *table;
745         } syscalls;
746         struct perf_record_opts opts;
747         struct machine          host;
748         u64                     base_time;
749         bool                    full_time;
750         FILE                    *output;
751         unsigned long           nr_events;
752         struct strlist          *ev_qualifier;
753         bool                    not_ev_qualifier;
754         struct intlist          *tid_list;
755         struct intlist          *pid_list;
756         bool                    sched;
757         bool                    multiple_threads;
758         bool                    show_comm;
759         double                  duration_filter;
760         double                  runtime_ms;
761 };
762
763 static bool trace__filter_duration(struct trace *trace, double t)
764 {
765         return t < (trace->duration_filter * NSEC_PER_MSEC);
766 }
767
768 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
769 {
770         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
771
772         return fprintf(fp, "%10.3f ", ts);
773 }
774
775 static bool done = false;
776
777 static void sig_handler(int sig __maybe_unused)
778 {
779         done = true;
780 }
781
782 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
783                                         u64 duration, u64 tstamp, FILE *fp)
784 {
785         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
786         printed += fprintf_duration(duration, fp);
787
788         if (trace->multiple_threads) {
789                 if (trace->show_comm)
790                         printed += fprintf(fp, "%.14s/", thread->comm);
791                 printed += fprintf(fp, "%d ", thread->tid);
792         }
793
794         return printed;
795 }
796
797 static int trace__process_event(struct trace *trace, struct machine *machine,
798                                 union perf_event *event)
799 {
800         int ret = 0;
801
802         switch (event->header.type) {
803         case PERF_RECORD_LOST:
804                 color_fprintf(trace->output, PERF_COLOR_RED,
805                               "LOST %" PRIu64 " events!\n", event->lost.lost);
806                 ret = machine__process_lost_event(machine, event);
807         default:
808                 ret = machine__process_event(machine, event);
809                 break;
810         }
811
812         return ret;
813 }
814
815 static int trace__tool_process(struct perf_tool *tool,
816                                union perf_event *event,
817                                struct perf_sample *sample __maybe_unused,
818                                struct machine *machine)
819 {
820         struct trace *trace = container_of(tool, struct trace, tool);
821         return trace__process_event(trace, machine, event);
822 }
823
824 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
825 {
826         int err = symbol__init();
827
828         if (err)
829                 return err;
830
831         machine__init(&trace->host, "", HOST_KERNEL_ID);
832         machine__create_kernel_maps(&trace->host);
833
834         if (perf_target__has_task(&trace->opts.target)) {
835                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
836                                                         trace__tool_process,
837                                                         &trace->host);
838         } else {
839                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
840                                                      &trace->host);
841         }
842
843         if (err)
844                 symbol__exit();
845
846         return err;
847 }
848
849 static int syscall__set_arg_fmts(struct syscall *sc)
850 {
851         struct format_field *field;
852         int idx = 0;
853
854         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
855         if (sc->arg_scnprintf == NULL)
856                 return -1;
857
858         if (sc->fmt)
859                 sc->arg_parm = sc->fmt->arg_parm;
860
861         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
862                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
863                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
864                 else if (field->flags & FIELD_IS_POINTER)
865                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
866                 ++idx;
867         }
868
869         return 0;
870 }
871
872 static int trace__read_syscall_info(struct trace *trace, int id)
873 {
874         char tp_name[128];
875         struct syscall *sc;
876         const char *name = audit_syscall_to_name(id, trace->audit_machine);
877
878         if (name == NULL)
879                 return -1;
880
881         if (id > trace->syscalls.max) {
882                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
883
884                 if (nsyscalls == NULL)
885                         return -1;
886
887                 if (trace->syscalls.max != -1) {
888                         memset(nsyscalls + trace->syscalls.max + 1, 0,
889                                (id - trace->syscalls.max) * sizeof(*sc));
890                 } else {
891                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
892                 }
893
894                 trace->syscalls.table = nsyscalls;
895                 trace->syscalls.max   = id;
896         }
897
898         sc = trace->syscalls.table + id;
899         sc->name = name;
900
901         if (trace->ev_qualifier) {
902                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
903
904                 if (!(in ^ trace->not_ev_qualifier)) {
905                         sc->filtered = true;
906                         /*
907                          * No need to do read tracepoint information since this will be
908                          * filtered out.
909                          */
910                         return 0;
911                 }
912         }
913
914         sc->fmt  = syscall_fmt__find(sc->name);
915
916         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
917         sc->tp_format = event_format__new("syscalls", tp_name);
918
919         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
920                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
921                 sc->tp_format = event_format__new("syscalls", tp_name);
922         }
923
924         if (sc->tp_format == NULL)
925                 return -1;
926
927         return syscall__set_arg_fmts(sc);
928 }
929
930 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
931                                       unsigned long *args)
932 {
933         size_t printed = 0;
934
935         if (sc->tp_format != NULL) {
936                 struct format_field *field;
937                 u8 bit = 1;
938                 struct syscall_arg arg = {
939                         .idx  = 0,
940                         .mask = 0,
941                 };
942
943                 for (field = sc->tp_format->format.fields->next; field;
944                      field = field->next, ++arg.idx, bit <<= 1) {
945                         if (arg.mask & bit)
946                                 continue;
947                         /*
948                          * Suppress this argument if its value is zero and
949                          * and we don't have a string associated in an
950                          * strarray for it.
951                          */
952                         if (args[arg.idx] == 0 &&
953                             !(sc->arg_scnprintf &&
954                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
955                               sc->arg_parm[arg.idx]))
956                                 continue;
957
958                         printed += scnprintf(bf + printed, size - printed,
959                                              "%s%s: ", printed ? ", " : "", field->name);
960                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
961                                 arg.val = args[arg.idx];
962                                 if (sc->arg_parm)
963                                         arg.parm = sc->arg_parm[arg.idx];
964                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
965                                                                       size - printed, &arg);
966                         } else {
967                                 printed += scnprintf(bf + printed, size - printed,
968                                                      "%ld", args[arg.idx]);
969                         }
970                 }
971         } else {
972                 int i = 0;
973
974                 while (i < 6) {
975                         printed += scnprintf(bf + printed, size - printed,
976                                              "%sarg%d: %ld",
977                                              printed ? ", " : "", i, args[i]);
978                         ++i;
979                 }
980         }
981
982         return printed;
983 }
984
985 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
986                                   struct perf_sample *sample);
987
988 static struct syscall *trace__syscall_info(struct trace *trace,
989                                            struct perf_evsel *evsel,
990                                            struct perf_sample *sample)
991 {
992         int id = perf_evsel__intval(evsel, sample, "id");
993
994         if (id < 0) {
995
996                 /*
997                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
998                  * before that, leaving at a higher verbosity level till that is
999                  * explained. Reproduced with plain ftrace with:
1000                  *
1001                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1002                  * grep "NR -1 " /t/trace_pipe
1003                  *
1004                  * After generating some load on the machine.
1005                  */
1006                 if (verbose > 1) {
1007                         static u64 n;
1008                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1009                                 id, perf_evsel__name(evsel), ++n);
1010                 }
1011                 return NULL;
1012         }
1013
1014         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1015             trace__read_syscall_info(trace, id))
1016                 goto out_cant_read;
1017
1018         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1019                 goto out_cant_read;
1020
1021         return &trace->syscalls.table[id];
1022
1023 out_cant_read:
1024         if (verbose) {
1025                 fprintf(trace->output, "Problems reading syscall %d", id);
1026                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1027                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1028                 fputs(" information\n", trace->output);
1029         }
1030         return NULL;
1031 }
1032
1033 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1034                             struct perf_sample *sample)
1035 {
1036         char *msg;
1037         void *args;
1038         size_t printed = 0;
1039         struct thread *thread;
1040         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1041         struct thread_trace *ttrace;
1042
1043         if (sc == NULL)
1044                 return -1;
1045
1046         if (sc->filtered)
1047                 return 0;
1048
1049         thread = machine__findnew_thread(&trace->host, sample->pid,
1050                                          sample->tid);
1051         ttrace = thread__trace(thread, trace->output);
1052         if (ttrace == NULL)
1053                 return -1;
1054
1055         args = perf_evsel__rawptr(evsel, sample, "args");
1056         if (args == NULL) {
1057                 fprintf(trace->output, "Problems reading syscall arguments\n");
1058                 return -1;
1059         }
1060
1061         ttrace = thread->priv;
1062
1063         if (ttrace->entry_str == NULL) {
1064                 ttrace->entry_str = malloc(1024);
1065                 if (!ttrace->entry_str)
1066                         return -1;
1067         }
1068
1069         ttrace->entry_time = sample->time;
1070         msg = ttrace->entry_str;
1071         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1072
1073         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1074
1075         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1076                 if (!trace->duration_filter) {
1077                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1078                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1079                 }
1080         } else
1081                 ttrace->entry_pending = true;
1082
1083         return 0;
1084 }
1085
1086 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1087                            struct perf_sample *sample)
1088 {
1089         int ret;
1090         u64 duration = 0;
1091         struct thread *thread;
1092         struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1093         struct thread_trace *ttrace;
1094
1095         if (sc == NULL)
1096                 return -1;
1097
1098         if (sc->filtered)
1099                 return 0;
1100
1101         thread = machine__findnew_thread(&trace->host, sample->pid,
1102                                          sample->tid);
1103         ttrace = thread__trace(thread, trace->output);
1104         if (ttrace == NULL)
1105                 return -1;
1106
1107         ret = perf_evsel__intval(evsel, sample, "ret");
1108
1109         ttrace = thread->priv;
1110
1111         ttrace->exit_time = sample->time;
1112
1113         if (ttrace->entry_time) {
1114                 duration = sample->time - ttrace->entry_time;
1115                 if (trace__filter_duration(trace, duration))
1116                         goto out;
1117         } else if (trace->duration_filter)
1118                 goto out;
1119
1120         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1121
1122         if (ttrace->entry_pending) {
1123                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1124         } else {
1125                 fprintf(trace->output, " ... [");
1126                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1127                 fprintf(trace->output, "]: %s()", sc->name);
1128         }
1129
1130         if (sc->fmt == NULL) {
1131 signed_print:
1132                 fprintf(trace->output, ") = %d", ret);
1133         } else if (ret < 0 && sc->fmt->errmsg) {
1134                 char bf[256];
1135                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1136                            *e = audit_errno_to_name(-ret);
1137
1138                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1139         } else if (ret == 0 && sc->fmt->timeout)
1140                 fprintf(trace->output, ") = 0 Timeout");
1141         else if (sc->fmt->hexret)
1142                 fprintf(trace->output, ") = %#x", ret);
1143         else
1144                 goto signed_print;
1145
1146         fputc('\n', trace->output);
1147 out:
1148         ttrace->entry_pending = false;
1149
1150         return 0;
1151 }
1152
1153 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1154                                      struct perf_sample *sample)
1155 {
1156         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1157         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1158         struct thread *thread = machine__findnew_thread(&trace->host,
1159                                                         sample->pid,
1160                                                         sample->tid);
1161         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1162
1163         if (ttrace == NULL)
1164                 goto out_dump;
1165
1166         ttrace->runtime_ms += runtime_ms;
1167         trace->runtime_ms += runtime_ms;
1168         return 0;
1169
1170 out_dump:
1171         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1172                evsel->name,
1173                perf_evsel__strval(evsel, sample, "comm"),
1174                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1175                runtime,
1176                perf_evsel__intval(evsel, sample, "vruntime"));
1177         return 0;
1178 }
1179
1180 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1181 {
1182         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1183             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1184                 return false;
1185
1186         if (trace->pid_list || trace->tid_list)
1187                 return true;
1188
1189         return false;
1190 }
1191
1192 static int trace__process_sample(struct perf_tool *tool,
1193                                  union perf_event *event __maybe_unused,
1194                                  struct perf_sample *sample,
1195                                  struct perf_evsel *evsel,
1196                                  struct machine *machine __maybe_unused)
1197 {
1198         struct trace *trace = container_of(tool, struct trace, tool);
1199         int err = 0;
1200
1201         tracepoint_handler handler = evsel->handler.func;
1202
1203         if (skip_sample(trace, sample))
1204                 return 0;
1205
1206         if (!trace->full_time && trace->base_time == 0)
1207                 trace->base_time = sample->time;
1208
1209         if (handler)
1210                 handler(trace, evsel, sample);
1211
1212         return err;
1213 }
1214
1215 static bool
1216 perf_session__has_tp(struct perf_session *session, const char *name)
1217 {
1218         struct perf_evsel *evsel;
1219
1220         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1221
1222         return evsel != NULL;
1223 }
1224
1225 static int parse_target_str(struct trace *trace)
1226 {
1227         if (trace->opts.target.pid) {
1228                 trace->pid_list = intlist__new(trace->opts.target.pid);
1229                 if (trace->pid_list == NULL) {
1230                         pr_err("Error parsing process id string\n");
1231                         return -EINVAL;
1232                 }
1233         }
1234
1235         if (trace->opts.target.tid) {
1236                 trace->tid_list = intlist__new(trace->opts.target.tid);
1237                 if (trace->tid_list == NULL) {
1238                         pr_err("Error parsing thread id string\n");
1239                         return -EINVAL;
1240                 }
1241         }
1242
1243         return 0;
1244 }
1245
1246 static int trace__run(struct trace *trace, int argc, const char **argv)
1247 {
1248         struct perf_evlist *evlist = perf_evlist__new();
1249         struct perf_evsel *evsel;
1250         int err = -1, i;
1251         unsigned long before;
1252         const bool forks = argc > 0;
1253
1254         if (evlist == NULL) {
1255                 fprintf(trace->output, "Not enough memory to run!\n");
1256                 goto out;
1257         }
1258
1259         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1260             perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1261                 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1262                 goto out_delete_evlist;
1263         }
1264
1265         if (trace->sched &&
1266             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1267                                    trace__sched_stat_runtime)) {
1268                 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1269                 goto out_delete_evlist;
1270         }
1271
1272         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1273         if (err < 0) {
1274                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1275                 goto out_delete_evlist;
1276         }
1277
1278         err = trace__symbols_init(trace, evlist);
1279         if (err < 0) {
1280                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1281                 goto out_delete_maps;
1282         }
1283
1284         perf_evlist__config(evlist, &trace->opts);
1285
1286         signal(SIGCHLD, sig_handler);
1287         signal(SIGINT, sig_handler);
1288
1289         if (forks) {
1290                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1291                                                     argv, false, false);
1292                 if (err < 0) {
1293                         fprintf(trace->output, "Couldn't run the workload!\n");
1294                         goto out_delete_maps;
1295                 }
1296         }
1297
1298         err = perf_evlist__open(evlist);
1299         if (err < 0) {
1300                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1301                 goto out_delete_maps;
1302         }
1303
1304         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1305         if (err < 0) {
1306                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1307                 goto out_close_evlist;
1308         }
1309
1310         perf_evlist__enable(evlist);
1311
1312         if (forks)
1313                 perf_evlist__start_workload(evlist);
1314
1315         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1316 again:
1317         before = trace->nr_events;
1318
1319         for (i = 0; i < evlist->nr_mmaps; i++) {
1320                 union perf_event *event;
1321
1322                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1323                         const u32 type = event->header.type;
1324                         tracepoint_handler handler;
1325                         struct perf_sample sample;
1326
1327                         ++trace->nr_events;
1328
1329                         err = perf_evlist__parse_sample(evlist, event, &sample);
1330                         if (err) {
1331                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1332                                 continue;
1333                         }
1334
1335                         if (!trace->full_time && trace->base_time == 0)
1336                                 trace->base_time = sample.time;
1337
1338                         if (type != PERF_RECORD_SAMPLE) {
1339                                 trace__process_event(trace, &trace->host, event);
1340                                 continue;
1341                         }
1342
1343                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1344                         if (evsel == NULL) {
1345                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1346                                 continue;
1347                         }
1348
1349                         if (sample.raw_data == NULL) {
1350                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1351                                        perf_evsel__name(evsel), sample.tid,
1352                                        sample.cpu, sample.raw_size);
1353                                 continue;
1354                         }
1355
1356                         handler = evsel->handler.func;
1357                         handler(trace, evsel, &sample);
1358
1359                         if (done)
1360                                 goto out_unmap_evlist;
1361                 }
1362         }
1363
1364         if (trace->nr_events == before) {
1365                 if (done)
1366                         goto out_unmap_evlist;
1367
1368                 poll(evlist->pollfd, evlist->nr_fds, -1);
1369         }
1370
1371         if (done)
1372                 perf_evlist__disable(evlist);
1373
1374         goto again;
1375
1376 out_unmap_evlist:
1377         perf_evlist__munmap(evlist);
1378 out_close_evlist:
1379         perf_evlist__close(evlist);
1380 out_delete_maps:
1381         perf_evlist__delete_maps(evlist);
1382 out_delete_evlist:
1383         perf_evlist__delete(evlist);
1384 out:
1385         return err;
1386 }
1387
1388 static int trace__replay(struct trace *trace)
1389 {
1390         const struct perf_evsel_str_handler handlers[] = {
1391                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1392                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1393         };
1394
1395         struct perf_session *session;
1396         int err = -1;
1397
1398         trace->tool.sample        = trace__process_sample;
1399         trace->tool.mmap          = perf_event__process_mmap;
1400         trace->tool.mmap2         = perf_event__process_mmap2;
1401         trace->tool.comm          = perf_event__process_comm;
1402         trace->tool.exit          = perf_event__process_exit;
1403         trace->tool.fork          = perf_event__process_fork;
1404         trace->tool.attr          = perf_event__process_attr;
1405         trace->tool.tracing_data = perf_event__process_tracing_data;
1406         trace->tool.build_id      = perf_event__process_build_id;
1407
1408         trace->tool.ordered_samples = true;
1409         trace->tool.ordering_requires_timestamps = true;
1410
1411         /* add tid to output */
1412         trace->multiple_threads = true;
1413
1414         if (symbol__init() < 0)
1415                 return -1;
1416
1417         session = perf_session__new(input_name, O_RDONLY, 0, false,
1418                                     &trace->tool);
1419         if (session == NULL)
1420                 return -ENOMEM;
1421
1422         err = perf_session__set_tracepoints_handlers(session, handlers);
1423         if (err)
1424                 goto out;
1425
1426         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1427                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1428                 goto out;
1429         }
1430
1431         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1432                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1433                 goto out;
1434         }
1435
1436         err = parse_target_str(trace);
1437         if (err != 0)
1438                 goto out;
1439
1440         setup_pager();
1441
1442         err = perf_session__process_events(session, &trace->tool);
1443         if (err)
1444                 pr_err("Failed to process events, error %d", err);
1445
1446 out:
1447         perf_session__delete(session);
1448
1449         return err;
1450 }
1451
1452 static size_t trace__fprintf_threads_header(FILE *fp)
1453 {
1454         size_t printed;
1455
1456         printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1457         printed += fprintf(fp," __)    Summary of events    (__\n\n");
1458         printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1459         printed += fprintf(fp," _____________________________________________________________________\n\n");
1460
1461         return printed;
1462 }
1463
1464 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1465 {
1466         size_t printed = trace__fprintf_threads_header(fp);
1467         struct rb_node *nd;
1468
1469         for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1470                 struct thread *thread = rb_entry(nd, struct thread, rb_node);
1471                 struct thread_trace *ttrace = thread->priv;
1472                 const char *color;
1473                 double ratio;
1474
1475                 if (ttrace == NULL)
1476                         continue;
1477
1478                 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1479
1480                 color = PERF_COLOR_NORMAL;
1481                 if (ratio > 50.0)
1482                         color = PERF_COLOR_RED;
1483                 else if (ratio > 25.0)
1484                         color = PERF_COLOR_GREEN;
1485                 else if (ratio > 5.0)
1486                         color = PERF_COLOR_YELLOW;
1487
1488                 printed += color_fprintf(fp, color, "%20s", thread->comm);
1489                 printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1490                 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1491                 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1492         }
1493
1494         return printed;
1495 }
1496
1497 static int trace__set_duration(const struct option *opt, const char *str,
1498                                int unset __maybe_unused)
1499 {
1500         struct trace *trace = opt->value;
1501
1502         trace->duration_filter = atof(str);
1503         return 0;
1504 }
1505
1506 static int trace__open_output(struct trace *trace, const char *filename)
1507 {
1508         struct stat st;
1509
1510         if (!stat(filename, &st) && st.st_size) {
1511                 char oldname[PATH_MAX];
1512
1513                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1514                 unlink(oldname);
1515                 rename(filename, oldname);
1516         }
1517
1518         trace->output = fopen(filename, "w");
1519
1520         return trace->output == NULL ? -errno : 0;
1521 }
1522
1523 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1524 {
1525         const char * const trace_usage[] = {
1526                 "perf trace [<options>] [<command>]",
1527                 "perf trace [<options>] -- <command> [<options>]",
1528                 NULL
1529         };
1530         struct trace trace = {
1531                 .audit_machine = audit_detect_machine(),
1532                 .syscalls = {
1533                         . max = -1,
1534                 },
1535                 .opts = {
1536                         .target = {
1537                                 .uid       = UINT_MAX,
1538                                 .uses_mmap = true,
1539                         },
1540                         .user_freq     = UINT_MAX,
1541                         .user_interval = ULLONG_MAX,
1542                         .no_delay      = true,
1543                         .mmap_pages    = 1024,
1544                 },
1545                 .output = stdout,
1546                 .show_comm = true,
1547         };
1548         const char *output_name = NULL;
1549         const char *ev_qualifier_str = NULL;
1550         const struct option trace_options[] = {
1551         OPT_BOOLEAN(0, "comm", &trace.show_comm,
1552                     "show the thread COMM next to its id"),
1553         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1554                     "list of events to trace"),
1555         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1556         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1557         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1558                     "trace events on existing process id"),
1559         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1560                     "trace events on existing thread id"),
1561         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1562                     "system-wide collection from all CPUs"),
1563         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1564                     "list of cpus to monitor"),
1565         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1566                     "child tasks do not inherit counters"),
1567         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
1568                      "number of mmap data pages",
1569                      perf_evlist__parse_mmap_pages),
1570         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1571                    "user to profile"),
1572         OPT_CALLBACK(0, "duration", &trace, "float",
1573                      "show only events with duration > N.M ms",
1574                      trace__set_duration),
1575         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1576         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1577         OPT_BOOLEAN('T', "time", &trace.full_time,
1578                     "Show full timestamp, not time relative to first start"),
1579         OPT_END()
1580         };
1581         int err;
1582         char bf[BUFSIZ];
1583
1584         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1585
1586         if (output_name != NULL) {
1587                 err = trace__open_output(&trace, output_name);
1588                 if (err < 0) {
1589                         perror("failed to create output file");
1590                         goto out;
1591                 }
1592         }
1593
1594         if (ev_qualifier_str != NULL) {
1595                 const char *s = ev_qualifier_str;
1596
1597                 trace.not_ev_qualifier = *s == '!';
1598                 if (trace.not_ev_qualifier)
1599                         ++s;
1600                 trace.ev_qualifier = strlist__new(true, s);
1601                 if (trace.ev_qualifier == NULL) {
1602                         fputs("Not enough memory to parse event qualifier",
1603                               trace.output);
1604                         err = -ENOMEM;
1605                         goto out_close;
1606                 }
1607         }
1608
1609         err = perf_target__validate(&trace.opts.target);
1610         if (err) {
1611                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1612                 fprintf(trace.output, "%s", bf);
1613                 goto out_close;
1614         }
1615
1616         err = perf_target__parse_uid(&trace.opts.target);
1617         if (err) {
1618                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1619                 fprintf(trace.output, "%s", bf);
1620                 goto out_close;
1621         }
1622
1623         if (!argc && perf_target__none(&trace.opts.target))
1624                 trace.opts.target.system_wide = true;
1625
1626         if (input_name)
1627                 err = trace__replay(&trace);
1628         else
1629                 err = trace__run(&trace, argc, argv);
1630
1631         if (trace.sched && !err)
1632                 trace__fprintf_thread_summary(&trace, trace.output);
1633
1634 out_close:
1635         if (output_name != NULL)
1636                 fclose(trace.output);
1637 out:
1638         return err;
1639 }