Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct syscall_arg {
39         unsigned long val;
40         struct thread *thread;
41         struct trace  *trace;
42         void          *parm;
43         u8            idx;
44         u8            mask;
45 };
46
47 struct strarray {
48         int         offset;
49         int         nr_entries;
50         const char **entries;
51 };
52
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54         .nr_entries = ARRAY_SIZE(array), \
55         .entries = array, \
56 }
57
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
59         .offset     = off, \
60         .nr_entries = ARRAY_SIZE(array), \
61         .entries = array, \
62 }
63
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
65                                                 const char *intfmt,
66                                                 struct syscall_arg *arg)
67 {
68         struct strarray *sa = arg->parm;
69         int idx = arg->val - sa->offset;
70
71         if (idx < 0 || idx >= sa->nr_entries)
72                 return scnprintf(bf, size, intfmt, arg->val);
73
74         return scnprintf(bf, size, "%s", sa->entries[idx]);
75 }
76
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78                                               struct syscall_arg *arg)
79 {
80         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
81 }
82
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
84
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86                                                  struct syscall_arg *arg)
87 {
88         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
89 }
90
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
92
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94                                         struct syscall_arg *arg);
95
96 #define SCA_FD syscall_arg__scnprintf_fd
97
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99                                            struct syscall_arg *arg)
100 {
101         int fd = arg->val;
102
103         if (fd == AT_FDCWD)
104                 return scnprintf(bf, size, "CWD");
105
106         return syscall_arg__scnprintf_fd(bf, size, arg);
107 }
108
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
110
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112                                               struct syscall_arg *arg);
113
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
115
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117                                          struct syscall_arg *arg)
118 {
119         return scnprintf(bf, size, "%#lx", arg->val);
120 }
121
122 #define SCA_HEX syscall_arg__scnprintf_hex
123
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125                                                struct syscall_arg *arg)
126 {
127         int printed = 0, prot = arg->val;
128
129         if (prot == PROT_NONE)
130                 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132         if (prot & PROT_##n) { \
133                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
134                 prot &= ~PROT_##n; \
135         }
136
137         P_MMAP_PROT(EXEC);
138         P_MMAP_PROT(READ);
139         P_MMAP_PROT(WRITE);
140 #ifdef PROT_SEM
141         P_MMAP_PROT(SEM);
142 #endif
143         P_MMAP_PROT(GROWSDOWN);
144         P_MMAP_PROT(GROWSUP);
145 #undef P_MMAP_PROT
146
147         if (prot)
148                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
149
150         return printed;
151 }
152
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
154
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156                                                 struct syscall_arg *arg)
157 {
158         int printed = 0, flags = arg->val;
159
160 #define P_MMAP_FLAG(n) \
161         if (flags & MAP_##n) { \
162                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
163                 flags &= ~MAP_##n; \
164         }
165
166         P_MMAP_FLAG(SHARED);
167         P_MMAP_FLAG(PRIVATE);
168 #ifdef MAP_32BIT
169         P_MMAP_FLAG(32BIT);
170 #endif
171         P_MMAP_FLAG(ANONYMOUS);
172         P_MMAP_FLAG(DENYWRITE);
173         P_MMAP_FLAG(EXECUTABLE);
174         P_MMAP_FLAG(FILE);
175         P_MMAP_FLAG(FIXED);
176         P_MMAP_FLAG(GROWSDOWN);
177 #ifdef MAP_HUGETLB
178         P_MMAP_FLAG(HUGETLB);
179 #endif
180         P_MMAP_FLAG(LOCKED);
181         P_MMAP_FLAG(NONBLOCK);
182         P_MMAP_FLAG(NORESERVE);
183         P_MMAP_FLAG(POPULATE);
184         P_MMAP_FLAG(STACK);
185 #ifdef MAP_UNINITIALIZED
186         P_MMAP_FLAG(UNINITIALIZED);
187 #endif
188 #undef P_MMAP_FLAG
189
190         if (flags)
191                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
192
193         return printed;
194 }
195
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
197
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199                                                       struct syscall_arg *arg)
200 {
201         int behavior = arg->val;
202
203         switch (behavior) {
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
205         P_MADV_BHV(NORMAL);
206         P_MADV_BHV(RANDOM);
207         P_MADV_BHV(SEQUENTIAL);
208         P_MADV_BHV(WILLNEED);
209         P_MADV_BHV(DONTNEED);
210         P_MADV_BHV(REMOVE);
211         P_MADV_BHV(DONTFORK);
212         P_MADV_BHV(DOFORK);
213         P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215         P_MADV_BHV(SOFT_OFFLINE);
216 #endif
217         P_MADV_BHV(MERGEABLE);
218         P_MADV_BHV(UNMERGEABLE);
219 #ifdef MADV_HUGEPAGE
220         P_MADV_BHV(HUGEPAGE);
221 #endif
222 #ifdef MADV_NOHUGEPAGE
223         P_MADV_BHV(NOHUGEPAGE);
224 #endif
225 #ifdef MADV_DONTDUMP
226         P_MADV_BHV(DONTDUMP);
227 #endif
228 #ifdef MADV_DODUMP
229         P_MADV_BHV(DODUMP);
230 #endif
231 #undef P_MADV_PHV
232         default: break;
233         }
234
235         return scnprintf(bf, size, "%#x", behavior);
236 }
237
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
239
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241                                            struct syscall_arg *arg)
242 {
243         int printed = 0, op = arg->val;
244
245         if (op == 0)
246                 return scnprintf(bf, size, "NONE");
247 #define P_CMD(cmd) \
248         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
250                 op &= ~LOCK_##cmd; \
251         }
252
253         P_CMD(SH);
254         P_CMD(EX);
255         P_CMD(NB);
256         P_CMD(UN);
257         P_CMD(MAND);
258         P_CMD(RW);
259         P_CMD(READ);
260         P_CMD(WRITE);
261 #undef P_OP
262
263         if (op)
264                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
265
266         return printed;
267 }
268
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
270
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
272 {
273         enum syscall_futex_args {
274                 SCF_UADDR   = (1 << 0),
275                 SCF_OP      = (1 << 1),
276                 SCF_VAL     = (1 << 2),
277                 SCF_TIMEOUT = (1 << 3),
278                 SCF_UADDR2  = (1 << 4),
279                 SCF_VAL3    = (1 << 5),
280         };
281         int op = arg->val;
282         int cmd = op & FUTEX_CMD_MASK;
283         size_t printed = 0;
284
285         switch (cmd) {
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
288         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
291         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
292         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
293         P_FUTEX_OP(WAKE_OP);                                                      break;
294         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
297         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
298         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
299         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
300         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
301         }
302
303         if (op & FUTEX_PRIVATE_FLAG)
304                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
305
306         if (op & FUTEX_CLOCK_REALTIME)
307                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
308
309         return printed;
310 }
311
312 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
313
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
316
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
319
320 static const char *whences[] = { "SET", "CUR", "END",
321 #ifdef SEEK_DATA
322 "DATA",
323 #endif
324 #ifdef SEEK_HOLE
325 "HOLE",
326 #endif
327 };
328 static DEFINE_STRARRAY(whences);
329
330 static const char *fcntl_cmds[] = {
331         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
334         "F_GETOWNER_UIDS",
335 };
336 static DEFINE_STRARRAY(fcntl_cmds);
337
338 static const char *rlimit_resources[] = {
339         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
341         "RTTIME",
342 };
343 static DEFINE_STRARRAY(rlimit_resources);
344
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
347
348 static const char *clockid[] = {
349         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
351 };
352 static DEFINE_STRARRAY(clockid);
353
354 static const char *socket_families[] = {
355         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360         "ALG", "NFC", "VSOCK",
361 };
362 static DEFINE_STRARRAY(socket_families);
363
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
366 #endif
367
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369                                                       struct syscall_arg *arg)
370 {
371         size_t printed;
372         int type = arg->val,
373             flags = type & ~SOCK_TYPE_MASK;
374
375         type &= SOCK_TYPE_MASK;
376         /*
377          * Can't use a strarray, MIPS may override for ABI reasons.
378          */
379         switch (type) {
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
381         P_SK_TYPE(STREAM);
382         P_SK_TYPE(DGRAM);
383         P_SK_TYPE(RAW);
384         P_SK_TYPE(RDM);
385         P_SK_TYPE(SEQPACKET);
386         P_SK_TYPE(DCCP);
387         P_SK_TYPE(PACKET);
388 #undef P_SK_TYPE
389         default:
390                 printed = scnprintf(bf, size, "%#x", type);
391         }
392
393 #define P_SK_FLAG(n) \
394         if (flags & SOCK_##n) { \
395                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396                 flags &= ~SOCK_##n; \
397         }
398
399         P_SK_FLAG(CLOEXEC);
400         P_SK_FLAG(NONBLOCK);
401 #undef P_SK_FLAG
402
403         if (flags)
404                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
405
406         return printed;
407 }
408
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
410
411 #ifndef MSG_PROBE
412 #define MSG_PROBE            0x10
413 #endif
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE  0x10000
416 #endif
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
419 #endif
420 #ifndef MSG_FASTOPEN
421 #define MSG_FASTOPEN         0x20000000
422 #endif
423
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425                                                struct syscall_arg *arg)
426 {
427         int printed = 0, flags = arg->val;
428
429         if (flags == 0)
430                 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432         if (flags & MSG_##n) { \
433                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
434                 flags &= ~MSG_##n; \
435         }
436
437         P_MSG_FLAG(OOB);
438         P_MSG_FLAG(PEEK);
439         P_MSG_FLAG(DONTROUTE);
440         P_MSG_FLAG(TRYHARD);
441         P_MSG_FLAG(CTRUNC);
442         P_MSG_FLAG(PROBE);
443         P_MSG_FLAG(TRUNC);
444         P_MSG_FLAG(DONTWAIT);
445         P_MSG_FLAG(EOR);
446         P_MSG_FLAG(WAITALL);
447         P_MSG_FLAG(FIN);
448         P_MSG_FLAG(SYN);
449         P_MSG_FLAG(CONFIRM);
450         P_MSG_FLAG(RST);
451         P_MSG_FLAG(ERRQUEUE);
452         P_MSG_FLAG(NOSIGNAL);
453         P_MSG_FLAG(MORE);
454         P_MSG_FLAG(WAITFORONE);
455         P_MSG_FLAG(SENDPAGE_NOTLAST);
456         P_MSG_FLAG(FASTOPEN);
457         P_MSG_FLAG(CMSG_CLOEXEC);
458 #undef P_MSG_FLAG
459
460         if (flags)
461                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
462
463         return printed;
464 }
465
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
467
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469                                                  struct syscall_arg *arg)
470 {
471         size_t printed = 0;
472         int mode = arg->val;
473
474         if (mode == F_OK) /* 0 */
475                 return scnprintf(bf, size, "F");
476 #define P_MODE(n) \
477         if (mode & n##_OK) { \
478                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
479                 mode &= ~n##_OK; \
480         }
481
482         P_MODE(R);
483         P_MODE(W);
484         P_MODE(X);
485 #undef P_MODE
486
487         if (mode)
488                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
489
490         return printed;
491 }
492
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
494
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496                                                struct syscall_arg *arg)
497 {
498         int printed = 0, flags = arg->val;
499
500         if (!(flags & O_CREAT))
501                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
502
503         if (flags == 0)
504                 return scnprintf(bf, size, "RDONLY");
505 #define P_FLAG(n) \
506         if (flags & O_##n) { \
507                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
508                 flags &= ~O_##n; \
509         }
510
511         P_FLAG(APPEND);
512         P_FLAG(ASYNC);
513         P_FLAG(CLOEXEC);
514         P_FLAG(CREAT);
515         P_FLAG(DIRECT);
516         P_FLAG(DIRECTORY);
517         P_FLAG(EXCL);
518         P_FLAG(LARGEFILE);
519         P_FLAG(NOATIME);
520         P_FLAG(NOCTTY);
521 #ifdef O_NONBLOCK
522         P_FLAG(NONBLOCK);
523 #elif O_NDELAY
524         P_FLAG(NDELAY);
525 #endif
526 #ifdef O_PATH
527         P_FLAG(PATH);
528 #endif
529         P_FLAG(RDWR);
530 #ifdef O_DSYNC
531         if ((flags & O_SYNC) == O_SYNC)
532                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
533         else {
534                 P_FLAG(DSYNC);
535         }
536 #else
537         P_FLAG(SYNC);
538 #endif
539         P_FLAG(TRUNC);
540         P_FLAG(WRONLY);
541 #undef P_FLAG
542
543         if (flags)
544                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
545
546         return printed;
547 }
548
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
550
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552                                                    struct syscall_arg *arg)
553 {
554         int printed = 0, flags = arg->val;
555
556         if (flags == 0)
557                 return scnprintf(bf, size, "NONE");
558 #define P_FLAG(n) \
559         if (flags & EFD_##n) { \
560                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
561                 flags &= ~EFD_##n; \
562         }
563
564         P_FLAG(SEMAPHORE);
565         P_FLAG(CLOEXEC);
566         P_FLAG(NONBLOCK);
567 #undef P_FLAG
568
569         if (flags)
570                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
571
572         return printed;
573 }
574
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
576
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578                                                 struct syscall_arg *arg)
579 {
580         int printed = 0, flags = arg->val;
581
582 #define P_FLAG(n) \
583         if (flags & O_##n) { \
584                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
585                 flags &= ~O_##n; \
586         }
587
588         P_FLAG(CLOEXEC);
589         P_FLAG(NONBLOCK);
590 #undef P_FLAG
591
592         if (flags)
593                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
594
595         return printed;
596 }
597
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
599
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
601 {
602         int sig = arg->val;
603
604         switch (sig) {
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
606         P_SIGNUM(HUP);
607         P_SIGNUM(INT);
608         P_SIGNUM(QUIT);
609         P_SIGNUM(ILL);
610         P_SIGNUM(TRAP);
611         P_SIGNUM(ABRT);
612         P_SIGNUM(BUS);
613         P_SIGNUM(FPE);
614         P_SIGNUM(KILL);
615         P_SIGNUM(USR1);
616         P_SIGNUM(SEGV);
617         P_SIGNUM(USR2);
618         P_SIGNUM(PIPE);
619         P_SIGNUM(ALRM);
620         P_SIGNUM(TERM);
621         P_SIGNUM(STKFLT);
622         P_SIGNUM(CHLD);
623         P_SIGNUM(CONT);
624         P_SIGNUM(STOP);
625         P_SIGNUM(TSTP);
626         P_SIGNUM(TTIN);
627         P_SIGNUM(TTOU);
628         P_SIGNUM(URG);
629         P_SIGNUM(XCPU);
630         P_SIGNUM(XFSZ);
631         P_SIGNUM(VTALRM);
632         P_SIGNUM(PROF);
633         P_SIGNUM(WINCH);
634         P_SIGNUM(IO);
635         P_SIGNUM(PWR);
636         P_SIGNUM(SYS);
637         default: break;
638         }
639
640         return scnprintf(bf, size, "%#x", sig);
641 }
642
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
644
645 #define TCGETS          0x5401
646
647 static const char *tioctls[] = {
648         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
663 };
664
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
666
667 #define STRARRAY(arg, name, array) \
668           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669           .arg_parm      = { [arg] = &strarray__##array, }
670
671 static struct syscall_fmt {
672         const char *name;
673         const char *alias;
674         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
675         void       *arg_parm[6];
676         bool       errmsg;
677         bool       timeout;
678         bool       hexret;
679 } syscall_fmts[] = {
680         { .name     = "access",     .errmsg = true,
681           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
683         { .name     = "brk",        .hexret = true,
684           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
686         { .name     = "close",      .errmsg = true,
687           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
688         { .name     = "connect",    .errmsg = true, },
689         { .name     = "dup",        .errmsg = true,
690           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
691         { .name     = "dup2",       .errmsg = true,
692           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
693         { .name     = "dup3",       .errmsg = true,
694           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
695         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696         { .name     = "eventfd2",   .errmsg = true,
697           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698         { .name     = "faccessat",  .errmsg = true,
699           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700         { .name     = "fadvise64",  .errmsg = true,
701           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
702         { .name     = "fallocate",  .errmsg = true,
703           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
704         { .name     = "fchdir",     .errmsg = true,
705           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
706         { .name     = "fchmod",     .errmsg = true,
707           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
708         { .name     = "fchmodat",   .errmsg = true,
709           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
710         { .name     = "fchown",     .errmsg = true,
711           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
712         { .name     = "fchownat",   .errmsg = true,
713           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
714         { .name     = "fcntl",      .errmsg = true,
715           .arg_scnprintf = { [0] = SCA_FD, /* fd */
716                              [1] = SCA_STRARRAY, /* cmd */ },
717           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718         { .name     = "fdatasync",  .errmsg = true,
719           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
720         { .name     = "flock",      .errmsg = true,
721           .arg_scnprintf = { [0] = SCA_FD, /* fd */
722                              [1] = SCA_FLOCK, /* cmd */ }, },
723         { .name     = "fsetxattr",  .errmsg = true,
724           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
725         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
726           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
727         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
728           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
729         { .name     = "fstatfs",    .errmsg = true,
730           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
731         { .name     = "fsync",    .errmsg = true,
732           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
733         { .name     = "ftruncate", .errmsg = true,
734           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
735         { .name     = "futex",      .errmsg = true,
736           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737         { .name     = "futimesat", .errmsg = true,
738           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
739         { .name     = "getdents",   .errmsg = true,
740           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
741         { .name     = "getdents64", .errmsg = true,
742           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
743         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
744         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745         { .name     = "ioctl",      .errmsg = true,
746           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
747                              [1] = SCA_STRHEXARRAY, /* cmd */
748                              [2] = SCA_HEX, /* arg */ },
749           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
750         { .name     = "kill",       .errmsg = true,
751           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752         { .name     = "linkat",     .errmsg = true,
753           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
754         { .name     = "lseek",      .errmsg = true,
755           .arg_scnprintf = { [0] = SCA_FD, /* fd */
756                              [2] = SCA_STRARRAY, /* whence */ },
757           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
758         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
759         { .name     = "madvise",    .errmsg = true,
760           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
761                              [2] = SCA_MADV_BHV, /* behavior */ }, },
762         { .name     = "mkdirat",    .errmsg = true,
763           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
764         { .name     = "mknodat",    .errmsg = true,
765           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
766         { .name     = "mlock",      .errmsg = true,
767           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768         { .name     = "mlockall",   .errmsg = true,
769           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770         { .name     = "mmap",       .hexret = true,
771           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
772                              [2] = SCA_MMAP_PROT, /* prot */
773                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774         { .name     = "mprotect",   .errmsg = true,
775           .arg_scnprintf = { [0] = SCA_HEX, /* start */
776                              [2] = SCA_MMAP_PROT, /* prot */ }, },
777         { .name     = "mremap",     .hexret = true,
778           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779                              [4] = SCA_HEX, /* new_addr */ }, },
780         { .name     = "munlock",    .errmsg = true,
781           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782         { .name     = "munmap",     .errmsg = true,
783           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784         { .name     = "name_to_handle_at", .errmsg = true,
785           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
786         { .name     = "newfstatat", .errmsg = true,
787           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
788         { .name     = "open",       .errmsg = true,
789           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790         { .name     = "open_by_handle_at", .errmsg = true,
791           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793         { .name     = "openat",     .errmsg = true,
794           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796         { .name     = "pipe2",      .errmsg = true,
797           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798         { .name     = "poll",       .errmsg = true, .timeout = true, },
799         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
800         { .name     = "pread",      .errmsg = true, .alias = "pread64",
801           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
802         { .name     = "preadv",     .errmsg = true, .alias = "pread",
803           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
804         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
806           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
807         { .name     = "pwritev",    .errmsg = true,
808           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
809         { .name     = "read",       .errmsg = true,
810           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
811         { .name     = "readlinkat", .errmsg = true,
812           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
813         { .name     = "readv",      .errmsg = true,
814           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
815         { .name     = "recvfrom",   .errmsg = true,
816           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817         { .name     = "recvmmsg",   .errmsg = true,
818           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819         { .name     = "recvmsg",    .errmsg = true,
820           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821         { .name     = "renameat",   .errmsg = true,
822           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
823         { .name     = "rt_sigaction", .errmsg = true,
824           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
826         { .name     = "rt_sigqueueinfo", .errmsg = true,
827           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
829           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830         { .name     = "select",     .errmsg = true, .timeout = true, },
831         { .name     = "sendmmsg",    .errmsg = true,
832           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833         { .name     = "sendmsg",    .errmsg = true,
834           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835         { .name     = "sendto",     .errmsg = true,
836           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
838         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839         { .name     = "shutdown",   .errmsg = true,
840           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
841         { .name     = "socket",     .errmsg = true,
842           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843                              [1] = SCA_SK_TYPE, /* type */ },
844           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
845         { .name     = "socketpair", .errmsg = true,
846           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847                              [1] = SCA_SK_TYPE, /* type */ },
848           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
849         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
850         { .name     = "symlinkat",  .errmsg = true,
851           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
852         { .name     = "tgkill",     .errmsg = true,
853           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854         { .name     = "tkill",      .errmsg = true,
855           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
857         { .name     = "unlinkat",   .errmsg = true,
858           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859         { .name     = "utimensat",  .errmsg = true,
860           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861         { .name     = "write",      .errmsg = true,
862           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
863         { .name     = "writev",     .errmsg = true,
864           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
865 };
866
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
868 {
869         const struct syscall_fmt *fmt = fmtp;
870         return strcmp(name, fmt->name);
871 }
872
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
874 {
875         const int nmemb = ARRAY_SIZE(syscall_fmts);
876         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
877 }
878
879 struct syscall {
880         struct event_format *tp_format;
881         const char          *name;
882         bool                filtered;
883         struct syscall_fmt  *fmt;
884         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
885         void                **arg_parm;
886 };
887
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
889 {
890         double duration = (double)t / NSEC_PER_MSEC;
891         size_t printed = fprintf(fp, "(");
892
893         if (duration >= 1.0)
894                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895         else if (duration >= 0.01)
896                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
897         else
898                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899         return printed + fprintf(fp, "): ");
900 }
901
902 struct thread_trace {
903         u64               entry_time;
904         u64               exit_time;
905         bool              entry_pending;
906         unsigned long     nr_events;
907         char              *entry_str;
908         double            runtime_ms;
909         struct {
910                 int       max;
911                 char      **table;
912         } paths;
913
914         struct intlist *syscall_stats;
915 };
916
917 static struct thread_trace *thread_trace__new(void)
918 {
919         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
920
921         if (ttrace)
922                 ttrace->paths.max = -1;
923
924         ttrace->syscall_stats = intlist__new(NULL);
925
926         return ttrace;
927 }
928
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
930 {
931         struct thread_trace *ttrace;
932
933         if (thread == NULL)
934                 goto fail;
935
936         if (thread->priv == NULL)
937                 thread->priv = thread_trace__new();
938                 
939         if (thread->priv == NULL)
940                 goto fail;
941
942         ttrace = thread->priv;
943         ++ttrace->nr_events;
944
945         return ttrace;
946 fail:
947         color_fprintf(fp, PERF_COLOR_RED,
948                       "WARNING: not enough memory, dropping samples!\n");
949         return NULL;
950 }
951
952 struct trace {
953         struct perf_tool        tool;
954         int                     audit_machine;
955         struct {
956                 int             max;
957                 struct syscall  *table;
958         } syscalls;
959         struct perf_record_opts opts;
960         struct machine          *host;
961         u64                     base_time;
962         bool                    full_time;
963         FILE                    *output;
964         unsigned long           nr_events;
965         struct strlist          *ev_qualifier;
966         bool                    not_ev_qualifier;
967         bool                    live;
968         struct intlist          *tid_list;
969         struct intlist          *pid_list;
970         bool                    sched;
971         bool                    multiple_threads;
972         bool                    summary;
973         bool                    show_comm;
974         double                  duration_filter;
975         double                  runtime_ms;
976 };
977
978 static int thread__read_fd_path(struct thread *thread, int fd)
979 {
980         struct thread_trace *ttrace = thread->priv;
981         char linkname[PATH_MAX], pathname[PATH_MAX];
982         struct stat st;
983         int ret;
984
985         if (thread->pid_ == thread->tid) {
986                 scnprintf(linkname, sizeof(linkname),
987                           "/proc/%d/fd/%d", thread->pid_, fd);
988         } else {
989                 scnprintf(linkname, sizeof(linkname),
990                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
991         }
992
993         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
994                 return -1;
995
996         ret = readlink(linkname, pathname, sizeof(pathname));
997
998         if (ret < 0 || ret > st.st_size)
999                 return -1;
1000
1001         pathname[ret] = '\0';
1002
1003         if (fd > ttrace->paths.max) {
1004                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1005
1006                 if (npath == NULL)
1007                         return -1;
1008
1009                 if (ttrace->paths.max != -1) {
1010                         memset(npath + ttrace->paths.max + 1, 0,
1011                                (fd - ttrace->paths.max) * sizeof(char *));
1012                 } else {
1013                         memset(npath, 0, (fd + 1) * sizeof(char *));
1014                 }
1015
1016                 ttrace->paths.table = npath;
1017                 ttrace->paths.max   = fd;
1018         }
1019
1020         ttrace->paths.table[fd] = strdup(pathname);
1021
1022         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1023 }
1024
1025 static const char *thread__fd_path(struct thread *thread, int fd, bool live)
1026 {
1027         struct thread_trace *ttrace = thread->priv;
1028
1029         if (ttrace == NULL)
1030                 return NULL;
1031
1032         if (fd < 0)
1033                 return NULL;
1034
1035         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL) &&
1036             (!live || thread__read_fd_path(thread, fd)))
1037                 return NULL;
1038
1039         return ttrace->paths.table[fd];
1040 }
1041
1042 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1043                                         struct syscall_arg *arg)
1044 {
1045         int fd = arg->val;
1046         size_t printed = scnprintf(bf, size, "%d", fd);
1047         const char *path = thread__fd_path(arg->thread, fd, arg->trace->live);
1048
1049         if (path)
1050                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1051
1052         return printed;
1053 }
1054
1055 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1056                                               struct syscall_arg *arg)
1057 {
1058         int fd = arg->val;
1059         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1060         struct thread_trace *ttrace = arg->thread->priv;
1061
1062         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1063                 free(ttrace->paths.table[fd]);
1064                 ttrace->paths.table[fd] = NULL;
1065         }
1066
1067         return printed;
1068 }
1069
1070 static bool trace__filter_duration(struct trace *trace, double t)
1071 {
1072         return t < (trace->duration_filter * NSEC_PER_MSEC);
1073 }
1074
1075 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1076 {
1077         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1078
1079         return fprintf(fp, "%10.3f ", ts);
1080 }
1081
1082 static bool done = false;
1083
1084 static void sig_handler(int sig __maybe_unused)
1085 {
1086         done = true;
1087 }
1088
1089 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1090                                         u64 duration, u64 tstamp, FILE *fp)
1091 {
1092         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1093         printed += fprintf_duration(duration, fp);
1094
1095         if (trace->multiple_threads) {
1096                 if (trace->show_comm)
1097                         printed += fprintf(fp, "%.14s/", thread->comm);
1098                 printed += fprintf(fp, "%d ", thread->tid);
1099         }
1100
1101         return printed;
1102 }
1103
1104 static int trace__process_event(struct trace *trace, struct machine *machine,
1105                                 union perf_event *event)
1106 {
1107         int ret = 0;
1108
1109         switch (event->header.type) {
1110         case PERF_RECORD_LOST:
1111                 color_fprintf(trace->output, PERF_COLOR_RED,
1112                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1113                 ret = machine__process_lost_event(machine, event);
1114         default:
1115                 ret = machine__process_event(machine, event);
1116                 break;
1117         }
1118
1119         return ret;
1120 }
1121
1122 static int trace__tool_process(struct perf_tool *tool,
1123                                union perf_event *event,
1124                                struct perf_sample *sample __maybe_unused,
1125                                struct machine *machine)
1126 {
1127         struct trace *trace = container_of(tool, struct trace, tool);
1128         return trace__process_event(trace, machine, event);
1129 }
1130
1131 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1132 {
1133         int err = symbol__init();
1134
1135         if (err)
1136                 return err;
1137
1138         trace->host = machine__new_host();
1139         if (trace->host == NULL)
1140                 return -ENOMEM;
1141
1142         if (perf_target__has_task(&trace->opts.target)) {
1143                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1144                                                         trace__tool_process,
1145                                                         trace->host);
1146         } else {
1147                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1148                                                      trace->host);
1149         }
1150
1151         if (err)
1152                 symbol__exit();
1153
1154         return err;
1155 }
1156
1157 static int syscall__set_arg_fmts(struct syscall *sc)
1158 {
1159         struct format_field *field;
1160         int idx = 0;
1161
1162         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1163         if (sc->arg_scnprintf == NULL)
1164                 return -1;
1165
1166         if (sc->fmt)
1167                 sc->arg_parm = sc->fmt->arg_parm;
1168
1169         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1170                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1171                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1172                 else if (field->flags & FIELD_IS_POINTER)
1173                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1174                 ++idx;
1175         }
1176
1177         return 0;
1178 }
1179
1180 static int trace__read_syscall_info(struct trace *trace, int id)
1181 {
1182         char tp_name[128];
1183         struct syscall *sc;
1184         const char *name = audit_syscall_to_name(id, trace->audit_machine);
1185
1186         if (name == NULL)
1187                 return -1;
1188
1189         if (id > trace->syscalls.max) {
1190                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1191
1192                 if (nsyscalls == NULL)
1193                         return -1;
1194
1195                 if (trace->syscalls.max != -1) {
1196                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1197                                (id - trace->syscalls.max) * sizeof(*sc));
1198                 } else {
1199                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1200                 }
1201
1202                 trace->syscalls.table = nsyscalls;
1203                 trace->syscalls.max   = id;
1204         }
1205
1206         sc = trace->syscalls.table + id;
1207         sc->name = name;
1208
1209         if (trace->ev_qualifier) {
1210                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1211
1212                 if (!(in ^ trace->not_ev_qualifier)) {
1213                         sc->filtered = true;
1214                         /*
1215                          * No need to do read tracepoint information since this will be
1216                          * filtered out.
1217                          */
1218                         return 0;
1219                 }
1220         }
1221
1222         sc->fmt  = syscall_fmt__find(sc->name);
1223
1224         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1225         sc->tp_format = event_format__new("syscalls", tp_name);
1226
1227         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1228                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1229                 sc->tp_format = event_format__new("syscalls", tp_name);
1230         }
1231
1232         if (sc->tp_format == NULL)
1233                 return -1;
1234
1235         return syscall__set_arg_fmts(sc);
1236 }
1237
1238 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1239                                       unsigned long *args, struct trace *trace,
1240                                       struct thread *thread)
1241 {
1242         size_t printed = 0;
1243
1244         if (sc->tp_format != NULL) {
1245                 struct format_field *field;
1246                 u8 bit = 1;
1247                 struct syscall_arg arg = {
1248                         .idx    = 0,
1249                         .mask   = 0,
1250                         .trace  = trace,
1251                         .thread = thread,
1252                 };
1253
1254                 for (field = sc->tp_format->format.fields->next; field;
1255                      field = field->next, ++arg.idx, bit <<= 1) {
1256                         if (arg.mask & bit)
1257                                 continue;
1258                         /*
1259                          * Suppress this argument if its value is zero and
1260                          * and we don't have a string associated in an
1261                          * strarray for it.
1262                          */
1263                         if (args[arg.idx] == 0 &&
1264                             !(sc->arg_scnprintf &&
1265                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1266                               sc->arg_parm[arg.idx]))
1267                                 continue;
1268
1269                         printed += scnprintf(bf + printed, size - printed,
1270                                              "%s%s: ", printed ? ", " : "", field->name);
1271                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1272                                 arg.val = args[arg.idx];
1273                                 if (sc->arg_parm)
1274                                         arg.parm = sc->arg_parm[arg.idx];
1275                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1276                                                                       size - printed, &arg);
1277                         } else {
1278                                 printed += scnprintf(bf + printed, size - printed,
1279                                                      "%ld", args[arg.idx]);
1280                         }
1281                 }
1282         } else {
1283                 int i = 0;
1284
1285                 while (i < 6) {
1286                         printed += scnprintf(bf + printed, size - printed,
1287                                              "%sarg%d: %ld",
1288                                              printed ? ", " : "", i, args[i]);
1289                         ++i;
1290                 }
1291         }
1292
1293         return printed;
1294 }
1295
1296 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1297                                   struct perf_sample *sample);
1298
1299 static struct syscall *trace__syscall_info(struct trace *trace,
1300                                            struct perf_evsel *evsel, int id)
1301 {
1302
1303         if (id < 0) {
1304
1305                 /*
1306                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1307                  * before that, leaving at a higher verbosity level till that is
1308                  * explained. Reproduced with plain ftrace with:
1309                  *
1310                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1311                  * grep "NR -1 " /t/trace_pipe
1312                  *
1313                  * After generating some load on the machine.
1314                  */
1315                 if (verbose > 1) {
1316                         static u64 n;
1317                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1318                                 id, perf_evsel__name(evsel), ++n);
1319                 }
1320                 return NULL;
1321         }
1322
1323         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1324             trace__read_syscall_info(trace, id))
1325                 goto out_cant_read;
1326
1327         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1328                 goto out_cant_read;
1329
1330         return &trace->syscalls.table[id];
1331
1332 out_cant_read:
1333         if (verbose) {
1334                 fprintf(trace->output, "Problems reading syscall %d", id);
1335                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1336                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1337                 fputs(" information\n", trace->output);
1338         }
1339         return NULL;
1340 }
1341
1342 static void thread__update_stats(struct thread_trace *ttrace,
1343                                  int id, struct perf_sample *sample)
1344 {
1345         struct int_node *inode;
1346         struct stats *stats;
1347         u64 duration = 0;
1348
1349         inode = intlist__findnew(ttrace->syscall_stats, id);
1350         if (inode == NULL)
1351                 return;
1352
1353         stats = inode->priv;
1354         if (stats == NULL) {
1355                 stats = malloc(sizeof(struct stats));
1356                 if (stats == NULL)
1357                         return;
1358                 init_stats(stats);
1359                 inode->priv = stats;
1360         }
1361
1362         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1363                 duration = sample->time - ttrace->entry_time;
1364
1365         update_stats(stats, duration);
1366 }
1367
1368 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1369                             struct perf_sample *sample)
1370 {
1371         char *msg;
1372         void *args;
1373         size_t printed = 0;
1374         struct thread *thread;
1375         int id = perf_evsel__intval(evsel, sample, "id");
1376         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1377         struct thread_trace *ttrace;
1378
1379         if (sc == NULL)
1380                 return -1;
1381
1382         if (sc->filtered)
1383                 return 0;
1384
1385         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1386         ttrace = thread__trace(thread, trace->output);
1387         if (ttrace == NULL)
1388                 return -1;
1389
1390         args = perf_evsel__rawptr(evsel, sample, "args");
1391         if (args == NULL) {
1392                 fprintf(trace->output, "Problems reading syscall arguments\n");
1393                 return -1;
1394         }
1395
1396         ttrace = thread->priv;
1397
1398         if (ttrace->entry_str == NULL) {
1399                 ttrace->entry_str = malloc(1024);
1400                 if (!ttrace->entry_str)
1401                         return -1;
1402         }
1403
1404         ttrace->entry_time = sample->time;
1405         msg = ttrace->entry_str;
1406         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1407
1408         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1409                                            args, trace, thread);
1410
1411         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1412                 if (!trace->duration_filter) {
1413                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1414                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1415                 }
1416         } else
1417                 ttrace->entry_pending = true;
1418
1419         return 0;
1420 }
1421
1422 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1423                            struct perf_sample *sample)
1424 {
1425         int ret;
1426         u64 duration = 0;
1427         struct thread *thread;
1428         int id = perf_evsel__intval(evsel, sample, "id");
1429         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1430         struct thread_trace *ttrace;
1431
1432         if (sc == NULL)
1433                 return -1;
1434
1435         if (sc->filtered)
1436                 return 0;
1437
1438         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1439         ttrace = thread__trace(thread, trace->output);
1440         if (ttrace == NULL)
1441                 return -1;
1442
1443         if (trace->summary)
1444                 thread__update_stats(ttrace, id, sample);
1445
1446         ret = perf_evsel__intval(evsel, sample, "ret");
1447
1448         ttrace = thread->priv;
1449
1450         ttrace->exit_time = sample->time;
1451
1452         if (ttrace->entry_time) {
1453                 duration = sample->time - ttrace->entry_time;
1454                 if (trace__filter_duration(trace, duration))
1455                         goto out;
1456         } else if (trace->duration_filter)
1457                 goto out;
1458
1459         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1460
1461         if (ttrace->entry_pending) {
1462                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1463         } else {
1464                 fprintf(trace->output, " ... [");
1465                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1466                 fprintf(trace->output, "]: %s()", sc->name);
1467         }
1468
1469         if (sc->fmt == NULL) {
1470 signed_print:
1471                 fprintf(trace->output, ") = %d", ret);
1472         } else if (ret < 0 && sc->fmt->errmsg) {
1473                 char bf[256];
1474                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1475                            *e = audit_errno_to_name(-ret);
1476
1477                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1478         } else if (ret == 0 && sc->fmt->timeout)
1479                 fprintf(trace->output, ") = 0 Timeout");
1480         else if (sc->fmt->hexret)
1481                 fprintf(trace->output, ") = %#x", ret);
1482         else
1483                 goto signed_print;
1484
1485         fputc('\n', trace->output);
1486 out:
1487         ttrace->entry_pending = false;
1488
1489         return 0;
1490 }
1491
1492 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1493                                      struct perf_sample *sample)
1494 {
1495         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1496         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1497         struct thread *thread = machine__findnew_thread(trace->host,
1498                                                         sample->pid,
1499                                                         sample->tid);
1500         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1501
1502         if (ttrace == NULL)
1503                 goto out_dump;
1504
1505         ttrace->runtime_ms += runtime_ms;
1506         trace->runtime_ms += runtime_ms;
1507         return 0;
1508
1509 out_dump:
1510         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1511                evsel->name,
1512                perf_evsel__strval(evsel, sample, "comm"),
1513                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1514                runtime,
1515                perf_evsel__intval(evsel, sample, "vruntime"));
1516         return 0;
1517 }
1518
1519 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1520 {
1521         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1522             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1523                 return false;
1524
1525         if (trace->pid_list || trace->tid_list)
1526                 return true;
1527
1528         return false;
1529 }
1530
1531 static int trace__process_sample(struct perf_tool *tool,
1532                                  union perf_event *event __maybe_unused,
1533                                  struct perf_sample *sample,
1534                                  struct perf_evsel *evsel,
1535                                  struct machine *machine __maybe_unused)
1536 {
1537         struct trace *trace = container_of(tool, struct trace, tool);
1538         int err = 0;
1539
1540         tracepoint_handler handler = evsel->handler.func;
1541
1542         if (skip_sample(trace, sample))
1543                 return 0;
1544
1545         if (!trace->full_time && trace->base_time == 0)
1546                 trace->base_time = sample->time;
1547
1548         if (handler)
1549                 handler(trace, evsel, sample);
1550
1551         return err;
1552 }
1553
1554 static bool
1555 perf_session__has_tp(struct perf_session *session, const char *name)
1556 {
1557         struct perf_evsel *evsel;
1558
1559         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1560
1561         return evsel != NULL;
1562 }
1563
1564 static int parse_target_str(struct trace *trace)
1565 {
1566         if (trace->opts.target.pid) {
1567                 trace->pid_list = intlist__new(trace->opts.target.pid);
1568                 if (trace->pid_list == NULL) {
1569                         pr_err("Error parsing process id string\n");
1570                         return -EINVAL;
1571                 }
1572         }
1573
1574         if (trace->opts.target.tid) {
1575                 trace->tid_list = intlist__new(trace->opts.target.tid);
1576                 if (trace->tid_list == NULL) {
1577                         pr_err("Error parsing thread id string\n");
1578                         return -EINVAL;
1579                 }
1580         }
1581
1582         return 0;
1583 }
1584
1585 static int trace__record(int argc, const char **argv)
1586 {
1587         unsigned int rec_argc, i, j;
1588         const char **rec_argv;
1589         const char * const record_args[] = {
1590                 "record",
1591                 "-R",
1592                 "-m", "1024",
1593                 "-c", "1",
1594                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1595         };
1596
1597         rec_argc = ARRAY_SIZE(record_args) + argc;
1598         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1599
1600         if (rec_argv == NULL)
1601                 return -ENOMEM;
1602
1603         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1604                 rec_argv[i] = record_args[i];
1605
1606         for (j = 0; j < (unsigned int)argc; j++, i++)
1607                 rec_argv[i] = argv[j];
1608
1609         return cmd_record(i, rec_argv, NULL);
1610 }
1611
1612 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1613
1614 static int trace__run(struct trace *trace, int argc, const char **argv)
1615 {
1616         struct perf_evlist *evlist = perf_evlist__new();
1617         struct perf_evsel *evsel;
1618         int err = -1, i;
1619         unsigned long before;
1620         const bool forks = argc > 0;
1621
1622         trace->live = true;
1623
1624         if (evlist == NULL) {
1625                 fprintf(trace->output, "Not enough memory to run!\n");
1626                 goto out;
1627         }
1628
1629         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1630                 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1631                 goto out_error_tp;
1632
1633         if (trace->sched &&
1634                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1635                                 trace__sched_stat_runtime))
1636                 goto out_error_tp;
1637
1638         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1639         if (err < 0) {
1640                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1641                 goto out_delete_evlist;
1642         }
1643
1644         err = trace__symbols_init(trace, evlist);
1645         if (err < 0) {
1646                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1647                 goto out_delete_maps;
1648         }
1649
1650         perf_evlist__config(evlist, &trace->opts);
1651
1652         signal(SIGCHLD, sig_handler);
1653         signal(SIGINT, sig_handler);
1654
1655         if (forks) {
1656                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1657                                                     argv, false, false);
1658                 if (err < 0) {
1659                         fprintf(trace->output, "Couldn't run the workload!\n");
1660                         goto out_delete_maps;
1661                 }
1662         }
1663
1664         err = perf_evlist__open(evlist);
1665         if (err < 0) {
1666                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1667                 goto out_delete_maps;
1668         }
1669
1670         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1671         if (err < 0) {
1672                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1673                 goto out_close_evlist;
1674         }
1675
1676         perf_evlist__enable(evlist);
1677
1678         if (forks)
1679                 perf_evlist__start_workload(evlist);
1680
1681         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1682 again:
1683         before = trace->nr_events;
1684
1685         for (i = 0; i < evlist->nr_mmaps; i++) {
1686                 union perf_event *event;
1687
1688                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1689                         const u32 type = event->header.type;
1690                         tracepoint_handler handler;
1691                         struct perf_sample sample;
1692
1693                         ++trace->nr_events;
1694
1695                         err = perf_evlist__parse_sample(evlist, event, &sample);
1696                         if (err) {
1697                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1698                                 continue;
1699                         }
1700
1701                         if (!trace->full_time && trace->base_time == 0)
1702                                 trace->base_time = sample.time;
1703
1704                         if (type != PERF_RECORD_SAMPLE) {
1705                                 trace__process_event(trace, trace->host, event);
1706                                 continue;
1707                         }
1708
1709                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1710                         if (evsel == NULL) {
1711                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1712                                 continue;
1713                         }
1714
1715                         if (sample.raw_data == NULL) {
1716                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1717                                        perf_evsel__name(evsel), sample.tid,
1718                                        sample.cpu, sample.raw_size);
1719                                 continue;
1720                         }
1721
1722                         handler = evsel->handler.func;
1723                         handler(trace, evsel, &sample);
1724
1725                         if (done)
1726                                 goto out_unmap_evlist;
1727                 }
1728         }
1729
1730         if (trace->nr_events == before) {
1731                 if (done)
1732                         goto out_unmap_evlist;
1733
1734                 poll(evlist->pollfd, evlist->nr_fds, -1);
1735         }
1736
1737         if (done)
1738                 perf_evlist__disable(evlist);
1739
1740         goto again;
1741
1742 out_unmap_evlist:
1743         if (!err && trace->summary)
1744                 trace__fprintf_thread_summary(trace, trace->output);
1745
1746         perf_evlist__munmap(evlist);
1747 out_close_evlist:
1748         perf_evlist__close(evlist);
1749 out_delete_maps:
1750         perf_evlist__delete_maps(evlist);
1751 out_delete_evlist:
1752         perf_evlist__delete(evlist);
1753 out:
1754         trace->live = false;
1755         return err;
1756 out_error_tp:
1757         switch(errno) {
1758         case ENOENT:
1759                 fputs("Error:\tUnable to find debugfs\n"
1760                       "Hint:\tWas your kernel was compiled with debugfs support?\n"
1761                       "Hint:\tIs the debugfs filesystem mounted?\n"
1762                       "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n",
1763                       trace->output);
1764                 break;
1765         case EACCES:
1766                 fprintf(trace->output,
1767                         "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
1768                         "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
1769                         debugfs_mountpoint, debugfs_mountpoint);
1770                 break;
1771         default: {
1772                 char bf[256];
1773                 fprintf(trace->output, "Can't trace: %s\n",
1774                         strerror_r(errno, bf, sizeof(bf)));
1775         }
1776                 break;
1777         }
1778         goto out_delete_evlist;
1779 }
1780
1781 static int trace__replay(struct trace *trace)
1782 {
1783         const struct perf_evsel_str_handler handlers[] = {
1784                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1785                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1786         };
1787
1788         struct perf_session *session;
1789         int err = -1;
1790
1791         trace->tool.sample        = trace__process_sample;
1792         trace->tool.mmap          = perf_event__process_mmap;
1793         trace->tool.mmap2         = perf_event__process_mmap2;
1794         trace->tool.comm          = perf_event__process_comm;
1795         trace->tool.exit          = perf_event__process_exit;
1796         trace->tool.fork          = perf_event__process_fork;
1797         trace->tool.attr          = perf_event__process_attr;
1798         trace->tool.tracing_data = perf_event__process_tracing_data;
1799         trace->tool.build_id      = perf_event__process_build_id;
1800
1801         trace->tool.ordered_samples = true;
1802         trace->tool.ordering_requires_timestamps = true;
1803
1804         /* add tid to output */
1805         trace->multiple_threads = true;
1806
1807         if (symbol__init() < 0)
1808                 return -1;
1809
1810         session = perf_session__new(input_name, O_RDONLY, 0, false,
1811                                     &trace->tool);
1812         if (session == NULL)
1813                 return -ENOMEM;
1814
1815         trace->host = &session->machines.host;
1816
1817         err = perf_session__set_tracepoints_handlers(session, handlers);
1818         if (err)
1819                 goto out;
1820
1821         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1822                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1823                 goto out;
1824         }
1825
1826         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1827                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1828                 goto out;
1829         }
1830
1831         err = parse_target_str(trace);
1832         if (err != 0)
1833                 goto out;
1834
1835         setup_pager();
1836
1837         err = perf_session__process_events(session, &trace->tool);
1838         if (err)
1839                 pr_err("Failed to process events, error %d", err);
1840
1841         else if (trace->summary)
1842                 trace__fprintf_thread_summary(trace, trace->output);
1843
1844 out:
1845         perf_session__delete(session);
1846
1847         return err;
1848 }
1849
1850 static size_t trace__fprintf_threads_header(FILE *fp)
1851 {
1852         size_t printed;
1853
1854         printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
1855         printed += fprintf(fp, " __)    Summary of events    (__\n\n");
1856         printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1857         printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
1858         printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
1859         printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1860
1861         return printed;
1862 }
1863
1864 static size_t thread__dump_stats(struct thread_trace *ttrace,
1865                                  struct trace *trace, FILE *fp)
1866 {
1867         struct stats *stats;
1868         size_t printed = 0;
1869         struct syscall *sc;
1870         struct int_node *inode = intlist__first(ttrace->syscall_stats);
1871
1872         if (inode == NULL)
1873                 return 0;
1874
1875         printed += fprintf(fp, "\n");
1876
1877         /* each int_node is a syscall */
1878         while (inode) {
1879                 stats = inode->priv;
1880                 if (stats) {
1881                         double min = (double)(stats->min) / NSEC_PER_MSEC;
1882                         double max = (double)(stats->max) / NSEC_PER_MSEC;
1883                         double avg = avg_stats(stats);
1884                         double pct;
1885                         u64 n = (u64) stats->n;
1886
1887                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1888                         avg /= NSEC_PER_MSEC;
1889
1890                         sc = &trace->syscalls.table[inode->i];
1891                         printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
1892                         printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
1893                                            n, min, max);
1894                         printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
1895                 }
1896
1897                 inode = intlist__next(inode);
1898         }
1899
1900         printed += fprintf(fp, "\n\n");
1901
1902         return printed;
1903 }
1904
1905 /* struct used to pass data to per-thread function */
1906 struct summary_data {
1907         FILE *fp;
1908         struct trace *trace;
1909         size_t printed;
1910 };
1911
1912 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1913 {
1914         struct summary_data *data = priv;
1915         FILE *fp = data->fp;
1916         size_t printed = data->printed;
1917         struct trace *trace = data->trace;
1918         struct thread_trace *ttrace = thread->priv;
1919         const char *color;
1920         double ratio;
1921
1922         if (ttrace == NULL)
1923                 return 0;
1924
1925         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1926
1927         color = PERF_COLOR_NORMAL;
1928         if (ratio > 50.0)
1929                 color = PERF_COLOR_RED;
1930         else if (ratio > 25.0)
1931                 color = PERF_COLOR_GREEN;
1932         else if (ratio > 5.0)
1933                 color = PERF_COLOR_YELLOW;
1934
1935         printed += color_fprintf(fp, color, "%20s", thread->comm);
1936         printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1937         printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1938         printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1939         printed += thread__dump_stats(ttrace, trace, fp);
1940
1941         data->printed += printed;
1942
1943         return 0;
1944 }
1945
1946 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1947 {
1948         struct summary_data data = {
1949                 .fp = fp,
1950                 .trace = trace
1951         };
1952         data.printed = trace__fprintf_threads_header(fp);
1953
1954         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
1955
1956         return data.printed;
1957 }
1958
1959 static int trace__set_duration(const struct option *opt, const char *str,
1960                                int unset __maybe_unused)
1961 {
1962         struct trace *trace = opt->value;
1963
1964         trace->duration_filter = atof(str);
1965         return 0;
1966 }
1967
1968 static int trace__open_output(struct trace *trace, const char *filename)
1969 {
1970         struct stat st;
1971
1972         if (!stat(filename, &st) && st.st_size) {
1973                 char oldname[PATH_MAX];
1974
1975                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1976                 unlink(oldname);
1977                 rename(filename, oldname);
1978         }
1979
1980         trace->output = fopen(filename, "w");
1981
1982         return trace->output == NULL ? -errno : 0;
1983 }
1984
1985 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1986 {
1987         const char * const trace_usage[] = {
1988                 "perf trace [<options>] [<command>]",
1989                 "perf trace [<options>] -- <command> [<options>]",
1990                 "perf trace record [<options>] [<command>]",
1991                 "perf trace record [<options>] -- <command> [<options>]",
1992                 NULL
1993         };
1994         struct trace trace = {
1995                 .audit_machine = audit_detect_machine(),
1996                 .syscalls = {
1997                         . max = -1,
1998                 },
1999                 .opts = {
2000                         .target = {
2001                                 .uid       = UINT_MAX,
2002                                 .uses_mmap = true,
2003                         },
2004                         .user_freq     = UINT_MAX,
2005                         .user_interval = ULLONG_MAX,
2006                         .no_delay      = true,
2007                         .mmap_pages    = 1024,
2008                 },
2009                 .output = stdout,
2010                 .show_comm = true,
2011         };
2012         const char *output_name = NULL;
2013         const char *ev_qualifier_str = NULL;
2014         const struct option trace_options[] = {
2015         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2016                     "show the thread COMM next to its id"),
2017         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2018                     "list of events to trace"),
2019         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2020         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2021         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2022                     "trace events on existing process id"),
2023         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2024                     "trace events on existing thread id"),
2025         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2026                     "system-wide collection from all CPUs"),
2027         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2028                     "list of cpus to monitor"),
2029         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2030                     "child tasks do not inherit counters"),
2031         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2032                      "number of mmap data pages",
2033                      perf_evlist__parse_mmap_pages),
2034         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2035                    "user to profile"),
2036         OPT_CALLBACK(0, "duration", &trace, "float",
2037                      "show only events with duration > N.M ms",
2038                      trace__set_duration),
2039         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2040         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2041         OPT_BOOLEAN('T', "time", &trace.full_time,
2042                     "Show full timestamp, not time relative to first start"),
2043         OPT_BOOLEAN(0, "summary", &trace.summary,
2044                     "Show syscall summary with statistics"),
2045         OPT_END()
2046         };
2047         int err;
2048         char bf[BUFSIZ];
2049
2050         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2051                 return trace__record(argc-2, &argv[2]);
2052
2053         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2054
2055         if (output_name != NULL) {
2056                 err = trace__open_output(&trace, output_name);
2057                 if (err < 0) {
2058                         perror("failed to create output file");
2059                         goto out;
2060                 }
2061         }
2062
2063         if (ev_qualifier_str != NULL) {
2064                 const char *s = ev_qualifier_str;
2065
2066                 trace.not_ev_qualifier = *s == '!';
2067                 if (trace.not_ev_qualifier)
2068                         ++s;
2069                 trace.ev_qualifier = strlist__new(true, s);
2070                 if (trace.ev_qualifier == NULL) {
2071                         fputs("Not enough memory to parse event qualifier",
2072                               trace.output);
2073                         err = -ENOMEM;
2074                         goto out_close;
2075                 }
2076         }
2077
2078         err = perf_target__validate(&trace.opts.target);
2079         if (err) {
2080                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2081                 fprintf(trace.output, "%s", bf);
2082                 goto out_close;
2083         }
2084
2085         err = perf_target__parse_uid(&trace.opts.target);
2086         if (err) {
2087                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2088                 fprintf(trace.output, "%s", bf);
2089                 goto out_close;
2090         }
2091
2092         if (!argc && perf_target__none(&trace.opts.target))
2093                 trace.opts.target.system_wide = true;
2094
2095         if (input_name)
2096                 err = trace__replay(&trace);
2097         else
2098                 err = trace__run(&trace, argc, argv);
2099
2100 out_close:
2101         if (output_name != NULL)
2102                 fclose(trace.output);
2103 out:
2104         return err;
2105 }