perf trace: Use vfs_getname hook if available
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct syscall_arg {
39         unsigned long val;
40         struct thread *thread;
41         struct trace  *trace;
42         void          *parm;
43         u8            idx;
44         u8            mask;
45 };
46
47 struct strarray {
48         int         offset;
49         int         nr_entries;
50         const char **entries;
51 };
52
53 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
54         .nr_entries = ARRAY_SIZE(array), \
55         .entries = array, \
56 }
57
58 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
59         .offset     = off, \
60         .nr_entries = ARRAY_SIZE(array), \
61         .entries = array, \
62 }
63
64 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
65                                                 const char *intfmt,
66                                                 struct syscall_arg *arg)
67 {
68         struct strarray *sa = arg->parm;
69         int idx = arg->val - sa->offset;
70
71         if (idx < 0 || idx >= sa->nr_entries)
72                 return scnprintf(bf, size, intfmt, arg->val);
73
74         return scnprintf(bf, size, "%s", sa->entries[idx]);
75 }
76
77 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
78                                               struct syscall_arg *arg)
79 {
80         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
81 }
82
83 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
84
85 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
86                                                  struct syscall_arg *arg)
87 {
88         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
89 }
90
91 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
92
93 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
94                                         struct syscall_arg *arg);
95
96 #define SCA_FD syscall_arg__scnprintf_fd
97
98 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
99                                            struct syscall_arg *arg)
100 {
101         int fd = arg->val;
102
103         if (fd == AT_FDCWD)
104                 return scnprintf(bf, size, "CWD");
105
106         return syscall_arg__scnprintf_fd(bf, size, arg);
107 }
108
109 #define SCA_FDAT syscall_arg__scnprintf_fd_at
110
111 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
112                                               struct syscall_arg *arg);
113
114 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
115
116 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
117                                          struct syscall_arg *arg)
118 {
119         return scnprintf(bf, size, "%#lx", arg->val);
120 }
121
122 #define SCA_HEX syscall_arg__scnprintf_hex
123
124 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
125                                                struct syscall_arg *arg)
126 {
127         int printed = 0, prot = arg->val;
128
129         if (prot == PROT_NONE)
130                 return scnprintf(bf, size, "NONE");
131 #define P_MMAP_PROT(n) \
132         if (prot & PROT_##n) { \
133                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
134                 prot &= ~PROT_##n; \
135         }
136
137         P_MMAP_PROT(EXEC);
138         P_MMAP_PROT(READ);
139         P_MMAP_PROT(WRITE);
140 #ifdef PROT_SEM
141         P_MMAP_PROT(SEM);
142 #endif
143         P_MMAP_PROT(GROWSDOWN);
144         P_MMAP_PROT(GROWSUP);
145 #undef P_MMAP_PROT
146
147         if (prot)
148                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
149
150         return printed;
151 }
152
153 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
154
155 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
156                                                 struct syscall_arg *arg)
157 {
158         int printed = 0, flags = arg->val;
159
160 #define P_MMAP_FLAG(n) \
161         if (flags & MAP_##n) { \
162                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
163                 flags &= ~MAP_##n; \
164         }
165
166         P_MMAP_FLAG(SHARED);
167         P_MMAP_FLAG(PRIVATE);
168 #ifdef MAP_32BIT
169         P_MMAP_FLAG(32BIT);
170 #endif
171         P_MMAP_FLAG(ANONYMOUS);
172         P_MMAP_FLAG(DENYWRITE);
173         P_MMAP_FLAG(EXECUTABLE);
174         P_MMAP_FLAG(FILE);
175         P_MMAP_FLAG(FIXED);
176         P_MMAP_FLAG(GROWSDOWN);
177 #ifdef MAP_HUGETLB
178         P_MMAP_FLAG(HUGETLB);
179 #endif
180         P_MMAP_FLAG(LOCKED);
181         P_MMAP_FLAG(NONBLOCK);
182         P_MMAP_FLAG(NORESERVE);
183         P_MMAP_FLAG(POPULATE);
184         P_MMAP_FLAG(STACK);
185 #ifdef MAP_UNINITIALIZED
186         P_MMAP_FLAG(UNINITIALIZED);
187 #endif
188 #undef P_MMAP_FLAG
189
190         if (flags)
191                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
192
193         return printed;
194 }
195
196 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
197
198 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
199                                                       struct syscall_arg *arg)
200 {
201         int behavior = arg->val;
202
203         switch (behavior) {
204 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
205         P_MADV_BHV(NORMAL);
206         P_MADV_BHV(RANDOM);
207         P_MADV_BHV(SEQUENTIAL);
208         P_MADV_BHV(WILLNEED);
209         P_MADV_BHV(DONTNEED);
210         P_MADV_BHV(REMOVE);
211         P_MADV_BHV(DONTFORK);
212         P_MADV_BHV(DOFORK);
213         P_MADV_BHV(HWPOISON);
214 #ifdef MADV_SOFT_OFFLINE
215         P_MADV_BHV(SOFT_OFFLINE);
216 #endif
217         P_MADV_BHV(MERGEABLE);
218         P_MADV_BHV(UNMERGEABLE);
219 #ifdef MADV_HUGEPAGE
220         P_MADV_BHV(HUGEPAGE);
221 #endif
222 #ifdef MADV_NOHUGEPAGE
223         P_MADV_BHV(NOHUGEPAGE);
224 #endif
225 #ifdef MADV_DONTDUMP
226         P_MADV_BHV(DONTDUMP);
227 #endif
228 #ifdef MADV_DODUMP
229         P_MADV_BHV(DODUMP);
230 #endif
231 #undef P_MADV_PHV
232         default: break;
233         }
234
235         return scnprintf(bf, size, "%#x", behavior);
236 }
237
238 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
239
240 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
241                                            struct syscall_arg *arg)
242 {
243         int printed = 0, op = arg->val;
244
245         if (op == 0)
246                 return scnprintf(bf, size, "NONE");
247 #define P_CMD(cmd) \
248         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
249                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
250                 op &= ~LOCK_##cmd; \
251         }
252
253         P_CMD(SH);
254         P_CMD(EX);
255         P_CMD(NB);
256         P_CMD(UN);
257         P_CMD(MAND);
258         P_CMD(RW);
259         P_CMD(READ);
260         P_CMD(WRITE);
261 #undef P_OP
262
263         if (op)
264                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
265
266         return printed;
267 }
268
269 #define SCA_FLOCK syscall_arg__scnprintf_flock
270
271 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
272 {
273         enum syscall_futex_args {
274                 SCF_UADDR   = (1 << 0),
275                 SCF_OP      = (1 << 1),
276                 SCF_VAL     = (1 << 2),
277                 SCF_TIMEOUT = (1 << 3),
278                 SCF_UADDR2  = (1 << 4),
279                 SCF_VAL3    = (1 << 5),
280         };
281         int op = arg->val;
282         int cmd = op & FUTEX_CMD_MASK;
283         size_t printed = 0;
284
285         switch (cmd) {
286 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
287         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
288         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
289         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
290         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
291         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
292         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
293         P_FUTEX_OP(WAKE_OP);                                                      break;
294         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
295         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
296         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
297         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
298         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
299         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
300         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
301         }
302
303         if (op & FUTEX_PRIVATE_FLAG)
304                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
305
306         if (op & FUTEX_CLOCK_REALTIME)
307                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
308
309         return printed;
310 }
311
312 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
313
314 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
315 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
316
317 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
318 static DEFINE_STRARRAY(itimers);
319
320 static const char *whences[] = { "SET", "CUR", "END",
321 #ifdef SEEK_DATA
322 "DATA",
323 #endif
324 #ifdef SEEK_HOLE
325 "HOLE",
326 #endif
327 };
328 static DEFINE_STRARRAY(whences);
329
330 static const char *fcntl_cmds[] = {
331         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
332         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
333         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
334         "F_GETOWNER_UIDS",
335 };
336 static DEFINE_STRARRAY(fcntl_cmds);
337
338 static const char *rlimit_resources[] = {
339         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
340         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
341         "RTTIME",
342 };
343 static DEFINE_STRARRAY(rlimit_resources);
344
345 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
346 static DEFINE_STRARRAY(sighow);
347
348 static const char *clockid[] = {
349         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
350         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
351 };
352 static DEFINE_STRARRAY(clockid);
353
354 static const char *socket_families[] = {
355         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
356         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
357         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
358         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
359         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
360         "ALG", "NFC", "VSOCK",
361 };
362 static DEFINE_STRARRAY(socket_families);
363
364 #ifndef SOCK_TYPE_MASK
365 #define SOCK_TYPE_MASK 0xf
366 #endif
367
368 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
369                                                       struct syscall_arg *arg)
370 {
371         size_t printed;
372         int type = arg->val,
373             flags = type & ~SOCK_TYPE_MASK;
374
375         type &= SOCK_TYPE_MASK;
376         /*
377          * Can't use a strarray, MIPS may override for ABI reasons.
378          */
379         switch (type) {
380 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
381         P_SK_TYPE(STREAM);
382         P_SK_TYPE(DGRAM);
383         P_SK_TYPE(RAW);
384         P_SK_TYPE(RDM);
385         P_SK_TYPE(SEQPACKET);
386         P_SK_TYPE(DCCP);
387         P_SK_TYPE(PACKET);
388 #undef P_SK_TYPE
389         default:
390                 printed = scnprintf(bf, size, "%#x", type);
391         }
392
393 #define P_SK_FLAG(n) \
394         if (flags & SOCK_##n) { \
395                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
396                 flags &= ~SOCK_##n; \
397         }
398
399         P_SK_FLAG(CLOEXEC);
400         P_SK_FLAG(NONBLOCK);
401 #undef P_SK_FLAG
402
403         if (flags)
404                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
405
406         return printed;
407 }
408
409 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
410
411 #ifndef MSG_PROBE
412 #define MSG_PROBE            0x10
413 #endif
414 #ifndef MSG_WAITFORONE
415 #define MSG_WAITFORONE  0x10000
416 #endif
417 #ifndef MSG_SENDPAGE_NOTLAST
418 #define MSG_SENDPAGE_NOTLAST 0x20000
419 #endif
420 #ifndef MSG_FASTOPEN
421 #define MSG_FASTOPEN         0x20000000
422 #endif
423
424 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
425                                                struct syscall_arg *arg)
426 {
427         int printed = 0, flags = arg->val;
428
429         if (flags == 0)
430                 return scnprintf(bf, size, "NONE");
431 #define P_MSG_FLAG(n) \
432         if (flags & MSG_##n) { \
433                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
434                 flags &= ~MSG_##n; \
435         }
436
437         P_MSG_FLAG(OOB);
438         P_MSG_FLAG(PEEK);
439         P_MSG_FLAG(DONTROUTE);
440         P_MSG_FLAG(TRYHARD);
441         P_MSG_FLAG(CTRUNC);
442         P_MSG_FLAG(PROBE);
443         P_MSG_FLAG(TRUNC);
444         P_MSG_FLAG(DONTWAIT);
445         P_MSG_FLAG(EOR);
446         P_MSG_FLAG(WAITALL);
447         P_MSG_FLAG(FIN);
448         P_MSG_FLAG(SYN);
449         P_MSG_FLAG(CONFIRM);
450         P_MSG_FLAG(RST);
451         P_MSG_FLAG(ERRQUEUE);
452         P_MSG_FLAG(NOSIGNAL);
453         P_MSG_FLAG(MORE);
454         P_MSG_FLAG(WAITFORONE);
455         P_MSG_FLAG(SENDPAGE_NOTLAST);
456         P_MSG_FLAG(FASTOPEN);
457         P_MSG_FLAG(CMSG_CLOEXEC);
458 #undef P_MSG_FLAG
459
460         if (flags)
461                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
462
463         return printed;
464 }
465
466 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
467
468 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
469                                                  struct syscall_arg *arg)
470 {
471         size_t printed = 0;
472         int mode = arg->val;
473
474         if (mode == F_OK) /* 0 */
475                 return scnprintf(bf, size, "F");
476 #define P_MODE(n) \
477         if (mode & n##_OK) { \
478                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
479                 mode &= ~n##_OK; \
480         }
481
482         P_MODE(R);
483         P_MODE(W);
484         P_MODE(X);
485 #undef P_MODE
486
487         if (mode)
488                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
489
490         return printed;
491 }
492
493 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
494
495 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
496                                                struct syscall_arg *arg)
497 {
498         int printed = 0, flags = arg->val;
499
500         if (!(flags & O_CREAT))
501                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
502
503         if (flags == 0)
504                 return scnprintf(bf, size, "RDONLY");
505 #define P_FLAG(n) \
506         if (flags & O_##n) { \
507                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
508                 flags &= ~O_##n; \
509         }
510
511         P_FLAG(APPEND);
512         P_FLAG(ASYNC);
513         P_FLAG(CLOEXEC);
514         P_FLAG(CREAT);
515         P_FLAG(DIRECT);
516         P_FLAG(DIRECTORY);
517         P_FLAG(EXCL);
518         P_FLAG(LARGEFILE);
519         P_FLAG(NOATIME);
520         P_FLAG(NOCTTY);
521 #ifdef O_NONBLOCK
522         P_FLAG(NONBLOCK);
523 #elif O_NDELAY
524         P_FLAG(NDELAY);
525 #endif
526 #ifdef O_PATH
527         P_FLAG(PATH);
528 #endif
529         P_FLAG(RDWR);
530 #ifdef O_DSYNC
531         if ((flags & O_SYNC) == O_SYNC)
532                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
533         else {
534                 P_FLAG(DSYNC);
535         }
536 #else
537         P_FLAG(SYNC);
538 #endif
539         P_FLAG(TRUNC);
540         P_FLAG(WRONLY);
541 #undef P_FLAG
542
543         if (flags)
544                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
545
546         return printed;
547 }
548
549 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
550
551 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
552                                                    struct syscall_arg *arg)
553 {
554         int printed = 0, flags = arg->val;
555
556         if (flags == 0)
557                 return scnprintf(bf, size, "NONE");
558 #define P_FLAG(n) \
559         if (flags & EFD_##n) { \
560                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
561                 flags &= ~EFD_##n; \
562         }
563
564         P_FLAG(SEMAPHORE);
565         P_FLAG(CLOEXEC);
566         P_FLAG(NONBLOCK);
567 #undef P_FLAG
568
569         if (flags)
570                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
571
572         return printed;
573 }
574
575 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
576
577 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
578                                                 struct syscall_arg *arg)
579 {
580         int printed = 0, flags = arg->val;
581
582 #define P_FLAG(n) \
583         if (flags & O_##n) { \
584                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
585                 flags &= ~O_##n; \
586         }
587
588         P_FLAG(CLOEXEC);
589         P_FLAG(NONBLOCK);
590 #undef P_FLAG
591
592         if (flags)
593                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
594
595         return printed;
596 }
597
598 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
599
600 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
601 {
602         int sig = arg->val;
603
604         switch (sig) {
605 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
606         P_SIGNUM(HUP);
607         P_SIGNUM(INT);
608         P_SIGNUM(QUIT);
609         P_SIGNUM(ILL);
610         P_SIGNUM(TRAP);
611         P_SIGNUM(ABRT);
612         P_SIGNUM(BUS);
613         P_SIGNUM(FPE);
614         P_SIGNUM(KILL);
615         P_SIGNUM(USR1);
616         P_SIGNUM(SEGV);
617         P_SIGNUM(USR2);
618         P_SIGNUM(PIPE);
619         P_SIGNUM(ALRM);
620         P_SIGNUM(TERM);
621         P_SIGNUM(STKFLT);
622         P_SIGNUM(CHLD);
623         P_SIGNUM(CONT);
624         P_SIGNUM(STOP);
625         P_SIGNUM(TSTP);
626         P_SIGNUM(TTIN);
627         P_SIGNUM(TTOU);
628         P_SIGNUM(URG);
629         P_SIGNUM(XCPU);
630         P_SIGNUM(XFSZ);
631         P_SIGNUM(VTALRM);
632         P_SIGNUM(PROF);
633         P_SIGNUM(WINCH);
634         P_SIGNUM(IO);
635         P_SIGNUM(PWR);
636         P_SIGNUM(SYS);
637         default: break;
638         }
639
640         return scnprintf(bf, size, "%#x", sig);
641 }
642
643 #define SCA_SIGNUM syscall_arg__scnprintf_signum
644
645 #define TCGETS          0x5401
646
647 static const char *tioctls[] = {
648         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
649         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
650         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
651         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
652         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
653         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
654         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
655         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
656         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
657         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
658         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
659         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
660         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
661         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
662         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
663 };
664
665 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
666
667 #define STRARRAY(arg, name, array) \
668           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
669           .arg_parm      = { [arg] = &strarray__##array, }
670
671 static struct syscall_fmt {
672         const char *name;
673         const char *alias;
674         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
675         void       *arg_parm[6];
676         bool       errmsg;
677         bool       timeout;
678         bool       hexret;
679 } syscall_fmts[] = {
680         { .name     = "access",     .errmsg = true,
681           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
682         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
683         { .name     = "brk",        .hexret = true,
684           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
685         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
686         { .name     = "close",      .errmsg = true,
687           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
688         { .name     = "connect",    .errmsg = true, },
689         { .name     = "dup",        .errmsg = true,
690           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
691         { .name     = "dup2",       .errmsg = true,
692           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
693         { .name     = "dup3",       .errmsg = true,
694           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
695         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
696         { .name     = "eventfd2",   .errmsg = true,
697           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
698         { .name     = "faccessat",  .errmsg = true,
699           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700         { .name     = "fadvise64",  .errmsg = true,
701           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
702         { .name     = "fallocate",  .errmsg = true,
703           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
704         { .name     = "fchdir",     .errmsg = true,
705           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
706         { .name     = "fchmod",     .errmsg = true,
707           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
708         { .name     = "fchmodat",   .errmsg = true,
709           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
710         { .name     = "fchown",     .errmsg = true,
711           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
712         { .name     = "fchownat",   .errmsg = true,
713           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
714         { .name     = "fcntl",      .errmsg = true,
715           .arg_scnprintf = { [0] = SCA_FD, /* fd */
716                              [1] = SCA_STRARRAY, /* cmd */ },
717           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
718         { .name     = "fdatasync",  .errmsg = true,
719           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
720         { .name     = "flock",      .errmsg = true,
721           .arg_scnprintf = { [0] = SCA_FD, /* fd */
722                              [1] = SCA_FLOCK, /* cmd */ }, },
723         { .name     = "fsetxattr",  .errmsg = true,
724           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
725         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
726           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
727         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
728           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
729         { .name     = "fstatfs",    .errmsg = true,
730           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
731         { .name     = "fsync",    .errmsg = true,
732           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
733         { .name     = "ftruncate", .errmsg = true,
734           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
735         { .name     = "futex",      .errmsg = true,
736           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
737         { .name     = "futimesat", .errmsg = true,
738           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
739         { .name     = "getdents",   .errmsg = true,
740           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
741         { .name     = "getdents64", .errmsg = true,
742           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
743         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
744         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
745         { .name     = "ioctl",      .errmsg = true,
746           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
747                              [1] = SCA_STRHEXARRAY, /* cmd */
748                              [2] = SCA_HEX, /* arg */ },
749           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
750         { .name     = "kill",       .errmsg = true,
751           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
752         { .name     = "linkat",     .errmsg = true,
753           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
754         { .name     = "lseek",      .errmsg = true,
755           .arg_scnprintf = { [0] = SCA_FD, /* fd */
756                              [2] = SCA_STRARRAY, /* whence */ },
757           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
758         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
759         { .name     = "madvise",    .errmsg = true,
760           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
761                              [2] = SCA_MADV_BHV, /* behavior */ }, },
762         { .name     = "mkdirat",    .errmsg = true,
763           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
764         { .name     = "mknodat",    .errmsg = true,
765           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
766         { .name     = "mlock",      .errmsg = true,
767           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
768         { .name     = "mlockall",   .errmsg = true,
769           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
770         { .name     = "mmap",       .hexret = true,
771           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
772                              [2] = SCA_MMAP_PROT, /* prot */
773                              [3] = SCA_MMAP_FLAGS, /* flags */ }, },
774         { .name     = "mprotect",   .errmsg = true,
775           .arg_scnprintf = { [0] = SCA_HEX, /* start */
776                              [2] = SCA_MMAP_PROT, /* prot */ }, },
777         { .name     = "mremap",     .hexret = true,
778           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
779                              [4] = SCA_HEX, /* new_addr */ }, },
780         { .name     = "munlock",    .errmsg = true,
781           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
782         { .name     = "munmap",     .errmsg = true,
783           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
784         { .name     = "name_to_handle_at", .errmsg = true,
785           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
786         { .name     = "newfstatat", .errmsg = true,
787           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
788         { .name     = "open",       .errmsg = true,
789           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
790         { .name     = "open_by_handle_at", .errmsg = true,
791           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
792                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
793         { .name     = "openat",     .errmsg = true,
794           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
795                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
796         { .name     = "pipe2",      .errmsg = true,
797           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
798         { .name     = "poll",       .errmsg = true, .timeout = true, },
799         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
800         { .name     = "pread",      .errmsg = true, .alias = "pread64",
801           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
802         { .name     = "preadv",     .errmsg = true, .alias = "pread",
803           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
804         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
805         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
806           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
807         { .name     = "pwritev",    .errmsg = true,
808           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
809         { .name     = "read",       .errmsg = true,
810           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
811         { .name     = "readlinkat", .errmsg = true,
812           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
813         { .name     = "readv",      .errmsg = true,
814           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
815         { .name     = "recvfrom",   .errmsg = true,
816           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
817         { .name     = "recvmmsg",   .errmsg = true,
818           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
819         { .name     = "recvmsg",    .errmsg = true,
820           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
821         { .name     = "renameat",   .errmsg = true,
822           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
823         { .name     = "rt_sigaction", .errmsg = true,
824           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
825         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
826         { .name     = "rt_sigqueueinfo", .errmsg = true,
827           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
828         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
829           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
830         { .name     = "select",     .errmsg = true, .timeout = true, },
831         { .name     = "sendmmsg",    .errmsg = true,
832           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
833         { .name     = "sendmsg",    .errmsg = true,
834           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
835         { .name     = "sendto",     .errmsg = true,
836           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
837         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
838         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
839         { .name     = "shutdown",   .errmsg = true,
840           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
841         { .name     = "socket",     .errmsg = true,
842           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
843                              [1] = SCA_SK_TYPE, /* type */ },
844           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
845         { .name     = "socketpair", .errmsg = true,
846           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
847                              [1] = SCA_SK_TYPE, /* type */ },
848           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
849         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
850         { .name     = "symlinkat",  .errmsg = true,
851           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
852         { .name     = "tgkill",     .errmsg = true,
853           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
854         { .name     = "tkill",      .errmsg = true,
855           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
856         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
857         { .name     = "unlinkat",   .errmsg = true,
858           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
859         { .name     = "utimensat",  .errmsg = true,
860           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
861         { .name     = "write",      .errmsg = true,
862           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
863         { .name     = "writev",     .errmsg = true,
864           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
865 };
866
867 static int syscall_fmt__cmp(const void *name, const void *fmtp)
868 {
869         const struct syscall_fmt *fmt = fmtp;
870         return strcmp(name, fmt->name);
871 }
872
873 static struct syscall_fmt *syscall_fmt__find(const char *name)
874 {
875         const int nmemb = ARRAY_SIZE(syscall_fmts);
876         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
877 }
878
879 struct syscall {
880         struct event_format *tp_format;
881         const char          *name;
882         bool                filtered;
883         struct syscall_fmt  *fmt;
884         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
885         void                **arg_parm;
886 };
887
888 static size_t fprintf_duration(unsigned long t, FILE *fp)
889 {
890         double duration = (double)t / NSEC_PER_MSEC;
891         size_t printed = fprintf(fp, "(");
892
893         if (duration >= 1.0)
894                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
895         else if (duration >= 0.01)
896                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
897         else
898                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
899         return printed + fprintf(fp, "): ");
900 }
901
902 struct thread_trace {
903         u64               entry_time;
904         u64               exit_time;
905         bool              entry_pending;
906         unsigned long     nr_events;
907         char              *entry_str;
908         double            runtime_ms;
909         struct {
910                 int       max;
911                 char      **table;
912         } paths;
913
914         struct intlist *syscall_stats;
915 };
916
917 static struct thread_trace *thread_trace__new(void)
918 {
919         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
920
921         if (ttrace)
922                 ttrace->paths.max = -1;
923
924         ttrace->syscall_stats = intlist__new(NULL);
925
926         return ttrace;
927 }
928
929 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
930 {
931         struct thread_trace *ttrace;
932
933         if (thread == NULL)
934                 goto fail;
935
936         if (thread->priv == NULL)
937                 thread->priv = thread_trace__new();
938                 
939         if (thread->priv == NULL)
940                 goto fail;
941
942         ttrace = thread->priv;
943         ++ttrace->nr_events;
944
945         return ttrace;
946 fail:
947         color_fprintf(fp, PERF_COLOR_RED,
948                       "WARNING: not enough memory, dropping samples!\n");
949         return NULL;
950 }
951
952 struct trace {
953         struct perf_tool        tool;
954         struct {
955                 int             machine;
956                 int             open_id;
957         }                       audit;
958         struct {
959                 int             max;
960                 struct syscall  *table;
961         } syscalls;
962         struct perf_record_opts opts;
963         struct machine          *host;
964         u64                     base_time;
965         bool                    full_time;
966         FILE                    *output;
967         unsigned long           nr_events;
968         struct strlist          *ev_qualifier;
969         bool                    not_ev_qualifier;
970         bool                    live;
971         const char              *last_vfs_getname;
972         struct intlist          *tid_list;
973         struct intlist          *pid_list;
974         bool                    sched;
975         bool                    multiple_threads;
976         bool                    summary;
977         bool                    show_comm;
978         bool                    show_tool_stats;
979         double                  duration_filter;
980         double                  runtime_ms;
981         struct {
982                 u64             vfs_getname, proc_getname;
983         } stats;
984 };
985
986 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
987 {
988         struct thread_trace *ttrace = thread->priv;
989
990         if (fd > ttrace->paths.max) {
991                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
992
993                 if (npath == NULL)
994                         return -1;
995
996                 if (ttrace->paths.max != -1) {
997                         memset(npath + ttrace->paths.max + 1, 0,
998                                (fd - ttrace->paths.max) * sizeof(char *));
999                 } else {
1000                         memset(npath, 0, (fd + 1) * sizeof(char *));
1001                 }
1002
1003                 ttrace->paths.table = npath;
1004                 ttrace->paths.max   = fd;
1005         }
1006
1007         ttrace->paths.table[fd] = strdup(pathname);
1008
1009         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1010 }
1011
1012 static int thread__read_fd_path(struct thread *thread, int fd)
1013 {
1014         char linkname[PATH_MAX], pathname[PATH_MAX];
1015         struct stat st;
1016         int ret;
1017
1018         if (thread->pid_ == thread->tid) {
1019                 scnprintf(linkname, sizeof(linkname),
1020                           "/proc/%d/fd/%d", thread->pid_, fd);
1021         } else {
1022                 scnprintf(linkname, sizeof(linkname),
1023                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1024         }
1025
1026         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1027                 return -1;
1028
1029         ret = readlink(linkname, pathname, sizeof(pathname));
1030
1031         if (ret < 0 || ret > st.st_size)
1032                 return -1;
1033
1034         pathname[ret] = '\0';
1035         return trace__set_fd_pathname(thread, fd, pathname);
1036 }
1037
1038 static const char *thread__fd_path(struct thread *thread, int fd,
1039                                    struct trace *trace)
1040 {
1041         struct thread_trace *ttrace = thread->priv;
1042
1043         if (ttrace == NULL)
1044                 return NULL;
1045
1046         if (fd < 0)
1047                 return NULL;
1048
1049         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1050                 if (!trace->live)
1051                         return NULL;
1052                 ++trace->stats.proc_getname;
1053                 if (thread__read_fd_path(thread, fd)) {
1054                         return NULL;
1055         }
1056
1057         return ttrace->paths.table[fd];
1058 }
1059
1060 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1061                                         struct syscall_arg *arg)
1062 {
1063         int fd = arg->val;
1064         size_t printed = scnprintf(bf, size, "%d", fd);
1065         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1066
1067         if (path)
1068                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1069
1070         return printed;
1071 }
1072
1073 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1074                                               struct syscall_arg *arg)
1075 {
1076         int fd = arg->val;
1077         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1078         struct thread_trace *ttrace = arg->thread->priv;
1079
1080         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1081                 free(ttrace->paths.table[fd]);
1082                 ttrace->paths.table[fd] = NULL;
1083         }
1084
1085         return printed;
1086 }
1087
1088 static bool trace__filter_duration(struct trace *trace, double t)
1089 {
1090         return t < (trace->duration_filter * NSEC_PER_MSEC);
1091 }
1092
1093 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1094 {
1095         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1096
1097         return fprintf(fp, "%10.3f ", ts);
1098 }
1099
1100 static bool done = false;
1101
1102 static void sig_handler(int sig __maybe_unused)
1103 {
1104         done = true;
1105 }
1106
1107 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1108                                         u64 duration, u64 tstamp, FILE *fp)
1109 {
1110         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1111         printed += fprintf_duration(duration, fp);
1112
1113         if (trace->multiple_threads) {
1114                 if (trace->show_comm)
1115                         printed += fprintf(fp, "%.14s/", thread->comm);
1116                 printed += fprintf(fp, "%d ", thread->tid);
1117         }
1118
1119         return printed;
1120 }
1121
1122 static int trace__process_event(struct trace *trace, struct machine *machine,
1123                                 union perf_event *event)
1124 {
1125         int ret = 0;
1126
1127         switch (event->header.type) {
1128         case PERF_RECORD_LOST:
1129                 color_fprintf(trace->output, PERF_COLOR_RED,
1130                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1131                 ret = machine__process_lost_event(machine, event);
1132         default:
1133                 ret = machine__process_event(machine, event);
1134                 break;
1135         }
1136
1137         return ret;
1138 }
1139
1140 static int trace__tool_process(struct perf_tool *tool,
1141                                union perf_event *event,
1142                                struct perf_sample *sample __maybe_unused,
1143                                struct machine *machine)
1144 {
1145         struct trace *trace = container_of(tool, struct trace, tool);
1146         return trace__process_event(trace, machine, event);
1147 }
1148
1149 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1150 {
1151         int err = symbol__init();
1152
1153         if (err)
1154                 return err;
1155
1156         trace->host = machine__new_host();
1157         if (trace->host == NULL)
1158                 return -ENOMEM;
1159
1160         if (perf_target__has_task(&trace->opts.target)) {
1161                 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
1162                                                         trace__tool_process,
1163                                                         trace->host);
1164         } else {
1165                 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
1166                                                      trace->host);
1167         }
1168
1169         if (err)
1170                 symbol__exit();
1171
1172         return err;
1173 }
1174
1175 static int syscall__set_arg_fmts(struct syscall *sc)
1176 {
1177         struct format_field *field;
1178         int idx = 0;
1179
1180         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1181         if (sc->arg_scnprintf == NULL)
1182                 return -1;
1183
1184         if (sc->fmt)
1185                 sc->arg_parm = sc->fmt->arg_parm;
1186
1187         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1188                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1189                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1190                 else if (field->flags & FIELD_IS_POINTER)
1191                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1192                 ++idx;
1193         }
1194
1195         return 0;
1196 }
1197
1198 static int trace__read_syscall_info(struct trace *trace, int id)
1199 {
1200         char tp_name[128];
1201         struct syscall *sc;
1202         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1203
1204         if (name == NULL)
1205                 return -1;
1206
1207         if (id > trace->syscalls.max) {
1208                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1209
1210                 if (nsyscalls == NULL)
1211                         return -1;
1212
1213                 if (trace->syscalls.max != -1) {
1214                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1215                                (id - trace->syscalls.max) * sizeof(*sc));
1216                 } else {
1217                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1218                 }
1219
1220                 trace->syscalls.table = nsyscalls;
1221                 trace->syscalls.max   = id;
1222         }
1223
1224         sc = trace->syscalls.table + id;
1225         sc->name = name;
1226
1227         if (trace->ev_qualifier) {
1228                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1229
1230                 if (!(in ^ trace->not_ev_qualifier)) {
1231                         sc->filtered = true;
1232                         /*
1233                          * No need to do read tracepoint information since this will be
1234                          * filtered out.
1235                          */
1236                         return 0;
1237                 }
1238         }
1239
1240         sc->fmt  = syscall_fmt__find(sc->name);
1241
1242         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1243         sc->tp_format = event_format__new("syscalls", tp_name);
1244
1245         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1246                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1247                 sc->tp_format = event_format__new("syscalls", tp_name);
1248         }
1249
1250         if (sc->tp_format == NULL)
1251                 return -1;
1252
1253         return syscall__set_arg_fmts(sc);
1254 }
1255
1256 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1257                                       unsigned long *args, struct trace *trace,
1258                                       struct thread *thread)
1259 {
1260         size_t printed = 0;
1261
1262         if (sc->tp_format != NULL) {
1263                 struct format_field *field;
1264                 u8 bit = 1;
1265                 struct syscall_arg arg = {
1266                         .idx    = 0,
1267                         .mask   = 0,
1268                         .trace  = trace,
1269                         .thread = thread,
1270                 };
1271
1272                 for (field = sc->tp_format->format.fields->next; field;
1273                      field = field->next, ++arg.idx, bit <<= 1) {
1274                         if (arg.mask & bit)
1275                                 continue;
1276                         /*
1277                          * Suppress this argument if its value is zero and
1278                          * and we don't have a string associated in an
1279                          * strarray for it.
1280                          */
1281                         if (args[arg.idx] == 0 &&
1282                             !(sc->arg_scnprintf &&
1283                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1284                               sc->arg_parm[arg.idx]))
1285                                 continue;
1286
1287                         printed += scnprintf(bf + printed, size - printed,
1288                                              "%s%s: ", printed ? ", " : "", field->name);
1289                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1290                                 arg.val = args[arg.idx];
1291                                 if (sc->arg_parm)
1292                                         arg.parm = sc->arg_parm[arg.idx];
1293                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1294                                                                       size - printed, &arg);
1295                         } else {
1296                                 printed += scnprintf(bf + printed, size - printed,
1297                                                      "%ld", args[arg.idx]);
1298                         }
1299                 }
1300         } else {
1301                 int i = 0;
1302
1303                 while (i < 6) {
1304                         printed += scnprintf(bf + printed, size - printed,
1305                                              "%sarg%d: %ld",
1306                                              printed ? ", " : "", i, args[i]);
1307                         ++i;
1308                 }
1309         }
1310
1311         return printed;
1312 }
1313
1314 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1315                                   struct perf_sample *sample);
1316
1317 static struct syscall *trace__syscall_info(struct trace *trace,
1318                                            struct perf_evsel *evsel, int id)
1319 {
1320
1321         if (id < 0) {
1322
1323                 /*
1324                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1325                  * before that, leaving at a higher verbosity level till that is
1326                  * explained. Reproduced with plain ftrace with:
1327                  *
1328                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1329                  * grep "NR -1 " /t/trace_pipe
1330                  *
1331                  * After generating some load on the machine.
1332                  */
1333                 if (verbose > 1) {
1334                         static u64 n;
1335                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1336                                 id, perf_evsel__name(evsel), ++n);
1337                 }
1338                 return NULL;
1339         }
1340
1341         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1342             trace__read_syscall_info(trace, id))
1343                 goto out_cant_read;
1344
1345         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1346                 goto out_cant_read;
1347
1348         return &trace->syscalls.table[id];
1349
1350 out_cant_read:
1351         if (verbose) {
1352                 fprintf(trace->output, "Problems reading syscall %d", id);
1353                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1354                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1355                 fputs(" information\n", trace->output);
1356         }
1357         return NULL;
1358 }
1359
1360 static void thread__update_stats(struct thread_trace *ttrace,
1361                                  int id, struct perf_sample *sample)
1362 {
1363         struct int_node *inode;
1364         struct stats *stats;
1365         u64 duration = 0;
1366
1367         inode = intlist__findnew(ttrace->syscall_stats, id);
1368         if (inode == NULL)
1369                 return;
1370
1371         stats = inode->priv;
1372         if (stats == NULL) {
1373                 stats = malloc(sizeof(struct stats));
1374                 if (stats == NULL)
1375                         return;
1376                 init_stats(stats);
1377                 inode->priv = stats;
1378         }
1379
1380         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1381                 duration = sample->time - ttrace->entry_time;
1382
1383         update_stats(stats, duration);
1384 }
1385
1386 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1387                             struct perf_sample *sample)
1388 {
1389         char *msg;
1390         void *args;
1391         size_t printed = 0;
1392         struct thread *thread;
1393         int id = perf_evsel__intval(evsel, sample, "id");
1394         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1395         struct thread_trace *ttrace;
1396
1397         if (sc == NULL)
1398                 return -1;
1399
1400         if (sc->filtered)
1401                 return 0;
1402
1403         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1404         ttrace = thread__trace(thread, trace->output);
1405         if (ttrace == NULL)
1406                 return -1;
1407
1408         args = perf_evsel__rawptr(evsel, sample, "args");
1409         if (args == NULL) {
1410                 fprintf(trace->output, "Problems reading syscall arguments\n");
1411                 return -1;
1412         }
1413
1414         ttrace = thread->priv;
1415
1416         if (ttrace->entry_str == NULL) {
1417                 ttrace->entry_str = malloc(1024);
1418                 if (!ttrace->entry_str)
1419                         return -1;
1420         }
1421
1422         ttrace->entry_time = sample->time;
1423         msg = ttrace->entry_str;
1424         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1425
1426         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1427                                            args, trace, thread);
1428
1429         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1430                 if (!trace->duration_filter) {
1431                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1432                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1433                 }
1434         } else
1435                 ttrace->entry_pending = true;
1436
1437         return 0;
1438 }
1439
1440 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1441                            struct perf_sample *sample)
1442 {
1443         int ret;
1444         u64 duration = 0;
1445         struct thread *thread;
1446         int id = perf_evsel__intval(evsel, sample, "id");
1447         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1448         struct thread_trace *ttrace;
1449
1450         if (sc == NULL)
1451                 return -1;
1452
1453         if (sc->filtered)
1454                 return 0;
1455
1456         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1457         ttrace = thread__trace(thread, trace->output);
1458         if (ttrace == NULL)
1459                 return -1;
1460
1461         if (trace->summary)
1462                 thread__update_stats(ttrace, id, sample);
1463
1464         ret = perf_evsel__intval(evsel, sample, "ret");
1465
1466         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1467                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1468                 trace->last_vfs_getname = NULL;
1469                 ++trace->stats.vfs_getname;
1470         }
1471
1472         ttrace = thread->priv;
1473
1474         ttrace->exit_time = sample->time;
1475
1476         if (ttrace->entry_time) {
1477                 duration = sample->time - ttrace->entry_time;
1478                 if (trace__filter_duration(trace, duration))
1479                         goto out;
1480         } else if (trace->duration_filter)
1481                 goto out;
1482
1483         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1484
1485         if (ttrace->entry_pending) {
1486                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1487         } else {
1488                 fprintf(trace->output, " ... [");
1489                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1490                 fprintf(trace->output, "]: %s()", sc->name);
1491         }
1492
1493         if (sc->fmt == NULL) {
1494 signed_print:
1495                 fprintf(trace->output, ") = %d", ret);
1496         } else if (ret < 0 && sc->fmt->errmsg) {
1497                 char bf[256];
1498                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1499                            *e = audit_errno_to_name(-ret);
1500
1501                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1502         } else if (ret == 0 && sc->fmt->timeout)
1503                 fprintf(trace->output, ") = 0 Timeout");
1504         else if (sc->fmt->hexret)
1505                 fprintf(trace->output, ") = %#x", ret);
1506         else
1507                 goto signed_print;
1508
1509         fputc('\n', trace->output);
1510 out:
1511         ttrace->entry_pending = false;
1512
1513         return 0;
1514 }
1515
1516 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1517                               struct perf_sample *sample)
1518 {
1519         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1520         return 0;
1521 }
1522
1523 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1524                                      struct perf_sample *sample)
1525 {
1526         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1527         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1528         struct thread *thread = machine__findnew_thread(trace->host,
1529                                                         sample->pid,
1530                                                         sample->tid);
1531         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1532
1533         if (ttrace == NULL)
1534                 goto out_dump;
1535
1536         ttrace->runtime_ms += runtime_ms;
1537         trace->runtime_ms += runtime_ms;
1538         return 0;
1539
1540 out_dump:
1541         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1542                evsel->name,
1543                perf_evsel__strval(evsel, sample, "comm"),
1544                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1545                runtime,
1546                perf_evsel__intval(evsel, sample, "vruntime"));
1547         return 0;
1548 }
1549
1550 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1551 {
1552         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1553             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1554                 return false;
1555
1556         if (trace->pid_list || trace->tid_list)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 static int trace__process_sample(struct perf_tool *tool,
1563                                  union perf_event *event __maybe_unused,
1564                                  struct perf_sample *sample,
1565                                  struct perf_evsel *evsel,
1566                                  struct machine *machine __maybe_unused)
1567 {
1568         struct trace *trace = container_of(tool, struct trace, tool);
1569         int err = 0;
1570
1571         tracepoint_handler handler = evsel->handler.func;
1572
1573         if (skip_sample(trace, sample))
1574                 return 0;
1575
1576         if (!trace->full_time && trace->base_time == 0)
1577                 trace->base_time = sample->time;
1578
1579         if (handler)
1580                 handler(trace, evsel, sample);
1581
1582         return err;
1583 }
1584
1585 static bool
1586 perf_session__has_tp(struct perf_session *session, const char *name)
1587 {
1588         struct perf_evsel *evsel;
1589
1590         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1591
1592         return evsel != NULL;
1593 }
1594
1595 static int parse_target_str(struct trace *trace)
1596 {
1597         if (trace->opts.target.pid) {
1598                 trace->pid_list = intlist__new(trace->opts.target.pid);
1599                 if (trace->pid_list == NULL) {
1600                         pr_err("Error parsing process id string\n");
1601                         return -EINVAL;
1602                 }
1603         }
1604
1605         if (trace->opts.target.tid) {
1606                 trace->tid_list = intlist__new(trace->opts.target.tid);
1607                 if (trace->tid_list == NULL) {
1608                         pr_err("Error parsing thread id string\n");
1609                         return -EINVAL;
1610                 }
1611         }
1612
1613         return 0;
1614 }
1615
1616 static int trace__record(int argc, const char **argv)
1617 {
1618         unsigned int rec_argc, i, j;
1619         const char **rec_argv;
1620         const char * const record_args[] = {
1621                 "record",
1622                 "-R",
1623                 "-m", "1024",
1624                 "-c", "1",
1625                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1626         };
1627
1628         rec_argc = ARRAY_SIZE(record_args) + argc;
1629         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1630
1631         if (rec_argv == NULL)
1632                 return -ENOMEM;
1633
1634         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1635                 rec_argv[i] = record_args[i];
1636
1637         for (j = 0; j < (unsigned int)argc; j++, i++)
1638                 rec_argv[i] = argv[j];
1639
1640         return cmd_record(i, rec_argv, NULL);
1641 }
1642
1643 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1644
1645 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1646 {
1647         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
1648                                                      evlist->nr_entries);
1649         if (evsel == NULL)
1650                 return;
1651
1652         if (perf_evsel__field(evsel, "pathname") == NULL) {
1653                 perf_evsel__delete(evsel);
1654                 return;
1655         }
1656
1657         evsel->handler.func = trace__vfs_getname;
1658         perf_evlist__add(evlist, evsel);
1659 }
1660
1661 static int trace__run(struct trace *trace, int argc, const char **argv)
1662 {
1663         struct perf_evlist *evlist = perf_evlist__new();
1664         struct perf_evsel *evsel;
1665         int err = -1, i;
1666         unsigned long before;
1667         const bool forks = argc > 0;
1668
1669         trace->live = true;
1670
1671         if (evlist == NULL) {
1672                 fprintf(trace->output, "Not enough memory to run!\n");
1673                 goto out;
1674         }
1675
1676         if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1677                 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit))
1678                 goto out_error_tp;
1679
1680         perf_evlist__add_vfs_getname(evlist);
1681
1682         if (trace->sched &&
1683                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1684                                 trace__sched_stat_runtime))
1685                 goto out_error_tp;
1686
1687         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1688         if (err < 0) {
1689                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1690                 goto out_delete_evlist;
1691         }
1692
1693         err = trace__symbols_init(trace, evlist);
1694         if (err < 0) {
1695                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1696                 goto out_delete_maps;
1697         }
1698
1699         perf_evlist__config(evlist, &trace->opts);
1700
1701         signal(SIGCHLD, sig_handler);
1702         signal(SIGINT, sig_handler);
1703
1704         if (forks) {
1705                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1706                                                     argv, false, false);
1707                 if (err < 0) {
1708                         fprintf(trace->output, "Couldn't run the workload!\n");
1709                         goto out_delete_maps;
1710                 }
1711         }
1712
1713         err = perf_evlist__open(evlist);
1714         if (err < 0) {
1715                 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1716                 goto out_delete_maps;
1717         }
1718
1719         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1720         if (err < 0) {
1721                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1722                 goto out_close_evlist;
1723         }
1724
1725         perf_evlist__enable(evlist);
1726
1727         if (forks)
1728                 perf_evlist__start_workload(evlist);
1729
1730         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1731 again:
1732         before = trace->nr_events;
1733
1734         for (i = 0; i < evlist->nr_mmaps; i++) {
1735                 union perf_event *event;
1736
1737                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1738                         const u32 type = event->header.type;
1739                         tracepoint_handler handler;
1740                         struct perf_sample sample;
1741
1742                         ++trace->nr_events;
1743
1744                         err = perf_evlist__parse_sample(evlist, event, &sample);
1745                         if (err) {
1746                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1747                                 continue;
1748                         }
1749
1750                         if (!trace->full_time && trace->base_time == 0)
1751                                 trace->base_time = sample.time;
1752
1753                         if (type != PERF_RECORD_SAMPLE) {
1754                                 trace__process_event(trace, trace->host, event);
1755                                 continue;
1756                         }
1757
1758                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1759                         if (evsel == NULL) {
1760                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1761                                 continue;
1762                         }
1763
1764                         if (sample.raw_data == NULL) {
1765                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1766                                        perf_evsel__name(evsel), sample.tid,
1767                                        sample.cpu, sample.raw_size);
1768                                 continue;
1769                         }
1770
1771                         handler = evsel->handler.func;
1772                         handler(trace, evsel, &sample);
1773
1774                         if (done)
1775                                 goto out_unmap_evlist;
1776                 }
1777         }
1778
1779         if (trace->nr_events == before) {
1780                 if (done)
1781                         goto out_unmap_evlist;
1782
1783                 poll(evlist->pollfd, evlist->nr_fds, -1);
1784         }
1785
1786         if (done)
1787                 perf_evlist__disable(evlist);
1788         else
1789                 goto again;
1790
1791 out_unmap_evlist:
1792         if (!err) {
1793                 if (trace->summary)
1794                         trace__fprintf_thread_summary(trace, trace->output);
1795
1796                 if (trace->show_tool_stats) {
1797                         fprintf(trace->output, "Stats:\n "
1798                                                " vfs_getname : %" PRIu64 "\n"
1799                                                " proc_getname: %" PRIu64 "\n",
1800                                 trace->stats.vfs_getname,
1801                                 trace->stats.proc_getname);
1802                 }
1803         }
1804
1805         perf_evlist__munmap(evlist);
1806 out_close_evlist:
1807         perf_evlist__close(evlist);
1808 out_delete_maps:
1809         perf_evlist__delete_maps(evlist);
1810 out_delete_evlist:
1811         perf_evlist__delete(evlist);
1812 out:
1813         trace->live = false;
1814         return err;
1815 out_error_tp:
1816         switch(errno) {
1817         case ENOENT:
1818                 fputs("Error:\tUnable to find debugfs\n"
1819                       "Hint:\tWas your kernel was compiled with debugfs support?\n"
1820                       "Hint:\tIs the debugfs filesystem mounted?\n"
1821                       "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n",
1822                       trace->output);
1823                 break;
1824         case EACCES:
1825                 fprintf(trace->output,
1826                         "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
1827                         "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
1828                         debugfs_mountpoint, debugfs_mountpoint);
1829                 break;
1830         default: {
1831                 char bf[256];
1832                 fprintf(trace->output, "Can't trace: %s\n",
1833                         strerror_r(errno, bf, sizeof(bf)));
1834         }
1835                 break;
1836         }
1837         goto out_delete_evlist;
1838 }
1839
1840 static int trace__replay(struct trace *trace)
1841 {
1842         const struct perf_evsel_str_handler handlers[] = {
1843                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
1844                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
1845                 { "probe:vfs_getname",       trace__vfs_getname, },
1846         };
1847
1848         struct perf_session *session;
1849         int err = -1;
1850
1851         trace->tool.sample        = trace__process_sample;
1852         trace->tool.mmap          = perf_event__process_mmap;
1853         trace->tool.mmap2         = perf_event__process_mmap2;
1854         trace->tool.comm          = perf_event__process_comm;
1855         trace->tool.exit          = perf_event__process_exit;
1856         trace->tool.fork          = perf_event__process_fork;
1857         trace->tool.attr          = perf_event__process_attr;
1858         trace->tool.tracing_data = perf_event__process_tracing_data;
1859         trace->tool.build_id      = perf_event__process_build_id;
1860
1861         trace->tool.ordered_samples = true;
1862         trace->tool.ordering_requires_timestamps = true;
1863
1864         /* add tid to output */
1865         trace->multiple_threads = true;
1866
1867         if (symbol__init() < 0)
1868                 return -1;
1869
1870         session = perf_session__new(input_name, O_RDONLY, 0, false,
1871                                     &trace->tool);
1872         if (session == NULL)
1873                 return -ENOMEM;
1874
1875         trace->host = &session->machines.host;
1876
1877         err = perf_session__set_tracepoints_handlers(session, handlers);
1878         if (err)
1879                 goto out;
1880
1881         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1882                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1883                 goto out;
1884         }
1885
1886         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1887                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1888                 goto out;
1889         }
1890
1891         err = parse_target_str(trace);
1892         if (err != 0)
1893                 goto out;
1894
1895         setup_pager();
1896
1897         err = perf_session__process_events(session, &trace->tool);
1898         if (err)
1899                 pr_err("Failed to process events, error %d", err);
1900
1901         else if (trace->summary)
1902                 trace__fprintf_thread_summary(trace, trace->output);
1903
1904 out:
1905         perf_session__delete(session);
1906
1907         return err;
1908 }
1909
1910 static size_t trace__fprintf_threads_header(FILE *fp)
1911 {
1912         size_t printed;
1913
1914         printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
1915         printed += fprintf(fp, " __)    Summary of events    (__\n\n");
1916         printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1917         printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
1918         printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
1919         printed += fprintf(fp, " _____________________________________________________________________________\n\n");
1920
1921         return printed;
1922 }
1923
1924 static size_t thread__dump_stats(struct thread_trace *ttrace,
1925                                  struct trace *trace, FILE *fp)
1926 {
1927         struct stats *stats;
1928         size_t printed = 0;
1929         struct syscall *sc;
1930         struct int_node *inode = intlist__first(ttrace->syscall_stats);
1931
1932         if (inode == NULL)
1933                 return 0;
1934
1935         printed += fprintf(fp, "\n");
1936
1937         /* each int_node is a syscall */
1938         while (inode) {
1939                 stats = inode->priv;
1940                 if (stats) {
1941                         double min = (double)(stats->min) / NSEC_PER_MSEC;
1942                         double max = (double)(stats->max) / NSEC_PER_MSEC;
1943                         double avg = avg_stats(stats);
1944                         double pct;
1945                         u64 n = (u64) stats->n;
1946
1947                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
1948                         avg /= NSEC_PER_MSEC;
1949
1950                         sc = &trace->syscalls.table[inode->i];
1951                         printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
1952                         printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
1953                                            n, min, max);
1954                         printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
1955                 }
1956
1957                 inode = intlist__next(inode);
1958         }
1959
1960         printed += fprintf(fp, "\n\n");
1961
1962         return printed;
1963 }
1964
1965 /* struct used to pass data to per-thread function */
1966 struct summary_data {
1967         FILE *fp;
1968         struct trace *trace;
1969         size_t printed;
1970 };
1971
1972 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
1973 {
1974         struct summary_data *data = priv;
1975         FILE *fp = data->fp;
1976         size_t printed = data->printed;
1977         struct trace *trace = data->trace;
1978         struct thread_trace *ttrace = thread->priv;
1979         const char *color;
1980         double ratio;
1981
1982         if (ttrace == NULL)
1983                 return 0;
1984
1985         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1986
1987         color = PERF_COLOR_NORMAL;
1988         if (ratio > 50.0)
1989                 color = PERF_COLOR_RED;
1990         else if (ratio > 25.0)
1991                 color = PERF_COLOR_GREEN;
1992         else if (ratio > 5.0)
1993                 color = PERF_COLOR_YELLOW;
1994
1995         printed += color_fprintf(fp, color, "%20s", thread->comm);
1996         printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1997         printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1998         printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1999         printed += thread__dump_stats(ttrace, trace, fp);
2000
2001         data->printed += printed;
2002
2003         return 0;
2004 }
2005
2006 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2007 {
2008         struct summary_data data = {
2009                 .fp = fp,
2010                 .trace = trace
2011         };
2012         data.printed = trace__fprintf_threads_header(fp);
2013
2014         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2015
2016         return data.printed;
2017 }
2018
2019 static int trace__set_duration(const struct option *opt, const char *str,
2020                                int unset __maybe_unused)
2021 {
2022         struct trace *trace = opt->value;
2023
2024         trace->duration_filter = atof(str);
2025         return 0;
2026 }
2027
2028 static int trace__open_output(struct trace *trace, const char *filename)
2029 {
2030         struct stat st;
2031
2032         if (!stat(filename, &st) && st.st_size) {
2033                 char oldname[PATH_MAX];
2034
2035                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2036                 unlink(oldname);
2037                 rename(filename, oldname);
2038         }
2039
2040         trace->output = fopen(filename, "w");
2041
2042         return trace->output == NULL ? -errno : 0;
2043 }
2044
2045 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2046 {
2047         const char * const trace_usage[] = {
2048                 "perf trace [<options>] [<command>]",
2049                 "perf trace [<options>] -- <command> [<options>]",
2050                 "perf trace record [<options>] [<command>]",
2051                 "perf trace record [<options>] -- <command> [<options>]",
2052                 NULL
2053         };
2054         struct trace trace = {
2055                 .audit = {
2056                         .machine = audit_detect_machine(),
2057                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2058                 },
2059                 .syscalls = {
2060                         . max = -1,
2061                 },
2062                 .opts = {
2063                         .target = {
2064                                 .uid       = UINT_MAX,
2065                                 .uses_mmap = true,
2066                         },
2067                         .user_freq     = UINT_MAX,
2068                         .user_interval = ULLONG_MAX,
2069                         .no_delay      = true,
2070                         .mmap_pages    = 1024,
2071                 },
2072                 .output = stdout,
2073                 .show_comm = true,
2074         };
2075         const char *output_name = NULL;
2076         const char *ev_qualifier_str = NULL;
2077         const struct option trace_options[] = {
2078         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2079                     "show the thread COMM next to its id"),
2080         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2081         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2082                     "list of events to trace"),
2083         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2084         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2085         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2086                     "trace events on existing process id"),
2087         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2088                     "trace events on existing thread id"),
2089         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2090                     "system-wide collection from all CPUs"),
2091         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2092                     "list of cpus to monitor"),
2093         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2094                     "child tasks do not inherit counters"),
2095         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2096                      "number of mmap data pages",
2097                      perf_evlist__parse_mmap_pages),
2098         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2099                    "user to profile"),
2100         OPT_CALLBACK(0, "duration", &trace, "float",
2101                      "show only events with duration > N.M ms",
2102                      trace__set_duration),
2103         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2104         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2105         OPT_BOOLEAN('T', "time", &trace.full_time,
2106                     "Show full timestamp, not time relative to first start"),
2107         OPT_BOOLEAN(0, "summary", &trace.summary,
2108                     "Show syscall summary with statistics"),
2109         OPT_END()
2110         };
2111         int err;
2112         char bf[BUFSIZ];
2113
2114         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2115                 return trace__record(argc-2, &argv[2]);
2116
2117         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2118
2119         if (output_name != NULL) {
2120                 err = trace__open_output(&trace, output_name);
2121                 if (err < 0) {
2122                         perror("failed to create output file");
2123                         goto out;
2124                 }
2125         }
2126
2127         if (ev_qualifier_str != NULL) {
2128                 const char *s = ev_qualifier_str;
2129
2130                 trace.not_ev_qualifier = *s == '!';
2131                 if (trace.not_ev_qualifier)
2132                         ++s;
2133                 trace.ev_qualifier = strlist__new(true, s);
2134                 if (trace.ev_qualifier == NULL) {
2135                         fputs("Not enough memory to parse event qualifier",
2136                               trace.output);
2137                         err = -ENOMEM;
2138                         goto out_close;
2139                 }
2140         }
2141
2142         err = perf_target__validate(&trace.opts.target);
2143         if (err) {
2144                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2145                 fprintf(trace.output, "%s", bf);
2146                 goto out_close;
2147         }
2148
2149         err = perf_target__parse_uid(&trace.opts.target);
2150         if (err) {
2151                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2152                 fprintf(trace.output, "%s", bf);
2153                 goto out_close;
2154         }
2155
2156         if (!argc && perf_target__none(&trace.opts.target))
2157                 trace.opts.target.system_wide = true;
2158
2159         if (input_name)
2160                 err = trace__replay(&trace);
2161         else
2162                 err = trace__run(&trace, argc, argv);
2163
2164 out_close:
2165         if (output_name != NULL)
2166                 fclose(trace.output);
2167 out:
2168         return err;
2169 }