3fa1dce6d43e1250f3fab3f4413ae460bf7e7cc5
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK              0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON          100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE         12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE       13
36 #endif
37
38 struct tp_field {
39         int offset;
40         union {
41                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43         };
44 };
45
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49         return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61         return bswap_##bits(value);\
62 }
63
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67
68 static int tp_field__init_uint(struct tp_field *field,
69                                struct format_field *format_field,
70                                bool needs_swap)
71 {
72         field->offset = format_field->offset;
73
74         switch (format_field->size) {
75         case 1:
76                 field->integer = tp_field__u8;
77                 break;
78         case 2:
79                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80                 break;
81         case 4:
82                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83                 break;
84         case 8:
85                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86                 break;
87         default:
88                 return -1;
89         }
90
91         return 0;
92 }
93
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96         return sample->raw_data + field->offset;
97 }
98
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101         field->offset = format_field->offset;
102         field->pointer = tp_field__ptr;
103         return 0;
104 }
105
106 struct syscall_tp {
107         struct tp_field id;
108         union {
109                 struct tp_field args, ret;
110         };
111 };
112
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114                                           struct tp_field *field,
115                                           const char *name)
116 {
117         struct format_field *format_field = perf_evsel__field(evsel, name);
118
119         if (format_field == NULL)
120                 return -1;
121
122         return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126         ({ struct syscall_tp *sc = evsel->priv;\
127            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130                                          struct tp_field *field,
131                                          const char *name)
132 {
133         struct format_field *format_field = perf_evsel__field(evsel, name);
134
135         if (format_field == NULL)
136                 return -1;
137
138         return tp_field__init_ptr(field, format_field);
139 }
140
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142         ({ struct syscall_tp *sc = evsel->priv;\
143            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147         free(evsel->priv);
148         evsel->priv = NULL;
149         perf_evsel__delete(evsel);
150 }
151
152 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
153 {
154         evsel->priv = malloc(sizeof(struct syscall_tp));
155         if (evsel->priv != NULL) {
156                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
157                         goto out_delete;
158
159                 evsel->handler = handler;
160                 return 0;
161         }
162
163         return -ENOMEM;
164
165 out_delete:
166         free(evsel->priv);
167         evsel->priv = NULL;
168         return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175         if (evsel) {
176                 if (perf_evsel__init_syscall_tp(evsel, handler))
177                         goto out_delete;
178         }
179
180         return evsel;
181
182 out_delete:
183         perf_evsel__delete_priv(evsel);
184         return NULL;
185 }
186
187 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
188         ({ struct syscall_tp *fields = evsel->priv; \
189            fields->name.integer(&fields->name, sample); })
190
191 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
192         ({ struct syscall_tp *fields = evsel->priv; \
193            fields->name.pointer(&fields->name, sample); })
194
195 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
196                                           void *sys_enter_handler,
197                                           void *sys_exit_handler)
198 {
199         int ret = -1;
200         struct perf_evsel *sys_enter, *sys_exit;
201
202         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
203         if (sys_enter == NULL)
204                 goto out;
205
206         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
207                 goto out_delete_sys_enter;
208
209         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
210         if (sys_exit == NULL)
211                 goto out_delete_sys_enter;
212
213         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
214                 goto out_delete_sys_exit;
215
216         perf_evlist__add(evlist, sys_enter);
217         perf_evlist__add(evlist, sys_exit);
218
219         ret = 0;
220 out:
221         return ret;
222
223 out_delete_sys_exit:
224         perf_evsel__delete_priv(sys_exit);
225 out_delete_sys_enter:
226         perf_evsel__delete_priv(sys_enter);
227         goto out;
228 }
229
230
231 struct syscall_arg {
232         unsigned long val;
233         struct thread *thread;
234         struct trace  *trace;
235         void          *parm;
236         u8            idx;
237         u8            mask;
238 };
239
240 struct strarray {
241         int         offset;
242         int         nr_entries;
243         const char **entries;
244 };
245
246 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
247         .nr_entries = ARRAY_SIZE(array), \
248         .entries = array, \
249 }
250
251 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
252         .offset     = off, \
253         .nr_entries = ARRAY_SIZE(array), \
254         .entries = array, \
255 }
256
257 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
258                                                 const char *intfmt,
259                                                 struct syscall_arg *arg)
260 {
261         struct strarray *sa = arg->parm;
262         int idx = arg->val - sa->offset;
263
264         if (idx < 0 || idx >= sa->nr_entries)
265                 return scnprintf(bf, size, intfmt, arg->val);
266
267         return scnprintf(bf, size, "%s", sa->entries[idx]);
268 }
269
270 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
271                                               struct syscall_arg *arg)
272 {
273         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
274 }
275
276 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
277
278 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
279                                                  struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
282 }
283
284 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
285
286 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
287                                         struct syscall_arg *arg);
288
289 #define SCA_FD syscall_arg__scnprintf_fd
290
291 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
292                                            struct syscall_arg *arg)
293 {
294         int fd = arg->val;
295
296         if (fd == AT_FDCWD)
297                 return scnprintf(bf, size, "CWD");
298
299         return syscall_arg__scnprintf_fd(bf, size, arg);
300 }
301
302 #define SCA_FDAT syscall_arg__scnprintf_fd_at
303
304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
305                                               struct syscall_arg *arg);
306
307 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
308
309 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
310                                          struct syscall_arg *arg)
311 {
312         return scnprintf(bf, size, "%#lx", arg->val);
313 }
314
315 #define SCA_HEX syscall_arg__scnprintf_hex
316
317 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
318                                                struct syscall_arg *arg)
319 {
320         int printed = 0, prot = arg->val;
321
322         if (prot == PROT_NONE)
323                 return scnprintf(bf, size, "NONE");
324 #define P_MMAP_PROT(n) \
325         if (prot & PROT_##n) { \
326                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
327                 prot &= ~PROT_##n; \
328         }
329
330         P_MMAP_PROT(EXEC);
331         P_MMAP_PROT(READ);
332         P_MMAP_PROT(WRITE);
333 #ifdef PROT_SEM
334         P_MMAP_PROT(SEM);
335 #endif
336         P_MMAP_PROT(GROWSDOWN);
337         P_MMAP_PROT(GROWSUP);
338 #undef P_MMAP_PROT
339
340         if (prot)
341                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
342
343         return printed;
344 }
345
346 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
347
348 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
349                                                 struct syscall_arg *arg)
350 {
351         int printed = 0, flags = arg->val;
352
353 #define P_MMAP_FLAG(n) \
354         if (flags & MAP_##n) { \
355                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
356                 flags &= ~MAP_##n; \
357         }
358
359         P_MMAP_FLAG(SHARED);
360         P_MMAP_FLAG(PRIVATE);
361 #ifdef MAP_32BIT
362         P_MMAP_FLAG(32BIT);
363 #endif
364         P_MMAP_FLAG(ANONYMOUS);
365         P_MMAP_FLAG(DENYWRITE);
366         P_MMAP_FLAG(EXECUTABLE);
367         P_MMAP_FLAG(FILE);
368         P_MMAP_FLAG(FIXED);
369         P_MMAP_FLAG(GROWSDOWN);
370 #ifdef MAP_HUGETLB
371         P_MMAP_FLAG(HUGETLB);
372 #endif
373         P_MMAP_FLAG(LOCKED);
374         P_MMAP_FLAG(NONBLOCK);
375         P_MMAP_FLAG(NORESERVE);
376         P_MMAP_FLAG(POPULATE);
377         P_MMAP_FLAG(STACK);
378 #ifdef MAP_UNINITIALIZED
379         P_MMAP_FLAG(UNINITIALIZED);
380 #endif
381 #undef P_MMAP_FLAG
382
383         if (flags)
384                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
385
386         return printed;
387 }
388
389 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
390
391 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
392                                                       struct syscall_arg *arg)
393 {
394         int behavior = arg->val;
395
396         switch (behavior) {
397 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
398         P_MADV_BHV(NORMAL);
399         P_MADV_BHV(RANDOM);
400         P_MADV_BHV(SEQUENTIAL);
401         P_MADV_BHV(WILLNEED);
402         P_MADV_BHV(DONTNEED);
403         P_MADV_BHV(REMOVE);
404         P_MADV_BHV(DONTFORK);
405         P_MADV_BHV(DOFORK);
406         P_MADV_BHV(HWPOISON);
407 #ifdef MADV_SOFT_OFFLINE
408         P_MADV_BHV(SOFT_OFFLINE);
409 #endif
410         P_MADV_BHV(MERGEABLE);
411         P_MADV_BHV(UNMERGEABLE);
412 #ifdef MADV_HUGEPAGE
413         P_MADV_BHV(HUGEPAGE);
414 #endif
415 #ifdef MADV_NOHUGEPAGE
416         P_MADV_BHV(NOHUGEPAGE);
417 #endif
418 #ifdef MADV_DONTDUMP
419         P_MADV_BHV(DONTDUMP);
420 #endif
421 #ifdef MADV_DODUMP
422         P_MADV_BHV(DODUMP);
423 #endif
424 #undef P_MADV_PHV
425         default: break;
426         }
427
428         return scnprintf(bf, size, "%#x", behavior);
429 }
430
431 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
432
433 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
434                                            struct syscall_arg *arg)
435 {
436         int printed = 0, op = arg->val;
437
438         if (op == 0)
439                 return scnprintf(bf, size, "NONE");
440 #define P_CMD(cmd) \
441         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
442                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
443                 op &= ~LOCK_##cmd; \
444         }
445
446         P_CMD(SH);
447         P_CMD(EX);
448         P_CMD(NB);
449         P_CMD(UN);
450         P_CMD(MAND);
451         P_CMD(RW);
452         P_CMD(READ);
453         P_CMD(WRITE);
454 #undef P_OP
455
456         if (op)
457                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
458
459         return printed;
460 }
461
462 #define SCA_FLOCK syscall_arg__scnprintf_flock
463
464 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
465 {
466         enum syscall_futex_args {
467                 SCF_UADDR   = (1 << 0),
468                 SCF_OP      = (1 << 1),
469                 SCF_VAL     = (1 << 2),
470                 SCF_TIMEOUT = (1 << 3),
471                 SCF_UADDR2  = (1 << 4),
472                 SCF_VAL3    = (1 << 5),
473         };
474         int op = arg->val;
475         int cmd = op & FUTEX_CMD_MASK;
476         size_t printed = 0;
477
478         switch (cmd) {
479 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
480         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
481         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
482         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
484         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
485         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
486         P_FUTEX_OP(WAKE_OP);                                                      break;
487         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
490         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
491         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
492         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
493         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
494         }
495
496         if (op & FUTEX_PRIVATE_FLAG)
497                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
498
499         if (op & FUTEX_CLOCK_REALTIME)
500                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
501
502         return printed;
503 }
504
505 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
506
507 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
508 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
509
510 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
511 static DEFINE_STRARRAY(itimers);
512
513 static const char *whences[] = { "SET", "CUR", "END",
514 #ifdef SEEK_DATA
515 "DATA",
516 #endif
517 #ifdef SEEK_HOLE
518 "HOLE",
519 #endif
520 };
521 static DEFINE_STRARRAY(whences);
522
523 static const char *fcntl_cmds[] = {
524         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
525         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
526         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
527         "F_GETOWNER_UIDS",
528 };
529 static DEFINE_STRARRAY(fcntl_cmds);
530
531 static const char *rlimit_resources[] = {
532         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
533         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
534         "RTTIME",
535 };
536 static DEFINE_STRARRAY(rlimit_resources);
537
538 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
539 static DEFINE_STRARRAY(sighow);
540
541 static const char *clockid[] = {
542         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
543         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
544 };
545 static DEFINE_STRARRAY(clockid);
546
547 static const char *socket_families[] = {
548         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
549         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
550         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
551         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
552         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
553         "ALG", "NFC", "VSOCK",
554 };
555 static DEFINE_STRARRAY(socket_families);
556
557 #ifndef SOCK_TYPE_MASK
558 #define SOCK_TYPE_MASK 0xf
559 #endif
560
561 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
562                                                       struct syscall_arg *arg)
563 {
564         size_t printed;
565         int type = arg->val,
566             flags = type & ~SOCK_TYPE_MASK;
567
568         type &= SOCK_TYPE_MASK;
569         /*
570          * Can't use a strarray, MIPS may override for ABI reasons.
571          */
572         switch (type) {
573 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
574         P_SK_TYPE(STREAM);
575         P_SK_TYPE(DGRAM);
576         P_SK_TYPE(RAW);
577         P_SK_TYPE(RDM);
578         P_SK_TYPE(SEQPACKET);
579         P_SK_TYPE(DCCP);
580         P_SK_TYPE(PACKET);
581 #undef P_SK_TYPE
582         default:
583                 printed = scnprintf(bf, size, "%#x", type);
584         }
585
586 #define P_SK_FLAG(n) \
587         if (flags & SOCK_##n) { \
588                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
589                 flags &= ~SOCK_##n; \
590         }
591
592         P_SK_FLAG(CLOEXEC);
593         P_SK_FLAG(NONBLOCK);
594 #undef P_SK_FLAG
595
596         if (flags)
597                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
598
599         return printed;
600 }
601
602 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
603
604 #ifndef MSG_PROBE
605 #define MSG_PROBE            0x10
606 #endif
607 #ifndef MSG_WAITFORONE
608 #define MSG_WAITFORONE  0x10000
609 #endif
610 #ifndef MSG_SENDPAGE_NOTLAST
611 #define MSG_SENDPAGE_NOTLAST 0x20000
612 #endif
613 #ifndef MSG_FASTOPEN
614 #define MSG_FASTOPEN         0x20000000
615 #endif
616
617 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
618                                                struct syscall_arg *arg)
619 {
620         int printed = 0, flags = arg->val;
621
622         if (flags == 0)
623                 return scnprintf(bf, size, "NONE");
624 #define P_MSG_FLAG(n) \
625         if (flags & MSG_##n) { \
626                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
627                 flags &= ~MSG_##n; \
628         }
629
630         P_MSG_FLAG(OOB);
631         P_MSG_FLAG(PEEK);
632         P_MSG_FLAG(DONTROUTE);
633         P_MSG_FLAG(TRYHARD);
634         P_MSG_FLAG(CTRUNC);
635         P_MSG_FLAG(PROBE);
636         P_MSG_FLAG(TRUNC);
637         P_MSG_FLAG(DONTWAIT);
638         P_MSG_FLAG(EOR);
639         P_MSG_FLAG(WAITALL);
640         P_MSG_FLAG(FIN);
641         P_MSG_FLAG(SYN);
642         P_MSG_FLAG(CONFIRM);
643         P_MSG_FLAG(RST);
644         P_MSG_FLAG(ERRQUEUE);
645         P_MSG_FLAG(NOSIGNAL);
646         P_MSG_FLAG(MORE);
647         P_MSG_FLAG(WAITFORONE);
648         P_MSG_FLAG(SENDPAGE_NOTLAST);
649         P_MSG_FLAG(FASTOPEN);
650         P_MSG_FLAG(CMSG_CLOEXEC);
651 #undef P_MSG_FLAG
652
653         if (flags)
654                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
655
656         return printed;
657 }
658
659 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
660
661 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
662                                                  struct syscall_arg *arg)
663 {
664         size_t printed = 0;
665         int mode = arg->val;
666
667         if (mode == F_OK) /* 0 */
668                 return scnprintf(bf, size, "F");
669 #define P_MODE(n) \
670         if (mode & n##_OK) { \
671                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
672                 mode &= ~n##_OK; \
673         }
674
675         P_MODE(R);
676         P_MODE(W);
677         P_MODE(X);
678 #undef P_MODE
679
680         if (mode)
681                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
682
683         return printed;
684 }
685
686 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
687
688 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
689                                                struct syscall_arg *arg)
690 {
691         int printed = 0, flags = arg->val;
692
693         if (!(flags & O_CREAT))
694                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
695
696         if (flags == 0)
697                 return scnprintf(bf, size, "RDONLY");
698 #define P_FLAG(n) \
699         if (flags & O_##n) { \
700                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
701                 flags &= ~O_##n; \
702         }
703
704         P_FLAG(APPEND);
705         P_FLAG(ASYNC);
706         P_FLAG(CLOEXEC);
707         P_FLAG(CREAT);
708         P_FLAG(DIRECT);
709         P_FLAG(DIRECTORY);
710         P_FLAG(EXCL);
711         P_FLAG(LARGEFILE);
712         P_FLAG(NOATIME);
713         P_FLAG(NOCTTY);
714 #ifdef O_NONBLOCK
715         P_FLAG(NONBLOCK);
716 #elif O_NDELAY
717         P_FLAG(NDELAY);
718 #endif
719 #ifdef O_PATH
720         P_FLAG(PATH);
721 #endif
722         P_FLAG(RDWR);
723 #ifdef O_DSYNC
724         if ((flags & O_SYNC) == O_SYNC)
725                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
726         else {
727                 P_FLAG(DSYNC);
728         }
729 #else
730         P_FLAG(SYNC);
731 #endif
732         P_FLAG(TRUNC);
733         P_FLAG(WRONLY);
734 #undef P_FLAG
735
736         if (flags)
737                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
738
739         return printed;
740 }
741
742 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
743
744 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
745                                                    struct syscall_arg *arg)
746 {
747         int printed = 0, flags = arg->val;
748
749         if (flags == 0)
750                 return scnprintf(bf, size, "NONE");
751 #define P_FLAG(n) \
752         if (flags & EFD_##n) { \
753                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
754                 flags &= ~EFD_##n; \
755         }
756
757         P_FLAG(SEMAPHORE);
758         P_FLAG(CLOEXEC);
759         P_FLAG(NONBLOCK);
760 #undef P_FLAG
761
762         if (flags)
763                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
764
765         return printed;
766 }
767
768 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
769
770 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
771                                                 struct syscall_arg *arg)
772 {
773         int printed = 0, flags = arg->val;
774
775 #define P_FLAG(n) \
776         if (flags & O_##n) { \
777                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
778                 flags &= ~O_##n; \
779         }
780
781         P_FLAG(CLOEXEC);
782         P_FLAG(NONBLOCK);
783 #undef P_FLAG
784
785         if (flags)
786                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
787
788         return printed;
789 }
790
791 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
792
793 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
794 {
795         int sig = arg->val;
796
797         switch (sig) {
798 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
799         P_SIGNUM(HUP);
800         P_SIGNUM(INT);
801         P_SIGNUM(QUIT);
802         P_SIGNUM(ILL);
803         P_SIGNUM(TRAP);
804         P_SIGNUM(ABRT);
805         P_SIGNUM(BUS);
806         P_SIGNUM(FPE);
807         P_SIGNUM(KILL);
808         P_SIGNUM(USR1);
809         P_SIGNUM(SEGV);
810         P_SIGNUM(USR2);
811         P_SIGNUM(PIPE);
812         P_SIGNUM(ALRM);
813         P_SIGNUM(TERM);
814         P_SIGNUM(STKFLT);
815         P_SIGNUM(CHLD);
816         P_SIGNUM(CONT);
817         P_SIGNUM(STOP);
818         P_SIGNUM(TSTP);
819         P_SIGNUM(TTIN);
820         P_SIGNUM(TTOU);
821         P_SIGNUM(URG);
822         P_SIGNUM(XCPU);
823         P_SIGNUM(XFSZ);
824         P_SIGNUM(VTALRM);
825         P_SIGNUM(PROF);
826         P_SIGNUM(WINCH);
827         P_SIGNUM(IO);
828         P_SIGNUM(PWR);
829         P_SIGNUM(SYS);
830         default: break;
831         }
832
833         return scnprintf(bf, size, "%#x", sig);
834 }
835
836 #define SCA_SIGNUM syscall_arg__scnprintf_signum
837
838 #define TCGETS          0x5401
839
840 static const char *tioctls[] = {
841         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
842         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
843         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
844         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
845         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
846         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
847         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
848         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
849         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
850         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
851         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
852         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
853         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
854         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
855         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
856 };
857
858 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
859
860 #define STRARRAY(arg, name, array) \
861           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
862           .arg_parm      = { [arg] = &strarray__##array, }
863
864 static struct syscall_fmt {
865         const char *name;
866         const char *alias;
867         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
868         void       *arg_parm[6];
869         bool       errmsg;
870         bool       timeout;
871         bool       hexret;
872 } syscall_fmts[] = {
873         { .name     = "access",     .errmsg = true,
874           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
875         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
876         { .name     = "brk",        .hexret = true,
877           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
878         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
879         { .name     = "close",      .errmsg = true,
880           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
881         { .name     = "connect",    .errmsg = true, },
882         { .name     = "dup",        .errmsg = true,
883           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
884         { .name     = "dup2",       .errmsg = true,
885           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
886         { .name     = "dup3",       .errmsg = true,
887           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
888         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
889         { .name     = "eventfd2",   .errmsg = true,
890           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
891         { .name     = "faccessat",  .errmsg = true,
892           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
893         { .name     = "fadvise64",  .errmsg = true,
894           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
895         { .name     = "fallocate",  .errmsg = true,
896           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
897         { .name     = "fchdir",     .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
899         { .name     = "fchmod",     .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "fchmodat",   .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
903         { .name     = "fchown",     .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchownat",   .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
907         { .name     = "fcntl",      .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FD, /* fd */
909                              [1] = SCA_STRARRAY, /* cmd */ },
910           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
911         { .name     = "fdatasync",  .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
913         { .name     = "flock",      .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */
915                              [1] = SCA_FLOCK, /* cmd */ }, },
916         { .name     = "fsetxattr",  .errmsg = true,
917           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
918         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
919           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
920         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
921           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
922         { .name     = "fstatfs",    .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fsync",    .errmsg = true,
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "ftruncate", .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "futex",      .errmsg = true,
929           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
930         { .name     = "futimesat", .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
932         { .name     = "getdents",   .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
934         { .name     = "getdents64", .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
936         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
937         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
938         { .name     = "ioctl",      .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
940                              [1] = SCA_STRHEXARRAY, /* cmd */
941                              [2] = SCA_HEX, /* arg */ },
942           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
943         { .name     = "kill",       .errmsg = true,
944           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
945         { .name     = "linkat",     .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
947         { .name     = "lseek",      .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_FD, /* fd */
949                              [2] = SCA_STRARRAY, /* whence */ },
950           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
951         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
952         { .name     = "madvise",    .errmsg = true,
953           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
954                              [2] = SCA_MADV_BHV, /* behavior */ }, },
955         { .name     = "mkdirat",    .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
957         { .name     = "mknodat",    .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
959         { .name     = "mlock",      .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
961         { .name     = "mlockall",   .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963         { .name     = "mmap",       .hexret = true,
964           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
965                              [2] = SCA_MMAP_PROT, /* prot */
966                              [3] = SCA_MMAP_FLAGS, /* flags */
967                              [4] = SCA_FD,        /* fd */ }, },
968         { .name     = "mprotect",   .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_HEX, /* start */
970                              [2] = SCA_MMAP_PROT, /* prot */ }, },
971         { .name     = "mremap",     .hexret = true,
972           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
973                              [4] = SCA_HEX, /* new_addr */ }, },
974         { .name     = "munlock",    .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
976         { .name     = "munmap",     .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
978         { .name     = "name_to_handle_at", .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
980         { .name     = "newfstatat", .errmsg = true,
981           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
982         { .name     = "open",       .errmsg = true,
983           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
984         { .name     = "open_by_handle_at", .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
986                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
987         { .name     = "openat",     .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
989                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
990         { .name     = "pipe2",      .errmsg = true,
991           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
992         { .name     = "poll",       .errmsg = true, .timeout = true, },
993         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
994         { .name     = "pread",      .errmsg = true, .alias = "pread64",
995           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
996         { .name     = "preadv",     .errmsg = true, .alias = "pread",
997           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
998         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
999         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1000           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1001         { .name     = "pwritev",    .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1003         { .name     = "read",       .errmsg = true,
1004           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1005         { .name     = "readlinkat", .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1007         { .name     = "readv",      .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "recvfrom",   .errmsg = true,
1010           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1011         { .name     = "recvmmsg",   .errmsg = true,
1012           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1013         { .name     = "recvmsg",    .errmsg = true,
1014           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1015         { .name     = "renameat",   .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1017         { .name     = "rt_sigaction", .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1019         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1020         { .name     = "rt_sigqueueinfo", .errmsg = true,
1021           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1022         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1023           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1024         { .name     = "select",     .errmsg = true, .timeout = true, },
1025         { .name     = "sendmmsg",    .errmsg = true,
1026           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1027         { .name     = "sendmsg",    .errmsg = true,
1028           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1029         { .name     = "sendto",     .errmsg = true,
1030           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1032         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1033         { .name     = "shutdown",   .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1035         { .name     = "socket",     .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1037                              [1] = SCA_SK_TYPE, /* type */ },
1038           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1039         { .name     = "socketpair", .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041                              [1] = SCA_SK_TYPE, /* type */ },
1042           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1043         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1044         { .name     = "symlinkat",  .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1046         { .name     = "tgkill",     .errmsg = true,
1047           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1048         { .name     = "tkill",      .errmsg = true,
1049           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1050         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1051         { .name     = "unlinkat",   .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1053         { .name     = "utimensat",  .errmsg = true,
1054           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1055         { .name     = "write",      .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1057         { .name     = "writev",     .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1059 };
1060
1061 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1062 {
1063         const struct syscall_fmt *fmt = fmtp;
1064         return strcmp(name, fmt->name);
1065 }
1066
1067 static struct syscall_fmt *syscall_fmt__find(const char *name)
1068 {
1069         const int nmemb = ARRAY_SIZE(syscall_fmts);
1070         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1071 }
1072
1073 struct syscall {
1074         struct event_format *tp_format;
1075         const char          *name;
1076         bool                filtered;
1077         struct syscall_fmt  *fmt;
1078         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1079         void                **arg_parm;
1080 };
1081
1082 static size_t fprintf_duration(unsigned long t, FILE *fp)
1083 {
1084         double duration = (double)t / NSEC_PER_MSEC;
1085         size_t printed = fprintf(fp, "(");
1086
1087         if (duration >= 1.0)
1088                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1089         else if (duration >= 0.01)
1090                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1091         else
1092                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1093         return printed + fprintf(fp, "): ");
1094 }
1095
1096 struct thread_trace {
1097         u64               entry_time;
1098         u64               exit_time;
1099         bool              entry_pending;
1100         unsigned long     nr_events;
1101         char              *entry_str;
1102         double            runtime_ms;
1103         struct {
1104                 int       max;
1105                 char      **table;
1106         } paths;
1107
1108         struct intlist *syscall_stats;
1109 };
1110
1111 static struct thread_trace *thread_trace__new(void)
1112 {
1113         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1114
1115         if (ttrace)
1116                 ttrace->paths.max = -1;
1117
1118         ttrace->syscall_stats = intlist__new(NULL);
1119
1120         return ttrace;
1121 }
1122
1123 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1124 {
1125         struct thread_trace *ttrace;
1126
1127         if (thread == NULL)
1128                 goto fail;
1129
1130         if (thread->priv == NULL)
1131                 thread->priv = thread_trace__new();
1132                 
1133         if (thread->priv == NULL)
1134                 goto fail;
1135
1136         ttrace = thread->priv;
1137         ++ttrace->nr_events;
1138
1139         return ttrace;
1140 fail:
1141         color_fprintf(fp, PERF_COLOR_RED,
1142                       "WARNING: not enough memory, dropping samples!\n");
1143         return NULL;
1144 }
1145
1146 struct trace {
1147         struct perf_tool        tool;
1148         struct {
1149                 int             machine;
1150                 int             open_id;
1151         }                       audit;
1152         struct {
1153                 int             max;
1154                 struct syscall  *table;
1155         } syscalls;
1156         struct perf_record_opts opts;
1157         struct machine          *host;
1158         u64                     base_time;
1159         bool                    full_time;
1160         FILE                    *output;
1161         unsigned long           nr_events;
1162         struct strlist          *ev_qualifier;
1163         bool                    not_ev_qualifier;
1164         bool                    live;
1165         const char              *last_vfs_getname;
1166         struct intlist          *tid_list;
1167         struct intlist          *pid_list;
1168         bool                    sched;
1169         bool                    multiple_threads;
1170         bool                    summary;
1171         bool                    show_comm;
1172         bool                    show_tool_stats;
1173         double                  duration_filter;
1174         double                  runtime_ms;
1175         struct {
1176                 u64             vfs_getname, proc_getname;
1177         } stats;
1178 };
1179
1180 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1181 {
1182         struct thread_trace *ttrace = thread->priv;
1183
1184         if (fd > ttrace->paths.max) {
1185                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1186
1187                 if (npath == NULL)
1188                         return -1;
1189
1190                 if (ttrace->paths.max != -1) {
1191                         memset(npath + ttrace->paths.max + 1, 0,
1192                                (fd - ttrace->paths.max) * sizeof(char *));
1193                 } else {
1194                         memset(npath, 0, (fd + 1) * sizeof(char *));
1195                 }
1196
1197                 ttrace->paths.table = npath;
1198                 ttrace->paths.max   = fd;
1199         }
1200
1201         ttrace->paths.table[fd] = strdup(pathname);
1202
1203         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1204 }
1205
1206 static int thread__read_fd_path(struct thread *thread, int fd)
1207 {
1208         char linkname[PATH_MAX], pathname[PATH_MAX];
1209         struct stat st;
1210         int ret;
1211
1212         if (thread->pid_ == thread->tid) {
1213                 scnprintf(linkname, sizeof(linkname),
1214                           "/proc/%d/fd/%d", thread->pid_, fd);
1215         } else {
1216                 scnprintf(linkname, sizeof(linkname),
1217                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1218         }
1219
1220         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1221                 return -1;
1222
1223         ret = readlink(linkname, pathname, sizeof(pathname));
1224
1225         if (ret < 0 || ret > st.st_size)
1226                 return -1;
1227
1228         pathname[ret] = '\0';
1229         return trace__set_fd_pathname(thread, fd, pathname);
1230 }
1231
1232 static const char *thread__fd_path(struct thread *thread, int fd,
1233                                    struct trace *trace)
1234 {
1235         struct thread_trace *ttrace = thread->priv;
1236
1237         if (ttrace == NULL)
1238                 return NULL;
1239
1240         if (fd < 0)
1241                 return NULL;
1242
1243         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1244                 if (!trace->live)
1245                         return NULL;
1246                 ++trace->stats.proc_getname;
1247                 if (thread__read_fd_path(thread, fd)) {
1248                         return NULL;
1249         }
1250
1251         return ttrace->paths.table[fd];
1252 }
1253
1254 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1255                                         struct syscall_arg *arg)
1256 {
1257         int fd = arg->val;
1258         size_t printed = scnprintf(bf, size, "%d", fd);
1259         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1260
1261         if (path)
1262                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1263
1264         return printed;
1265 }
1266
1267 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1268                                               struct syscall_arg *arg)
1269 {
1270         int fd = arg->val;
1271         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1272         struct thread_trace *ttrace = arg->thread->priv;
1273
1274         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1275                 free(ttrace->paths.table[fd]);
1276                 ttrace->paths.table[fd] = NULL;
1277         }
1278
1279         return printed;
1280 }
1281
1282 static bool trace__filter_duration(struct trace *trace, double t)
1283 {
1284         return t < (trace->duration_filter * NSEC_PER_MSEC);
1285 }
1286
1287 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1288 {
1289         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1290
1291         return fprintf(fp, "%10.3f ", ts);
1292 }
1293
1294 static bool done = false;
1295 static bool interrupted = false;
1296
1297 static void sig_handler(int sig)
1298 {
1299         done = true;
1300         interrupted = sig == SIGINT;
1301 }
1302
1303 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1304                                         u64 duration, u64 tstamp, FILE *fp)
1305 {
1306         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1307         printed += fprintf_duration(duration, fp);
1308
1309         if (trace->multiple_threads) {
1310                 if (trace->show_comm)
1311                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1312                 printed += fprintf(fp, "%d ", thread->tid);
1313         }
1314
1315         return printed;
1316 }
1317
1318 static int trace__process_event(struct trace *trace, struct machine *machine,
1319                                 union perf_event *event, struct perf_sample *sample)
1320 {
1321         int ret = 0;
1322
1323         switch (event->header.type) {
1324         case PERF_RECORD_LOST:
1325                 color_fprintf(trace->output, PERF_COLOR_RED,
1326                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1327                 ret = machine__process_lost_event(machine, event, sample);
1328         default:
1329                 ret = machine__process_event(machine, event, sample);
1330                 break;
1331         }
1332
1333         return ret;
1334 }
1335
1336 static int trace__tool_process(struct perf_tool *tool,
1337                                union perf_event *event,
1338                                struct perf_sample *sample,
1339                                struct machine *machine)
1340 {
1341         struct trace *trace = container_of(tool, struct trace, tool);
1342         return trace__process_event(trace, machine, event, sample);
1343 }
1344
1345 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1346 {
1347         int err = symbol__init();
1348
1349         if (err)
1350                 return err;
1351
1352         trace->host = machine__new_host();
1353         if (trace->host == NULL)
1354                 return -ENOMEM;
1355
1356         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1357                                             evlist->threads, trace__tool_process, false);
1358         if (err)
1359                 symbol__exit();
1360
1361         return err;
1362 }
1363
1364 static int syscall__set_arg_fmts(struct syscall *sc)
1365 {
1366         struct format_field *field;
1367         int idx = 0;
1368
1369         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1370         if (sc->arg_scnprintf == NULL)
1371                 return -1;
1372
1373         if (sc->fmt)
1374                 sc->arg_parm = sc->fmt->arg_parm;
1375
1376         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1377                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1378                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1379                 else if (field->flags & FIELD_IS_POINTER)
1380                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1381                 ++idx;
1382         }
1383
1384         return 0;
1385 }
1386
1387 static int trace__read_syscall_info(struct trace *trace, int id)
1388 {
1389         char tp_name[128];
1390         struct syscall *sc;
1391         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1392
1393         if (name == NULL)
1394                 return -1;
1395
1396         if (id > trace->syscalls.max) {
1397                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1398
1399                 if (nsyscalls == NULL)
1400                         return -1;
1401
1402                 if (trace->syscalls.max != -1) {
1403                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1404                                (id - trace->syscalls.max) * sizeof(*sc));
1405                 } else {
1406                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1407                 }
1408
1409                 trace->syscalls.table = nsyscalls;
1410                 trace->syscalls.max   = id;
1411         }
1412
1413         sc = trace->syscalls.table + id;
1414         sc->name = name;
1415
1416         if (trace->ev_qualifier) {
1417                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1418
1419                 if (!(in ^ trace->not_ev_qualifier)) {
1420                         sc->filtered = true;
1421                         /*
1422                          * No need to do read tracepoint information since this will be
1423                          * filtered out.
1424                          */
1425                         return 0;
1426                 }
1427         }
1428
1429         sc->fmt  = syscall_fmt__find(sc->name);
1430
1431         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1432         sc->tp_format = event_format__new("syscalls", tp_name);
1433
1434         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1435                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1436                 sc->tp_format = event_format__new("syscalls", tp_name);
1437         }
1438
1439         if (sc->tp_format == NULL)
1440                 return -1;
1441
1442         return syscall__set_arg_fmts(sc);
1443 }
1444
1445 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1446                                       unsigned long *args, struct trace *trace,
1447                                       struct thread *thread)
1448 {
1449         size_t printed = 0;
1450
1451         if (sc->tp_format != NULL) {
1452                 struct format_field *field;
1453                 u8 bit = 1;
1454                 struct syscall_arg arg = {
1455                         .idx    = 0,
1456                         .mask   = 0,
1457                         .trace  = trace,
1458                         .thread = thread,
1459                 };
1460
1461                 for (field = sc->tp_format->format.fields->next; field;
1462                      field = field->next, ++arg.idx, bit <<= 1) {
1463                         if (arg.mask & bit)
1464                                 continue;
1465                         /*
1466                          * Suppress this argument if its value is zero and
1467                          * and we don't have a string associated in an
1468                          * strarray for it.
1469                          */
1470                         if (args[arg.idx] == 0 &&
1471                             !(sc->arg_scnprintf &&
1472                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1473                               sc->arg_parm[arg.idx]))
1474                                 continue;
1475
1476                         printed += scnprintf(bf + printed, size - printed,
1477                                              "%s%s: ", printed ? ", " : "", field->name);
1478                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1479                                 arg.val = args[arg.idx];
1480                                 if (sc->arg_parm)
1481                                         arg.parm = sc->arg_parm[arg.idx];
1482                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1483                                                                       size - printed, &arg);
1484                         } else {
1485                                 printed += scnprintf(bf + printed, size - printed,
1486                                                      "%ld", args[arg.idx]);
1487                         }
1488                 }
1489         } else {
1490                 int i = 0;
1491
1492                 while (i < 6) {
1493                         printed += scnprintf(bf + printed, size - printed,
1494                                              "%sarg%d: %ld",
1495                                              printed ? ", " : "", i, args[i]);
1496                         ++i;
1497                 }
1498         }
1499
1500         return printed;
1501 }
1502
1503 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1504                                   struct perf_sample *sample);
1505
1506 static struct syscall *trace__syscall_info(struct trace *trace,
1507                                            struct perf_evsel *evsel, int id)
1508 {
1509
1510         if (id < 0) {
1511
1512                 /*
1513                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1514                  * before that, leaving at a higher verbosity level till that is
1515                  * explained. Reproduced with plain ftrace with:
1516                  *
1517                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1518                  * grep "NR -1 " /t/trace_pipe
1519                  *
1520                  * After generating some load on the machine.
1521                  */
1522                 if (verbose > 1) {
1523                         static u64 n;
1524                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1525                                 id, perf_evsel__name(evsel), ++n);
1526                 }
1527                 return NULL;
1528         }
1529
1530         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1531             trace__read_syscall_info(trace, id))
1532                 goto out_cant_read;
1533
1534         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1535                 goto out_cant_read;
1536
1537         return &trace->syscalls.table[id];
1538
1539 out_cant_read:
1540         if (verbose) {
1541                 fprintf(trace->output, "Problems reading syscall %d", id);
1542                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1543                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1544                 fputs(" information\n", trace->output);
1545         }
1546         return NULL;
1547 }
1548
1549 static void thread__update_stats(struct thread_trace *ttrace,
1550                                  int id, struct perf_sample *sample)
1551 {
1552         struct int_node *inode;
1553         struct stats *stats;
1554         u64 duration = 0;
1555
1556         inode = intlist__findnew(ttrace->syscall_stats, id);
1557         if (inode == NULL)
1558                 return;
1559
1560         stats = inode->priv;
1561         if (stats == NULL) {
1562                 stats = malloc(sizeof(struct stats));
1563                 if (stats == NULL)
1564                         return;
1565                 init_stats(stats);
1566                 inode->priv = stats;
1567         }
1568
1569         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1570                 duration = sample->time - ttrace->entry_time;
1571
1572         update_stats(stats, duration);
1573 }
1574
1575 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1576                             struct perf_sample *sample)
1577 {
1578         char *msg;
1579         void *args;
1580         size_t printed = 0;
1581         struct thread *thread;
1582         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1583         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1584         struct thread_trace *ttrace;
1585
1586         if (sc == NULL)
1587                 return -1;
1588
1589         if (sc->filtered)
1590                 return 0;
1591
1592         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1593         ttrace = thread__trace(thread, trace->output);
1594         if (ttrace == NULL)
1595                 return -1;
1596
1597         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1598         ttrace = thread->priv;
1599
1600         if (ttrace->entry_str == NULL) {
1601                 ttrace->entry_str = malloc(1024);
1602                 if (!ttrace->entry_str)
1603                         return -1;
1604         }
1605
1606         ttrace->entry_time = sample->time;
1607         msg = ttrace->entry_str;
1608         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1609
1610         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1611                                            args, trace, thread);
1612
1613         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1614                 if (!trace->duration_filter) {
1615                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1616                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1617                 }
1618         } else
1619                 ttrace->entry_pending = true;
1620
1621         return 0;
1622 }
1623
1624 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1625                            struct perf_sample *sample)
1626 {
1627         int ret;
1628         u64 duration = 0;
1629         struct thread *thread;
1630         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1631         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1632         struct thread_trace *ttrace;
1633
1634         if (sc == NULL)
1635                 return -1;
1636
1637         if (sc->filtered)
1638                 return 0;
1639
1640         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1641         ttrace = thread__trace(thread, trace->output);
1642         if (ttrace == NULL)
1643                 return -1;
1644
1645         if (trace->summary)
1646                 thread__update_stats(ttrace, id, sample);
1647
1648         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1649
1650         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1651                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1652                 trace->last_vfs_getname = NULL;
1653                 ++trace->stats.vfs_getname;
1654         }
1655
1656         ttrace = thread->priv;
1657
1658         ttrace->exit_time = sample->time;
1659
1660         if (ttrace->entry_time) {
1661                 duration = sample->time - ttrace->entry_time;
1662                 if (trace__filter_duration(trace, duration))
1663                         goto out;
1664         } else if (trace->duration_filter)
1665                 goto out;
1666
1667         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1668
1669         if (ttrace->entry_pending) {
1670                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1671         } else {
1672                 fprintf(trace->output, " ... [");
1673                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1674                 fprintf(trace->output, "]: %s()", sc->name);
1675         }
1676
1677         if (sc->fmt == NULL) {
1678 signed_print:
1679                 fprintf(trace->output, ") = %d", ret);
1680         } else if (ret < 0 && sc->fmt->errmsg) {
1681                 char bf[256];
1682                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1683                            *e = audit_errno_to_name(-ret);
1684
1685                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1686         } else if (ret == 0 && sc->fmt->timeout)
1687                 fprintf(trace->output, ") = 0 Timeout");
1688         else if (sc->fmt->hexret)
1689                 fprintf(trace->output, ") = %#x", ret);
1690         else
1691                 goto signed_print;
1692
1693         fputc('\n', trace->output);
1694 out:
1695         ttrace->entry_pending = false;
1696
1697         return 0;
1698 }
1699
1700 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1701                               struct perf_sample *sample)
1702 {
1703         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1704         return 0;
1705 }
1706
1707 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1708                                      struct perf_sample *sample)
1709 {
1710         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1711         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1712         struct thread *thread = machine__findnew_thread(trace->host,
1713                                                         sample->pid,
1714                                                         sample->tid);
1715         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1716
1717         if (ttrace == NULL)
1718                 goto out_dump;
1719
1720         ttrace->runtime_ms += runtime_ms;
1721         trace->runtime_ms += runtime_ms;
1722         return 0;
1723
1724 out_dump:
1725         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1726                evsel->name,
1727                perf_evsel__strval(evsel, sample, "comm"),
1728                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1729                runtime,
1730                perf_evsel__intval(evsel, sample, "vruntime"));
1731         return 0;
1732 }
1733
1734 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1735 {
1736         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1737             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1738                 return false;
1739
1740         if (trace->pid_list || trace->tid_list)
1741                 return true;
1742
1743         return false;
1744 }
1745
1746 static int trace__process_sample(struct perf_tool *tool,
1747                                  union perf_event *event __maybe_unused,
1748                                  struct perf_sample *sample,
1749                                  struct perf_evsel *evsel,
1750                                  struct machine *machine __maybe_unused)
1751 {
1752         struct trace *trace = container_of(tool, struct trace, tool);
1753         int err = 0;
1754
1755         tracepoint_handler handler = evsel->handler;
1756
1757         if (skip_sample(trace, sample))
1758                 return 0;
1759
1760         if (!trace->full_time && trace->base_time == 0)
1761                 trace->base_time = sample->time;
1762
1763         if (handler)
1764                 handler(trace, evsel, sample);
1765
1766         return err;
1767 }
1768
1769 static bool
1770 perf_session__has_tp(struct perf_session *session, const char *name)
1771 {
1772         struct perf_evsel *evsel;
1773
1774         evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1775
1776         return evsel != NULL;
1777 }
1778
1779 static int parse_target_str(struct trace *trace)
1780 {
1781         if (trace->opts.target.pid) {
1782                 trace->pid_list = intlist__new(trace->opts.target.pid);
1783                 if (trace->pid_list == NULL) {
1784                         pr_err("Error parsing process id string\n");
1785                         return -EINVAL;
1786                 }
1787         }
1788
1789         if (trace->opts.target.tid) {
1790                 trace->tid_list = intlist__new(trace->opts.target.tid);
1791                 if (trace->tid_list == NULL) {
1792                         pr_err("Error parsing thread id string\n");
1793                         return -EINVAL;
1794                 }
1795         }
1796
1797         return 0;
1798 }
1799
1800 static int trace__record(int argc, const char **argv)
1801 {
1802         unsigned int rec_argc, i, j;
1803         const char **rec_argv;
1804         const char * const record_args[] = {
1805                 "record",
1806                 "-R",
1807                 "-m", "1024",
1808                 "-c", "1",
1809                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1810         };
1811
1812         rec_argc = ARRAY_SIZE(record_args) + argc;
1813         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1814
1815         if (rec_argv == NULL)
1816                 return -ENOMEM;
1817
1818         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1819                 rec_argv[i] = record_args[i];
1820
1821         for (j = 0; j < (unsigned int)argc; j++, i++)
1822                 rec_argv[i] = argv[j];
1823
1824         return cmd_record(i, rec_argv, NULL);
1825 }
1826
1827 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1828
1829 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1830 {
1831         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1832         if (evsel == NULL)
1833                 return;
1834
1835         if (perf_evsel__field(evsel, "pathname") == NULL) {
1836                 perf_evsel__delete(evsel);
1837                 return;
1838         }
1839
1840         evsel->handler = trace__vfs_getname;
1841         perf_evlist__add(evlist, evsel);
1842 }
1843
1844 static int trace__run(struct trace *trace, int argc, const char **argv)
1845 {
1846         struct perf_evlist *evlist = perf_evlist__new();
1847         struct perf_evsel *evsel;
1848         int err = -1, i;
1849         unsigned long before;
1850         const bool forks = argc > 0;
1851
1852         trace->live = true;
1853
1854         if (evlist == NULL) {
1855                 fprintf(trace->output, "Not enough memory to run!\n");
1856                 goto out;
1857         }
1858
1859         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1860                 goto out_error_tp;
1861
1862         perf_evlist__add_vfs_getname(evlist);
1863
1864         if (trace->sched &&
1865                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1866                                 trace__sched_stat_runtime))
1867                 goto out_error_tp;
1868
1869         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1870         if (err < 0) {
1871                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1872                 goto out_delete_evlist;
1873         }
1874
1875         err = trace__symbols_init(trace, evlist);
1876         if (err < 0) {
1877                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1878                 goto out_delete_maps;
1879         }
1880
1881         perf_evlist__config(evlist, &trace->opts);
1882
1883         signal(SIGCHLD, sig_handler);
1884         signal(SIGINT, sig_handler);
1885
1886         if (forks) {
1887                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1888                                                     argv, false, false);
1889                 if (err < 0) {
1890                         fprintf(trace->output, "Couldn't run the workload!\n");
1891                         goto out_delete_maps;
1892                 }
1893         }
1894
1895         err = perf_evlist__open(evlist);
1896         if (err < 0)
1897                 goto out_error_open;
1898
1899         err = perf_evlist__mmap(evlist, UINT_MAX, false);
1900         if (err < 0) {
1901                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1902                 goto out_close_evlist;
1903         }
1904
1905         perf_evlist__enable(evlist);
1906
1907         if (forks)
1908                 perf_evlist__start_workload(evlist);
1909
1910         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1911 again:
1912         before = trace->nr_events;
1913
1914         for (i = 0; i < evlist->nr_mmaps; i++) {
1915                 union perf_event *event;
1916
1917                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1918                         const u32 type = event->header.type;
1919                         tracepoint_handler handler;
1920                         struct perf_sample sample;
1921
1922                         ++trace->nr_events;
1923
1924                         err = perf_evlist__parse_sample(evlist, event, &sample);
1925                         if (err) {
1926                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1927                                 goto next_event;
1928                         }
1929
1930                         if (!trace->full_time && trace->base_time == 0)
1931                                 trace->base_time = sample.time;
1932
1933                         if (type != PERF_RECORD_SAMPLE) {
1934                                 trace__process_event(trace, trace->host, event, &sample);
1935                                 continue;
1936                         }
1937
1938                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1939                         if (evsel == NULL) {
1940                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1941                                 goto next_event;
1942                         }
1943
1944                         if (sample.raw_data == NULL) {
1945                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1946                                        perf_evsel__name(evsel), sample.tid,
1947                                        sample.cpu, sample.raw_size);
1948                                 goto next_event;
1949                         }
1950
1951                         handler = evsel->handler;
1952                         handler(trace, evsel, &sample);
1953 next_event:
1954                         perf_evlist__mmap_consume(evlist, i);
1955
1956                         if (interrupted)
1957                                 goto out_disable;
1958                 }
1959         }
1960
1961         if (trace->nr_events == before) {
1962                 int timeout = done ? 100 : -1;
1963
1964                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1965                         goto again;
1966         } else {
1967                 goto again;
1968         }
1969
1970 out_disable:
1971         perf_evlist__disable(evlist);
1972
1973         if (!err) {
1974                 if (trace->summary)
1975                         trace__fprintf_thread_summary(trace, trace->output);
1976
1977                 if (trace->show_tool_stats) {
1978                         fprintf(trace->output, "Stats:\n "
1979                                                " vfs_getname : %" PRIu64 "\n"
1980                                                " proc_getname: %" PRIu64 "\n",
1981                                 trace->stats.vfs_getname,
1982                                 trace->stats.proc_getname);
1983                 }
1984         }
1985
1986         perf_evlist__munmap(evlist);
1987 out_close_evlist:
1988         perf_evlist__close(evlist);
1989 out_delete_maps:
1990         perf_evlist__delete_maps(evlist);
1991 out_delete_evlist:
1992         perf_evlist__delete(evlist);
1993 out:
1994         trace->live = false;
1995         return err;
1996 {
1997         char errbuf[BUFSIZ];
1998
1999 out_error_tp:
2000         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2001         goto out_error;
2002
2003 out_error_open:
2004         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2005
2006 out_error:
2007         fprintf(trace->output, "%s\n", errbuf);
2008         goto out_delete_evlist;
2009 }
2010 }
2011
2012 static int trace__replay(struct trace *trace)
2013 {
2014         const struct perf_evsel_str_handler handlers[] = {
2015                 { "raw_syscalls:sys_enter",  trace__sys_enter, },
2016                 { "raw_syscalls:sys_exit",   trace__sys_exit, },
2017                 { "probe:vfs_getname",       trace__vfs_getname, },
2018         };
2019         struct perf_data_file file = {
2020                 .path  = input_name,
2021                 .mode  = PERF_DATA_MODE_READ,
2022         };
2023         struct perf_session *session;
2024         int err = -1;
2025
2026         trace->tool.sample        = trace__process_sample;
2027         trace->tool.mmap          = perf_event__process_mmap;
2028         trace->tool.mmap2         = perf_event__process_mmap2;
2029         trace->tool.comm          = perf_event__process_comm;
2030         trace->tool.exit          = perf_event__process_exit;
2031         trace->tool.fork          = perf_event__process_fork;
2032         trace->tool.attr          = perf_event__process_attr;
2033         trace->tool.tracing_data = perf_event__process_tracing_data;
2034         trace->tool.build_id      = perf_event__process_build_id;
2035
2036         trace->tool.ordered_samples = true;
2037         trace->tool.ordering_requires_timestamps = true;
2038
2039         /* add tid to output */
2040         trace->multiple_threads = true;
2041
2042         if (symbol__init() < 0)
2043                 return -1;
2044
2045         session = perf_session__new(&file, false, &trace->tool);
2046         if (session == NULL)
2047                 return -ENOMEM;
2048
2049         trace->host = &session->machines.host;
2050
2051         err = perf_session__set_tracepoints_handlers(session, handlers);
2052         if (err)
2053                 goto out;
2054
2055         if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2056                 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2057                 goto out;
2058         }
2059
2060         if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2061                 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2062                 goto out;
2063         }
2064
2065         err = parse_target_str(trace);
2066         if (err != 0)
2067                 goto out;
2068
2069         setup_pager();
2070
2071         err = perf_session__process_events(session, &trace->tool);
2072         if (err)
2073                 pr_err("Failed to process events, error %d", err);
2074
2075         else if (trace->summary)
2076                 trace__fprintf_thread_summary(trace, trace->output);
2077
2078 out:
2079         perf_session__delete(session);
2080
2081         return err;
2082 }
2083
2084 static size_t trace__fprintf_threads_header(FILE *fp)
2085 {
2086         size_t printed;
2087
2088         printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
2089         printed += fprintf(fp, " __)    Summary of events    (__\n\n");
2090         printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
2091         printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
2092         printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
2093         printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2094
2095         return printed;
2096 }
2097
2098 static size_t thread__dump_stats(struct thread_trace *ttrace,
2099                                  struct trace *trace, FILE *fp)
2100 {
2101         struct stats *stats;
2102         size_t printed = 0;
2103         struct syscall *sc;
2104         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2105
2106         if (inode == NULL)
2107                 return 0;
2108
2109         printed += fprintf(fp, "\n");
2110
2111         /* each int_node is a syscall */
2112         while (inode) {
2113                 stats = inode->priv;
2114                 if (stats) {
2115                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2116                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2117                         double avg = avg_stats(stats);
2118                         double pct;
2119                         u64 n = (u64) stats->n;
2120
2121                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2122                         avg /= NSEC_PER_MSEC;
2123
2124                         sc = &trace->syscalls.table[inode->i];
2125                         printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
2126                         printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
2127                                            n, min, max);
2128                         printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
2129                 }
2130
2131                 inode = intlist__next(inode);
2132         }
2133
2134         printed += fprintf(fp, "\n\n");
2135
2136         return printed;
2137 }
2138
2139 /* struct used to pass data to per-thread function */
2140 struct summary_data {
2141         FILE *fp;
2142         struct trace *trace;
2143         size_t printed;
2144 };
2145
2146 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2147 {
2148         struct summary_data *data = priv;
2149         FILE *fp = data->fp;
2150         size_t printed = data->printed;
2151         struct trace *trace = data->trace;
2152         struct thread_trace *ttrace = thread->priv;
2153         const char *color;
2154         double ratio;
2155
2156         if (ttrace == NULL)
2157                 return 0;
2158
2159         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2160
2161         color = PERF_COLOR_NORMAL;
2162         if (ratio > 50.0)
2163                 color = PERF_COLOR_RED;
2164         else if (ratio > 25.0)
2165                 color = PERF_COLOR_GREEN;
2166         else if (ratio > 5.0)
2167                 color = PERF_COLOR_YELLOW;
2168
2169         printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2170         printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
2171         printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2172         printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2173         printed += thread__dump_stats(ttrace, trace, fp);
2174
2175         data->printed += printed;
2176
2177         return 0;
2178 }
2179
2180 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2181 {
2182         struct summary_data data = {
2183                 .fp = fp,
2184                 .trace = trace
2185         };
2186         data.printed = trace__fprintf_threads_header(fp);
2187
2188         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2189
2190         return data.printed;
2191 }
2192
2193 static int trace__set_duration(const struct option *opt, const char *str,
2194                                int unset __maybe_unused)
2195 {
2196         struct trace *trace = opt->value;
2197
2198         trace->duration_filter = atof(str);
2199         return 0;
2200 }
2201
2202 static int trace__open_output(struct trace *trace, const char *filename)
2203 {
2204         struct stat st;
2205
2206         if (!stat(filename, &st) && st.st_size) {
2207                 char oldname[PATH_MAX];
2208
2209                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2210                 unlink(oldname);
2211                 rename(filename, oldname);
2212         }
2213
2214         trace->output = fopen(filename, "w");
2215
2216         return trace->output == NULL ? -errno : 0;
2217 }
2218
2219 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2220 {
2221         const char * const trace_usage[] = {
2222                 "perf trace [<options>] [<command>]",
2223                 "perf trace [<options>] -- <command> [<options>]",
2224                 "perf trace record [<options>] [<command>]",
2225                 "perf trace record [<options>] -- <command> [<options>]",
2226                 NULL
2227         };
2228         struct trace trace = {
2229                 .audit = {
2230                         .machine = audit_detect_machine(),
2231                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2232                 },
2233                 .syscalls = {
2234                         . max = -1,
2235                 },
2236                 .opts = {
2237                         .target = {
2238                                 .uid       = UINT_MAX,
2239                                 .uses_mmap = true,
2240                         },
2241                         .user_freq     = UINT_MAX,
2242                         .user_interval = ULLONG_MAX,
2243                         .no_delay      = true,
2244                         .mmap_pages    = 1024,
2245                 },
2246                 .output = stdout,
2247                 .show_comm = true,
2248         };
2249         const char *output_name = NULL;
2250         const char *ev_qualifier_str = NULL;
2251         const struct option trace_options[] = {
2252         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2253                     "show the thread COMM next to its id"),
2254         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2255         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2256                     "list of events to trace"),
2257         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2258         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2259         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2260                     "trace events on existing process id"),
2261         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2262                     "trace events on existing thread id"),
2263         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2264                     "system-wide collection from all CPUs"),
2265         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2266                     "list of cpus to monitor"),
2267         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2268                     "child tasks do not inherit counters"),
2269         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2270                      "number of mmap data pages",
2271                      perf_evlist__parse_mmap_pages),
2272         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2273                    "user to profile"),
2274         OPT_CALLBACK(0, "duration", &trace, "float",
2275                      "show only events with duration > N.M ms",
2276                      trace__set_duration),
2277         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2278         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2279         OPT_BOOLEAN('T', "time", &trace.full_time,
2280                     "Show full timestamp, not time relative to first start"),
2281         OPT_BOOLEAN(0, "summary", &trace.summary,
2282                     "Show syscall summary with statistics"),
2283         OPT_END()
2284         };
2285         int err;
2286         char bf[BUFSIZ];
2287
2288         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2289                 return trace__record(argc-2, &argv[2]);
2290
2291         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2292
2293         if (output_name != NULL) {
2294                 err = trace__open_output(&trace, output_name);
2295                 if (err < 0) {
2296                         perror("failed to create output file");
2297                         goto out;
2298                 }
2299         }
2300
2301         if (ev_qualifier_str != NULL) {
2302                 const char *s = ev_qualifier_str;
2303
2304                 trace.not_ev_qualifier = *s == '!';
2305                 if (trace.not_ev_qualifier)
2306                         ++s;
2307                 trace.ev_qualifier = strlist__new(true, s);
2308                 if (trace.ev_qualifier == NULL) {
2309                         fputs("Not enough memory to parse event qualifier",
2310                               trace.output);
2311                         err = -ENOMEM;
2312                         goto out_close;
2313                 }
2314         }
2315
2316         err = perf_target__validate(&trace.opts.target);
2317         if (err) {
2318                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2319                 fprintf(trace.output, "%s", bf);
2320                 goto out_close;
2321         }
2322
2323         err = perf_target__parse_uid(&trace.opts.target);
2324         if (err) {
2325                 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2326                 fprintf(trace.output, "%s", bf);
2327                 goto out_close;
2328         }
2329
2330         if (!argc && perf_target__none(&trace.opts.target))
2331                 trace.opts.target.system_wide = true;
2332
2333         if (input_name)
2334                 err = trace__replay(&trace);
2335         else
2336                 err = trace__run(&trace, argc, argv);
2337
2338 out_close:
2339         if (output_name != NULL)
2340                 fclose(trace.output);
2341 out:
2342         return err;
2343 }