Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15
16 #include <libaudit.h>
17 #include <stdlib.h>
18 #include <sys/eventfd.h>
19 #include <sys/mman.h>
20 #include <linux/futex.h>
21
22 /* For older distros: */
23 #ifndef MAP_STACK
24 # define MAP_STACK              0x20000
25 #endif
26
27 #ifndef MADV_HWPOISON
28 # define MADV_HWPOISON          100
29 #endif
30
31 #ifndef MADV_MERGEABLE
32 # define MADV_MERGEABLE         12
33 #endif
34
35 #ifndef MADV_UNMERGEABLE
36 # define MADV_UNMERGEABLE       13
37 #endif
38
39 struct tp_field {
40         int offset;
41         union {
42                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
43                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
44         };
45 };
46
47 #define TP_UINT_FIELD(bits) \
48 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
49 { \
50         return *(u##bits *)(sample->raw_data + field->offset); \
51 }
52
53 TP_UINT_FIELD(8);
54 TP_UINT_FIELD(16);
55 TP_UINT_FIELD(32);
56 TP_UINT_FIELD(64);
57
58 #define TP_UINT_FIELD__SWAPPED(bits) \
59 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
60 { \
61         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
62         return bswap_##bits(value);\
63 }
64
65 TP_UINT_FIELD__SWAPPED(16);
66 TP_UINT_FIELD__SWAPPED(32);
67 TP_UINT_FIELD__SWAPPED(64);
68
69 static int tp_field__init_uint(struct tp_field *field,
70                                struct format_field *format_field,
71                                bool needs_swap)
72 {
73         field->offset = format_field->offset;
74
75         switch (format_field->size) {
76         case 1:
77                 field->integer = tp_field__u8;
78                 break;
79         case 2:
80                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
81                 break;
82         case 4:
83                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
84                 break;
85         case 8:
86                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
87                 break;
88         default:
89                 return -1;
90         }
91
92         return 0;
93 }
94
95 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
96 {
97         return sample->raw_data + field->offset;
98 }
99
100 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
101 {
102         field->offset = format_field->offset;
103         field->pointer = tp_field__ptr;
104         return 0;
105 }
106
107 struct syscall_tp {
108         struct tp_field id;
109         union {
110                 struct tp_field args, ret;
111         };
112 };
113
114 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
115                                           struct tp_field *field,
116                                           const char *name)
117 {
118         struct format_field *format_field = perf_evsel__field(evsel, name);
119
120         if (format_field == NULL)
121                 return -1;
122
123         return tp_field__init_uint(field, format_field, evsel->needs_swap);
124 }
125
126 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
127         ({ struct syscall_tp *sc = evsel->priv;\
128            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
129
130 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
131                                          struct tp_field *field,
132                                          const char *name)
133 {
134         struct format_field *format_field = perf_evsel__field(evsel, name);
135
136         if (format_field == NULL)
137                 return -1;
138
139         return tp_field__init_ptr(field, format_field);
140 }
141
142 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
143         ({ struct syscall_tp *sc = evsel->priv;\
144            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
145
146 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
147 {
148         free(evsel->priv);
149         evsel->priv = NULL;
150         perf_evsel__delete(evsel);
151 }
152
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
154 {
155         evsel->priv = malloc(sizeof(struct syscall_tp));
156         if (evsel->priv != NULL) {
157                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
158                         goto out_delete;
159
160                 evsel->handler = handler;
161                 return 0;
162         }
163
164         return -ENOMEM;
165
166 out_delete:
167         free(evsel->priv);
168         evsel->priv = NULL;
169         return -ENOENT;
170 }
171
172 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
173 {
174         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
175
176         if (evsel) {
177                 if (perf_evsel__init_syscall_tp(evsel, handler))
178                         goto out_delete;
179         }
180
181         return evsel;
182
183 out_delete:
184         perf_evsel__delete_priv(evsel);
185         return NULL;
186 }
187
188 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
189         ({ struct syscall_tp *fields = evsel->priv; \
190            fields->name.integer(&fields->name, sample); })
191
192 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
193         ({ struct syscall_tp *fields = evsel->priv; \
194            fields->name.pointer(&fields->name, sample); })
195
196 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
197                                           void *sys_enter_handler,
198                                           void *sys_exit_handler)
199 {
200         int ret = -1;
201         struct perf_evsel *sys_enter, *sys_exit;
202
203         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
204         if (sys_enter == NULL)
205                 goto out;
206
207         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
208                 goto out_delete_sys_enter;
209
210         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
211         if (sys_exit == NULL)
212                 goto out_delete_sys_enter;
213
214         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
215                 goto out_delete_sys_exit;
216
217         perf_evlist__add(evlist, sys_enter);
218         perf_evlist__add(evlist, sys_exit);
219
220         ret = 0;
221 out:
222         return ret;
223
224 out_delete_sys_exit:
225         perf_evsel__delete_priv(sys_exit);
226 out_delete_sys_enter:
227         perf_evsel__delete_priv(sys_enter);
228         goto out;
229 }
230
231
232 struct syscall_arg {
233         unsigned long val;
234         struct thread *thread;
235         struct trace  *trace;
236         void          *parm;
237         u8            idx;
238         u8            mask;
239 };
240
241 struct strarray {
242         int         offset;
243         int         nr_entries;
244         const char **entries;
245 };
246
247 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
248         .nr_entries = ARRAY_SIZE(array), \
249         .entries = array, \
250 }
251
252 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
253         .offset     = off, \
254         .nr_entries = ARRAY_SIZE(array), \
255         .entries = array, \
256 }
257
258 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
259                                                 const char *intfmt,
260                                                 struct syscall_arg *arg)
261 {
262         struct strarray *sa = arg->parm;
263         int idx = arg->val - sa->offset;
264
265         if (idx < 0 || idx >= sa->nr_entries)
266                 return scnprintf(bf, size, intfmt, arg->val);
267
268         return scnprintf(bf, size, "%s", sa->entries[idx]);
269 }
270
271 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
272                                               struct syscall_arg *arg)
273 {
274         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
275 }
276
277 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
278
279 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
280                                                  struct syscall_arg *arg)
281 {
282         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
283 }
284
285 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
286
287 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
288                                         struct syscall_arg *arg);
289
290 #define SCA_FD syscall_arg__scnprintf_fd
291
292 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
293                                            struct syscall_arg *arg)
294 {
295         int fd = arg->val;
296
297         if (fd == AT_FDCWD)
298                 return scnprintf(bf, size, "CWD");
299
300         return syscall_arg__scnprintf_fd(bf, size, arg);
301 }
302
303 #define SCA_FDAT syscall_arg__scnprintf_fd_at
304
305 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
306                                               struct syscall_arg *arg);
307
308 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
309
310 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
311                                          struct syscall_arg *arg)
312 {
313         return scnprintf(bf, size, "%#lx", arg->val);
314 }
315
316 #define SCA_HEX syscall_arg__scnprintf_hex
317
318 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
319                                                struct syscall_arg *arg)
320 {
321         int printed = 0, prot = arg->val;
322
323         if (prot == PROT_NONE)
324                 return scnprintf(bf, size, "NONE");
325 #define P_MMAP_PROT(n) \
326         if (prot & PROT_##n) { \
327                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
328                 prot &= ~PROT_##n; \
329         }
330
331         P_MMAP_PROT(EXEC);
332         P_MMAP_PROT(READ);
333         P_MMAP_PROT(WRITE);
334 #ifdef PROT_SEM
335         P_MMAP_PROT(SEM);
336 #endif
337         P_MMAP_PROT(GROWSDOWN);
338         P_MMAP_PROT(GROWSUP);
339 #undef P_MMAP_PROT
340
341         if (prot)
342                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
343
344         return printed;
345 }
346
347 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
348
349 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
350                                                 struct syscall_arg *arg)
351 {
352         int printed = 0, flags = arg->val;
353
354 #define P_MMAP_FLAG(n) \
355         if (flags & MAP_##n) { \
356                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
357                 flags &= ~MAP_##n; \
358         }
359
360         P_MMAP_FLAG(SHARED);
361         P_MMAP_FLAG(PRIVATE);
362 #ifdef MAP_32BIT
363         P_MMAP_FLAG(32BIT);
364 #endif
365         P_MMAP_FLAG(ANONYMOUS);
366         P_MMAP_FLAG(DENYWRITE);
367         P_MMAP_FLAG(EXECUTABLE);
368         P_MMAP_FLAG(FILE);
369         P_MMAP_FLAG(FIXED);
370         P_MMAP_FLAG(GROWSDOWN);
371 #ifdef MAP_HUGETLB
372         P_MMAP_FLAG(HUGETLB);
373 #endif
374         P_MMAP_FLAG(LOCKED);
375         P_MMAP_FLAG(NONBLOCK);
376         P_MMAP_FLAG(NORESERVE);
377         P_MMAP_FLAG(POPULATE);
378         P_MMAP_FLAG(STACK);
379 #ifdef MAP_UNINITIALIZED
380         P_MMAP_FLAG(UNINITIALIZED);
381 #endif
382 #undef P_MMAP_FLAG
383
384         if (flags)
385                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
386
387         return printed;
388 }
389
390 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
391
392 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
393                                                       struct syscall_arg *arg)
394 {
395         int behavior = arg->val;
396
397         switch (behavior) {
398 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
399         P_MADV_BHV(NORMAL);
400         P_MADV_BHV(RANDOM);
401         P_MADV_BHV(SEQUENTIAL);
402         P_MADV_BHV(WILLNEED);
403         P_MADV_BHV(DONTNEED);
404         P_MADV_BHV(REMOVE);
405         P_MADV_BHV(DONTFORK);
406         P_MADV_BHV(DOFORK);
407         P_MADV_BHV(HWPOISON);
408 #ifdef MADV_SOFT_OFFLINE
409         P_MADV_BHV(SOFT_OFFLINE);
410 #endif
411         P_MADV_BHV(MERGEABLE);
412         P_MADV_BHV(UNMERGEABLE);
413 #ifdef MADV_HUGEPAGE
414         P_MADV_BHV(HUGEPAGE);
415 #endif
416 #ifdef MADV_NOHUGEPAGE
417         P_MADV_BHV(NOHUGEPAGE);
418 #endif
419 #ifdef MADV_DONTDUMP
420         P_MADV_BHV(DONTDUMP);
421 #endif
422 #ifdef MADV_DODUMP
423         P_MADV_BHV(DODUMP);
424 #endif
425 #undef P_MADV_PHV
426         default: break;
427         }
428
429         return scnprintf(bf, size, "%#x", behavior);
430 }
431
432 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
433
434 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
435                                            struct syscall_arg *arg)
436 {
437         int printed = 0, op = arg->val;
438
439         if (op == 0)
440                 return scnprintf(bf, size, "NONE");
441 #define P_CMD(cmd) \
442         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
443                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
444                 op &= ~LOCK_##cmd; \
445         }
446
447         P_CMD(SH);
448         P_CMD(EX);
449         P_CMD(NB);
450         P_CMD(UN);
451         P_CMD(MAND);
452         P_CMD(RW);
453         P_CMD(READ);
454         P_CMD(WRITE);
455 #undef P_OP
456
457         if (op)
458                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
459
460         return printed;
461 }
462
463 #define SCA_FLOCK syscall_arg__scnprintf_flock
464
465 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
466 {
467         enum syscall_futex_args {
468                 SCF_UADDR   = (1 << 0),
469                 SCF_OP      = (1 << 1),
470                 SCF_VAL     = (1 << 2),
471                 SCF_TIMEOUT = (1 << 3),
472                 SCF_UADDR2  = (1 << 4),
473                 SCF_VAL3    = (1 << 5),
474         };
475         int op = arg->val;
476         int cmd = op & FUTEX_CMD_MASK;
477         size_t printed = 0;
478
479         switch (cmd) {
480 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
481         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
482         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
484         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
485         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
486         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
487         P_FUTEX_OP(WAKE_OP);                                                      break;
488         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
490         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
491         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
492         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
493         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
494         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
495         }
496
497         if (op & FUTEX_PRIVATE_FLAG)
498                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
499
500         if (op & FUTEX_CLOCK_REALTIME)
501                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
502
503         return printed;
504 }
505
506 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
507
508 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
509 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
510
511 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
512 static DEFINE_STRARRAY(itimers);
513
514 static const char *whences[] = { "SET", "CUR", "END",
515 #ifdef SEEK_DATA
516 "DATA",
517 #endif
518 #ifdef SEEK_HOLE
519 "HOLE",
520 #endif
521 };
522 static DEFINE_STRARRAY(whences);
523
524 static const char *fcntl_cmds[] = {
525         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
526         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
527         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
528         "F_GETOWNER_UIDS",
529 };
530 static DEFINE_STRARRAY(fcntl_cmds);
531
532 static const char *rlimit_resources[] = {
533         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
534         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
535         "RTTIME",
536 };
537 static DEFINE_STRARRAY(rlimit_resources);
538
539 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
540 static DEFINE_STRARRAY(sighow);
541
542 static const char *clockid[] = {
543         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
544         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
545 };
546 static DEFINE_STRARRAY(clockid);
547
548 static const char *socket_families[] = {
549         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
550         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
551         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
552         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
553         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
554         "ALG", "NFC", "VSOCK",
555 };
556 static DEFINE_STRARRAY(socket_families);
557
558 #ifndef SOCK_TYPE_MASK
559 #define SOCK_TYPE_MASK 0xf
560 #endif
561
562 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
563                                                       struct syscall_arg *arg)
564 {
565         size_t printed;
566         int type = arg->val,
567             flags = type & ~SOCK_TYPE_MASK;
568
569         type &= SOCK_TYPE_MASK;
570         /*
571          * Can't use a strarray, MIPS may override for ABI reasons.
572          */
573         switch (type) {
574 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
575         P_SK_TYPE(STREAM);
576         P_SK_TYPE(DGRAM);
577         P_SK_TYPE(RAW);
578         P_SK_TYPE(RDM);
579         P_SK_TYPE(SEQPACKET);
580         P_SK_TYPE(DCCP);
581         P_SK_TYPE(PACKET);
582 #undef P_SK_TYPE
583         default:
584                 printed = scnprintf(bf, size, "%#x", type);
585         }
586
587 #define P_SK_FLAG(n) \
588         if (flags & SOCK_##n) { \
589                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
590                 flags &= ~SOCK_##n; \
591         }
592
593         P_SK_FLAG(CLOEXEC);
594         P_SK_FLAG(NONBLOCK);
595 #undef P_SK_FLAG
596
597         if (flags)
598                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
599
600         return printed;
601 }
602
603 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
604
605 #ifndef MSG_PROBE
606 #define MSG_PROBE            0x10
607 #endif
608 #ifndef MSG_WAITFORONE
609 #define MSG_WAITFORONE  0x10000
610 #endif
611 #ifndef MSG_SENDPAGE_NOTLAST
612 #define MSG_SENDPAGE_NOTLAST 0x20000
613 #endif
614 #ifndef MSG_FASTOPEN
615 #define MSG_FASTOPEN         0x20000000
616 #endif
617
618 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
619                                                struct syscall_arg *arg)
620 {
621         int printed = 0, flags = arg->val;
622
623         if (flags == 0)
624                 return scnprintf(bf, size, "NONE");
625 #define P_MSG_FLAG(n) \
626         if (flags & MSG_##n) { \
627                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
628                 flags &= ~MSG_##n; \
629         }
630
631         P_MSG_FLAG(OOB);
632         P_MSG_FLAG(PEEK);
633         P_MSG_FLAG(DONTROUTE);
634         P_MSG_FLAG(TRYHARD);
635         P_MSG_FLAG(CTRUNC);
636         P_MSG_FLAG(PROBE);
637         P_MSG_FLAG(TRUNC);
638         P_MSG_FLAG(DONTWAIT);
639         P_MSG_FLAG(EOR);
640         P_MSG_FLAG(WAITALL);
641         P_MSG_FLAG(FIN);
642         P_MSG_FLAG(SYN);
643         P_MSG_FLAG(CONFIRM);
644         P_MSG_FLAG(RST);
645         P_MSG_FLAG(ERRQUEUE);
646         P_MSG_FLAG(NOSIGNAL);
647         P_MSG_FLAG(MORE);
648         P_MSG_FLAG(WAITFORONE);
649         P_MSG_FLAG(SENDPAGE_NOTLAST);
650         P_MSG_FLAG(FASTOPEN);
651         P_MSG_FLAG(CMSG_CLOEXEC);
652 #undef P_MSG_FLAG
653
654         if (flags)
655                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
656
657         return printed;
658 }
659
660 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
661
662 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
663                                                  struct syscall_arg *arg)
664 {
665         size_t printed = 0;
666         int mode = arg->val;
667
668         if (mode == F_OK) /* 0 */
669                 return scnprintf(bf, size, "F");
670 #define P_MODE(n) \
671         if (mode & n##_OK) { \
672                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
673                 mode &= ~n##_OK; \
674         }
675
676         P_MODE(R);
677         P_MODE(W);
678         P_MODE(X);
679 #undef P_MODE
680
681         if (mode)
682                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
683
684         return printed;
685 }
686
687 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
688
689 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
690                                                struct syscall_arg *arg)
691 {
692         int printed = 0, flags = arg->val;
693
694         if (!(flags & O_CREAT))
695                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
696
697         if (flags == 0)
698                 return scnprintf(bf, size, "RDONLY");
699 #define P_FLAG(n) \
700         if (flags & O_##n) { \
701                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
702                 flags &= ~O_##n; \
703         }
704
705         P_FLAG(APPEND);
706         P_FLAG(ASYNC);
707         P_FLAG(CLOEXEC);
708         P_FLAG(CREAT);
709         P_FLAG(DIRECT);
710         P_FLAG(DIRECTORY);
711         P_FLAG(EXCL);
712         P_FLAG(LARGEFILE);
713         P_FLAG(NOATIME);
714         P_FLAG(NOCTTY);
715 #ifdef O_NONBLOCK
716         P_FLAG(NONBLOCK);
717 #elif O_NDELAY
718         P_FLAG(NDELAY);
719 #endif
720 #ifdef O_PATH
721         P_FLAG(PATH);
722 #endif
723         P_FLAG(RDWR);
724 #ifdef O_DSYNC
725         if ((flags & O_SYNC) == O_SYNC)
726                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
727         else {
728                 P_FLAG(DSYNC);
729         }
730 #else
731         P_FLAG(SYNC);
732 #endif
733         P_FLAG(TRUNC);
734         P_FLAG(WRONLY);
735 #undef P_FLAG
736
737         if (flags)
738                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
739
740         return printed;
741 }
742
743 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
744
745 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
746                                                    struct syscall_arg *arg)
747 {
748         int printed = 0, flags = arg->val;
749
750         if (flags == 0)
751                 return scnprintf(bf, size, "NONE");
752 #define P_FLAG(n) \
753         if (flags & EFD_##n) { \
754                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
755                 flags &= ~EFD_##n; \
756         }
757
758         P_FLAG(SEMAPHORE);
759         P_FLAG(CLOEXEC);
760         P_FLAG(NONBLOCK);
761 #undef P_FLAG
762
763         if (flags)
764                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
765
766         return printed;
767 }
768
769 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
770
771 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
772                                                 struct syscall_arg *arg)
773 {
774         int printed = 0, flags = arg->val;
775
776 #define P_FLAG(n) \
777         if (flags & O_##n) { \
778                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
779                 flags &= ~O_##n; \
780         }
781
782         P_FLAG(CLOEXEC);
783         P_FLAG(NONBLOCK);
784 #undef P_FLAG
785
786         if (flags)
787                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
788
789         return printed;
790 }
791
792 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
793
794 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
795 {
796         int sig = arg->val;
797
798         switch (sig) {
799 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
800         P_SIGNUM(HUP);
801         P_SIGNUM(INT);
802         P_SIGNUM(QUIT);
803         P_SIGNUM(ILL);
804         P_SIGNUM(TRAP);
805         P_SIGNUM(ABRT);
806         P_SIGNUM(BUS);
807         P_SIGNUM(FPE);
808         P_SIGNUM(KILL);
809         P_SIGNUM(USR1);
810         P_SIGNUM(SEGV);
811         P_SIGNUM(USR2);
812         P_SIGNUM(PIPE);
813         P_SIGNUM(ALRM);
814         P_SIGNUM(TERM);
815         P_SIGNUM(STKFLT);
816         P_SIGNUM(CHLD);
817         P_SIGNUM(CONT);
818         P_SIGNUM(STOP);
819         P_SIGNUM(TSTP);
820         P_SIGNUM(TTIN);
821         P_SIGNUM(TTOU);
822         P_SIGNUM(URG);
823         P_SIGNUM(XCPU);
824         P_SIGNUM(XFSZ);
825         P_SIGNUM(VTALRM);
826         P_SIGNUM(PROF);
827         P_SIGNUM(WINCH);
828         P_SIGNUM(IO);
829         P_SIGNUM(PWR);
830         P_SIGNUM(SYS);
831         default: break;
832         }
833
834         return scnprintf(bf, size, "%#x", sig);
835 }
836
837 #define SCA_SIGNUM syscall_arg__scnprintf_signum
838
839 #define TCGETS          0x5401
840
841 static const char *tioctls[] = {
842         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
843         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
844         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
845         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
846         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
847         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
848         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
849         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
850         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
851         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
852         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
853         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
854         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
855         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
856         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
857 };
858
859 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
860
861 #define STRARRAY(arg, name, array) \
862           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
863           .arg_parm      = { [arg] = &strarray__##array, }
864
865 static struct syscall_fmt {
866         const char *name;
867         const char *alias;
868         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
869         void       *arg_parm[6];
870         bool       errmsg;
871         bool       timeout;
872         bool       hexret;
873 } syscall_fmts[] = {
874         { .name     = "access",     .errmsg = true,
875           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
876         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
877         { .name     = "brk",        .hexret = true,
878           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
879         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
880         { .name     = "close",      .errmsg = true,
881           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
882         { .name     = "connect",    .errmsg = true, },
883         { .name     = "dup",        .errmsg = true,
884           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
885         { .name     = "dup2",       .errmsg = true,
886           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
887         { .name     = "dup3",       .errmsg = true,
888           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
889         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
890         { .name     = "eventfd2",   .errmsg = true,
891           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
892         { .name     = "faccessat",  .errmsg = true,
893           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
894         { .name     = "fadvise64",  .errmsg = true,
895           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
896         { .name     = "fallocate",  .errmsg = true,
897           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
898         { .name     = "fchdir",     .errmsg = true,
899           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
900         { .name     = "fchmod",     .errmsg = true,
901           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
902         { .name     = "fchmodat",   .errmsg = true,
903           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
904         { .name     = "fchown",     .errmsg = true,
905           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
906         { .name     = "fchownat",   .errmsg = true,
907           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
908         { .name     = "fcntl",      .errmsg = true,
909           .arg_scnprintf = { [0] = SCA_FD, /* fd */
910                              [1] = SCA_STRARRAY, /* cmd */ },
911           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
912         { .name     = "fdatasync",  .errmsg = true,
913           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
914         { .name     = "flock",      .errmsg = true,
915           .arg_scnprintf = { [0] = SCA_FD, /* fd */
916                              [1] = SCA_FLOCK, /* cmd */ }, },
917         { .name     = "fsetxattr",  .errmsg = true,
918           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
919         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
920           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
921         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
922           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
923         { .name     = "fstatfs",    .errmsg = true,
924           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
925         { .name     = "fsync",    .errmsg = true,
926           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
927         { .name     = "ftruncate", .errmsg = true,
928           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
929         { .name     = "futex",      .errmsg = true,
930           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
931         { .name     = "futimesat", .errmsg = true,
932           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
933         { .name     = "getdents",   .errmsg = true,
934           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
935         { .name     = "getdents64", .errmsg = true,
936           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
937         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
938         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
939         { .name     = "ioctl",      .errmsg = true,
940           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
941                              [1] = SCA_STRHEXARRAY, /* cmd */
942                              [2] = SCA_HEX, /* arg */ },
943           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
944         { .name     = "kill",       .errmsg = true,
945           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
946         { .name     = "linkat",     .errmsg = true,
947           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
948         { .name     = "lseek",      .errmsg = true,
949           .arg_scnprintf = { [0] = SCA_FD, /* fd */
950                              [2] = SCA_STRARRAY, /* whence */ },
951           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
952         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
953         { .name     = "madvise",    .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
955                              [2] = SCA_MADV_BHV, /* behavior */ }, },
956         { .name     = "mkdirat",    .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
958         { .name     = "mknodat",    .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
960         { .name     = "mlock",      .errmsg = true,
961           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
962         { .name     = "mlockall",   .errmsg = true,
963           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
964         { .name     = "mmap",       .hexret = true,
965           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
966                              [2] = SCA_MMAP_PROT, /* prot */
967                              [3] = SCA_MMAP_FLAGS, /* flags */
968                              [4] = SCA_FD,        /* fd */ }, },
969         { .name     = "mprotect",   .errmsg = true,
970           .arg_scnprintf = { [0] = SCA_HEX, /* start */
971                              [2] = SCA_MMAP_PROT, /* prot */ }, },
972         { .name     = "mremap",     .hexret = true,
973           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
974                              [4] = SCA_HEX, /* new_addr */ }, },
975         { .name     = "munlock",    .errmsg = true,
976           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
977         { .name     = "munmap",     .errmsg = true,
978           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
979         { .name     = "name_to_handle_at", .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
981         { .name     = "newfstatat", .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
983         { .name     = "open",       .errmsg = true,
984           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
985         { .name     = "open_by_handle_at", .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
987                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
988         { .name     = "openat",     .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991         { .name     = "pipe2",      .errmsg = true,
992           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
993         { .name     = "poll",       .errmsg = true, .timeout = true, },
994         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
995         { .name     = "pread",      .errmsg = true, .alias = "pread64",
996           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
997         { .name     = "preadv",     .errmsg = true, .alias = "pread",
998           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
999         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1000         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1002         { .name     = "pwritev",    .errmsg = true,
1003           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1004         { .name     = "read",       .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1006         { .name     = "readlinkat", .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1008         { .name     = "readv",      .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1010         { .name     = "recvfrom",   .errmsg = true,
1011           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1012         { .name     = "recvmmsg",   .errmsg = true,
1013           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1014         { .name     = "recvmsg",    .errmsg = true,
1015           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1016         { .name     = "renameat",   .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1018         { .name     = "rt_sigaction", .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1020         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1021         { .name     = "rt_sigqueueinfo", .errmsg = true,
1022           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1023         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1024           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1025         { .name     = "select",     .errmsg = true, .timeout = true, },
1026         { .name     = "sendmmsg",    .errmsg = true,
1027           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1028         { .name     = "sendmsg",    .errmsg = true,
1029           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1030         { .name     = "sendto",     .errmsg = true,
1031           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1032         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1033         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1034         { .name     = "shutdown",   .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1036         { .name     = "socket",     .errmsg = true,
1037           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1038                              [1] = SCA_SK_TYPE, /* type */ },
1039           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1040         { .name     = "socketpair", .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1042                              [1] = SCA_SK_TYPE, /* type */ },
1043           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1044         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1045         { .name     = "symlinkat",  .errmsg = true,
1046           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1047         { .name     = "tgkill",     .errmsg = true,
1048           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1049         { .name     = "tkill",      .errmsg = true,
1050           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1051         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1052         { .name     = "unlinkat",   .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1054         { .name     = "utimensat",  .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1056         { .name     = "write",      .errmsg = true,
1057           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1058         { .name     = "writev",     .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1060 };
1061
1062 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1063 {
1064         const struct syscall_fmt *fmt = fmtp;
1065         return strcmp(name, fmt->name);
1066 }
1067
1068 static struct syscall_fmt *syscall_fmt__find(const char *name)
1069 {
1070         const int nmemb = ARRAY_SIZE(syscall_fmts);
1071         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1072 }
1073
1074 struct syscall {
1075         struct event_format *tp_format;
1076         const char          *name;
1077         bool                filtered;
1078         struct syscall_fmt  *fmt;
1079         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1080         void                **arg_parm;
1081 };
1082
1083 static size_t fprintf_duration(unsigned long t, FILE *fp)
1084 {
1085         double duration = (double)t / NSEC_PER_MSEC;
1086         size_t printed = fprintf(fp, "(");
1087
1088         if (duration >= 1.0)
1089                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1090         else if (duration >= 0.01)
1091                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1092         else
1093                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1094         return printed + fprintf(fp, "): ");
1095 }
1096
1097 struct thread_trace {
1098         u64               entry_time;
1099         u64               exit_time;
1100         bool              entry_pending;
1101         unsigned long     nr_events;
1102         char              *entry_str;
1103         double            runtime_ms;
1104         struct {
1105                 int       max;
1106                 char      **table;
1107         } paths;
1108
1109         struct intlist *syscall_stats;
1110 };
1111
1112 static struct thread_trace *thread_trace__new(void)
1113 {
1114         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1115
1116         if (ttrace)
1117                 ttrace->paths.max = -1;
1118
1119         ttrace->syscall_stats = intlist__new(NULL);
1120
1121         return ttrace;
1122 }
1123
1124 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1125 {
1126         struct thread_trace *ttrace;
1127
1128         if (thread == NULL)
1129                 goto fail;
1130
1131         if (thread->priv == NULL)
1132                 thread->priv = thread_trace__new();
1133                 
1134         if (thread->priv == NULL)
1135                 goto fail;
1136
1137         ttrace = thread->priv;
1138         ++ttrace->nr_events;
1139
1140         return ttrace;
1141 fail:
1142         color_fprintf(fp, PERF_COLOR_RED,
1143                       "WARNING: not enough memory, dropping samples!\n");
1144         return NULL;
1145 }
1146
1147 struct trace {
1148         struct perf_tool        tool;
1149         struct {
1150                 int             machine;
1151                 int             open_id;
1152         }                       audit;
1153         struct {
1154                 int             max;
1155                 struct syscall  *table;
1156         } syscalls;
1157         struct perf_record_opts opts;
1158         struct machine          *host;
1159         u64                     base_time;
1160         bool                    full_time;
1161         FILE                    *output;
1162         unsigned long           nr_events;
1163         struct strlist          *ev_qualifier;
1164         bool                    not_ev_qualifier;
1165         bool                    live;
1166         const char              *last_vfs_getname;
1167         struct intlist          *tid_list;
1168         struct intlist          *pid_list;
1169         bool                    sched;
1170         bool                    multiple_threads;
1171         bool                    summary;
1172         bool                    summary_only;
1173         bool                    show_comm;
1174         bool                    show_tool_stats;
1175         double                  duration_filter;
1176         double                  runtime_ms;
1177         struct {
1178                 u64             vfs_getname, proc_getname;
1179         } stats;
1180 };
1181
1182 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1183 {
1184         struct thread_trace *ttrace = thread->priv;
1185
1186         if (fd > ttrace->paths.max) {
1187                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1188
1189                 if (npath == NULL)
1190                         return -1;
1191
1192                 if (ttrace->paths.max != -1) {
1193                         memset(npath + ttrace->paths.max + 1, 0,
1194                                (fd - ttrace->paths.max) * sizeof(char *));
1195                 } else {
1196                         memset(npath, 0, (fd + 1) * sizeof(char *));
1197                 }
1198
1199                 ttrace->paths.table = npath;
1200                 ttrace->paths.max   = fd;
1201         }
1202
1203         ttrace->paths.table[fd] = strdup(pathname);
1204
1205         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1206 }
1207
1208 static int thread__read_fd_path(struct thread *thread, int fd)
1209 {
1210         char linkname[PATH_MAX], pathname[PATH_MAX];
1211         struct stat st;
1212         int ret;
1213
1214         if (thread->pid_ == thread->tid) {
1215                 scnprintf(linkname, sizeof(linkname),
1216                           "/proc/%d/fd/%d", thread->pid_, fd);
1217         } else {
1218                 scnprintf(linkname, sizeof(linkname),
1219                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1220         }
1221
1222         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1223                 return -1;
1224
1225         ret = readlink(linkname, pathname, sizeof(pathname));
1226
1227         if (ret < 0 || ret > st.st_size)
1228                 return -1;
1229
1230         pathname[ret] = '\0';
1231         return trace__set_fd_pathname(thread, fd, pathname);
1232 }
1233
1234 static const char *thread__fd_path(struct thread *thread, int fd,
1235                                    struct trace *trace)
1236 {
1237         struct thread_trace *ttrace = thread->priv;
1238
1239         if (ttrace == NULL)
1240                 return NULL;
1241
1242         if (fd < 0)
1243                 return NULL;
1244
1245         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1246                 if (!trace->live)
1247                         return NULL;
1248                 ++trace->stats.proc_getname;
1249                 if (thread__read_fd_path(thread, fd)) {
1250                         return NULL;
1251         }
1252
1253         return ttrace->paths.table[fd];
1254 }
1255
1256 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1257                                         struct syscall_arg *arg)
1258 {
1259         int fd = arg->val;
1260         size_t printed = scnprintf(bf, size, "%d", fd);
1261         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1262
1263         if (path)
1264                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1265
1266         return printed;
1267 }
1268
1269 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1270                                               struct syscall_arg *arg)
1271 {
1272         int fd = arg->val;
1273         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1274         struct thread_trace *ttrace = arg->thread->priv;
1275
1276         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1277                 free(ttrace->paths.table[fd]);
1278                 ttrace->paths.table[fd] = NULL;
1279         }
1280
1281         return printed;
1282 }
1283
1284 static bool trace__filter_duration(struct trace *trace, double t)
1285 {
1286         return t < (trace->duration_filter * NSEC_PER_MSEC);
1287 }
1288
1289 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1290 {
1291         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1292
1293         return fprintf(fp, "%10.3f ", ts);
1294 }
1295
1296 static bool done = false;
1297 static bool interrupted = false;
1298
1299 static void sig_handler(int sig)
1300 {
1301         done = true;
1302         interrupted = sig == SIGINT;
1303 }
1304
1305 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1306                                         u64 duration, u64 tstamp, FILE *fp)
1307 {
1308         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1309         printed += fprintf_duration(duration, fp);
1310
1311         if (trace->multiple_threads) {
1312                 if (trace->show_comm)
1313                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1314                 printed += fprintf(fp, "%d ", thread->tid);
1315         }
1316
1317         return printed;
1318 }
1319
1320 static int trace__process_event(struct trace *trace, struct machine *machine,
1321                                 union perf_event *event, struct perf_sample *sample)
1322 {
1323         int ret = 0;
1324
1325         switch (event->header.type) {
1326         case PERF_RECORD_LOST:
1327                 color_fprintf(trace->output, PERF_COLOR_RED,
1328                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1329                 ret = machine__process_lost_event(machine, event, sample);
1330         default:
1331                 ret = machine__process_event(machine, event, sample);
1332                 break;
1333         }
1334
1335         return ret;
1336 }
1337
1338 static int trace__tool_process(struct perf_tool *tool,
1339                                union perf_event *event,
1340                                struct perf_sample *sample,
1341                                struct machine *machine)
1342 {
1343         struct trace *trace = container_of(tool, struct trace, tool);
1344         return trace__process_event(trace, machine, event, sample);
1345 }
1346
1347 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1348 {
1349         int err = symbol__init();
1350
1351         if (err)
1352                 return err;
1353
1354         trace->host = machine__new_host();
1355         if (trace->host == NULL)
1356                 return -ENOMEM;
1357
1358         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1359                                             evlist->threads, trace__tool_process, false);
1360         if (err)
1361                 symbol__exit();
1362
1363         return err;
1364 }
1365
1366 static int syscall__set_arg_fmts(struct syscall *sc)
1367 {
1368         struct format_field *field;
1369         int idx = 0;
1370
1371         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1372         if (sc->arg_scnprintf == NULL)
1373                 return -1;
1374
1375         if (sc->fmt)
1376                 sc->arg_parm = sc->fmt->arg_parm;
1377
1378         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1379                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1380                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1381                 else if (field->flags & FIELD_IS_POINTER)
1382                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1383                 ++idx;
1384         }
1385
1386         return 0;
1387 }
1388
1389 static int trace__read_syscall_info(struct trace *trace, int id)
1390 {
1391         char tp_name[128];
1392         struct syscall *sc;
1393         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1394
1395         if (name == NULL)
1396                 return -1;
1397
1398         if (id > trace->syscalls.max) {
1399                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1400
1401                 if (nsyscalls == NULL)
1402                         return -1;
1403
1404                 if (trace->syscalls.max != -1) {
1405                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1406                                (id - trace->syscalls.max) * sizeof(*sc));
1407                 } else {
1408                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1409                 }
1410
1411                 trace->syscalls.table = nsyscalls;
1412                 trace->syscalls.max   = id;
1413         }
1414
1415         sc = trace->syscalls.table + id;
1416         sc->name = name;
1417
1418         if (trace->ev_qualifier) {
1419                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1420
1421                 if (!(in ^ trace->not_ev_qualifier)) {
1422                         sc->filtered = true;
1423                         /*
1424                          * No need to do read tracepoint information since this will be
1425                          * filtered out.
1426                          */
1427                         return 0;
1428                 }
1429         }
1430
1431         sc->fmt  = syscall_fmt__find(sc->name);
1432
1433         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1434         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1435
1436         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1437                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1438                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1439         }
1440
1441         if (sc->tp_format == NULL)
1442                 return -1;
1443
1444         return syscall__set_arg_fmts(sc);
1445 }
1446
1447 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1448                                       unsigned long *args, struct trace *trace,
1449                                       struct thread *thread)
1450 {
1451         size_t printed = 0;
1452
1453         if (sc->tp_format != NULL) {
1454                 struct format_field *field;
1455                 u8 bit = 1;
1456                 struct syscall_arg arg = {
1457                         .idx    = 0,
1458                         .mask   = 0,
1459                         .trace  = trace,
1460                         .thread = thread,
1461                 };
1462
1463                 for (field = sc->tp_format->format.fields->next; field;
1464                      field = field->next, ++arg.idx, bit <<= 1) {
1465                         if (arg.mask & bit)
1466                                 continue;
1467                         /*
1468                          * Suppress this argument if its value is zero and
1469                          * and we don't have a string associated in an
1470                          * strarray for it.
1471                          */
1472                         if (args[arg.idx] == 0 &&
1473                             !(sc->arg_scnprintf &&
1474                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1475                               sc->arg_parm[arg.idx]))
1476                                 continue;
1477
1478                         printed += scnprintf(bf + printed, size - printed,
1479                                              "%s%s: ", printed ? ", " : "", field->name);
1480                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1481                                 arg.val = args[arg.idx];
1482                                 if (sc->arg_parm)
1483                                         arg.parm = sc->arg_parm[arg.idx];
1484                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1485                                                                       size - printed, &arg);
1486                         } else {
1487                                 printed += scnprintf(bf + printed, size - printed,
1488                                                      "%ld", args[arg.idx]);
1489                         }
1490                 }
1491         } else {
1492                 int i = 0;
1493
1494                 while (i < 6) {
1495                         printed += scnprintf(bf + printed, size - printed,
1496                                              "%sarg%d: %ld",
1497                                              printed ? ", " : "", i, args[i]);
1498                         ++i;
1499                 }
1500         }
1501
1502         return printed;
1503 }
1504
1505 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1506                                   struct perf_sample *sample);
1507
1508 static struct syscall *trace__syscall_info(struct trace *trace,
1509                                            struct perf_evsel *evsel, int id)
1510 {
1511
1512         if (id < 0) {
1513
1514                 /*
1515                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1516                  * before that, leaving at a higher verbosity level till that is
1517                  * explained. Reproduced with plain ftrace with:
1518                  *
1519                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1520                  * grep "NR -1 " /t/trace_pipe
1521                  *
1522                  * After generating some load on the machine.
1523                  */
1524                 if (verbose > 1) {
1525                         static u64 n;
1526                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1527                                 id, perf_evsel__name(evsel), ++n);
1528                 }
1529                 return NULL;
1530         }
1531
1532         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1533             trace__read_syscall_info(trace, id))
1534                 goto out_cant_read;
1535
1536         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1537                 goto out_cant_read;
1538
1539         return &trace->syscalls.table[id];
1540
1541 out_cant_read:
1542         if (verbose) {
1543                 fprintf(trace->output, "Problems reading syscall %d", id);
1544                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1545                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1546                 fputs(" information\n", trace->output);
1547         }
1548         return NULL;
1549 }
1550
1551 static void thread__update_stats(struct thread_trace *ttrace,
1552                                  int id, struct perf_sample *sample)
1553 {
1554         struct int_node *inode;
1555         struct stats *stats;
1556         u64 duration = 0;
1557
1558         inode = intlist__findnew(ttrace->syscall_stats, id);
1559         if (inode == NULL)
1560                 return;
1561
1562         stats = inode->priv;
1563         if (stats == NULL) {
1564                 stats = malloc(sizeof(struct stats));
1565                 if (stats == NULL)
1566                         return;
1567                 init_stats(stats);
1568                 inode->priv = stats;
1569         }
1570
1571         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1572                 duration = sample->time - ttrace->entry_time;
1573
1574         update_stats(stats, duration);
1575 }
1576
1577 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1578                             struct perf_sample *sample)
1579 {
1580         char *msg;
1581         void *args;
1582         size_t printed = 0;
1583         struct thread *thread;
1584         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1585         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1586         struct thread_trace *ttrace;
1587
1588         if (sc == NULL)
1589                 return -1;
1590
1591         if (sc->filtered)
1592                 return 0;
1593
1594         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1595         ttrace = thread__trace(thread, trace->output);
1596         if (ttrace == NULL)
1597                 return -1;
1598
1599         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1600         ttrace = thread->priv;
1601
1602         if (ttrace->entry_str == NULL) {
1603                 ttrace->entry_str = malloc(1024);
1604                 if (!ttrace->entry_str)
1605                         return -1;
1606         }
1607
1608         ttrace->entry_time = sample->time;
1609         msg = ttrace->entry_str;
1610         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1611
1612         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1613                                            args, trace, thread);
1614
1615         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1616                 if (!trace->duration_filter && !trace->summary_only) {
1617                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1618                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1619                 }
1620         } else
1621                 ttrace->entry_pending = true;
1622
1623         return 0;
1624 }
1625
1626 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1627                            struct perf_sample *sample)
1628 {
1629         int ret;
1630         u64 duration = 0;
1631         struct thread *thread;
1632         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1633         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1634         struct thread_trace *ttrace;
1635
1636         if (sc == NULL)
1637                 return -1;
1638
1639         if (sc->filtered)
1640                 return 0;
1641
1642         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1643         ttrace = thread__trace(thread, trace->output);
1644         if (ttrace == NULL)
1645                 return -1;
1646
1647         if (trace->summary)
1648                 thread__update_stats(ttrace, id, sample);
1649
1650         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1651
1652         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1653                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1654                 trace->last_vfs_getname = NULL;
1655                 ++trace->stats.vfs_getname;
1656         }
1657
1658         ttrace = thread->priv;
1659
1660         ttrace->exit_time = sample->time;
1661
1662         if (ttrace->entry_time) {
1663                 duration = sample->time - ttrace->entry_time;
1664                 if (trace__filter_duration(trace, duration))
1665                         goto out;
1666         } else if (trace->duration_filter)
1667                 goto out;
1668
1669         if (trace->summary_only)
1670                 goto out;
1671
1672         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1673
1674         if (ttrace->entry_pending) {
1675                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1676         } else {
1677                 fprintf(trace->output, " ... [");
1678                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1679                 fprintf(trace->output, "]: %s()", sc->name);
1680         }
1681
1682         if (sc->fmt == NULL) {
1683 signed_print:
1684                 fprintf(trace->output, ") = %d", ret);
1685         } else if (ret < 0 && sc->fmt->errmsg) {
1686                 char bf[256];
1687                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1688                            *e = audit_errno_to_name(-ret);
1689
1690                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1691         } else if (ret == 0 && sc->fmt->timeout)
1692                 fprintf(trace->output, ") = 0 Timeout");
1693         else if (sc->fmt->hexret)
1694                 fprintf(trace->output, ") = %#x", ret);
1695         else
1696                 goto signed_print;
1697
1698         fputc('\n', trace->output);
1699 out:
1700         ttrace->entry_pending = false;
1701
1702         return 0;
1703 }
1704
1705 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1706                               struct perf_sample *sample)
1707 {
1708         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1709         return 0;
1710 }
1711
1712 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1713                                      struct perf_sample *sample)
1714 {
1715         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1716         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1717         struct thread *thread = machine__findnew_thread(trace->host,
1718                                                         sample->pid,
1719                                                         sample->tid);
1720         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1721
1722         if (ttrace == NULL)
1723                 goto out_dump;
1724
1725         ttrace->runtime_ms += runtime_ms;
1726         trace->runtime_ms += runtime_ms;
1727         return 0;
1728
1729 out_dump:
1730         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1731                evsel->name,
1732                perf_evsel__strval(evsel, sample, "comm"),
1733                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1734                runtime,
1735                perf_evsel__intval(evsel, sample, "vruntime"));
1736         return 0;
1737 }
1738
1739 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1740 {
1741         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1742             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1743                 return false;
1744
1745         if (trace->pid_list || trace->tid_list)
1746                 return true;
1747
1748         return false;
1749 }
1750
1751 static int trace__process_sample(struct perf_tool *tool,
1752                                  union perf_event *event __maybe_unused,
1753                                  struct perf_sample *sample,
1754                                  struct perf_evsel *evsel,
1755                                  struct machine *machine __maybe_unused)
1756 {
1757         struct trace *trace = container_of(tool, struct trace, tool);
1758         int err = 0;
1759
1760         tracepoint_handler handler = evsel->handler;
1761
1762         if (skip_sample(trace, sample))
1763                 return 0;
1764
1765         if (!trace->full_time && trace->base_time == 0)
1766                 trace->base_time = sample->time;
1767
1768         if (handler)
1769                 handler(trace, evsel, sample);
1770
1771         return err;
1772 }
1773
1774 static int parse_target_str(struct trace *trace)
1775 {
1776         if (trace->opts.target.pid) {
1777                 trace->pid_list = intlist__new(trace->opts.target.pid);
1778                 if (trace->pid_list == NULL) {
1779                         pr_err("Error parsing process id string\n");
1780                         return -EINVAL;
1781                 }
1782         }
1783
1784         if (trace->opts.target.tid) {
1785                 trace->tid_list = intlist__new(trace->opts.target.tid);
1786                 if (trace->tid_list == NULL) {
1787                         pr_err("Error parsing thread id string\n");
1788                         return -EINVAL;
1789                 }
1790         }
1791
1792         return 0;
1793 }
1794
1795 static int trace__record(int argc, const char **argv)
1796 {
1797         unsigned int rec_argc, i, j;
1798         const char **rec_argv;
1799         const char * const record_args[] = {
1800                 "record",
1801                 "-R",
1802                 "-m", "1024",
1803                 "-c", "1",
1804                 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1805         };
1806
1807         rec_argc = ARRAY_SIZE(record_args) + argc;
1808         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1809
1810         if (rec_argv == NULL)
1811                 return -ENOMEM;
1812
1813         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1814                 rec_argv[i] = record_args[i];
1815
1816         for (j = 0; j < (unsigned int)argc; j++, i++)
1817                 rec_argv[i] = argv[j];
1818
1819         return cmd_record(i, rec_argv, NULL);
1820 }
1821
1822 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1823
1824 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1825 {
1826         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1827         if (evsel == NULL)
1828                 return;
1829
1830         if (perf_evsel__field(evsel, "pathname") == NULL) {
1831                 perf_evsel__delete(evsel);
1832                 return;
1833         }
1834
1835         evsel->handler = trace__vfs_getname;
1836         perf_evlist__add(evlist, evsel);
1837 }
1838
1839 static int trace__run(struct trace *trace, int argc, const char **argv)
1840 {
1841         struct perf_evlist *evlist = perf_evlist__new();
1842         struct perf_evsel *evsel;
1843         int err = -1, i;
1844         unsigned long before;
1845         const bool forks = argc > 0;
1846
1847         trace->live = true;
1848
1849         if (evlist == NULL) {
1850                 fprintf(trace->output, "Not enough memory to run!\n");
1851                 goto out;
1852         }
1853
1854         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1855                 goto out_error_tp;
1856
1857         perf_evlist__add_vfs_getname(evlist);
1858
1859         if (trace->sched &&
1860                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1861                                 trace__sched_stat_runtime))
1862                 goto out_error_tp;
1863
1864         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1865         if (err < 0) {
1866                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1867                 goto out_delete_evlist;
1868         }
1869
1870         err = trace__symbols_init(trace, evlist);
1871         if (err < 0) {
1872                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1873                 goto out_delete_maps;
1874         }
1875
1876         perf_evlist__config(evlist, &trace->opts);
1877
1878         signal(SIGCHLD, sig_handler);
1879         signal(SIGINT, sig_handler);
1880
1881         if (forks) {
1882                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1883                                                     argv, false, false);
1884                 if (err < 0) {
1885                         fprintf(trace->output, "Couldn't run the workload!\n");
1886                         goto out_delete_maps;
1887                 }
1888         }
1889
1890         err = perf_evlist__open(evlist);
1891         if (err < 0)
1892                 goto out_error_open;
1893
1894         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1895         if (err < 0) {
1896                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1897                 goto out_close_evlist;
1898         }
1899
1900         perf_evlist__enable(evlist);
1901
1902         if (forks)
1903                 perf_evlist__start_workload(evlist);
1904
1905         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1906 again:
1907         before = trace->nr_events;
1908
1909         for (i = 0; i < evlist->nr_mmaps; i++) {
1910                 union perf_event *event;
1911
1912                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1913                         const u32 type = event->header.type;
1914                         tracepoint_handler handler;
1915                         struct perf_sample sample;
1916
1917                         ++trace->nr_events;
1918
1919                         err = perf_evlist__parse_sample(evlist, event, &sample);
1920                         if (err) {
1921                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1922                                 goto next_event;
1923                         }
1924
1925                         if (!trace->full_time && trace->base_time == 0)
1926                                 trace->base_time = sample.time;
1927
1928                         if (type != PERF_RECORD_SAMPLE) {
1929                                 trace__process_event(trace, trace->host, event, &sample);
1930                                 continue;
1931                         }
1932
1933                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1934                         if (evsel == NULL) {
1935                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1936                                 goto next_event;
1937                         }
1938
1939                         if (sample.raw_data == NULL) {
1940                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1941                                        perf_evsel__name(evsel), sample.tid,
1942                                        sample.cpu, sample.raw_size);
1943                                 goto next_event;
1944                         }
1945
1946                         handler = evsel->handler;
1947                         handler(trace, evsel, &sample);
1948 next_event:
1949                         perf_evlist__mmap_consume(evlist, i);
1950
1951                         if (interrupted)
1952                                 goto out_disable;
1953                 }
1954         }
1955
1956         if (trace->nr_events == before) {
1957                 int timeout = done ? 100 : -1;
1958
1959                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1960                         goto again;
1961         } else {
1962                 goto again;
1963         }
1964
1965 out_disable:
1966         perf_evlist__disable(evlist);
1967
1968         if (!err) {
1969                 if (trace->summary)
1970                         trace__fprintf_thread_summary(trace, trace->output);
1971
1972                 if (trace->show_tool_stats) {
1973                         fprintf(trace->output, "Stats:\n "
1974                                                " vfs_getname : %" PRIu64 "\n"
1975                                                " proc_getname: %" PRIu64 "\n",
1976                                 trace->stats.vfs_getname,
1977                                 trace->stats.proc_getname);
1978                 }
1979         }
1980
1981         perf_evlist__munmap(evlist);
1982 out_close_evlist:
1983         perf_evlist__close(evlist);
1984 out_delete_maps:
1985         perf_evlist__delete_maps(evlist);
1986 out_delete_evlist:
1987         perf_evlist__delete(evlist);
1988 out:
1989         trace->live = false;
1990         return err;
1991 {
1992         char errbuf[BUFSIZ];
1993
1994 out_error_tp:
1995         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1996         goto out_error;
1997
1998 out_error_open:
1999         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2000
2001 out_error:
2002         fprintf(trace->output, "%s\n", errbuf);
2003         goto out_delete_evlist;
2004 }
2005 }
2006
2007 static int trace__replay(struct trace *trace)
2008 {
2009         const struct perf_evsel_str_handler handlers[] = {
2010                 { "probe:vfs_getname",       trace__vfs_getname, },
2011         };
2012         struct perf_data_file file = {
2013                 .path  = input_name,
2014                 .mode  = PERF_DATA_MODE_READ,
2015         };
2016         struct perf_session *session;
2017         struct perf_evsel *evsel;
2018         int err = -1;
2019
2020         trace->tool.sample        = trace__process_sample;
2021         trace->tool.mmap          = perf_event__process_mmap;
2022         trace->tool.mmap2         = perf_event__process_mmap2;
2023         trace->tool.comm          = perf_event__process_comm;
2024         trace->tool.exit          = perf_event__process_exit;
2025         trace->tool.fork          = perf_event__process_fork;
2026         trace->tool.attr          = perf_event__process_attr;
2027         trace->tool.tracing_data = perf_event__process_tracing_data;
2028         trace->tool.build_id      = perf_event__process_build_id;
2029
2030         trace->tool.ordered_samples = true;
2031         trace->tool.ordering_requires_timestamps = true;
2032
2033         /* add tid to output */
2034         trace->multiple_threads = true;
2035
2036         if (symbol__init() < 0)
2037                 return -1;
2038
2039         session = perf_session__new(&file, false, &trace->tool);
2040         if (session == NULL)
2041                 return -ENOMEM;
2042
2043         trace->host = &session->machines.host;
2044
2045         err = perf_session__set_tracepoints_handlers(session, handlers);
2046         if (err)
2047                 goto out;
2048
2049         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2050                                                      "raw_syscalls:sys_enter");
2051         if (evsel == NULL) {
2052                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2053                 goto out;
2054         }
2055
2056         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2057             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2058                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2059                 goto out;
2060         }
2061
2062         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2063                                                      "raw_syscalls:sys_exit");
2064         if (evsel == NULL) {
2065                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2066                 goto out;
2067         }
2068
2069         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2070             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2071                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2072                 goto out;
2073         }
2074
2075         err = parse_target_str(trace);
2076         if (err != 0)
2077                 goto out;
2078
2079         setup_pager();
2080
2081         err = perf_session__process_events(session, &trace->tool);
2082         if (err)
2083                 pr_err("Failed to process events, error %d", err);
2084
2085         else if (trace->summary)
2086                 trace__fprintf_thread_summary(trace, trace->output);
2087
2088 out:
2089         perf_session__delete(session);
2090
2091         return err;
2092 }
2093
2094 static size_t trace__fprintf_threads_header(FILE *fp)
2095 {
2096         size_t printed;
2097
2098         printed  = fprintf(fp, "\n Summary of events:\n\n");
2099
2100         return printed;
2101 }
2102
2103 static size_t thread__dump_stats(struct thread_trace *ttrace,
2104                                  struct trace *trace, FILE *fp)
2105 {
2106         struct stats *stats;
2107         size_t printed = 0;
2108         struct syscall *sc;
2109         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2110
2111         if (inode == NULL)
2112                 return 0;
2113
2114         printed += fprintf(fp, "\n");
2115
2116         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2117         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2118         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2119
2120         /* each int_node is a syscall */
2121         while (inode) {
2122                 stats = inode->priv;
2123                 if (stats) {
2124                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2125                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2126                         double avg = avg_stats(stats);
2127                         double pct;
2128                         u64 n = (u64) stats->n;
2129
2130                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2131                         avg /= NSEC_PER_MSEC;
2132
2133                         sc = &trace->syscalls.table[inode->i];
2134                         printed += fprintf(fp, "   %-15s", sc->name);
2135                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2136                                            n, min, avg);
2137                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2138                 }
2139
2140                 inode = intlist__next(inode);
2141         }
2142
2143         printed += fprintf(fp, "\n\n");
2144
2145         return printed;
2146 }
2147
2148 /* struct used to pass data to per-thread function */
2149 struct summary_data {
2150         FILE *fp;
2151         struct trace *trace;
2152         size_t printed;
2153 };
2154
2155 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2156 {
2157         struct summary_data *data = priv;
2158         FILE *fp = data->fp;
2159         size_t printed = data->printed;
2160         struct trace *trace = data->trace;
2161         struct thread_trace *ttrace = thread->priv;
2162         double ratio;
2163
2164         if (ttrace == NULL)
2165                 return 0;
2166
2167         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2168
2169         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2170         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2171         printed += fprintf(fp, "%.1f%%", ratio);
2172         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2173         printed += thread__dump_stats(ttrace, trace, fp);
2174
2175         data->printed += printed;
2176
2177         return 0;
2178 }
2179
2180 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2181 {
2182         struct summary_data data = {
2183                 .fp = fp,
2184                 .trace = trace
2185         };
2186         data.printed = trace__fprintf_threads_header(fp);
2187
2188         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2189
2190         return data.printed;
2191 }
2192
2193 static int trace__set_duration(const struct option *opt, const char *str,
2194                                int unset __maybe_unused)
2195 {
2196         struct trace *trace = opt->value;
2197
2198         trace->duration_filter = atof(str);
2199         return 0;
2200 }
2201
2202 static int trace__open_output(struct trace *trace, const char *filename)
2203 {
2204         struct stat st;
2205
2206         if (!stat(filename, &st) && st.st_size) {
2207                 char oldname[PATH_MAX];
2208
2209                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2210                 unlink(oldname);
2211                 rename(filename, oldname);
2212         }
2213
2214         trace->output = fopen(filename, "w");
2215
2216         return trace->output == NULL ? -errno : 0;
2217 }
2218
2219 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2220 {
2221         const char * const trace_usage[] = {
2222                 "perf trace [<options>] [<command>]",
2223                 "perf trace [<options>] -- <command> [<options>]",
2224                 "perf trace record [<options>] [<command>]",
2225                 "perf trace record [<options>] -- <command> [<options>]",
2226                 NULL
2227         };
2228         struct trace trace = {
2229                 .audit = {
2230                         .machine = audit_detect_machine(),
2231                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2232                 },
2233                 .syscalls = {
2234                         . max = -1,
2235                 },
2236                 .opts = {
2237                         .target = {
2238                                 .uid       = UINT_MAX,
2239                                 .uses_mmap = true,
2240                         },
2241                         .user_freq     = UINT_MAX,
2242                         .user_interval = ULLONG_MAX,
2243                         .no_delay      = true,
2244                         .mmap_pages    = 1024,
2245                 },
2246                 .output = stdout,
2247                 .show_comm = true,
2248         };
2249         const char *output_name = NULL;
2250         const char *ev_qualifier_str = NULL;
2251         const struct option trace_options[] = {
2252         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2253                     "show the thread COMM next to its id"),
2254         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2255         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2256                     "list of events to trace"),
2257         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2258         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2259         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2260                     "trace events on existing process id"),
2261         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2262                     "trace events on existing thread id"),
2263         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2264                     "system-wide collection from all CPUs"),
2265         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2266                     "list of cpus to monitor"),
2267         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2268                     "child tasks do not inherit counters"),
2269         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2270                      "number of mmap data pages",
2271                      perf_evlist__parse_mmap_pages),
2272         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2273                    "user to profile"),
2274         OPT_CALLBACK(0, "duration", &trace, "float",
2275                      "show only events with duration > N.M ms",
2276                      trace__set_duration),
2277         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2278         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2279         OPT_BOOLEAN('T', "time", &trace.full_time,
2280                     "Show full timestamp, not time relative to first start"),
2281         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2282                     "Show only syscall summary with statistics"),
2283         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2284                     "Show all syscalls and summary with statistics"),
2285         OPT_END()
2286         };
2287         int err;
2288         char bf[BUFSIZ];
2289
2290         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2291                 return trace__record(argc-2, &argv[2]);
2292
2293         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2294
2295         /* summary_only implies summary option, but don't overwrite summary if set */
2296         if (trace.summary_only)
2297                 trace.summary = trace.summary_only;
2298
2299         if (output_name != NULL) {
2300                 err = trace__open_output(&trace, output_name);
2301                 if (err < 0) {
2302                         perror("failed to create output file");
2303                         goto out;
2304                 }
2305         }
2306
2307         if (ev_qualifier_str != NULL) {
2308                 const char *s = ev_qualifier_str;
2309
2310                 trace.not_ev_qualifier = *s == '!';
2311                 if (trace.not_ev_qualifier)
2312                         ++s;
2313                 trace.ev_qualifier = strlist__new(true, s);
2314                 if (trace.ev_qualifier == NULL) {
2315                         fputs("Not enough memory to parse event qualifier",
2316                               trace.output);
2317                         err = -ENOMEM;
2318                         goto out_close;
2319                 }
2320         }
2321
2322         err = target__validate(&trace.opts.target);
2323         if (err) {
2324                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2325                 fprintf(trace.output, "%s", bf);
2326                 goto out_close;
2327         }
2328
2329         err = target__parse_uid(&trace.opts.target);
2330         if (err) {
2331                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2332                 fprintf(trace.output, "%s", bf);
2333                 goto out_close;
2334         }
2335
2336         if (!argc && target__none(&trace.opts.target))
2337                 trace.opts.target.system_wide = true;
2338
2339         if (input_name)
2340                 err = trace__replay(&trace);
2341         else
2342                 err = trace__run(&trace, argc, argv);
2343
2344 out_close:
2345         if (output_name != NULL)
2346                 fclose(trace.output);
2347 out:
2348         return err;
2349 }