Merge branch 'perf/urgent' into perf/core, to pick up fixes and to refresh the tree
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         u##bits value; \
56         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
57         return value;  \
58 }
59
60 TP_UINT_FIELD(8);
61 TP_UINT_FIELD(16);
62 TP_UINT_FIELD(32);
63 TP_UINT_FIELD(64);
64
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
67 { \
68         u##bits value; \
69         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70         return bswap_##bits(value);\
71 }
72
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
76
77 static int tp_field__init_uint(struct tp_field *field,
78                                struct format_field *format_field,
79                                bool needs_swap)
80 {
81         field->offset = format_field->offset;
82
83         switch (format_field->size) {
84         case 1:
85                 field->integer = tp_field__u8;
86                 break;
87         case 2:
88                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
89                 break;
90         case 4:
91                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
92                 break;
93         case 8:
94                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
95                 break;
96         default:
97                 return -1;
98         }
99
100         return 0;
101 }
102
103 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
104 {
105         return sample->raw_data + field->offset;
106 }
107
108 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
109 {
110         field->offset = format_field->offset;
111         field->pointer = tp_field__ptr;
112         return 0;
113 }
114
115 struct syscall_tp {
116         struct tp_field id;
117         union {
118                 struct tp_field args, ret;
119         };
120 };
121
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
123                                           struct tp_field *field,
124                                           const char *name)
125 {
126         struct format_field *format_field = perf_evsel__field(evsel, name);
127
128         if (format_field == NULL)
129                 return -1;
130
131         return tp_field__init_uint(field, format_field, evsel->needs_swap);
132 }
133
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135         ({ struct syscall_tp *sc = evsel->priv;\
136            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
137
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
139                                          struct tp_field *field,
140                                          const char *name)
141 {
142         struct format_field *format_field = perf_evsel__field(evsel, name);
143
144         if (format_field == NULL)
145                 return -1;
146
147         return tp_field__init_ptr(field, format_field);
148 }
149
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151         ({ struct syscall_tp *sc = evsel->priv;\
152            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
153
154 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
155 {
156         zfree(&evsel->priv);
157         perf_evsel__delete(evsel);
158 }
159
160 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
161 {
162         evsel->priv = malloc(sizeof(struct syscall_tp));
163         if (evsel->priv != NULL) {
164                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
165                         goto out_delete;
166
167                 evsel->handler = handler;
168                 return 0;
169         }
170
171         return -ENOMEM;
172
173 out_delete:
174         zfree(&evsel->priv);
175         return -ENOENT;
176 }
177
178 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
179 {
180         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
181
182         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
183         if (evsel == NULL)
184                 evsel = perf_evsel__newtp("syscalls", direction);
185
186         if (evsel) {
187                 if (perf_evsel__init_syscall_tp(evsel, handler))
188                         goto out_delete;
189         }
190
191         return evsel;
192
193 out_delete:
194         perf_evsel__delete_priv(evsel);
195         return NULL;
196 }
197
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199         ({ struct syscall_tp *fields = evsel->priv; \
200            fields->name.integer(&fields->name, sample); })
201
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203         ({ struct syscall_tp *fields = evsel->priv; \
204            fields->name.pointer(&fields->name, sample); })
205
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
207                                           void *sys_enter_handler,
208                                           void *sys_exit_handler)
209 {
210         int ret = -1;
211         struct perf_evsel *sys_enter, *sys_exit;
212
213         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
214         if (sys_enter == NULL)
215                 goto out;
216
217         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
218                 goto out_delete_sys_enter;
219
220         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
221         if (sys_exit == NULL)
222                 goto out_delete_sys_enter;
223
224         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
225                 goto out_delete_sys_exit;
226
227         perf_evlist__add(evlist, sys_enter);
228         perf_evlist__add(evlist, sys_exit);
229
230         ret = 0;
231 out:
232         return ret;
233
234 out_delete_sys_exit:
235         perf_evsel__delete_priv(sys_exit);
236 out_delete_sys_enter:
237         perf_evsel__delete_priv(sys_enter);
238         goto out;
239 }
240
241
242 struct syscall_arg {
243         unsigned long val;
244         struct thread *thread;
245         struct trace  *trace;
246         void          *parm;
247         u8            idx;
248         u8            mask;
249 };
250
251 struct strarray {
252         int         offset;
253         int         nr_entries;
254         const char **entries;
255 };
256
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258         .nr_entries = ARRAY_SIZE(array), \
259         .entries = array, \
260 }
261
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
263         .offset     = off, \
264         .nr_entries = ARRAY_SIZE(array), \
265         .entries = array, \
266 }
267
268 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
269                                                 const char *intfmt,
270                                                 struct syscall_arg *arg)
271 {
272         struct strarray *sa = arg->parm;
273         int idx = arg->val - sa->offset;
274
275         if (idx < 0 || idx >= sa->nr_entries)
276                 return scnprintf(bf, size, intfmt, arg->val);
277
278         return scnprintf(bf, size, "%s", sa->entries[idx]);
279 }
280
281 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
282                                               struct syscall_arg *arg)
283 {
284         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
285 }
286
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
288
289 #if defined(__i386__) || defined(__x86_64__)
290 /*
291  * FIXME: Make this available to all arches as soon as the ioctl beautifier
292  *        gets rewritten to support all arches.
293  */
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
295                                                  struct syscall_arg *arg)
296 {
297         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
298 }
299
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
302
303 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
304                                         struct syscall_arg *arg);
305
306 #define SCA_FD syscall_arg__scnprintf_fd
307
308 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
309                                            struct syscall_arg *arg)
310 {
311         int fd = arg->val;
312
313         if (fd == AT_FDCWD)
314                 return scnprintf(bf, size, "CWD");
315
316         return syscall_arg__scnprintf_fd(bf, size, arg);
317 }
318
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
320
321 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
322                                               struct syscall_arg *arg);
323
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
325
326 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
327                                          struct syscall_arg *arg)
328 {
329         return scnprintf(bf, size, "%#lx", arg->val);
330 }
331
332 #define SCA_HEX syscall_arg__scnprintf_hex
333
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335                                                struct syscall_arg *arg)
336 {
337         int printed = 0, prot = arg->val;
338
339         if (prot == PROT_NONE)
340                 return scnprintf(bf, size, "NONE");
341 #define P_MMAP_PROT(n) \
342         if (prot & PROT_##n) { \
343                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344                 prot &= ~PROT_##n; \
345         }
346
347         P_MMAP_PROT(EXEC);
348         P_MMAP_PROT(READ);
349         P_MMAP_PROT(WRITE);
350 #ifdef PROT_SEM
351         P_MMAP_PROT(SEM);
352 #endif
353         P_MMAP_PROT(GROWSDOWN);
354         P_MMAP_PROT(GROWSUP);
355 #undef P_MMAP_PROT
356
357         if (prot)
358                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
359
360         return printed;
361 }
362
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
364
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
366                                                 struct syscall_arg *arg)
367 {
368         int printed = 0, flags = arg->val;
369
370 #define P_MMAP_FLAG(n) \
371         if (flags & MAP_##n) { \
372                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
373                 flags &= ~MAP_##n; \
374         }
375
376         P_MMAP_FLAG(SHARED);
377         P_MMAP_FLAG(PRIVATE);
378 #ifdef MAP_32BIT
379         P_MMAP_FLAG(32BIT);
380 #endif
381         P_MMAP_FLAG(ANONYMOUS);
382         P_MMAP_FLAG(DENYWRITE);
383         P_MMAP_FLAG(EXECUTABLE);
384         P_MMAP_FLAG(FILE);
385         P_MMAP_FLAG(FIXED);
386         P_MMAP_FLAG(GROWSDOWN);
387 #ifdef MAP_HUGETLB
388         P_MMAP_FLAG(HUGETLB);
389 #endif
390         P_MMAP_FLAG(LOCKED);
391         P_MMAP_FLAG(NONBLOCK);
392         P_MMAP_FLAG(NORESERVE);
393         P_MMAP_FLAG(POPULATE);
394         P_MMAP_FLAG(STACK);
395 #ifdef MAP_UNINITIALIZED
396         P_MMAP_FLAG(UNINITIALIZED);
397 #endif
398 #undef P_MMAP_FLAG
399
400         if (flags)
401                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
402
403         return printed;
404 }
405
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
407
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
409                                                   struct syscall_arg *arg)
410 {
411         int printed = 0, flags = arg->val;
412
413 #define P_MREMAP_FLAG(n) \
414         if (flags & MREMAP_##n) { \
415                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416                 flags &= ~MREMAP_##n; \
417         }
418
419         P_MREMAP_FLAG(MAYMOVE);
420 #ifdef MREMAP_FIXED
421         P_MREMAP_FLAG(FIXED);
422 #endif
423 #undef P_MREMAP_FLAG
424
425         if (flags)
426                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
427
428         return printed;
429 }
430
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
432
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
434                                                       struct syscall_arg *arg)
435 {
436         int behavior = arg->val;
437
438         switch (behavior) {
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
440         P_MADV_BHV(NORMAL);
441         P_MADV_BHV(RANDOM);
442         P_MADV_BHV(SEQUENTIAL);
443         P_MADV_BHV(WILLNEED);
444         P_MADV_BHV(DONTNEED);
445         P_MADV_BHV(REMOVE);
446         P_MADV_BHV(DONTFORK);
447         P_MADV_BHV(DOFORK);
448         P_MADV_BHV(HWPOISON);
449 #ifdef MADV_SOFT_OFFLINE
450         P_MADV_BHV(SOFT_OFFLINE);
451 #endif
452         P_MADV_BHV(MERGEABLE);
453         P_MADV_BHV(UNMERGEABLE);
454 #ifdef MADV_HUGEPAGE
455         P_MADV_BHV(HUGEPAGE);
456 #endif
457 #ifdef MADV_NOHUGEPAGE
458         P_MADV_BHV(NOHUGEPAGE);
459 #endif
460 #ifdef MADV_DONTDUMP
461         P_MADV_BHV(DONTDUMP);
462 #endif
463 #ifdef MADV_DODUMP
464         P_MADV_BHV(DODUMP);
465 #endif
466 #undef P_MADV_PHV
467         default: break;
468         }
469
470         return scnprintf(bf, size, "%#x", behavior);
471 }
472
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
474
475 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
476                                            struct syscall_arg *arg)
477 {
478         int printed = 0, op = arg->val;
479
480         if (op == 0)
481                 return scnprintf(bf, size, "NONE");
482 #define P_CMD(cmd) \
483         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
485                 op &= ~LOCK_##cmd; \
486         }
487
488         P_CMD(SH);
489         P_CMD(EX);
490         P_CMD(NB);
491         P_CMD(UN);
492         P_CMD(MAND);
493         P_CMD(RW);
494         P_CMD(READ);
495         P_CMD(WRITE);
496 #undef P_OP
497
498         if (op)
499                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
500
501         return printed;
502 }
503
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
505
506 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
507 {
508         enum syscall_futex_args {
509                 SCF_UADDR   = (1 << 0),
510                 SCF_OP      = (1 << 1),
511                 SCF_VAL     = (1 << 2),
512                 SCF_TIMEOUT = (1 << 3),
513                 SCF_UADDR2  = (1 << 4),
514                 SCF_VAL3    = (1 << 5),
515         };
516         int op = arg->val;
517         int cmd = op & FUTEX_CMD_MASK;
518         size_t printed = 0;
519
520         switch (cmd) {
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
523         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
524         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
525         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
526         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
527         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
528         P_FUTEX_OP(WAKE_OP);                                                      break;
529         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
530         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
531         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
532         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
533         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
534         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
535         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
536         }
537
538         if (op & FUTEX_PRIVATE_FLAG)
539                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
540
541         if (op & FUTEX_CLOCK_REALTIME)
542                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
543
544         return printed;
545 }
546
547 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
548
549 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
551
552 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers);
554
555 static const char *whences[] = { "SET", "CUR", "END",
556 #ifdef SEEK_DATA
557 "DATA",
558 #endif
559 #ifdef SEEK_HOLE
560 "HOLE",
561 #endif
562 };
563 static DEFINE_STRARRAY(whences);
564
565 static const char *fcntl_cmds[] = {
566         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
569         "F_GETOWNER_UIDS",
570 };
571 static DEFINE_STRARRAY(fcntl_cmds);
572
573 static const char *rlimit_resources[] = {
574         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
576         "RTTIME",
577 };
578 static DEFINE_STRARRAY(rlimit_resources);
579
580 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow);
582
583 static const char *clockid[] = {
584         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
586 };
587 static DEFINE_STRARRAY(clockid);
588
589 static const char *socket_families[] = {
590         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595         "ALG", "NFC", "VSOCK",
596 };
597 static DEFINE_STRARRAY(socket_families);
598
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
601 #endif
602
603 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
604                                                       struct syscall_arg *arg)
605 {
606         size_t printed;
607         int type = arg->val,
608             flags = type & ~SOCK_TYPE_MASK;
609
610         type &= SOCK_TYPE_MASK;
611         /*
612          * Can't use a strarray, MIPS may override for ABI reasons.
613          */
614         switch (type) {
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
616         P_SK_TYPE(STREAM);
617         P_SK_TYPE(DGRAM);
618         P_SK_TYPE(RAW);
619         P_SK_TYPE(RDM);
620         P_SK_TYPE(SEQPACKET);
621         P_SK_TYPE(DCCP);
622         P_SK_TYPE(PACKET);
623 #undef P_SK_TYPE
624         default:
625                 printed = scnprintf(bf, size, "%#x", type);
626         }
627
628 #define P_SK_FLAG(n) \
629         if (flags & SOCK_##n) { \
630                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631                 flags &= ~SOCK_##n; \
632         }
633
634         P_SK_FLAG(CLOEXEC);
635         P_SK_FLAG(NONBLOCK);
636 #undef P_SK_FLAG
637
638         if (flags)
639                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
640
641         return printed;
642 }
643
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
645
646 #ifndef MSG_PROBE
647 #define MSG_PROBE            0x10
648 #endif
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE  0x10000
651 #endif
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
654 #endif
655 #ifndef MSG_FASTOPEN
656 #define MSG_FASTOPEN         0x20000000
657 #endif
658
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
660                                                struct syscall_arg *arg)
661 {
662         int printed = 0, flags = arg->val;
663
664         if (flags == 0)
665                 return scnprintf(bf, size, "NONE");
666 #define P_MSG_FLAG(n) \
667         if (flags & MSG_##n) { \
668                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
669                 flags &= ~MSG_##n; \
670         }
671
672         P_MSG_FLAG(OOB);
673         P_MSG_FLAG(PEEK);
674         P_MSG_FLAG(DONTROUTE);
675         P_MSG_FLAG(TRYHARD);
676         P_MSG_FLAG(CTRUNC);
677         P_MSG_FLAG(PROBE);
678         P_MSG_FLAG(TRUNC);
679         P_MSG_FLAG(DONTWAIT);
680         P_MSG_FLAG(EOR);
681         P_MSG_FLAG(WAITALL);
682         P_MSG_FLAG(FIN);
683         P_MSG_FLAG(SYN);
684         P_MSG_FLAG(CONFIRM);
685         P_MSG_FLAG(RST);
686         P_MSG_FLAG(ERRQUEUE);
687         P_MSG_FLAG(NOSIGNAL);
688         P_MSG_FLAG(MORE);
689         P_MSG_FLAG(WAITFORONE);
690         P_MSG_FLAG(SENDPAGE_NOTLAST);
691         P_MSG_FLAG(FASTOPEN);
692         P_MSG_FLAG(CMSG_CLOEXEC);
693 #undef P_MSG_FLAG
694
695         if (flags)
696                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
697
698         return printed;
699 }
700
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
702
703 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
704                                                  struct syscall_arg *arg)
705 {
706         size_t printed = 0;
707         int mode = arg->val;
708
709         if (mode == F_OK) /* 0 */
710                 return scnprintf(bf, size, "F");
711 #define P_MODE(n) \
712         if (mode & n##_OK) { \
713                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
714                 mode &= ~n##_OK; \
715         }
716
717         P_MODE(R);
718         P_MODE(W);
719         P_MODE(X);
720 #undef P_MODE
721
722         if (mode)
723                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
724
725         return printed;
726 }
727
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
729
730 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
731                                                struct syscall_arg *arg)
732 {
733         int printed = 0, flags = arg->val;
734
735         if (!(flags & O_CREAT))
736                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
737
738         if (flags == 0)
739                 return scnprintf(bf, size, "RDONLY");
740 #define P_FLAG(n) \
741         if (flags & O_##n) { \
742                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
743                 flags &= ~O_##n; \
744         }
745
746         P_FLAG(APPEND);
747         P_FLAG(ASYNC);
748         P_FLAG(CLOEXEC);
749         P_FLAG(CREAT);
750         P_FLAG(DIRECT);
751         P_FLAG(DIRECTORY);
752         P_FLAG(EXCL);
753         P_FLAG(LARGEFILE);
754         P_FLAG(NOATIME);
755         P_FLAG(NOCTTY);
756 #ifdef O_NONBLOCK
757         P_FLAG(NONBLOCK);
758 #elif O_NDELAY
759         P_FLAG(NDELAY);
760 #endif
761 #ifdef O_PATH
762         P_FLAG(PATH);
763 #endif
764         P_FLAG(RDWR);
765 #ifdef O_DSYNC
766         if ((flags & O_SYNC) == O_SYNC)
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
768         else {
769                 P_FLAG(DSYNC);
770         }
771 #else
772         P_FLAG(SYNC);
773 #endif
774         P_FLAG(TRUNC);
775         P_FLAG(WRONLY);
776 #undef P_FLAG
777
778         if (flags)
779                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
780
781         return printed;
782 }
783
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
785
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787                                                    struct syscall_arg *arg)
788 {
789         int printed = 0, flags = arg->val;
790
791         if (flags == 0)
792                 return scnprintf(bf, size, "NONE");
793 #define P_FLAG(n) \
794         if (flags & EFD_##n) { \
795                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
796                 flags &= ~EFD_##n; \
797         }
798
799         P_FLAG(SEMAPHORE);
800         P_FLAG(CLOEXEC);
801         P_FLAG(NONBLOCK);
802 #undef P_FLAG
803
804         if (flags)
805                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806
807         return printed;
808 }
809
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
811
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
813                                                 struct syscall_arg *arg)
814 {
815         int printed = 0, flags = arg->val;
816
817 #define P_FLAG(n) \
818         if (flags & O_##n) { \
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820                 flags &= ~O_##n; \
821         }
822
823         P_FLAG(CLOEXEC);
824         P_FLAG(NONBLOCK);
825 #undef P_FLAG
826
827         if (flags)
828                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829
830         return printed;
831 }
832
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
834
835 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
836 {
837         int sig = arg->val;
838
839         switch (sig) {
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
841         P_SIGNUM(HUP);
842         P_SIGNUM(INT);
843         P_SIGNUM(QUIT);
844         P_SIGNUM(ILL);
845         P_SIGNUM(TRAP);
846         P_SIGNUM(ABRT);
847         P_SIGNUM(BUS);
848         P_SIGNUM(FPE);
849         P_SIGNUM(KILL);
850         P_SIGNUM(USR1);
851         P_SIGNUM(SEGV);
852         P_SIGNUM(USR2);
853         P_SIGNUM(PIPE);
854         P_SIGNUM(ALRM);
855         P_SIGNUM(TERM);
856         P_SIGNUM(CHLD);
857         P_SIGNUM(CONT);
858         P_SIGNUM(STOP);
859         P_SIGNUM(TSTP);
860         P_SIGNUM(TTIN);
861         P_SIGNUM(TTOU);
862         P_SIGNUM(URG);
863         P_SIGNUM(XCPU);
864         P_SIGNUM(XFSZ);
865         P_SIGNUM(VTALRM);
866         P_SIGNUM(PROF);
867         P_SIGNUM(WINCH);
868         P_SIGNUM(IO);
869         P_SIGNUM(PWR);
870         P_SIGNUM(SYS);
871 #ifdef SIGEMT
872         P_SIGNUM(EMT);
873 #endif
874 #ifdef SIGSTKFLT
875         P_SIGNUM(STKFLT);
876 #endif
877 #ifdef SIGSWI
878         P_SIGNUM(SWI);
879 #endif
880         default: break;
881         }
882
883         return scnprintf(bf, size, "%#x", sig);
884 }
885
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
887
888 #if defined(__i386__) || defined(__x86_64__)
889 /*
890  * FIXME: Make this available to all arches.
891  */
892 #define TCGETS          0x5401
893
894 static const char *tioctls[] = {
895         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
910 };
911
912 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
914
915 #define STRARRAY(arg, name, array) \
916           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917           .arg_parm      = { [arg] = &strarray__##array, }
918
919 static struct syscall_fmt {
920         const char *name;
921         const char *alias;
922         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
923         void       *arg_parm[6];
924         bool       errmsg;
925         bool       timeout;
926         bool       hexret;
927 } syscall_fmts[] = {
928         { .name     = "access",     .errmsg = true,
929           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
930         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
931         { .name     = "brk",        .hexret = true,
932           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
933         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
934         { .name     = "close",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
936         { .name     = "connect",    .errmsg = true, },
937         { .name     = "dup",        .errmsg = true,
938           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939         { .name     = "dup2",       .errmsg = true,
940           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
941         { .name     = "dup3",       .errmsg = true,
942           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
944         { .name     = "eventfd2",   .errmsg = true,
945           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
946         { .name     = "faccessat",  .errmsg = true,
947           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
948         { .name     = "fadvise64",  .errmsg = true,
949           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
950         { .name     = "fallocate",  .errmsg = true,
951           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
952         { .name     = "fchdir",     .errmsg = true,
953           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
954         { .name     = "fchmod",     .errmsg = true,
955           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956         { .name     = "fchmodat",   .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958         { .name     = "fchown",     .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
960         { .name     = "fchownat",   .errmsg = true,
961           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
962         { .name     = "fcntl",      .errmsg = true,
963           .arg_scnprintf = { [0] = SCA_FD, /* fd */
964                              [1] = SCA_STRARRAY, /* cmd */ },
965           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
966         { .name     = "fdatasync",  .errmsg = true,
967           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
968         { .name     = "flock",      .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */
970                              [1] = SCA_FLOCK, /* cmd */ }, },
971         { .name     = "fsetxattr",  .errmsg = true,
972           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
973         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
974           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
975         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
976           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
977         { .name     = "fstatfs",    .errmsg = true,
978           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979         { .name     = "fsync",    .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981         { .name     = "ftruncate", .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983         { .name     = "futex",      .errmsg = true,
984           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
985         { .name     = "futimesat", .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
987         { .name     = "getdents",   .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989         { .name     = "getdents64", .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
992         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
993         { .name     = "ioctl",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FD, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
996 /*
997  * FIXME: Make this available to all arches.
998  */
999                              [1] = SCA_STRHEXARRAY, /* cmd */
1000                              [2] = SCA_HEX, /* arg */ },
1001           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1002 #else
1003                              [2] = SCA_HEX, /* arg */ }, },
1004 #endif
1005         { .name     = "kill",       .errmsg = true,
1006           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1007         { .name     = "linkat",     .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1009         { .name     = "lseek",      .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1011                              [2] = SCA_STRARRAY, /* whence */ },
1012           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1013         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1014         { .name     = "madvise",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1016                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1017         { .name     = "mkdirat",    .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1019         { .name     = "mknodat",    .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1021         { .name     = "mlock",      .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1023         { .name     = "mlockall",   .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1025         { .name     = "mmap",       .hexret = true,
1026           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1027                              [2] = SCA_MMAP_PROT, /* prot */
1028                              [3] = SCA_MMAP_FLAGS, /* flags */
1029                              [4] = SCA_FD,        /* fd */ }, },
1030         { .name     = "mprotect",   .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1032                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1033         { .name     = "mremap",     .hexret = true,
1034           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1035                              [3] = SCA_MREMAP_FLAGS, /* flags */
1036                              [4] = SCA_HEX, /* new_addr */ }, },
1037         { .name     = "munlock",    .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1039         { .name     = "munmap",     .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1041         { .name     = "name_to_handle_at", .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043         { .name     = "newfstatat", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1045         { .name     = "open",       .errmsg = true,
1046           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "open_by_handle_at", .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "openat",     .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1053         { .name     = "pipe2",      .errmsg = true,
1054           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055         { .name     = "poll",       .errmsg = true, .timeout = true, },
1056         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1057         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1062         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064         { .name     = "pwritev",    .errmsg = true,
1065           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066         { .name     = "read",       .errmsg = true,
1067           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1068         { .name     = "readlinkat", .errmsg = true,
1069           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1070         { .name     = "readv",      .errmsg = true,
1071           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072         { .name     = "recvfrom",   .errmsg = true,
1073           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1074         { .name     = "recvmmsg",   .errmsg = true,
1075           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1076         { .name     = "recvmsg",    .errmsg = true,
1077           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1078         { .name     = "renameat",   .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080         { .name     = "rt_sigaction", .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1083         { .name     = "rt_sigqueueinfo", .errmsg = true,
1084           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1085         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1086           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1087         { .name     = "select",     .errmsg = true, .timeout = true, },
1088         { .name     = "sendmmsg",    .errmsg = true,
1089           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1090         { .name     = "sendmsg",    .errmsg = true,
1091           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1092         { .name     = "sendto",     .errmsg = true,
1093           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1094         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096         { .name     = "shutdown",   .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1098         { .name     = "socket",     .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1100                              [1] = SCA_SK_TYPE, /* type */ },
1101           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1102         { .name     = "socketpair", .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1104                              [1] = SCA_SK_TYPE, /* type */ },
1105           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1106         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1107         { .name     = "symlinkat",  .errmsg = true,
1108           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1109         { .name     = "tgkill",     .errmsg = true,
1110           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1111         { .name     = "tkill",      .errmsg = true,
1112           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1113         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1114         { .name     = "unlinkat",   .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1116         { .name     = "utimensat",  .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1118         { .name     = "write",      .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120         { .name     = "writev",     .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 };
1123
1124 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1125 {
1126         const struct syscall_fmt *fmt = fmtp;
1127         return strcmp(name, fmt->name);
1128 }
1129
1130 static struct syscall_fmt *syscall_fmt__find(const char *name)
1131 {
1132         const int nmemb = ARRAY_SIZE(syscall_fmts);
1133         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 }
1135
1136 struct syscall {
1137         struct event_format *tp_format;
1138         int                 nr_args;
1139         struct format_field *args;
1140         const char          *name;
1141         bool                filtered;
1142         bool                is_exit;
1143         struct syscall_fmt  *fmt;
1144         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1145         void                **arg_parm;
1146 };
1147
1148 static size_t fprintf_duration(unsigned long t, FILE *fp)
1149 {
1150         double duration = (double)t / NSEC_PER_MSEC;
1151         size_t printed = fprintf(fp, "(");
1152
1153         if (duration >= 1.0)
1154                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1155         else if (duration >= 0.01)
1156                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1157         else
1158                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1159         return printed + fprintf(fp, "): ");
1160 }
1161
1162 struct thread_trace {
1163         u64               entry_time;
1164         u64               exit_time;
1165         bool              entry_pending;
1166         unsigned long     nr_events;
1167         unsigned long     pfmaj, pfmin;
1168         char              *entry_str;
1169         double            runtime_ms;
1170         struct {
1171                 int       max;
1172                 char      **table;
1173         } paths;
1174
1175         struct intlist *syscall_stats;
1176 };
1177
1178 static struct thread_trace *thread_trace__new(void)
1179 {
1180         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1181
1182         if (ttrace)
1183                 ttrace->paths.max = -1;
1184
1185         ttrace->syscall_stats = intlist__new(NULL);
1186
1187         return ttrace;
1188 }
1189
1190 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1191 {
1192         struct thread_trace *ttrace;
1193
1194         if (thread == NULL)
1195                 goto fail;
1196
1197         if (thread__priv(thread) == NULL)
1198                 thread__set_priv(thread, thread_trace__new());
1199
1200         if (thread__priv(thread) == NULL)
1201                 goto fail;
1202
1203         ttrace = thread__priv(thread);
1204         ++ttrace->nr_events;
1205
1206         return ttrace;
1207 fail:
1208         color_fprintf(fp, PERF_COLOR_RED,
1209                       "WARNING: not enough memory, dropping samples!\n");
1210         return NULL;
1211 }
1212
1213 #define TRACE_PFMAJ             (1 << 0)
1214 #define TRACE_PFMIN             (1 << 1)
1215
1216 struct trace {
1217         struct perf_tool        tool;
1218         struct {
1219                 int             machine;
1220                 int             open_id;
1221         }                       audit;
1222         struct {
1223                 int             max;
1224                 struct syscall  *table;
1225         } syscalls;
1226         struct record_opts      opts;
1227         struct perf_evlist      *evlist;
1228         struct machine          *host;
1229         struct thread           *current;
1230         u64                     base_time;
1231         FILE                    *output;
1232         unsigned long           nr_events;
1233         struct strlist          *ev_qualifier;
1234         const char              *last_vfs_getname;
1235         struct intlist          *tid_list;
1236         struct intlist          *pid_list;
1237         struct {
1238                 size_t          nr;
1239                 pid_t           *entries;
1240         }                       filter_pids;
1241         double                  duration_filter;
1242         double                  runtime_ms;
1243         struct {
1244                 u64             vfs_getname,
1245                                 proc_getname;
1246         } stats;
1247         bool                    not_ev_qualifier;
1248         bool                    live;
1249         bool                    full_time;
1250         bool                    sched;
1251         bool                    multiple_threads;
1252         bool                    summary;
1253         bool                    summary_only;
1254         bool                    show_comm;
1255         bool                    show_tool_stats;
1256         bool                    trace_syscalls;
1257         int                     trace_pgfaults;
1258 };
1259
1260 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1261 {
1262         struct thread_trace *ttrace = thread__priv(thread);
1263
1264         if (fd > ttrace->paths.max) {
1265                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1266
1267                 if (npath == NULL)
1268                         return -1;
1269
1270                 if (ttrace->paths.max != -1) {
1271                         memset(npath + ttrace->paths.max + 1, 0,
1272                                (fd - ttrace->paths.max) * sizeof(char *));
1273                 } else {
1274                         memset(npath, 0, (fd + 1) * sizeof(char *));
1275                 }
1276
1277                 ttrace->paths.table = npath;
1278                 ttrace->paths.max   = fd;
1279         }
1280
1281         ttrace->paths.table[fd] = strdup(pathname);
1282
1283         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1284 }
1285
1286 static int thread__read_fd_path(struct thread *thread, int fd)
1287 {
1288         char linkname[PATH_MAX], pathname[PATH_MAX];
1289         struct stat st;
1290         int ret;
1291
1292         if (thread->pid_ == thread->tid) {
1293                 scnprintf(linkname, sizeof(linkname),
1294                           "/proc/%d/fd/%d", thread->pid_, fd);
1295         } else {
1296                 scnprintf(linkname, sizeof(linkname),
1297                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1298         }
1299
1300         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1301                 return -1;
1302
1303         ret = readlink(linkname, pathname, sizeof(pathname));
1304
1305         if (ret < 0 || ret > st.st_size)
1306                 return -1;
1307
1308         pathname[ret] = '\0';
1309         return trace__set_fd_pathname(thread, fd, pathname);
1310 }
1311
1312 static const char *thread__fd_path(struct thread *thread, int fd,
1313                                    struct trace *trace)
1314 {
1315         struct thread_trace *ttrace = thread__priv(thread);
1316
1317         if (ttrace == NULL)
1318                 return NULL;
1319
1320         if (fd < 0)
1321                 return NULL;
1322
1323         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1324                 if (!trace->live)
1325                         return NULL;
1326                 ++trace->stats.proc_getname;
1327                 if (thread__read_fd_path(thread, fd))
1328                         return NULL;
1329         }
1330
1331         return ttrace->paths.table[fd];
1332 }
1333
1334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1335                                         struct syscall_arg *arg)
1336 {
1337         int fd = arg->val;
1338         size_t printed = scnprintf(bf, size, "%d", fd);
1339         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1340
1341         if (path)
1342                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1343
1344         return printed;
1345 }
1346
1347 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1348                                               struct syscall_arg *arg)
1349 {
1350         int fd = arg->val;
1351         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1352         struct thread_trace *ttrace = thread__priv(arg->thread);
1353
1354         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1355                 zfree(&ttrace->paths.table[fd]);
1356
1357         return printed;
1358 }
1359
1360 static bool trace__filter_duration(struct trace *trace, double t)
1361 {
1362         return t < (trace->duration_filter * NSEC_PER_MSEC);
1363 }
1364
1365 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1366 {
1367         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1368
1369         return fprintf(fp, "%10.3f ", ts);
1370 }
1371
1372 static bool done = false;
1373 static bool interrupted = false;
1374
1375 static void sig_handler(int sig)
1376 {
1377         done = true;
1378         interrupted = sig == SIGINT;
1379 }
1380
1381 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1382                                         u64 duration, u64 tstamp, FILE *fp)
1383 {
1384         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1385         printed += fprintf_duration(duration, fp);
1386
1387         if (trace->multiple_threads) {
1388                 if (trace->show_comm)
1389                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1390                 printed += fprintf(fp, "%d ", thread->tid);
1391         }
1392
1393         return printed;
1394 }
1395
1396 static int trace__process_event(struct trace *trace, struct machine *machine,
1397                                 union perf_event *event, struct perf_sample *sample)
1398 {
1399         int ret = 0;
1400
1401         switch (event->header.type) {
1402         case PERF_RECORD_LOST:
1403                 color_fprintf(trace->output, PERF_COLOR_RED,
1404                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1405                 ret = machine__process_lost_event(machine, event, sample);
1406         default:
1407                 ret = machine__process_event(machine, event, sample);
1408                 break;
1409         }
1410
1411         return ret;
1412 }
1413
1414 static int trace__tool_process(struct perf_tool *tool,
1415                                union perf_event *event,
1416                                struct perf_sample *sample,
1417                                struct machine *machine)
1418 {
1419         struct trace *trace = container_of(tool, struct trace, tool);
1420         return trace__process_event(trace, machine, event, sample);
1421 }
1422
1423 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1424 {
1425         int err = symbol__init(NULL);
1426
1427         if (err)
1428                 return err;
1429
1430         trace->host = machine__new_host();
1431         if (trace->host == NULL)
1432                 return -ENOMEM;
1433
1434         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1435                                             evlist->threads, trace__tool_process, false);
1436         if (err)
1437                 symbol__exit();
1438
1439         return err;
1440 }
1441
1442 static int syscall__set_arg_fmts(struct syscall *sc)
1443 {
1444         struct format_field *field;
1445         int idx = 0;
1446
1447         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1448         if (sc->arg_scnprintf == NULL)
1449                 return -1;
1450
1451         if (sc->fmt)
1452                 sc->arg_parm = sc->fmt->arg_parm;
1453
1454         for (field = sc->args; field; field = field->next) {
1455                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1456                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1457                 else if (field->flags & FIELD_IS_POINTER)
1458                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1459                 ++idx;
1460         }
1461
1462         return 0;
1463 }
1464
1465 static int trace__read_syscall_info(struct trace *trace, int id)
1466 {
1467         char tp_name[128];
1468         struct syscall *sc;
1469         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1470
1471         if (name == NULL)
1472                 return -1;
1473
1474         if (id > trace->syscalls.max) {
1475                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1476
1477                 if (nsyscalls == NULL)
1478                         return -1;
1479
1480                 if (trace->syscalls.max != -1) {
1481                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1482                                (id - trace->syscalls.max) * sizeof(*sc));
1483                 } else {
1484                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1485                 }
1486
1487                 trace->syscalls.table = nsyscalls;
1488                 trace->syscalls.max   = id;
1489         }
1490
1491         sc = trace->syscalls.table + id;
1492         sc->name = name;
1493
1494         if (trace->ev_qualifier) {
1495                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1496
1497                 if (!(in ^ trace->not_ev_qualifier)) {
1498                         sc->filtered = true;
1499                         /*
1500                          * No need to do read tracepoint information since this will be
1501                          * filtered out.
1502                          */
1503                         return 0;
1504                 }
1505         }
1506
1507         sc->fmt  = syscall_fmt__find(sc->name);
1508
1509         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1510         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1511
1512         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1513                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1514                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1515         }
1516
1517         if (sc->tp_format == NULL)
1518                 return -1;
1519
1520         sc->args = sc->tp_format->format.fields;
1521         sc->nr_args = sc->tp_format->format.nr_fields;
1522         /* drop nr field - not relevant here; does not exist on older kernels */
1523         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1524                 sc->args = sc->args->next;
1525                 --sc->nr_args;
1526         }
1527
1528         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1529
1530         return syscall__set_arg_fmts(sc);
1531 }
1532
1533 /*
1534  * args is to be interpreted as a series of longs but we need to handle
1535  * 8-byte unaligned accesses. args points to raw_data within the event
1536  * and raw_data is guaranteed to be 8-byte unaligned because it is
1537  * preceded by raw_size which is a u32. So we need to copy args to a temp
1538  * variable to read it. Most notably this avoids extended load instructions
1539  * on unaligned addresses
1540  */
1541
1542 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1543                                       unsigned char *args, struct trace *trace,
1544                                       struct thread *thread)
1545 {
1546         size_t printed = 0;
1547         unsigned char *p;
1548         unsigned long val;
1549
1550         if (sc->args != NULL) {
1551                 struct format_field *field;
1552                 u8 bit = 1;
1553                 struct syscall_arg arg = {
1554                         .idx    = 0,
1555                         .mask   = 0,
1556                         .trace  = trace,
1557                         .thread = thread,
1558                 };
1559
1560                 for (field = sc->args; field;
1561                      field = field->next, ++arg.idx, bit <<= 1) {
1562                         if (arg.mask & bit)
1563                                 continue;
1564
1565                         /* special care for unaligned accesses */
1566                         p = args + sizeof(unsigned long) * arg.idx;
1567                         memcpy(&val, p, sizeof(val));
1568
1569                         /*
1570                          * Suppress this argument if its value is zero and
1571                          * and we don't have a string associated in an
1572                          * strarray for it.
1573                          */
1574                         if (val == 0 &&
1575                             !(sc->arg_scnprintf &&
1576                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1577                               sc->arg_parm[arg.idx]))
1578                                 continue;
1579
1580                         printed += scnprintf(bf + printed, size - printed,
1581                                              "%s%s: ", printed ? ", " : "", field->name);
1582                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1583                                 arg.val = val;
1584                                 if (sc->arg_parm)
1585                                         arg.parm = sc->arg_parm[arg.idx];
1586                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1587                                                                       size - printed, &arg);
1588                         } else {
1589                                 printed += scnprintf(bf + printed, size - printed,
1590                                                      "%ld", val);
1591                         }
1592                 }
1593         } else {
1594                 int i = 0;
1595
1596                 while (i < 6) {
1597                         /* special care for unaligned accesses */
1598                         p = args + sizeof(unsigned long) * i;
1599                         memcpy(&val, p, sizeof(val));
1600                         printed += scnprintf(bf + printed, size - printed,
1601                                              "%sarg%d: %ld",
1602                                              printed ? ", " : "", i, val);
1603                         ++i;
1604                 }
1605         }
1606
1607         return printed;
1608 }
1609
1610 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1611                                   union perf_event *event,
1612                                   struct perf_sample *sample);
1613
1614 static struct syscall *trace__syscall_info(struct trace *trace,
1615                                            struct perf_evsel *evsel, int id)
1616 {
1617
1618         if (id < 0) {
1619
1620                 /*
1621                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1622                  * before that, leaving at a higher verbosity level till that is
1623                  * explained. Reproduced with plain ftrace with:
1624                  *
1625                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1626                  * grep "NR -1 " /t/trace_pipe
1627                  *
1628                  * After generating some load on the machine.
1629                  */
1630                 if (verbose > 1) {
1631                         static u64 n;
1632                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1633                                 id, perf_evsel__name(evsel), ++n);
1634                 }
1635                 return NULL;
1636         }
1637
1638         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1639             trace__read_syscall_info(trace, id))
1640                 goto out_cant_read;
1641
1642         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1643                 goto out_cant_read;
1644
1645         return &trace->syscalls.table[id];
1646
1647 out_cant_read:
1648         if (verbose) {
1649                 fprintf(trace->output, "Problems reading syscall %d", id);
1650                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1651                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1652                 fputs(" information\n", trace->output);
1653         }
1654         return NULL;
1655 }
1656
1657 static void thread__update_stats(struct thread_trace *ttrace,
1658                                  int id, struct perf_sample *sample)
1659 {
1660         struct int_node *inode;
1661         struct stats *stats;
1662         u64 duration = 0;
1663
1664         inode = intlist__findnew(ttrace->syscall_stats, id);
1665         if (inode == NULL)
1666                 return;
1667
1668         stats = inode->priv;
1669         if (stats == NULL) {
1670                 stats = malloc(sizeof(struct stats));
1671                 if (stats == NULL)
1672                         return;
1673                 init_stats(stats);
1674                 inode->priv = stats;
1675         }
1676
1677         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1678                 duration = sample->time - ttrace->entry_time;
1679
1680         update_stats(stats, duration);
1681 }
1682
1683 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1684 {
1685         struct thread_trace *ttrace;
1686         u64 duration;
1687         size_t printed;
1688
1689         if (trace->current == NULL)
1690                 return 0;
1691
1692         ttrace = thread__priv(trace->current);
1693
1694         if (!ttrace->entry_pending)
1695                 return 0;
1696
1697         duration = sample->time - ttrace->entry_time;
1698
1699         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1700         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1701         ttrace->entry_pending = false;
1702
1703         return printed;
1704 }
1705
1706 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1707                             union perf_event *event __maybe_unused,
1708                             struct perf_sample *sample)
1709 {
1710         char *msg;
1711         void *args;
1712         size_t printed = 0;
1713         struct thread *thread;
1714         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1715         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1716         struct thread_trace *ttrace;
1717
1718         if (sc == NULL)
1719                 return -1;
1720
1721         if (sc->filtered)
1722                 return 0;
1723
1724         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1725         ttrace = thread__trace(thread, trace->output);
1726         if (ttrace == NULL)
1727                 return -1;
1728
1729         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1730
1731         if (ttrace->entry_str == NULL) {
1732                 ttrace->entry_str = malloc(1024);
1733                 if (!ttrace->entry_str)
1734                         return -1;
1735         }
1736
1737         if (!trace->summary_only)
1738                 trace__printf_interrupted_entry(trace, sample);
1739
1740         ttrace->entry_time = sample->time;
1741         msg = ttrace->entry_str;
1742         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1743
1744         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1745                                            args, trace, thread);
1746
1747         if (sc->is_exit) {
1748                 if (!trace->duration_filter && !trace->summary_only) {
1749                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1750                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1751                 }
1752         } else
1753                 ttrace->entry_pending = true;
1754
1755         if (trace->current != thread) {
1756                 thread__put(trace->current);
1757                 trace->current = thread__get(thread);
1758         }
1759
1760         return 0;
1761 }
1762
1763 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1764                            union perf_event *event __maybe_unused,
1765                            struct perf_sample *sample)
1766 {
1767         long ret;
1768         u64 duration = 0;
1769         struct thread *thread;
1770         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1771         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1772         struct thread_trace *ttrace;
1773
1774         if (sc == NULL)
1775                 return -1;
1776
1777         if (sc->filtered)
1778                 return 0;
1779
1780         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1781         ttrace = thread__trace(thread, trace->output);
1782         if (ttrace == NULL)
1783                 return -1;
1784
1785         if (trace->summary)
1786                 thread__update_stats(ttrace, id, sample);
1787
1788         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1789
1790         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1791                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1792                 trace->last_vfs_getname = NULL;
1793                 ++trace->stats.vfs_getname;
1794         }
1795
1796         ttrace->exit_time = sample->time;
1797
1798         if (ttrace->entry_time) {
1799                 duration = sample->time - ttrace->entry_time;
1800                 if (trace__filter_duration(trace, duration))
1801                         goto out;
1802         } else if (trace->duration_filter)
1803                 goto out;
1804
1805         if (trace->summary_only)
1806                 goto out;
1807
1808         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1809
1810         if (ttrace->entry_pending) {
1811                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1812         } else {
1813                 fprintf(trace->output, " ... [");
1814                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1815                 fprintf(trace->output, "]: %s()", sc->name);
1816         }
1817
1818         if (sc->fmt == NULL) {
1819 signed_print:
1820                 fprintf(trace->output, ") = %ld", ret);
1821         } else if (ret < 0 && sc->fmt->errmsg) {
1822                 char bf[STRERR_BUFSIZE];
1823                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1824                            *e = audit_errno_to_name(-ret);
1825
1826                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1827         } else if (ret == 0 && sc->fmt->timeout)
1828                 fprintf(trace->output, ") = 0 Timeout");
1829         else if (sc->fmt->hexret)
1830                 fprintf(trace->output, ") = %#lx", ret);
1831         else
1832                 goto signed_print;
1833
1834         fputc('\n', trace->output);
1835 out:
1836         ttrace->entry_pending = false;
1837
1838         return 0;
1839 }
1840
1841 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1842                               union perf_event *event __maybe_unused,
1843                               struct perf_sample *sample)
1844 {
1845         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1846         return 0;
1847 }
1848
1849 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1850                                      union perf_event *event __maybe_unused,
1851                                      struct perf_sample *sample)
1852 {
1853         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1854         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1855         struct thread *thread = machine__findnew_thread(trace->host,
1856                                                         sample->pid,
1857                                                         sample->tid);
1858         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1859
1860         if (ttrace == NULL)
1861                 goto out_dump;
1862
1863         ttrace->runtime_ms += runtime_ms;
1864         trace->runtime_ms += runtime_ms;
1865         return 0;
1866
1867 out_dump:
1868         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1869                evsel->name,
1870                perf_evsel__strval(evsel, sample, "comm"),
1871                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1872                runtime,
1873                perf_evsel__intval(evsel, sample, "vruntime"));
1874         return 0;
1875 }
1876
1877 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1878                                 union perf_event *event __maybe_unused,
1879                                 struct perf_sample *sample)
1880 {
1881         trace__printf_interrupted_entry(trace, sample);
1882         trace__fprintf_tstamp(trace, sample->time, trace->output);
1883
1884         if (trace->trace_syscalls)
1885                 fprintf(trace->output, "(         ): ");
1886
1887         fprintf(trace->output, "%s:", evsel->name);
1888
1889         if (evsel->tp_format) {
1890                 event_format__fprintf(evsel->tp_format, sample->cpu,
1891                                       sample->raw_data, sample->raw_size,
1892                                       trace->output);
1893         }
1894
1895         fprintf(trace->output, ")\n");
1896         return 0;
1897 }
1898
1899 static void print_location(FILE *f, struct perf_sample *sample,
1900                            struct addr_location *al,
1901                            bool print_dso, bool print_sym)
1902 {
1903
1904         if ((verbose || print_dso) && al->map)
1905                 fprintf(f, "%s@", al->map->dso->long_name);
1906
1907         if ((verbose || print_sym) && al->sym)
1908                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1909                         al->addr - al->sym->start);
1910         else if (al->map)
1911                 fprintf(f, "0x%" PRIx64, al->addr);
1912         else
1913                 fprintf(f, "0x%" PRIx64, sample->addr);
1914 }
1915
1916 static int trace__pgfault(struct trace *trace,
1917                           struct perf_evsel *evsel,
1918                           union perf_event *event,
1919                           struct perf_sample *sample)
1920 {
1921         struct thread *thread;
1922         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1923         struct addr_location al;
1924         char map_type = 'd';
1925         struct thread_trace *ttrace;
1926
1927         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1928         ttrace = thread__trace(thread, trace->output);
1929         if (ttrace == NULL)
1930                 return -1;
1931
1932         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1933                 ttrace->pfmaj++;
1934         else
1935                 ttrace->pfmin++;
1936
1937         if (trace->summary_only)
1938                 return 0;
1939
1940         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1941                               sample->ip, &al);
1942
1943         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1944
1945         fprintf(trace->output, "%sfault [",
1946                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1947                 "maj" : "min");
1948
1949         print_location(trace->output, sample, &al, false, true);
1950
1951         fprintf(trace->output, "] => ");
1952
1953         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1954                                    sample->addr, &al);
1955
1956         if (!al.map) {
1957                 thread__find_addr_location(thread, cpumode,
1958                                            MAP__FUNCTION, sample->addr, &al);
1959
1960                 if (al.map)
1961                         map_type = 'x';
1962                 else
1963                         map_type = '?';
1964         }
1965
1966         print_location(trace->output, sample, &al, true, false);
1967
1968         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1969
1970         return 0;
1971 }
1972
1973 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1974 {
1975         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1976             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1977                 return false;
1978
1979         if (trace->pid_list || trace->tid_list)
1980                 return true;
1981
1982         return false;
1983 }
1984
1985 static int trace__process_sample(struct perf_tool *tool,
1986                                  union perf_event *event,
1987                                  struct perf_sample *sample,
1988                                  struct perf_evsel *evsel,
1989                                  struct machine *machine __maybe_unused)
1990 {
1991         struct trace *trace = container_of(tool, struct trace, tool);
1992         int err = 0;
1993
1994         tracepoint_handler handler = evsel->handler;
1995
1996         if (skip_sample(trace, sample))
1997                 return 0;
1998
1999         if (!trace->full_time && trace->base_time == 0)
2000                 trace->base_time = sample->time;
2001
2002         if (handler) {
2003                 ++trace->nr_events;
2004                 handler(trace, evsel, event, sample);
2005         }
2006
2007         return err;
2008 }
2009
2010 static int parse_target_str(struct trace *trace)
2011 {
2012         if (trace->opts.target.pid) {
2013                 trace->pid_list = intlist__new(trace->opts.target.pid);
2014                 if (trace->pid_list == NULL) {
2015                         pr_err("Error parsing process id string\n");
2016                         return -EINVAL;
2017                 }
2018         }
2019
2020         if (trace->opts.target.tid) {
2021                 trace->tid_list = intlist__new(trace->opts.target.tid);
2022                 if (trace->tid_list == NULL) {
2023                         pr_err("Error parsing thread id string\n");
2024                         return -EINVAL;
2025                 }
2026         }
2027
2028         return 0;
2029 }
2030
2031 static int trace__record(struct trace *trace, int argc, const char **argv)
2032 {
2033         unsigned int rec_argc, i, j;
2034         const char **rec_argv;
2035         const char * const record_args[] = {
2036                 "record",
2037                 "-R",
2038                 "-m", "1024",
2039                 "-c", "1",
2040         };
2041
2042         const char * const sc_args[] = { "-e", };
2043         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2044         const char * const majpf_args[] = { "-e", "major-faults" };
2045         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2046         const char * const minpf_args[] = { "-e", "minor-faults" };
2047         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2048
2049         /* +1 is for the event string below */
2050         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2051                 majpf_args_nr + minpf_args_nr + argc;
2052         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2053
2054         if (rec_argv == NULL)
2055                 return -ENOMEM;
2056
2057         j = 0;
2058         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2059                 rec_argv[j++] = record_args[i];
2060
2061         if (trace->trace_syscalls) {
2062                 for (i = 0; i < sc_args_nr; i++)
2063                         rec_argv[j++] = sc_args[i];
2064
2065                 /* event string may be different for older kernels - e.g., RHEL6 */
2066                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2067                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2068                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2069                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2070                 else {
2071                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2072                         return -1;
2073                 }
2074         }
2075
2076         if (trace->trace_pgfaults & TRACE_PFMAJ)
2077                 for (i = 0; i < majpf_args_nr; i++)
2078                         rec_argv[j++] = majpf_args[i];
2079
2080         if (trace->trace_pgfaults & TRACE_PFMIN)
2081                 for (i = 0; i < minpf_args_nr; i++)
2082                         rec_argv[j++] = minpf_args[i];
2083
2084         for (i = 0; i < (unsigned int)argc; i++)
2085                 rec_argv[j++] = argv[i];
2086
2087         return cmd_record(j, rec_argv, NULL);
2088 }
2089
2090 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2091
2092 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2093 {
2094         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2095         if (evsel == NULL)
2096                 return;
2097
2098         if (perf_evsel__field(evsel, "pathname") == NULL) {
2099                 perf_evsel__delete(evsel);
2100                 return;
2101         }
2102
2103         evsel->handler = trace__vfs_getname;
2104         perf_evlist__add(evlist, evsel);
2105 }
2106
2107 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2108                                     u64 config)
2109 {
2110         struct perf_evsel *evsel;
2111         struct perf_event_attr attr = {
2112                 .type = PERF_TYPE_SOFTWARE,
2113                 .mmap_data = 1,
2114         };
2115
2116         attr.config = config;
2117         attr.sample_period = 1;
2118
2119         event_attr_init(&attr);
2120
2121         evsel = perf_evsel__new(&attr);
2122         if (!evsel)
2123                 return -ENOMEM;
2124
2125         evsel->handler = trace__pgfault;
2126         perf_evlist__add(evlist, evsel);
2127
2128         return 0;
2129 }
2130
2131 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2132 {
2133         const u32 type = event->header.type;
2134         struct perf_evsel *evsel;
2135
2136         if (!trace->full_time && trace->base_time == 0)
2137                 trace->base_time = sample->time;
2138
2139         if (type != PERF_RECORD_SAMPLE) {
2140                 trace__process_event(trace, trace->host, event, sample);
2141                 return;
2142         }
2143
2144         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2145         if (evsel == NULL) {
2146                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2147                 return;
2148         }
2149
2150         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2151             sample->raw_data == NULL) {
2152                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2153                        perf_evsel__name(evsel), sample->tid,
2154                        sample->cpu, sample->raw_size);
2155         } else {
2156                 tracepoint_handler handler = evsel->handler;
2157                 handler(trace, evsel, event, sample);
2158         }
2159 }
2160
2161 static int trace__run(struct trace *trace, int argc, const char **argv)
2162 {
2163         struct perf_evlist *evlist = trace->evlist;
2164         int err = -1, i;
2165         unsigned long before;
2166         const bool forks = argc > 0;
2167         bool draining = false;
2168
2169         trace->live = true;
2170
2171         if (trace->trace_syscalls &&
2172             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2173                                            trace__sys_exit))
2174                 goto out_error_raw_syscalls;
2175
2176         if (trace->trace_syscalls)
2177                 perf_evlist__add_vfs_getname(evlist);
2178
2179         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2180             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2181                 goto out_error_mem;
2182         }
2183
2184         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2185             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2186                 goto out_error_mem;
2187
2188         if (trace->sched &&
2189             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2190                                    trace__sched_stat_runtime))
2191                 goto out_error_sched_stat_runtime;
2192
2193         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2194         if (err < 0) {
2195                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2196                 goto out_delete_evlist;
2197         }
2198
2199         err = trace__symbols_init(trace, evlist);
2200         if (err < 0) {
2201                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2202                 goto out_delete_evlist;
2203         }
2204
2205         perf_evlist__config(evlist, &trace->opts);
2206
2207         signal(SIGCHLD, sig_handler);
2208         signal(SIGINT, sig_handler);
2209
2210         if (forks) {
2211                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2212                                                     argv, false, NULL);
2213                 if (err < 0) {
2214                         fprintf(trace->output, "Couldn't run the workload!\n");
2215                         goto out_delete_evlist;
2216                 }
2217         }
2218
2219         err = perf_evlist__open(evlist);
2220         if (err < 0)
2221                 goto out_error_open;
2222
2223         /*
2224          * Better not use !target__has_task() here because we need to cover the
2225          * case where no threads were specified in the command line, but a
2226          * workload was, and in that case we will fill in the thread_map when
2227          * we fork the workload in perf_evlist__prepare_workload.
2228          */
2229         if (trace->filter_pids.nr > 0)
2230                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2231         else if (evlist->threads->map[0] == -1)
2232                 err = perf_evlist__set_filter_pid(evlist, getpid());
2233
2234         if (err < 0) {
2235                 printf("err=%d,%s\n", -err, strerror(-err));
2236                 exit(1);
2237         }
2238
2239         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2240         if (err < 0)
2241                 goto out_error_mmap;
2242
2243         if (forks)
2244                 perf_evlist__start_workload(evlist);
2245         else
2246                 perf_evlist__enable(evlist);
2247
2248         trace->multiple_threads = evlist->threads->map[0] == -1 ||
2249                                   evlist->threads->nr > 1 ||
2250                                   perf_evlist__first(evlist)->attr.inherit;
2251 again:
2252         before = trace->nr_events;
2253
2254         for (i = 0; i < evlist->nr_mmaps; i++) {
2255                 union perf_event *event;
2256
2257                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2258                         struct perf_sample sample;
2259
2260                         ++trace->nr_events;
2261
2262                         err = perf_evlist__parse_sample(evlist, event, &sample);
2263                         if (err) {
2264                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2265                                 goto next_event;
2266                         }
2267
2268                         trace__handle_event(trace, event, &sample);
2269 next_event:
2270                         perf_evlist__mmap_consume(evlist, i);
2271
2272                         if (interrupted)
2273                                 goto out_disable;
2274                 }
2275         }
2276
2277         if (trace->nr_events == before) {
2278                 int timeout = done ? 100 : -1;
2279
2280                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2281                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2282                                 draining = true;
2283
2284                         goto again;
2285                 }
2286         } else {
2287                 goto again;
2288         }
2289
2290 out_disable:
2291         thread__zput(trace->current);
2292
2293         perf_evlist__disable(evlist);
2294
2295         if (!err) {
2296                 if (trace->summary)
2297                         trace__fprintf_thread_summary(trace, trace->output);
2298
2299                 if (trace->show_tool_stats) {
2300                         fprintf(trace->output, "Stats:\n "
2301                                                " vfs_getname : %" PRIu64 "\n"
2302                                                " proc_getname: %" PRIu64 "\n",
2303                                 trace->stats.vfs_getname,
2304                                 trace->stats.proc_getname);
2305                 }
2306         }
2307
2308 out_delete_evlist:
2309         perf_evlist__delete(evlist);
2310         trace->evlist = NULL;
2311         trace->live = false;
2312         return err;
2313 {
2314         char errbuf[BUFSIZ];
2315
2316 out_error_sched_stat_runtime:
2317         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2318         goto out_error;
2319
2320 out_error_raw_syscalls:
2321         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2322         goto out_error;
2323
2324 out_error_mmap:
2325         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2326         goto out_error;
2327
2328 out_error_open:
2329         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2330
2331 out_error:
2332         fprintf(trace->output, "%s\n", errbuf);
2333         goto out_delete_evlist;
2334 }
2335 out_error_mem:
2336         fprintf(trace->output, "Not enough memory to run!\n");
2337         goto out_delete_evlist;
2338 }
2339
2340 static int trace__replay(struct trace *trace)
2341 {
2342         const struct perf_evsel_str_handler handlers[] = {
2343                 { "probe:vfs_getname",       trace__vfs_getname, },
2344         };
2345         struct perf_data_file file = {
2346                 .path  = input_name,
2347                 .mode  = PERF_DATA_MODE_READ,
2348         };
2349         struct perf_session *session;
2350         struct perf_evsel *evsel;
2351         int err = -1;
2352
2353         trace->tool.sample        = trace__process_sample;
2354         trace->tool.mmap          = perf_event__process_mmap;
2355         trace->tool.mmap2         = perf_event__process_mmap2;
2356         trace->tool.comm          = perf_event__process_comm;
2357         trace->tool.exit          = perf_event__process_exit;
2358         trace->tool.fork          = perf_event__process_fork;
2359         trace->tool.attr          = perf_event__process_attr;
2360         trace->tool.tracing_data = perf_event__process_tracing_data;
2361         trace->tool.build_id      = perf_event__process_build_id;
2362
2363         trace->tool.ordered_events = true;
2364         trace->tool.ordering_requires_timestamps = true;
2365
2366         /* add tid to output */
2367         trace->multiple_threads = true;
2368
2369         session = perf_session__new(&file, false, &trace->tool);
2370         if (session == NULL)
2371                 return -1;
2372
2373         if (symbol__init(&session->header.env) < 0)
2374                 goto out;
2375
2376         trace->host = &session->machines.host;
2377
2378         err = perf_session__set_tracepoints_handlers(session, handlers);
2379         if (err)
2380                 goto out;
2381
2382         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2383                                                      "raw_syscalls:sys_enter");
2384         /* older kernels have syscalls tp versus raw_syscalls */
2385         if (evsel == NULL)
2386                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2387                                                              "syscalls:sys_enter");
2388
2389         if (evsel &&
2390             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2391             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2392                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2393                 goto out;
2394         }
2395
2396         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2397                                                      "raw_syscalls:sys_exit");
2398         if (evsel == NULL)
2399                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2400                                                              "syscalls:sys_exit");
2401         if (evsel &&
2402             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2403             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2404                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2405                 goto out;
2406         }
2407
2408         evlist__for_each(session->evlist, evsel) {
2409                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2410                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2411                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2412                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2413                         evsel->handler = trace__pgfault;
2414         }
2415
2416         err = parse_target_str(trace);
2417         if (err != 0)
2418                 goto out;
2419
2420         setup_pager();
2421
2422         err = perf_session__process_events(session);
2423         if (err)
2424                 pr_err("Failed to process events, error %d", err);
2425
2426         else if (trace->summary)
2427                 trace__fprintf_thread_summary(trace, trace->output);
2428
2429 out:
2430         perf_session__delete(session);
2431
2432         return err;
2433 }
2434
2435 static size_t trace__fprintf_threads_header(FILE *fp)
2436 {
2437         size_t printed;
2438
2439         printed  = fprintf(fp, "\n Summary of events:\n\n");
2440
2441         return printed;
2442 }
2443
2444 static size_t thread__dump_stats(struct thread_trace *ttrace,
2445                                  struct trace *trace, FILE *fp)
2446 {
2447         struct stats *stats;
2448         size_t printed = 0;
2449         struct syscall *sc;
2450         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2451
2452         if (inode == NULL)
2453                 return 0;
2454
2455         printed += fprintf(fp, "\n");
2456
2457         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2458         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2459         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2460
2461         /* each int_node is a syscall */
2462         while (inode) {
2463                 stats = inode->priv;
2464                 if (stats) {
2465                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2466                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2467                         double avg = avg_stats(stats);
2468                         double pct;
2469                         u64 n = (u64) stats->n;
2470
2471                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2472                         avg /= NSEC_PER_MSEC;
2473
2474                         sc = &trace->syscalls.table[inode->i];
2475                         printed += fprintf(fp, "   %-15s", sc->name);
2476                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2477                                            n, min, avg);
2478                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2479                 }
2480
2481                 inode = intlist__next(inode);
2482         }
2483
2484         printed += fprintf(fp, "\n\n");
2485
2486         return printed;
2487 }
2488
2489 /* struct used to pass data to per-thread function */
2490 struct summary_data {
2491         FILE *fp;
2492         struct trace *trace;
2493         size_t printed;
2494 };
2495
2496 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2497 {
2498         struct summary_data *data = priv;
2499         FILE *fp = data->fp;
2500         size_t printed = data->printed;
2501         struct trace *trace = data->trace;
2502         struct thread_trace *ttrace = thread__priv(thread);
2503         double ratio;
2504
2505         if (ttrace == NULL)
2506                 return 0;
2507
2508         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2509
2510         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2511         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2512         printed += fprintf(fp, "%.1f%%", ratio);
2513         if (ttrace->pfmaj)
2514                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2515         if (ttrace->pfmin)
2516                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2517         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2518         printed += thread__dump_stats(ttrace, trace, fp);
2519
2520         data->printed += printed;
2521
2522         return 0;
2523 }
2524
2525 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2526 {
2527         struct summary_data data = {
2528                 .fp = fp,
2529                 .trace = trace
2530         };
2531         data.printed = trace__fprintf_threads_header(fp);
2532
2533         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2534
2535         return data.printed;
2536 }
2537
2538 static int trace__set_duration(const struct option *opt, const char *str,
2539                                int unset __maybe_unused)
2540 {
2541         struct trace *trace = opt->value;
2542
2543         trace->duration_filter = atof(str);
2544         return 0;
2545 }
2546
2547 static int trace__set_filter_pids(const struct option *opt, const char *str,
2548                                   int unset __maybe_unused)
2549 {
2550         int ret = -1;
2551         size_t i;
2552         struct trace *trace = opt->value;
2553         /*
2554          * FIXME: introduce a intarray class, plain parse csv and create a
2555          * { int nr, int entries[] } struct...
2556          */
2557         struct intlist *list = intlist__new(str);
2558
2559         if (list == NULL)
2560                 return -1;
2561
2562         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2563         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2564
2565         if (trace->filter_pids.entries == NULL)
2566                 goto out;
2567
2568         trace->filter_pids.entries[0] = getpid();
2569
2570         for (i = 1; i < trace->filter_pids.nr; ++i)
2571                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2572
2573         intlist__delete(list);
2574         ret = 0;
2575 out:
2576         return ret;
2577 }
2578
2579 static int trace__open_output(struct trace *trace, const char *filename)
2580 {
2581         struct stat st;
2582
2583         if (!stat(filename, &st) && st.st_size) {
2584                 char oldname[PATH_MAX];
2585
2586                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2587                 unlink(oldname);
2588                 rename(filename, oldname);
2589         }
2590
2591         trace->output = fopen(filename, "w");
2592
2593         return trace->output == NULL ? -errno : 0;
2594 }
2595
2596 static int parse_pagefaults(const struct option *opt, const char *str,
2597                             int unset __maybe_unused)
2598 {
2599         int *trace_pgfaults = opt->value;
2600
2601         if (strcmp(str, "all") == 0)
2602                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2603         else if (strcmp(str, "maj") == 0)
2604                 *trace_pgfaults |= TRACE_PFMAJ;
2605         else if (strcmp(str, "min") == 0)
2606                 *trace_pgfaults |= TRACE_PFMIN;
2607         else
2608                 return -1;
2609
2610         return 0;
2611 }
2612
2613 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2614 {
2615         struct perf_evsel *evsel;
2616
2617         evlist__for_each(evlist, evsel)
2618                 evsel->handler = handler;
2619 }
2620
2621 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2622 {
2623         const char *trace_usage[] = {
2624                 "perf trace [<options>] [<command>]",
2625                 "perf trace [<options>] -- <command> [<options>]",
2626                 "perf trace record [<options>] [<command>]",
2627                 "perf trace record [<options>] -- <command> [<options>]",
2628                 NULL
2629         };
2630         struct trace trace = {
2631                 .audit = {
2632                         .machine = audit_detect_machine(),
2633                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2634                 },
2635                 .syscalls = {
2636                         . max = -1,
2637                 },
2638                 .opts = {
2639                         .target = {
2640                                 .uid       = UINT_MAX,
2641                                 .uses_mmap = true,
2642                         },
2643                         .user_freq     = UINT_MAX,
2644                         .user_interval = ULLONG_MAX,
2645                         .no_buffering  = true,
2646                         .mmap_pages    = UINT_MAX,
2647                 },
2648                 .output = stdout,
2649                 .show_comm = true,
2650                 .trace_syscalls = true,
2651         };
2652         const char *output_name = NULL;
2653         const char *ev_qualifier_str = NULL;
2654         const struct option trace_options[] = {
2655         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2656                      "event selector. use 'perf list' to list available events",
2657                      parse_events_option),
2658         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2659                     "show the thread COMM next to its id"),
2660         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2661         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2662                     "list of events to trace"),
2663         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2664         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2665         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2666                     "trace events on existing process id"),
2667         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2668                     "trace events on existing thread id"),
2669         OPT_CALLBACK(0, "filter-pids", &trace, "float",
2670                      "show only events with duration > N.M ms", trace__set_filter_pids),
2671         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2672                     "system-wide collection from all CPUs"),
2673         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2674                     "list of cpus to monitor"),
2675         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2676                     "child tasks do not inherit counters"),
2677         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2678                      "number of mmap data pages",
2679                      perf_evlist__parse_mmap_pages),
2680         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2681                    "user to profile"),
2682         OPT_CALLBACK(0, "duration", &trace, "float",
2683                      "show only events with duration > N.M ms",
2684                      trace__set_duration),
2685         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2686         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2687         OPT_BOOLEAN('T', "time", &trace.full_time,
2688                     "Show full timestamp, not time relative to first start"),
2689         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2690                     "Show only syscall summary with statistics"),
2691         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2692                     "Show all syscalls and summary with statistics"),
2693         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2694                      "Trace pagefaults", parse_pagefaults, "maj"),
2695         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2696         OPT_END()
2697         };
2698         const char * const trace_subcommands[] = { "record", NULL };
2699         int err;
2700         char bf[BUFSIZ];
2701
2702         signal(SIGSEGV, sighandler_dump_stack);
2703         signal(SIGFPE, sighandler_dump_stack);
2704
2705         trace.evlist = perf_evlist__new();
2706         if (trace.evlist == NULL)
2707                 return -ENOMEM;
2708
2709         if (trace.evlist == NULL) {
2710                 pr_err("Not enough memory to run!\n");
2711                 goto out;
2712         }
2713
2714         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2715                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2716
2717         if (trace.trace_pgfaults) {
2718                 trace.opts.sample_address = true;
2719                 trace.opts.sample_time = true;
2720         }
2721
2722         if (trace.evlist->nr_entries > 0)
2723                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2724
2725         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2726                 return trace__record(&trace, argc-1, &argv[1]);
2727
2728         /* summary_only implies summary option, but don't overwrite summary if set */
2729         if (trace.summary_only)
2730                 trace.summary = trace.summary_only;
2731
2732         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2733             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2734                 pr_err("Please specify something to trace.\n");
2735                 return -1;
2736         }
2737
2738         if (output_name != NULL) {
2739                 err = trace__open_output(&trace, output_name);
2740                 if (err < 0) {
2741                         perror("failed to create output file");
2742                         goto out;
2743                 }
2744         }
2745
2746         if (ev_qualifier_str != NULL) {
2747                 const char *s = ev_qualifier_str;
2748
2749                 trace.not_ev_qualifier = *s == '!';
2750                 if (trace.not_ev_qualifier)
2751                         ++s;
2752                 trace.ev_qualifier = strlist__new(true, s);
2753                 if (trace.ev_qualifier == NULL) {
2754                         fputs("Not enough memory to parse event qualifier",
2755                               trace.output);
2756                         err = -ENOMEM;
2757                         goto out_close;
2758                 }
2759         }
2760
2761         err = target__validate(&trace.opts.target);
2762         if (err) {
2763                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2764                 fprintf(trace.output, "%s", bf);
2765                 goto out_close;
2766         }
2767
2768         err = target__parse_uid(&trace.opts.target);
2769         if (err) {
2770                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2771                 fprintf(trace.output, "%s", bf);
2772                 goto out_close;
2773         }
2774
2775         if (!argc && target__none(&trace.opts.target))
2776                 trace.opts.target.system_wide = true;
2777
2778         if (input_name)
2779                 err = trace__replay(&trace);
2780         else
2781                 err = trace__run(&trace, argc, argv);
2782
2783 out_close:
2784         if (output_name != NULL)
2785                 fclose(trace.output);
2786 out:
2787         return err;
2788 }