Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         u##bits value; \
56         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
57         return value;  \
58 }
59
60 TP_UINT_FIELD(8);
61 TP_UINT_FIELD(16);
62 TP_UINT_FIELD(32);
63 TP_UINT_FIELD(64);
64
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
67 { \
68         u##bits value; \
69         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70         return bswap_##bits(value);\
71 }
72
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
76
77 static int tp_field__init_uint(struct tp_field *field,
78                                struct format_field *format_field,
79                                bool needs_swap)
80 {
81         field->offset = format_field->offset;
82
83         switch (format_field->size) {
84         case 1:
85                 field->integer = tp_field__u8;
86                 break;
87         case 2:
88                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
89                 break;
90         case 4:
91                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
92                 break;
93         case 8:
94                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
95                 break;
96         default:
97                 return -1;
98         }
99
100         return 0;
101 }
102
103 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
104 {
105         return sample->raw_data + field->offset;
106 }
107
108 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
109 {
110         field->offset = format_field->offset;
111         field->pointer = tp_field__ptr;
112         return 0;
113 }
114
115 struct syscall_tp {
116         struct tp_field id;
117         union {
118                 struct tp_field args, ret;
119         };
120 };
121
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
123                                           struct tp_field *field,
124                                           const char *name)
125 {
126         struct format_field *format_field = perf_evsel__field(evsel, name);
127
128         if (format_field == NULL)
129                 return -1;
130
131         return tp_field__init_uint(field, format_field, evsel->needs_swap);
132 }
133
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135         ({ struct syscall_tp *sc = evsel->priv;\
136            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
137
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
139                                          struct tp_field *field,
140                                          const char *name)
141 {
142         struct format_field *format_field = perf_evsel__field(evsel, name);
143
144         if (format_field == NULL)
145                 return -1;
146
147         return tp_field__init_ptr(field, format_field);
148 }
149
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151         ({ struct syscall_tp *sc = evsel->priv;\
152            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
153
154 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
155 {
156         zfree(&evsel->priv);
157         perf_evsel__delete(evsel);
158 }
159
160 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
161 {
162         evsel->priv = malloc(sizeof(struct syscall_tp));
163         if (evsel->priv != NULL) {
164                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
165                         goto out_delete;
166
167                 evsel->handler = handler;
168                 return 0;
169         }
170
171         return -ENOMEM;
172
173 out_delete:
174         zfree(&evsel->priv);
175         return -ENOENT;
176 }
177
178 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
179 {
180         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
181
182         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
183         if (evsel == NULL)
184                 evsel = perf_evsel__newtp("syscalls", direction);
185
186         if (evsel) {
187                 if (perf_evsel__init_syscall_tp(evsel, handler))
188                         goto out_delete;
189         }
190
191         return evsel;
192
193 out_delete:
194         perf_evsel__delete_priv(evsel);
195         return NULL;
196 }
197
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199         ({ struct syscall_tp *fields = evsel->priv; \
200            fields->name.integer(&fields->name, sample); })
201
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203         ({ struct syscall_tp *fields = evsel->priv; \
204            fields->name.pointer(&fields->name, sample); })
205
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
207                                           void *sys_enter_handler,
208                                           void *sys_exit_handler)
209 {
210         int ret = -1;
211         struct perf_evsel *sys_enter, *sys_exit;
212
213         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
214         if (sys_enter == NULL)
215                 goto out;
216
217         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
218                 goto out_delete_sys_enter;
219
220         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
221         if (sys_exit == NULL)
222                 goto out_delete_sys_enter;
223
224         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
225                 goto out_delete_sys_exit;
226
227         perf_evlist__add(evlist, sys_enter);
228         perf_evlist__add(evlist, sys_exit);
229
230         ret = 0;
231 out:
232         return ret;
233
234 out_delete_sys_exit:
235         perf_evsel__delete_priv(sys_exit);
236 out_delete_sys_enter:
237         perf_evsel__delete_priv(sys_enter);
238         goto out;
239 }
240
241
242 struct syscall_arg {
243         unsigned long val;
244         struct thread *thread;
245         struct trace  *trace;
246         void          *parm;
247         u8            idx;
248         u8            mask;
249 };
250
251 struct strarray {
252         int         offset;
253         int         nr_entries;
254         const char **entries;
255 };
256
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258         .nr_entries = ARRAY_SIZE(array), \
259         .entries = array, \
260 }
261
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
263         .offset     = off, \
264         .nr_entries = ARRAY_SIZE(array), \
265         .entries = array, \
266 }
267
268 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
269                                                 const char *intfmt,
270                                                 struct syscall_arg *arg)
271 {
272         struct strarray *sa = arg->parm;
273         int idx = arg->val - sa->offset;
274
275         if (idx < 0 || idx >= sa->nr_entries)
276                 return scnprintf(bf, size, intfmt, arg->val);
277
278         return scnprintf(bf, size, "%s", sa->entries[idx]);
279 }
280
281 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
282                                               struct syscall_arg *arg)
283 {
284         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
285 }
286
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
288
289 #if defined(__i386__) || defined(__x86_64__)
290 /*
291  * FIXME: Make this available to all arches as soon as the ioctl beautifier
292  *        gets rewritten to support all arches.
293  */
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
295                                                  struct syscall_arg *arg)
296 {
297         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
298 }
299
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
302
303 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
304                                         struct syscall_arg *arg);
305
306 #define SCA_FD syscall_arg__scnprintf_fd
307
308 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
309                                            struct syscall_arg *arg)
310 {
311         int fd = arg->val;
312
313         if (fd == AT_FDCWD)
314                 return scnprintf(bf, size, "CWD");
315
316         return syscall_arg__scnprintf_fd(bf, size, arg);
317 }
318
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
320
321 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
322                                               struct syscall_arg *arg);
323
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
325
326 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
327                                          struct syscall_arg *arg)
328 {
329         return scnprintf(bf, size, "%#lx", arg->val);
330 }
331
332 #define SCA_HEX syscall_arg__scnprintf_hex
333
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335                                                struct syscall_arg *arg)
336 {
337         int printed = 0, prot = arg->val;
338
339         if (prot == PROT_NONE)
340                 return scnprintf(bf, size, "NONE");
341 #define P_MMAP_PROT(n) \
342         if (prot & PROT_##n) { \
343                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344                 prot &= ~PROT_##n; \
345         }
346
347         P_MMAP_PROT(EXEC);
348         P_MMAP_PROT(READ);
349         P_MMAP_PROT(WRITE);
350 #ifdef PROT_SEM
351         P_MMAP_PROT(SEM);
352 #endif
353         P_MMAP_PROT(GROWSDOWN);
354         P_MMAP_PROT(GROWSUP);
355 #undef P_MMAP_PROT
356
357         if (prot)
358                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
359
360         return printed;
361 }
362
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
364
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
366                                                 struct syscall_arg *arg)
367 {
368         int printed = 0, flags = arg->val;
369
370 #define P_MMAP_FLAG(n) \
371         if (flags & MAP_##n) { \
372                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
373                 flags &= ~MAP_##n; \
374         }
375
376         P_MMAP_FLAG(SHARED);
377         P_MMAP_FLAG(PRIVATE);
378 #ifdef MAP_32BIT
379         P_MMAP_FLAG(32BIT);
380 #endif
381         P_MMAP_FLAG(ANONYMOUS);
382         P_MMAP_FLAG(DENYWRITE);
383         P_MMAP_FLAG(EXECUTABLE);
384         P_MMAP_FLAG(FILE);
385         P_MMAP_FLAG(FIXED);
386         P_MMAP_FLAG(GROWSDOWN);
387 #ifdef MAP_HUGETLB
388         P_MMAP_FLAG(HUGETLB);
389 #endif
390         P_MMAP_FLAG(LOCKED);
391         P_MMAP_FLAG(NONBLOCK);
392         P_MMAP_FLAG(NORESERVE);
393         P_MMAP_FLAG(POPULATE);
394         P_MMAP_FLAG(STACK);
395 #ifdef MAP_UNINITIALIZED
396         P_MMAP_FLAG(UNINITIALIZED);
397 #endif
398 #undef P_MMAP_FLAG
399
400         if (flags)
401                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
402
403         return printed;
404 }
405
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
407
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
409                                                   struct syscall_arg *arg)
410 {
411         int printed = 0, flags = arg->val;
412
413 #define P_MREMAP_FLAG(n) \
414         if (flags & MREMAP_##n) { \
415                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416                 flags &= ~MREMAP_##n; \
417         }
418
419         P_MREMAP_FLAG(MAYMOVE);
420 #ifdef MREMAP_FIXED
421         P_MREMAP_FLAG(FIXED);
422 #endif
423 #undef P_MREMAP_FLAG
424
425         if (flags)
426                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
427
428         return printed;
429 }
430
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
432
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
434                                                       struct syscall_arg *arg)
435 {
436         int behavior = arg->val;
437
438         switch (behavior) {
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
440         P_MADV_BHV(NORMAL);
441         P_MADV_BHV(RANDOM);
442         P_MADV_BHV(SEQUENTIAL);
443         P_MADV_BHV(WILLNEED);
444         P_MADV_BHV(DONTNEED);
445         P_MADV_BHV(REMOVE);
446         P_MADV_BHV(DONTFORK);
447         P_MADV_BHV(DOFORK);
448         P_MADV_BHV(HWPOISON);
449 #ifdef MADV_SOFT_OFFLINE
450         P_MADV_BHV(SOFT_OFFLINE);
451 #endif
452         P_MADV_BHV(MERGEABLE);
453         P_MADV_BHV(UNMERGEABLE);
454 #ifdef MADV_HUGEPAGE
455         P_MADV_BHV(HUGEPAGE);
456 #endif
457 #ifdef MADV_NOHUGEPAGE
458         P_MADV_BHV(NOHUGEPAGE);
459 #endif
460 #ifdef MADV_DONTDUMP
461         P_MADV_BHV(DONTDUMP);
462 #endif
463 #ifdef MADV_DODUMP
464         P_MADV_BHV(DODUMP);
465 #endif
466 #undef P_MADV_PHV
467         default: break;
468         }
469
470         return scnprintf(bf, size, "%#x", behavior);
471 }
472
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
474
475 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
476                                            struct syscall_arg *arg)
477 {
478         int printed = 0, op = arg->val;
479
480         if (op == 0)
481                 return scnprintf(bf, size, "NONE");
482 #define P_CMD(cmd) \
483         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
485                 op &= ~LOCK_##cmd; \
486         }
487
488         P_CMD(SH);
489         P_CMD(EX);
490         P_CMD(NB);
491         P_CMD(UN);
492         P_CMD(MAND);
493         P_CMD(RW);
494         P_CMD(READ);
495         P_CMD(WRITE);
496 #undef P_OP
497
498         if (op)
499                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
500
501         return printed;
502 }
503
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
505
506 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
507 {
508         enum syscall_futex_args {
509                 SCF_UADDR   = (1 << 0),
510                 SCF_OP      = (1 << 1),
511                 SCF_VAL     = (1 << 2),
512                 SCF_TIMEOUT = (1 << 3),
513                 SCF_UADDR2  = (1 << 4),
514                 SCF_VAL3    = (1 << 5),
515         };
516         int op = arg->val;
517         int cmd = op & FUTEX_CMD_MASK;
518         size_t printed = 0;
519
520         switch (cmd) {
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
523         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
524         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
525         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
526         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
527         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
528         P_FUTEX_OP(WAKE_OP);                                                      break;
529         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
530         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
531         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
532         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
533         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
534         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
535         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
536         }
537
538         if (op & FUTEX_PRIVATE_FLAG)
539                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
540
541         if (op & FUTEX_CLOCK_REALTIME)
542                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
543
544         return printed;
545 }
546
547 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
548
549 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
551
552 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers);
554
555 static const char *whences[] = { "SET", "CUR", "END",
556 #ifdef SEEK_DATA
557 "DATA",
558 #endif
559 #ifdef SEEK_HOLE
560 "HOLE",
561 #endif
562 };
563 static DEFINE_STRARRAY(whences);
564
565 static const char *fcntl_cmds[] = {
566         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
569         "F_GETOWNER_UIDS",
570 };
571 static DEFINE_STRARRAY(fcntl_cmds);
572
573 static const char *rlimit_resources[] = {
574         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
576         "RTTIME",
577 };
578 static DEFINE_STRARRAY(rlimit_resources);
579
580 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow);
582
583 static const char *clockid[] = {
584         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
586 };
587 static DEFINE_STRARRAY(clockid);
588
589 static const char *socket_families[] = {
590         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595         "ALG", "NFC", "VSOCK",
596 };
597 static DEFINE_STRARRAY(socket_families);
598
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
601 #endif
602
603 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
604                                                       struct syscall_arg *arg)
605 {
606         size_t printed;
607         int type = arg->val,
608             flags = type & ~SOCK_TYPE_MASK;
609
610         type &= SOCK_TYPE_MASK;
611         /*
612          * Can't use a strarray, MIPS may override for ABI reasons.
613          */
614         switch (type) {
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
616         P_SK_TYPE(STREAM);
617         P_SK_TYPE(DGRAM);
618         P_SK_TYPE(RAW);
619         P_SK_TYPE(RDM);
620         P_SK_TYPE(SEQPACKET);
621         P_SK_TYPE(DCCP);
622         P_SK_TYPE(PACKET);
623 #undef P_SK_TYPE
624         default:
625                 printed = scnprintf(bf, size, "%#x", type);
626         }
627
628 #define P_SK_FLAG(n) \
629         if (flags & SOCK_##n) { \
630                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631                 flags &= ~SOCK_##n; \
632         }
633
634         P_SK_FLAG(CLOEXEC);
635         P_SK_FLAG(NONBLOCK);
636 #undef P_SK_FLAG
637
638         if (flags)
639                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
640
641         return printed;
642 }
643
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
645
646 #ifndef MSG_PROBE
647 #define MSG_PROBE            0x10
648 #endif
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE  0x10000
651 #endif
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
654 #endif
655 #ifndef MSG_FASTOPEN
656 #define MSG_FASTOPEN         0x20000000
657 #endif
658
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
660                                                struct syscall_arg *arg)
661 {
662         int printed = 0, flags = arg->val;
663
664         if (flags == 0)
665                 return scnprintf(bf, size, "NONE");
666 #define P_MSG_FLAG(n) \
667         if (flags & MSG_##n) { \
668                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
669                 flags &= ~MSG_##n; \
670         }
671
672         P_MSG_FLAG(OOB);
673         P_MSG_FLAG(PEEK);
674         P_MSG_FLAG(DONTROUTE);
675         P_MSG_FLAG(TRYHARD);
676         P_MSG_FLAG(CTRUNC);
677         P_MSG_FLAG(PROBE);
678         P_MSG_FLAG(TRUNC);
679         P_MSG_FLAG(DONTWAIT);
680         P_MSG_FLAG(EOR);
681         P_MSG_FLAG(WAITALL);
682         P_MSG_FLAG(FIN);
683         P_MSG_FLAG(SYN);
684         P_MSG_FLAG(CONFIRM);
685         P_MSG_FLAG(RST);
686         P_MSG_FLAG(ERRQUEUE);
687         P_MSG_FLAG(NOSIGNAL);
688         P_MSG_FLAG(MORE);
689         P_MSG_FLAG(WAITFORONE);
690         P_MSG_FLAG(SENDPAGE_NOTLAST);
691         P_MSG_FLAG(FASTOPEN);
692         P_MSG_FLAG(CMSG_CLOEXEC);
693 #undef P_MSG_FLAG
694
695         if (flags)
696                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
697
698         return printed;
699 }
700
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
702
703 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
704                                                  struct syscall_arg *arg)
705 {
706         size_t printed = 0;
707         int mode = arg->val;
708
709         if (mode == F_OK) /* 0 */
710                 return scnprintf(bf, size, "F");
711 #define P_MODE(n) \
712         if (mode & n##_OK) { \
713                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
714                 mode &= ~n##_OK; \
715         }
716
717         P_MODE(R);
718         P_MODE(W);
719         P_MODE(X);
720 #undef P_MODE
721
722         if (mode)
723                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
724
725         return printed;
726 }
727
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
729
730 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
731                                                struct syscall_arg *arg)
732 {
733         int printed = 0, flags = arg->val;
734
735         if (!(flags & O_CREAT))
736                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
737
738         if (flags == 0)
739                 return scnprintf(bf, size, "RDONLY");
740 #define P_FLAG(n) \
741         if (flags & O_##n) { \
742                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
743                 flags &= ~O_##n; \
744         }
745
746         P_FLAG(APPEND);
747         P_FLAG(ASYNC);
748         P_FLAG(CLOEXEC);
749         P_FLAG(CREAT);
750         P_FLAG(DIRECT);
751         P_FLAG(DIRECTORY);
752         P_FLAG(EXCL);
753         P_FLAG(LARGEFILE);
754         P_FLAG(NOATIME);
755         P_FLAG(NOCTTY);
756 #ifdef O_NONBLOCK
757         P_FLAG(NONBLOCK);
758 #elif O_NDELAY
759         P_FLAG(NDELAY);
760 #endif
761 #ifdef O_PATH
762         P_FLAG(PATH);
763 #endif
764         P_FLAG(RDWR);
765 #ifdef O_DSYNC
766         if ((flags & O_SYNC) == O_SYNC)
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
768         else {
769                 P_FLAG(DSYNC);
770         }
771 #else
772         P_FLAG(SYNC);
773 #endif
774         P_FLAG(TRUNC);
775         P_FLAG(WRONLY);
776 #undef P_FLAG
777
778         if (flags)
779                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
780
781         return printed;
782 }
783
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
785
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787                                                    struct syscall_arg *arg)
788 {
789         int printed = 0, flags = arg->val;
790
791         if (flags == 0)
792                 return scnprintf(bf, size, "NONE");
793 #define P_FLAG(n) \
794         if (flags & EFD_##n) { \
795                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
796                 flags &= ~EFD_##n; \
797         }
798
799         P_FLAG(SEMAPHORE);
800         P_FLAG(CLOEXEC);
801         P_FLAG(NONBLOCK);
802 #undef P_FLAG
803
804         if (flags)
805                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806
807         return printed;
808 }
809
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
811
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
813                                                 struct syscall_arg *arg)
814 {
815         int printed = 0, flags = arg->val;
816
817 #define P_FLAG(n) \
818         if (flags & O_##n) { \
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820                 flags &= ~O_##n; \
821         }
822
823         P_FLAG(CLOEXEC);
824         P_FLAG(NONBLOCK);
825 #undef P_FLAG
826
827         if (flags)
828                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829
830         return printed;
831 }
832
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
834
835 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
836 {
837         int sig = arg->val;
838
839         switch (sig) {
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
841         P_SIGNUM(HUP);
842         P_SIGNUM(INT);
843         P_SIGNUM(QUIT);
844         P_SIGNUM(ILL);
845         P_SIGNUM(TRAP);
846         P_SIGNUM(ABRT);
847         P_SIGNUM(BUS);
848         P_SIGNUM(FPE);
849         P_SIGNUM(KILL);
850         P_SIGNUM(USR1);
851         P_SIGNUM(SEGV);
852         P_SIGNUM(USR2);
853         P_SIGNUM(PIPE);
854         P_SIGNUM(ALRM);
855         P_SIGNUM(TERM);
856         P_SIGNUM(CHLD);
857         P_SIGNUM(CONT);
858         P_SIGNUM(STOP);
859         P_SIGNUM(TSTP);
860         P_SIGNUM(TTIN);
861         P_SIGNUM(TTOU);
862         P_SIGNUM(URG);
863         P_SIGNUM(XCPU);
864         P_SIGNUM(XFSZ);
865         P_SIGNUM(VTALRM);
866         P_SIGNUM(PROF);
867         P_SIGNUM(WINCH);
868         P_SIGNUM(IO);
869         P_SIGNUM(PWR);
870         P_SIGNUM(SYS);
871 #ifdef SIGEMT
872         P_SIGNUM(EMT);
873 #endif
874 #ifdef SIGSTKFLT
875         P_SIGNUM(STKFLT);
876 #endif
877 #ifdef SIGSWI
878         P_SIGNUM(SWI);
879 #endif
880         default: break;
881         }
882
883         return scnprintf(bf, size, "%#x", sig);
884 }
885
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
887
888 #if defined(__i386__) || defined(__x86_64__)
889 /*
890  * FIXME: Make this available to all arches.
891  */
892 #define TCGETS          0x5401
893
894 static const char *tioctls[] = {
895         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
910 };
911
912 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
914
915 #define STRARRAY(arg, name, array) \
916           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917           .arg_parm      = { [arg] = &strarray__##array, }
918
919 static struct syscall_fmt {
920         const char *name;
921         const char *alias;
922         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
923         void       *arg_parm[6];
924         bool       errmsg;
925         bool       timeout;
926         bool       hexret;
927 } syscall_fmts[] = {
928         { .name     = "access",     .errmsg = true,
929           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
930         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
931         { .name     = "brk",        .hexret = true,
932           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
933         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
934         { .name     = "close",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
936         { .name     = "connect",    .errmsg = true, },
937         { .name     = "dup",        .errmsg = true,
938           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939         { .name     = "dup2",       .errmsg = true,
940           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
941         { .name     = "dup3",       .errmsg = true,
942           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
944         { .name     = "eventfd2",   .errmsg = true,
945           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
946         { .name     = "faccessat",  .errmsg = true,
947           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
948         { .name     = "fadvise64",  .errmsg = true,
949           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
950         { .name     = "fallocate",  .errmsg = true,
951           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
952         { .name     = "fchdir",     .errmsg = true,
953           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
954         { .name     = "fchmod",     .errmsg = true,
955           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956         { .name     = "fchmodat",   .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958         { .name     = "fchown",     .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
960         { .name     = "fchownat",   .errmsg = true,
961           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
962         { .name     = "fcntl",      .errmsg = true,
963           .arg_scnprintf = { [0] = SCA_FD, /* fd */
964                              [1] = SCA_STRARRAY, /* cmd */ },
965           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
966         { .name     = "fdatasync",  .errmsg = true,
967           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
968         { .name     = "flock",      .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */
970                              [1] = SCA_FLOCK, /* cmd */ }, },
971         { .name     = "fsetxattr",  .errmsg = true,
972           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
973         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
974           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
975         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
976           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
977         { .name     = "fstatfs",    .errmsg = true,
978           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979         { .name     = "fsync",    .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981         { .name     = "ftruncate", .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983         { .name     = "futex",      .errmsg = true,
984           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
985         { .name     = "futimesat", .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
987         { .name     = "getdents",   .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989         { .name     = "getdents64", .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
992         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
993         { .name     = "ioctl",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FD, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
996 /*
997  * FIXME: Make this available to all arches.
998  */
999                              [1] = SCA_STRHEXARRAY, /* cmd */
1000                              [2] = SCA_HEX, /* arg */ },
1001           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1002 #else
1003                              [2] = SCA_HEX, /* arg */ }, },
1004 #endif
1005         { .name     = "kill",       .errmsg = true,
1006           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1007         { .name     = "linkat",     .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1009         { .name     = "lseek",      .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1011                              [2] = SCA_STRARRAY, /* whence */ },
1012           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1013         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1014         { .name     = "madvise",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1016                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1017         { .name     = "mkdirat",    .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1019         { .name     = "mknodat",    .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1021         { .name     = "mlock",      .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1023         { .name     = "mlockall",   .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1025         { .name     = "mmap",       .hexret = true,
1026           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1027                              [2] = SCA_MMAP_PROT, /* prot */
1028                              [3] = SCA_MMAP_FLAGS, /* flags */
1029                              [4] = SCA_FD,        /* fd */ }, },
1030         { .name     = "mprotect",   .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1032                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1033         { .name     = "mremap",     .hexret = true,
1034           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1035                              [3] = SCA_MREMAP_FLAGS, /* flags */
1036                              [4] = SCA_HEX, /* new_addr */ }, },
1037         { .name     = "munlock",    .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1039         { .name     = "munmap",     .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1041         { .name     = "name_to_handle_at", .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043         { .name     = "newfstatat", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1045         { .name     = "open",       .errmsg = true,
1046           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "open_by_handle_at", .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "openat",     .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1053         { .name     = "pipe2",      .errmsg = true,
1054           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055         { .name     = "poll",       .errmsg = true, .timeout = true, },
1056         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1057         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1062         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064         { .name     = "pwritev",    .errmsg = true,
1065           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066         { .name     = "read",       .errmsg = true,
1067           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1068         { .name     = "readlinkat", .errmsg = true,
1069           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1070         { .name     = "readv",      .errmsg = true,
1071           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072         { .name     = "recvfrom",   .errmsg = true,
1073           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1074         { .name     = "recvmmsg",   .errmsg = true,
1075           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1076         { .name     = "recvmsg",    .errmsg = true,
1077           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1078         { .name     = "renameat",   .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080         { .name     = "rt_sigaction", .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1083         { .name     = "rt_sigqueueinfo", .errmsg = true,
1084           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1085         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1086           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1087         { .name     = "select",     .errmsg = true, .timeout = true, },
1088         { .name     = "sendmmsg",    .errmsg = true,
1089           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1090         { .name     = "sendmsg",    .errmsg = true,
1091           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1092         { .name     = "sendto",     .errmsg = true,
1093           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1094         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096         { .name     = "shutdown",   .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1098         { .name     = "socket",     .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1100                              [1] = SCA_SK_TYPE, /* type */ },
1101           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1102         { .name     = "socketpair", .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1104                              [1] = SCA_SK_TYPE, /* type */ },
1105           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1106         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1107         { .name     = "symlinkat",  .errmsg = true,
1108           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1109         { .name     = "tgkill",     .errmsg = true,
1110           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1111         { .name     = "tkill",      .errmsg = true,
1112           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1113         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1114         { .name     = "unlinkat",   .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1116         { .name     = "utimensat",  .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1118         { .name     = "write",      .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120         { .name     = "writev",     .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 };
1123
1124 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1125 {
1126         const struct syscall_fmt *fmt = fmtp;
1127         return strcmp(name, fmt->name);
1128 }
1129
1130 static struct syscall_fmt *syscall_fmt__find(const char *name)
1131 {
1132         const int nmemb = ARRAY_SIZE(syscall_fmts);
1133         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 }
1135
1136 struct syscall {
1137         struct event_format *tp_format;
1138         const char          *name;
1139         bool                filtered;
1140         bool                is_exit;
1141         struct syscall_fmt  *fmt;
1142         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1143         void                **arg_parm;
1144 };
1145
1146 static size_t fprintf_duration(unsigned long t, FILE *fp)
1147 {
1148         double duration = (double)t / NSEC_PER_MSEC;
1149         size_t printed = fprintf(fp, "(");
1150
1151         if (duration >= 1.0)
1152                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1153         else if (duration >= 0.01)
1154                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1155         else
1156                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1157         return printed + fprintf(fp, "): ");
1158 }
1159
1160 struct thread_trace {
1161         u64               entry_time;
1162         u64               exit_time;
1163         bool              entry_pending;
1164         unsigned long     nr_events;
1165         unsigned long     pfmaj, pfmin;
1166         char              *entry_str;
1167         double            runtime_ms;
1168         struct {
1169                 int       max;
1170                 char      **table;
1171         } paths;
1172
1173         struct intlist *syscall_stats;
1174 };
1175
1176 static struct thread_trace *thread_trace__new(void)
1177 {
1178         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1179
1180         if (ttrace)
1181                 ttrace->paths.max = -1;
1182
1183         ttrace->syscall_stats = intlist__new(NULL);
1184
1185         return ttrace;
1186 }
1187
1188 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1189 {
1190         struct thread_trace *ttrace;
1191
1192         if (thread == NULL)
1193                 goto fail;
1194
1195         if (thread__priv(thread) == NULL)
1196                 thread__set_priv(thread, thread_trace__new());
1197
1198         if (thread__priv(thread) == NULL)
1199                 goto fail;
1200
1201         ttrace = thread__priv(thread);
1202         ++ttrace->nr_events;
1203
1204         return ttrace;
1205 fail:
1206         color_fprintf(fp, PERF_COLOR_RED,
1207                       "WARNING: not enough memory, dropping samples!\n");
1208         return NULL;
1209 }
1210
1211 #define TRACE_PFMAJ             (1 << 0)
1212 #define TRACE_PFMIN             (1 << 1)
1213
1214 struct trace {
1215         struct perf_tool        tool;
1216         struct {
1217                 int             machine;
1218                 int             open_id;
1219         }                       audit;
1220         struct {
1221                 int             max;
1222                 struct syscall  *table;
1223         } syscalls;
1224         struct record_opts      opts;
1225         struct perf_evlist      *evlist;
1226         struct machine          *host;
1227         struct thread           *current;
1228         u64                     base_time;
1229         FILE                    *output;
1230         unsigned long           nr_events;
1231         struct strlist          *ev_qualifier;
1232         const char              *last_vfs_getname;
1233         struct intlist          *tid_list;
1234         struct intlist          *pid_list;
1235         struct {
1236                 size_t          nr;
1237                 pid_t           *entries;
1238         }                       filter_pids;
1239         double                  duration_filter;
1240         double                  runtime_ms;
1241         struct {
1242                 u64             vfs_getname,
1243                                 proc_getname;
1244         } stats;
1245         bool                    not_ev_qualifier;
1246         bool                    live;
1247         bool                    full_time;
1248         bool                    sched;
1249         bool                    multiple_threads;
1250         bool                    summary;
1251         bool                    summary_only;
1252         bool                    show_comm;
1253         bool                    show_tool_stats;
1254         bool                    trace_syscalls;
1255         int                     trace_pgfaults;
1256 };
1257
1258 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1259 {
1260         struct thread_trace *ttrace = thread__priv(thread);
1261
1262         if (fd > ttrace->paths.max) {
1263                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1264
1265                 if (npath == NULL)
1266                         return -1;
1267
1268                 if (ttrace->paths.max != -1) {
1269                         memset(npath + ttrace->paths.max + 1, 0,
1270                                (fd - ttrace->paths.max) * sizeof(char *));
1271                 } else {
1272                         memset(npath, 0, (fd + 1) * sizeof(char *));
1273                 }
1274
1275                 ttrace->paths.table = npath;
1276                 ttrace->paths.max   = fd;
1277         }
1278
1279         ttrace->paths.table[fd] = strdup(pathname);
1280
1281         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1282 }
1283
1284 static int thread__read_fd_path(struct thread *thread, int fd)
1285 {
1286         char linkname[PATH_MAX], pathname[PATH_MAX];
1287         struct stat st;
1288         int ret;
1289
1290         if (thread->pid_ == thread->tid) {
1291                 scnprintf(linkname, sizeof(linkname),
1292                           "/proc/%d/fd/%d", thread->pid_, fd);
1293         } else {
1294                 scnprintf(linkname, sizeof(linkname),
1295                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1296         }
1297
1298         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1299                 return -1;
1300
1301         ret = readlink(linkname, pathname, sizeof(pathname));
1302
1303         if (ret < 0 || ret > st.st_size)
1304                 return -1;
1305
1306         pathname[ret] = '\0';
1307         return trace__set_fd_pathname(thread, fd, pathname);
1308 }
1309
1310 static const char *thread__fd_path(struct thread *thread, int fd,
1311                                    struct trace *trace)
1312 {
1313         struct thread_trace *ttrace = thread__priv(thread);
1314
1315         if (ttrace == NULL)
1316                 return NULL;
1317
1318         if (fd < 0)
1319                 return NULL;
1320
1321         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1322                 if (!trace->live)
1323                         return NULL;
1324                 ++trace->stats.proc_getname;
1325                 if (thread__read_fd_path(thread, fd))
1326                         return NULL;
1327         }
1328
1329         return ttrace->paths.table[fd];
1330 }
1331
1332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1333                                         struct syscall_arg *arg)
1334 {
1335         int fd = arg->val;
1336         size_t printed = scnprintf(bf, size, "%d", fd);
1337         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1338
1339         if (path)
1340                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1341
1342         return printed;
1343 }
1344
1345 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1346                                               struct syscall_arg *arg)
1347 {
1348         int fd = arg->val;
1349         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1350         struct thread_trace *ttrace = thread__priv(arg->thread);
1351
1352         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1353                 zfree(&ttrace->paths.table[fd]);
1354
1355         return printed;
1356 }
1357
1358 static bool trace__filter_duration(struct trace *trace, double t)
1359 {
1360         return t < (trace->duration_filter * NSEC_PER_MSEC);
1361 }
1362
1363 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1364 {
1365         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1366
1367         return fprintf(fp, "%10.3f ", ts);
1368 }
1369
1370 static bool done = false;
1371 static bool interrupted = false;
1372
1373 static void sig_handler(int sig)
1374 {
1375         done = true;
1376         interrupted = sig == SIGINT;
1377 }
1378
1379 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1380                                         u64 duration, u64 tstamp, FILE *fp)
1381 {
1382         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1383         printed += fprintf_duration(duration, fp);
1384
1385         if (trace->multiple_threads) {
1386                 if (trace->show_comm)
1387                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1388                 printed += fprintf(fp, "%d ", thread->tid);
1389         }
1390
1391         return printed;
1392 }
1393
1394 static int trace__process_event(struct trace *trace, struct machine *machine,
1395                                 union perf_event *event, struct perf_sample *sample)
1396 {
1397         int ret = 0;
1398
1399         switch (event->header.type) {
1400         case PERF_RECORD_LOST:
1401                 color_fprintf(trace->output, PERF_COLOR_RED,
1402                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1403                 ret = machine__process_lost_event(machine, event, sample);
1404         default:
1405                 ret = machine__process_event(machine, event, sample);
1406                 break;
1407         }
1408
1409         return ret;
1410 }
1411
1412 static int trace__tool_process(struct perf_tool *tool,
1413                                union perf_event *event,
1414                                struct perf_sample *sample,
1415                                struct machine *machine)
1416 {
1417         struct trace *trace = container_of(tool, struct trace, tool);
1418         return trace__process_event(trace, machine, event, sample);
1419 }
1420
1421 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1422 {
1423         int err = symbol__init(NULL);
1424
1425         if (err)
1426                 return err;
1427
1428         trace->host = machine__new_host();
1429         if (trace->host == NULL)
1430                 return -ENOMEM;
1431
1432         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1433                                             evlist->threads, trace__tool_process, false);
1434         if (err)
1435                 symbol__exit();
1436
1437         return err;
1438 }
1439
1440 static int syscall__set_arg_fmts(struct syscall *sc)
1441 {
1442         struct format_field *field;
1443         int idx = 0;
1444
1445         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1446         if (sc->arg_scnprintf == NULL)
1447                 return -1;
1448
1449         if (sc->fmt)
1450                 sc->arg_parm = sc->fmt->arg_parm;
1451
1452         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1453                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1454                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1455                 else if (field->flags & FIELD_IS_POINTER)
1456                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1457                 ++idx;
1458         }
1459
1460         return 0;
1461 }
1462
1463 static int trace__read_syscall_info(struct trace *trace, int id)
1464 {
1465         char tp_name[128];
1466         struct syscall *sc;
1467         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1468
1469         if (name == NULL)
1470                 return -1;
1471
1472         if (id > trace->syscalls.max) {
1473                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1474
1475                 if (nsyscalls == NULL)
1476                         return -1;
1477
1478                 if (trace->syscalls.max != -1) {
1479                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1480                                (id - trace->syscalls.max) * sizeof(*sc));
1481                 } else {
1482                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1483                 }
1484
1485                 trace->syscalls.table = nsyscalls;
1486                 trace->syscalls.max   = id;
1487         }
1488
1489         sc = trace->syscalls.table + id;
1490         sc->name = name;
1491
1492         if (trace->ev_qualifier) {
1493                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1494
1495                 if (!(in ^ trace->not_ev_qualifier)) {
1496                         sc->filtered = true;
1497                         /*
1498                          * No need to do read tracepoint information since this will be
1499                          * filtered out.
1500                          */
1501                         return 0;
1502                 }
1503         }
1504
1505         sc->fmt  = syscall_fmt__find(sc->name);
1506
1507         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1508         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1509
1510         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1511                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1512                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1513         }
1514
1515         if (sc->tp_format == NULL)
1516                 return -1;
1517
1518         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1519
1520         return syscall__set_arg_fmts(sc);
1521 }
1522
1523 /*
1524  * args is to be interpreted as a series of longs but we need to handle
1525  * 8-byte unaligned accesses. args points to raw_data within the event
1526  * and raw_data is guaranteed to be 8-byte unaligned because it is
1527  * preceded by raw_size which is a u32. So we need to copy args to a temp
1528  * variable to read it. Most notably this avoids extended load instructions
1529  * on unaligned addresses
1530  */
1531
1532 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1533                                       unsigned char *args, struct trace *trace,
1534                                       struct thread *thread)
1535 {
1536         size_t printed = 0;
1537         unsigned char *p;
1538         unsigned long val;
1539
1540         if (sc->tp_format != NULL) {
1541                 struct format_field *field;
1542                 u8 bit = 1;
1543                 struct syscall_arg arg = {
1544                         .idx    = 0,
1545                         .mask   = 0,
1546                         .trace  = trace,
1547                         .thread = thread,
1548                 };
1549
1550                 for (field = sc->tp_format->format.fields->next; field;
1551                      field = field->next, ++arg.idx, bit <<= 1) {
1552                         if (arg.mask & bit)
1553                                 continue;
1554
1555                         /* special care for unaligned accesses */
1556                         p = args + sizeof(unsigned long) * arg.idx;
1557                         memcpy(&val, p, sizeof(val));
1558
1559                         /*
1560                          * Suppress this argument if its value is zero and
1561                          * and we don't have a string associated in an
1562                          * strarray for it.
1563                          */
1564                         if (val == 0 &&
1565                             !(sc->arg_scnprintf &&
1566                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1567                               sc->arg_parm[arg.idx]))
1568                                 continue;
1569
1570                         printed += scnprintf(bf + printed, size - printed,
1571                                              "%s%s: ", printed ? ", " : "", field->name);
1572                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1573                                 arg.val = val;
1574                                 if (sc->arg_parm)
1575                                         arg.parm = sc->arg_parm[arg.idx];
1576                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1577                                                                       size - printed, &arg);
1578                         } else {
1579                                 printed += scnprintf(bf + printed, size - printed,
1580                                                      "%ld", val);
1581                         }
1582                 }
1583         } else {
1584                 int i = 0;
1585
1586                 while (i < 6) {
1587                         /* special care for unaligned accesses */
1588                         p = args + sizeof(unsigned long) * i;
1589                         memcpy(&val, p, sizeof(val));
1590                         printed += scnprintf(bf + printed, size - printed,
1591                                              "%sarg%d: %ld",
1592                                              printed ? ", " : "", i, val);
1593                         ++i;
1594                 }
1595         }
1596
1597         return printed;
1598 }
1599
1600 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1601                                   union perf_event *event,
1602                                   struct perf_sample *sample);
1603
1604 static struct syscall *trace__syscall_info(struct trace *trace,
1605                                            struct perf_evsel *evsel, int id)
1606 {
1607
1608         if (id < 0) {
1609
1610                 /*
1611                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1612                  * before that, leaving at a higher verbosity level till that is
1613                  * explained. Reproduced with plain ftrace with:
1614                  *
1615                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1616                  * grep "NR -1 " /t/trace_pipe
1617                  *
1618                  * After generating some load on the machine.
1619                  */
1620                 if (verbose > 1) {
1621                         static u64 n;
1622                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1623                                 id, perf_evsel__name(evsel), ++n);
1624                 }
1625                 return NULL;
1626         }
1627
1628         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1629             trace__read_syscall_info(trace, id))
1630                 goto out_cant_read;
1631
1632         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1633                 goto out_cant_read;
1634
1635         return &trace->syscalls.table[id];
1636
1637 out_cant_read:
1638         if (verbose) {
1639                 fprintf(trace->output, "Problems reading syscall %d", id);
1640                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1641                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1642                 fputs(" information\n", trace->output);
1643         }
1644         return NULL;
1645 }
1646
1647 static void thread__update_stats(struct thread_trace *ttrace,
1648                                  int id, struct perf_sample *sample)
1649 {
1650         struct int_node *inode;
1651         struct stats *stats;
1652         u64 duration = 0;
1653
1654         inode = intlist__findnew(ttrace->syscall_stats, id);
1655         if (inode == NULL)
1656                 return;
1657
1658         stats = inode->priv;
1659         if (stats == NULL) {
1660                 stats = malloc(sizeof(struct stats));
1661                 if (stats == NULL)
1662                         return;
1663                 init_stats(stats);
1664                 inode->priv = stats;
1665         }
1666
1667         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1668                 duration = sample->time - ttrace->entry_time;
1669
1670         update_stats(stats, duration);
1671 }
1672
1673 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1674 {
1675         struct thread_trace *ttrace;
1676         u64 duration;
1677         size_t printed;
1678
1679         if (trace->current == NULL)
1680                 return 0;
1681
1682         ttrace = thread__priv(trace->current);
1683
1684         if (!ttrace->entry_pending)
1685                 return 0;
1686
1687         duration = sample->time - ttrace->entry_time;
1688
1689         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1690         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1691         ttrace->entry_pending = false;
1692
1693         return printed;
1694 }
1695
1696 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1697                             union perf_event *event __maybe_unused,
1698                             struct perf_sample *sample)
1699 {
1700         char *msg;
1701         void *args;
1702         size_t printed = 0;
1703         struct thread *thread;
1704         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1705         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1706         struct thread_trace *ttrace;
1707
1708         if (sc == NULL)
1709                 return -1;
1710
1711         if (sc->filtered)
1712                 return 0;
1713
1714         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1715         ttrace = thread__trace(thread, trace->output);
1716         if (ttrace == NULL)
1717                 return -1;
1718
1719         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1720
1721         if (ttrace->entry_str == NULL) {
1722                 ttrace->entry_str = malloc(1024);
1723                 if (!ttrace->entry_str)
1724                         return -1;
1725         }
1726
1727         printed += trace__printf_interrupted_entry(trace, sample);
1728
1729         ttrace->entry_time = sample->time;
1730         msg = ttrace->entry_str;
1731         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1732
1733         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1734                                            args, trace, thread);
1735
1736         if (sc->is_exit) {
1737                 if (!trace->duration_filter && !trace->summary_only) {
1738                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1739                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1740                 }
1741         } else
1742                 ttrace->entry_pending = true;
1743
1744         if (trace->current != thread) {
1745                 thread__put(trace->current);
1746                 trace->current = thread__get(thread);
1747         }
1748
1749         return 0;
1750 }
1751
1752 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1753                            union perf_event *event __maybe_unused,
1754                            struct perf_sample *sample)
1755 {
1756         long ret;
1757         u64 duration = 0;
1758         struct thread *thread;
1759         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1760         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1761         struct thread_trace *ttrace;
1762
1763         if (sc == NULL)
1764                 return -1;
1765
1766         if (sc->filtered)
1767                 return 0;
1768
1769         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1770         ttrace = thread__trace(thread, trace->output);
1771         if (ttrace == NULL)
1772                 return -1;
1773
1774         if (trace->summary)
1775                 thread__update_stats(ttrace, id, sample);
1776
1777         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1778
1779         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1780                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1781                 trace->last_vfs_getname = NULL;
1782                 ++trace->stats.vfs_getname;
1783         }
1784
1785         ttrace->exit_time = sample->time;
1786
1787         if (ttrace->entry_time) {
1788                 duration = sample->time - ttrace->entry_time;
1789                 if (trace__filter_duration(trace, duration))
1790                         goto out;
1791         } else if (trace->duration_filter)
1792                 goto out;
1793
1794         if (trace->summary_only)
1795                 goto out;
1796
1797         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1798
1799         if (ttrace->entry_pending) {
1800                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1801         } else {
1802                 fprintf(trace->output, " ... [");
1803                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1804                 fprintf(trace->output, "]: %s()", sc->name);
1805         }
1806
1807         if (sc->fmt == NULL) {
1808 signed_print:
1809                 fprintf(trace->output, ") = %ld", ret);
1810         } else if (ret < 0 && sc->fmt->errmsg) {
1811                 char bf[STRERR_BUFSIZE];
1812                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1813                            *e = audit_errno_to_name(-ret);
1814
1815                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1816         } else if (ret == 0 && sc->fmt->timeout)
1817                 fprintf(trace->output, ") = 0 Timeout");
1818         else if (sc->fmt->hexret)
1819                 fprintf(trace->output, ") = %#lx", ret);
1820         else
1821                 goto signed_print;
1822
1823         fputc('\n', trace->output);
1824 out:
1825         ttrace->entry_pending = false;
1826
1827         return 0;
1828 }
1829
1830 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1831                               union perf_event *event __maybe_unused,
1832                               struct perf_sample *sample)
1833 {
1834         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1835         return 0;
1836 }
1837
1838 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1839                                      union perf_event *event __maybe_unused,
1840                                      struct perf_sample *sample)
1841 {
1842         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1843         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1844         struct thread *thread = machine__findnew_thread(trace->host,
1845                                                         sample->pid,
1846                                                         sample->tid);
1847         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1848
1849         if (ttrace == NULL)
1850                 goto out_dump;
1851
1852         ttrace->runtime_ms += runtime_ms;
1853         trace->runtime_ms += runtime_ms;
1854         return 0;
1855
1856 out_dump:
1857         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1858                evsel->name,
1859                perf_evsel__strval(evsel, sample, "comm"),
1860                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1861                runtime,
1862                perf_evsel__intval(evsel, sample, "vruntime"));
1863         return 0;
1864 }
1865
1866 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1867                                 union perf_event *event __maybe_unused,
1868                                 struct perf_sample *sample)
1869 {
1870         trace__printf_interrupted_entry(trace, sample);
1871         trace__fprintf_tstamp(trace, sample->time, trace->output);
1872
1873         if (trace->trace_syscalls)
1874                 fprintf(trace->output, "(         ): ");
1875
1876         fprintf(trace->output, "%s:", evsel->name);
1877
1878         if (evsel->tp_format) {
1879                 event_format__fprintf(evsel->tp_format, sample->cpu,
1880                                       sample->raw_data, sample->raw_size,
1881                                       trace->output);
1882         }
1883
1884         fprintf(trace->output, ")\n");
1885         return 0;
1886 }
1887
1888 static void print_location(FILE *f, struct perf_sample *sample,
1889                            struct addr_location *al,
1890                            bool print_dso, bool print_sym)
1891 {
1892
1893         if ((verbose || print_dso) && al->map)
1894                 fprintf(f, "%s@", al->map->dso->long_name);
1895
1896         if ((verbose || print_sym) && al->sym)
1897                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1898                         al->addr - al->sym->start);
1899         else if (al->map)
1900                 fprintf(f, "0x%" PRIx64, al->addr);
1901         else
1902                 fprintf(f, "0x%" PRIx64, sample->addr);
1903 }
1904
1905 static int trace__pgfault(struct trace *trace,
1906                           struct perf_evsel *evsel,
1907                           union perf_event *event,
1908                           struct perf_sample *sample)
1909 {
1910         struct thread *thread;
1911         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1912         struct addr_location al;
1913         char map_type = 'd';
1914         struct thread_trace *ttrace;
1915
1916         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1917         ttrace = thread__trace(thread, trace->output);
1918         if (ttrace == NULL)
1919                 return -1;
1920
1921         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1922                 ttrace->pfmaj++;
1923         else
1924                 ttrace->pfmin++;
1925
1926         if (trace->summary_only)
1927                 return 0;
1928
1929         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1930                               sample->ip, &al);
1931
1932         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1933
1934         fprintf(trace->output, "%sfault [",
1935                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1936                 "maj" : "min");
1937
1938         print_location(trace->output, sample, &al, false, true);
1939
1940         fprintf(trace->output, "] => ");
1941
1942         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1943                                    sample->addr, &al);
1944
1945         if (!al.map) {
1946                 thread__find_addr_location(thread, cpumode,
1947                                            MAP__FUNCTION, sample->addr, &al);
1948
1949                 if (al.map)
1950                         map_type = 'x';
1951                 else
1952                         map_type = '?';
1953         }
1954
1955         print_location(trace->output, sample, &al, true, false);
1956
1957         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1958
1959         return 0;
1960 }
1961
1962 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1963 {
1964         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1965             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1966                 return false;
1967
1968         if (trace->pid_list || trace->tid_list)
1969                 return true;
1970
1971         return false;
1972 }
1973
1974 static int trace__process_sample(struct perf_tool *tool,
1975                                  union perf_event *event,
1976                                  struct perf_sample *sample,
1977                                  struct perf_evsel *evsel,
1978                                  struct machine *machine __maybe_unused)
1979 {
1980         struct trace *trace = container_of(tool, struct trace, tool);
1981         int err = 0;
1982
1983         tracepoint_handler handler = evsel->handler;
1984
1985         if (skip_sample(trace, sample))
1986                 return 0;
1987
1988         if (!trace->full_time && trace->base_time == 0)
1989                 trace->base_time = sample->time;
1990
1991         if (handler) {
1992                 ++trace->nr_events;
1993                 handler(trace, evsel, event, sample);
1994         }
1995
1996         return err;
1997 }
1998
1999 static int parse_target_str(struct trace *trace)
2000 {
2001         if (trace->opts.target.pid) {
2002                 trace->pid_list = intlist__new(trace->opts.target.pid);
2003                 if (trace->pid_list == NULL) {
2004                         pr_err("Error parsing process id string\n");
2005                         return -EINVAL;
2006                 }
2007         }
2008
2009         if (trace->opts.target.tid) {
2010                 trace->tid_list = intlist__new(trace->opts.target.tid);
2011                 if (trace->tid_list == NULL) {
2012                         pr_err("Error parsing thread id string\n");
2013                         return -EINVAL;
2014                 }
2015         }
2016
2017         return 0;
2018 }
2019
2020 static int trace__record(struct trace *trace, int argc, const char **argv)
2021 {
2022         unsigned int rec_argc, i, j;
2023         const char **rec_argv;
2024         const char * const record_args[] = {
2025                 "record",
2026                 "-R",
2027                 "-m", "1024",
2028                 "-c", "1",
2029         };
2030
2031         const char * const sc_args[] = { "-e", };
2032         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2033         const char * const majpf_args[] = { "-e", "major-faults" };
2034         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2035         const char * const minpf_args[] = { "-e", "minor-faults" };
2036         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2037
2038         /* +1 is for the event string below */
2039         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2040                 majpf_args_nr + minpf_args_nr + argc;
2041         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2042
2043         if (rec_argv == NULL)
2044                 return -ENOMEM;
2045
2046         j = 0;
2047         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2048                 rec_argv[j++] = record_args[i];
2049
2050         if (trace->trace_syscalls) {
2051                 for (i = 0; i < sc_args_nr; i++)
2052                         rec_argv[j++] = sc_args[i];
2053
2054                 /* event string may be different for older kernels - e.g., RHEL6 */
2055                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2056                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2057                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2058                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2059                 else {
2060                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2061                         return -1;
2062                 }
2063         }
2064
2065         if (trace->trace_pgfaults & TRACE_PFMAJ)
2066                 for (i = 0; i < majpf_args_nr; i++)
2067                         rec_argv[j++] = majpf_args[i];
2068
2069         if (trace->trace_pgfaults & TRACE_PFMIN)
2070                 for (i = 0; i < minpf_args_nr; i++)
2071                         rec_argv[j++] = minpf_args[i];
2072
2073         for (i = 0; i < (unsigned int)argc; i++)
2074                 rec_argv[j++] = argv[i];
2075
2076         return cmd_record(j, rec_argv, NULL);
2077 }
2078
2079 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2080
2081 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2082 {
2083         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2084         if (evsel == NULL)
2085                 return;
2086
2087         if (perf_evsel__field(evsel, "pathname") == NULL) {
2088                 perf_evsel__delete(evsel);
2089                 return;
2090         }
2091
2092         evsel->handler = trace__vfs_getname;
2093         perf_evlist__add(evlist, evsel);
2094 }
2095
2096 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2097                                     u64 config)
2098 {
2099         struct perf_evsel *evsel;
2100         struct perf_event_attr attr = {
2101                 .type = PERF_TYPE_SOFTWARE,
2102                 .mmap_data = 1,
2103         };
2104
2105         attr.config = config;
2106         attr.sample_period = 1;
2107
2108         event_attr_init(&attr);
2109
2110         evsel = perf_evsel__new(&attr);
2111         if (!evsel)
2112                 return -ENOMEM;
2113
2114         evsel->handler = trace__pgfault;
2115         perf_evlist__add(evlist, evsel);
2116
2117         return 0;
2118 }
2119
2120 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2121 {
2122         const u32 type = event->header.type;
2123         struct perf_evsel *evsel;
2124
2125         if (!trace->full_time && trace->base_time == 0)
2126                 trace->base_time = sample->time;
2127
2128         if (type != PERF_RECORD_SAMPLE) {
2129                 trace__process_event(trace, trace->host, event, sample);
2130                 return;
2131         }
2132
2133         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2134         if (evsel == NULL) {
2135                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2136                 return;
2137         }
2138
2139         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2140             sample->raw_data == NULL) {
2141                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2142                        perf_evsel__name(evsel), sample->tid,
2143                        sample->cpu, sample->raw_size);
2144         } else {
2145                 tracepoint_handler handler = evsel->handler;
2146                 handler(trace, evsel, event, sample);
2147         }
2148 }
2149
2150 static int trace__run(struct trace *trace, int argc, const char **argv)
2151 {
2152         struct perf_evlist *evlist = trace->evlist;
2153         int err = -1, i;
2154         unsigned long before;
2155         const bool forks = argc > 0;
2156         bool draining = false;
2157
2158         trace->live = true;
2159
2160         if (trace->trace_syscalls &&
2161             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2162                                            trace__sys_exit))
2163                 goto out_error_raw_syscalls;
2164
2165         if (trace->trace_syscalls)
2166                 perf_evlist__add_vfs_getname(evlist);
2167
2168         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2169             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2170                 goto out_error_mem;
2171         }
2172
2173         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2174             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2175                 goto out_error_mem;
2176
2177         if (trace->sched &&
2178             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2179                                    trace__sched_stat_runtime))
2180                 goto out_error_sched_stat_runtime;
2181
2182         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2183         if (err < 0) {
2184                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2185                 goto out_delete_evlist;
2186         }
2187
2188         err = trace__symbols_init(trace, evlist);
2189         if (err < 0) {
2190                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2191                 goto out_delete_evlist;
2192         }
2193
2194         perf_evlist__config(evlist, &trace->opts);
2195
2196         signal(SIGCHLD, sig_handler);
2197         signal(SIGINT, sig_handler);
2198
2199         if (forks) {
2200                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2201                                                     argv, false, NULL);
2202                 if (err < 0) {
2203                         fprintf(trace->output, "Couldn't run the workload!\n");
2204                         goto out_delete_evlist;
2205                 }
2206         }
2207
2208         err = perf_evlist__open(evlist);
2209         if (err < 0)
2210                 goto out_error_open;
2211
2212         /*
2213          * Better not use !target__has_task() here because we need to cover the
2214          * case where no threads were specified in the command line, but a
2215          * workload was, and in that case we will fill in the thread_map when
2216          * we fork the workload in perf_evlist__prepare_workload.
2217          */
2218         if (trace->filter_pids.nr > 0)
2219                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2220         else if (evlist->threads->map[0] == -1)
2221                 err = perf_evlist__set_filter_pid(evlist, getpid());
2222
2223         if (err < 0) {
2224                 printf("err=%d,%s\n", -err, strerror(-err));
2225                 exit(1);
2226         }
2227
2228         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2229         if (err < 0)
2230                 goto out_error_mmap;
2231
2232         if (forks)
2233                 perf_evlist__start_workload(evlist);
2234         else
2235                 perf_evlist__enable(evlist);
2236
2237         trace->multiple_threads = evlist->threads->map[0] == -1 ||
2238                                   evlist->threads->nr > 1 ||
2239                                   perf_evlist__first(evlist)->attr.inherit;
2240 again:
2241         before = trace->nr_events;
2242
2243         for (i = 0; i < evlist->nr_mmaps; i++) {
2244                 union perf_event *event;
2245
2246                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2247                         struct perf_sample sample;
2248
2249                         ++trace->nr_events;
2250
2251                         err = perf_evlist__parse_sample(evlist, event, &sample);
2252                         if (err) {
2253                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2254                                 goto next_event;
2255                         }
2256
2257                         trace__handle_event(trace, event, &sample);
2258 next_event:
2259                         perf_evlist__mmap_consume(evlist, i);
2260
2261                         if (interrupted)
2262                                 goto out_disable;
2263                 }
2264         }
2265
2266         if (trace->nr_events == before) {
2267                 int timeout = done ? 100 : -1;
2268
2269                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2270                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2271                                 draining = true;
2272
2273                         goto again;
2274                 }
2275         } else {
2276                 goto again;
2277         }
2278
2279 out_disable:
2280         thread__zput(trace->current);
2281
2282         perf_evlist__disable(evlist);
2283
2284         if (!err) {
2285                 if (trace->summary)
2286                         trace__fprintf_thread_summary(trace, trace->output);
2287
2288                 if (trace->show_tool_stats) {
2289                         fprintf(trace->output, "Stats:\n "
2290                                                " vfs_getname : %" PRIu64 "\n"
2291                                                " proc_getname: %" PRIu64 "\n",
2292                                 trace->stats.vfs_getname,
2293                                 trace->stats.proc_getname);
2294                 }
2295         }
2296
2297 out_delete_evlist:
2298         perf_evlist__delete(evlist);
2299         trace->evlist = NULL;
2300         trace->live = false;
2301         return err;
2302 {
2303         char errbuf[BUFSIZ];
2304
2305 out_error_sched_stat_runtime:
2306         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2307         goto out_error;
2308
2309 out_error_raw_syscalls:
2310         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2311         goto out_error;
2312
2313 out_error_mmap:
2314         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2315         goto out_error;
2316
2317 out_error_open:
2318         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2319
2320 out_error:
2321         fprintf(trace->output, "%s\n", errbuf);
2322         goto out_delete_evlist;
2323 }
2324 out_error_mem:
2325         fprintf(trace->output, "Not enough memory to run!\n");
2326         goto out_delete_evlist;
2327 }
2328
2329 static int trace__replay(struct trace *trace)
2330 {
2331         const struct perf_evsel_str_handler handlers[] = {
2332                 { "probe:vfs_getname",       trace__vfs_getname, },
2333         };
2334         struct perf_data_file file = {
2335                 .path  = input_name,
2336                 .mode  = PERF_DATA_MODE_READ,
2337         };
2338         struct perf_session *session;
2339         struct perf_evsel *evsel;
2340         int err = -1;
2341
2342         trace->tool.sample        = trace__process_sample;
2343         trace->tool.mmap          = perf_event__process_mmap;
2344         trace->tool.mmap2         = perf_event__process_mmap2;
2345         trace->tool.comm          = perf_event__process_comm;
2346         trace->tool.exit          = perf_event__process_exit;
2347         trace->tool.fork          = perf_event__process_fork;
2348         trace->tool.attr          = perf_event__process_attr;
2349         trace->tool.tracing_data = perf_event__process_tracing_data;
2350         trace->tool.build_id      = perf_event__process_build_id;
2351
2352         trace->tool.ordered_events = true;
2353         trace->tool.ordering_requires_timestamps = true;
2354
2355         /* add tid to output */
2356         trace->multiple_threads = true;
2357
2358         session = perf_session__new(&file, false, &trace->tool);
2359         if (session == NULL)
2360                 return -1;
2361
2362         if (symbol__init(&session->header.env) < 0)
2363                 goto out;
2364
2365         trace->host = &session->machines.host;
2366
2367         err = perf_session__set_tracepoints_handlers(session, handlers);
2368         if (err)
2369                 goto out;
2370
2371         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2372                                                      "raw_syscalls:sys_enter");
2373         /* older kernels have syscalls tp versus raw_syscalls */
2374         if (evsel == NULL)
2375                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2376                                                              "syscalls:sys_enter");
2377
2378         if (evsel &&
2379             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2380             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2381                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2382                 goto out;
2383         }
2384
2385         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2386                                                      "raw_syscalls:sys_exit");
2387         if (evsel == NULL)
2388                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2389                                                              "syscalls:sys_exit");
2390         if (evsel &&
2391             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2392             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2393                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2394                 goto out;
2395         }
2396
2397         evlist__for_each(session->evlist, evsel) {
2398                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2399                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2400                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2401                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2402                         evsel->handler = trace__pgfault;
2403         }
2404
2405         err = parse_target_str(trace);
2406         if (err != 0)
2407                 goto out;
2408
2409         setup_pager();
2410
2411         err = perf_session__process_events(session, &trace->tool);
2412         if (err)
2413                 pr_err("Failed to process events, error %d", err);
2414
2415         else if (trace->summary)
2416                 trace__fprintf_thread_summary(trace, trace->output);
2417
2418 out:
2419         perf_session__delete(session);
2420
2421         return err;
2422 }
2423
2424 static size_t trace__fprintf_threads_header(FILE *fp)
2425 {
2426         size_t printed;
2427
2428         printed  = fprintf(fp, "\n Summary of events:\n\n");
2429
2430         return printed;
2431 }
2432
2433 static size_t thread__dump_stats(struct thread_trace *ttrace,
2434                                  struct trace *trace, FILE *fp)
2435 {
2436         struct stats *stats;
2437         size_t printed = 0;
2438         struct syscall *sc;
2439         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2440
2441         if (inode == NULL)
2442                 return 0;
2443
2444         printed += fprintf(fp, "\n");
2445
2446         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2447         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2448         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2449
2450         /* each int_node is a syscall */
2451         while (inode) {
2452                 stats = inode->priv;
2453                 if (stats) {
2454                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2455                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2456                         double avg = avg_stats(stats);
2457                         double pct;
2458                         u64 n = (u64) stats->n;
2459
2460                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2461                         avg /= NSEC_PER_MSEC;
2462
2463                         sc = &trace->syscalls.table[inode->i];
2464                         printed += fprintf(fp, "   %-15s", sc->name);
2465                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2466                                            n, min, avg);
2467                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2468                 }
2469
2470                 inode = intlist__next(inode);
2471         }
2472
2473         printed += fprintf(fp, "\n\n");
2474
2475         return printed;
2476 }
2477
2478 /* struct used to pass data to per-thread function */
2479 struct summary_data {
2480         FILE *fp;
2481         struct trace *trace;
2482         size_t printed;
2483 };
2484
2485 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2486 {
2487         struct summary_data *data = priv;
2488         FILE *fp = data->fp;
2489         size_t printed = data->printed;
2490         struct trace *trace = data->trace;
2491         struct thread_trace *ttrace = thread__priv(thread);
2492         double ratio;
2493
2494         if (ttrace == NULL)
2495                 return 0;
2496
2497         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2498
2499         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2500         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2501         printed += fprintf(fp, "%.1f%%", ratio);
2502         if (ttrace->pfmaj)
2503                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2504         if (ttrace->pfmin)
2505                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2506         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2507         printed += thread__dump_stats(ttrace, trace, fp);
2508
2509         data->printed += printed;
2510
2511         return 0;
2512 }
2513
2514 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2515 {
2516         struct summary_data data = {
2517                 .fp = fp,
2518                 .trace = trace
2519         };
2520         data.printed = trace__fprintf_threads_header(fp);
2521
2522         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2523
2524         return data.printed;
2525 }
2526
2527 static int trace__set_duration(const struct option *opt, const char *str,
2528                                int unset __maybe_unused)
2529 {
2530         struct trace *trace = opt->value;
2531
2532         trace->duration_filter = atof(str);
2533         return 0;
2534 }
2535
2536 static int trace__set_filter_pids(const struct option *opt, const char *str,
2537                                   int unset __maybe_unused)
2538 {
2539         int ret = -1;
2540         size_t i;
2541         struct trace *trace = opt->value;
2542         /*
2543          * FIXME: introduce a intarray class, plain parse csv and create a
2544          * { int nr, int entries[] } struct...
2545          */
2546         struct intlist *list = intlist__new(str);
2547
2548         if (list == NULL)
2549                 return -1;
2550
2551         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2552         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2553
2554         if (trace->filter_pids.entries == NULL)
2555                 goto out;
2556
2557         trace->filter_pids.entries[0] = getpid();
2558
2559         for (i = 1; i < trace->filter_pids.nr; ++i)
2560                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2561
2562         intlist__delete(list);
2563         ret = 0;
2564 out:
2565         return ret;
2566 }
2567
2568 static int trace__open_output(struct trace *trace, const char *filename)
2569 {
2570         struct stat st;
2571
2572         if (!stat(filename, &st) && st.st_size) {
2573                 char oldname[PATH_MAX];
2574
2575                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2576                 unlink(oldname);
2577                 rename(filename, oldname);
2578         }
2579
2580         trace->output = fopen(filename, "w");
2581
2582         return trace->output == NULL ? -errno : 0;
2583 }
2584
2585 static int parse_pagefaults(const struct option *opt, const char *str,
2586                             int unset __maybe_unused)
2587 {
2588         int *trace_pgfaults = opt->value;
2589
2590         if (strcmp(str, "all") == 0)
2591                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2592         else if (strcmp(str, "maj") == 0)
2593                 *trace_pgfaults |= TRACE_PFMAJ;
2594         else if (strcmp(str, "min") == 0)
2595                 *trace_pgfaults |= TRACE_PFMIN;
2596         else
2597                 return -1;
2598
2599         return 0;
2600 }
2601
2602 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2603 {
2604         struct perf_evsel *evsel;
2605
2606         evlist__for_each(evlist, evsel)
2607                 evsel->handler = handler;
2608 }
2609
2610 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2611 {
2612         const char * const trace_usage[] = {
2613                 "perf trace [<options>] [<command>]",
2614                 "perf trace [<options>] -- <command> [<options>]",
2615                 "perf trace record [<options>] [<command>]",
2616                 "perf trace record [<options>] -- <command> [<options>]",
2617                 NULL
2618         };
2619         struct trace trace = {
2620                 .audit = {
2621                         .machine = audit_detect_machine(),
2622                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2623                 },
2624                 .syscalls = {
2625                         . max = -1,
2626                 },
2627                 .opts = {
2628                         .target = {
2629                                 .uid       = UINT_MAX,
2630                                 .uses_mmap = true,
2631                         },
2632                         .user_freq     = UINT_MAX,
2633                         .user_interval = ULLONG_MAX,
2634                         .no_buffering  = true,
2635                         .mmap_pages    = UINT_MAX,
2636                 },
2637                 .output = stdout,
2638                 .show_comm = true,
2639                 .trace_syscalls = true,
2640         };
2641         const char *output_name = NULL;
2642         const char *ev_qualifier_str = NULL;
2643         const struct option trace_options[] = {
2644         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2645                      "event selector. use 'perf list' to list available events",
2646                      parse_events_option),
2647         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2648                     "show the thread COMM next to its id"),
2649         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2650         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2651                     "list of events to trace"),
2652         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2653         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2654         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2655                     "trace events on existing process id"),
2656         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2657                     "trace events on existing thread id"),
2658         OPT_CALLBACK(0, "filter-pids", &trace, "float",
2659                      "show only events with duration > N.M ms", trace__set_filter_pids),
2660         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2661                     "system-wide collection from all CPUs"),
2662         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2663                     "list of cpus to monitor"),
2664         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2665                     "child tasks do not inherit counters"),
2666         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2667                      "number of mmap data pages",
2668                      perf_evlist__parse_mmap_pages),
2669         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2670                    "user to profile"),
2671         OPT_CALLBACK(0, "duration", &trace, "float",
2672                      "show only events with duration > N.M ms",
2673                      trace__set_duration),
2674         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2675         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2676         OPT_BOOLEAN('T', "time", &trace.full_time,
2677                     "Show full timestamp, not time relative to first start"),
2678         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2679                     "Show only syscall summary with statistics"),
2680         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2681                     "Show all syscalls and summary with statistics"),
2682         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2683                      "Trace pagefaults", parse_pagefaults, "maj"),
2684         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2685         OPT_END()
2686         };
2687         int err;
2688         char bf[BUFSIZ];
2689
2690         signal(SIGSEGV, sighandler_dump_stack);
2691         signal(SIGFPE, sighandler_dump_stack);
2692
2693         trace.evlist = perf_evlist__new();
2694         if (trace.evlist == NULL)
2695                 return -ENOMEM;
2696
2697         if (trace.evlist == NULL) {
2698                 pr_err("Not enough memory to run!\n");
2699                 goto out;
2700         }
2701
2702         argc = parse_options(argc, argv, trace_options, trace_usage,
2703                              PARSE_OPT_STOP_AT_NON_OPTION);
2704
2705         if (trace.trace_pgfaults) {
2706                 trace.opts.sample_address = true;
2707                 trace.opts.sample_time = true;
2708         }
2709
2710         if (trace.evlist->nr_entries > 0)
2711                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2712
2713         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2714                 return trace__record(&trace, argc-1, &argv[1]);
2715
2716         /* summary_only implies summary option, but don't overwrite summary if set */
2717         if (trace.summary_only)
2718                 trace.summary = trace.summary_only;
2719
2720         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2721             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2722                 pr_err("Please specify something to trace.\n");
2723                 return -1;
2724         }
2725
2726         if (output_name != NULL) {
2727                 err = trace__open_output(&trace, output_name);
2728                 if (err < 0) {
2729                         perror("failed to create output file");
2730                         goto out;
2731                 }
2732         }
2733
2734         if (ev_qualifier_str != NULL) {
2735                 const char *s = ev_qualifier_str;
2736
2737                 trace.not_ev_qualifier = *s == '!';
2738                 if (trace.not_ev_qualifier)
2739                         ++s;
2740                 trace.ev_qualifier = strlist__new(true, s);
2741                 if (trace.ev_qualifier == NULL) {
2742                         fputs("Not enough memory to parse event qualifier",
2743                               trace.output);
2744                         err = -ENOMEM;
2745                         goto out_close;
2746                 }
2747         }
2748
2749         err = target__validate(&trace.opts.target);
2750         if (err) {
2751                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2752                 fprintf(trace.output, "%s", bf);
2753                 goto out_close;
2754         }
2755
2756         err = target__parse_uid(&trace.opts.target);
2757         if (err) {
2758                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2759                 fprintf(trace.output, "%s", bf);
2760                 goto out_close;
2761         }
2762
2763         if (!argc && target__none(&trace.opts.target))
2764                 trace.opts.target.system_wide = true;
2765
2766         if (input_name)
2767                 err = trace__replay(&trace);
2768         else
2769                 err = trace__run(&trace, argc, argv);
2770
2771 out_close:
2772         if (output_name != NULL)
2773                 fclose(trace.output);
2774 out:
2775         return err;
2776 }