Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         u##bits value; \
56         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
57         return value;  \
58 }
59
60 TP_UINT_FIELD(8);
61 TP_UINT_FIELD(16);
62 TP_UINT_FIELD(32);
63 TP_UINT_FIELD(64);
64
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
67 { \
68         u##bits value; \
69         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70         return bswap_##bits(value);\
71 }
72
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
76
77 static int tp_field__init_uint(struct tp_field *field,
78                                struct format_field *format_field,
79                                bool needs_swap)
80 {
81         field->offset = format_field->offset;
82
83         switch (format_field->size) {
84         case 1:
85                 field->integer = tp_field__u8;
86                 break;
87         case 2:
88                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
89                 break;
90         case 4:
91                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
92                 break;
93         case 8:
94                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
95                 break;
96         default:
97                 return -1;
98         }
99
100         return 0;
101 }
102
103 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
104 {
105         return sample->raw_data + field->offset;
106 }
107
108 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
109 {
110         field->offset = format_field->offset;
111         field->pointer = tp_field__ptr;
112         return 0;
113 }
114
115 struct syscall_tp {
116         struct tp_field id;
117         union {
118                 struct tp_field args, ret;
119         };
120 };
121
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
123                                           struct tp_field *field,
124                                           const char *name)
125 {
126         struct format_field *format_field = perf_evsel__field(evsel, name);
127
128         if (format_field == NULL)
129                 return -1;
130
131         return tp_field__init_uint(field, format_field, evsel->needs_swap);
132 }
133
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135         ({ struct syscall_tp *sc = evsel->priv;\
136            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
137
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
139                                          struct tp_field *field,
140                                          const char *name)
141 {
142         struct format_field *format_field = perf_evsel__field(evsel, name);
143
144         if (format_field == NULL)
145                 return -1;
146
147         return tp_field__init_ptr(field, format_field);
148 }
149
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151         ({ struct syscall_tp *sc = evsel->priv;\
152            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
153
154 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
155 {
156         zfree(&evsel->priv);
157         perf_evsel__delete(evsel);
158 }
159
160 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
161 {
162         evsel->priv = malloc(sizeof(struct syscall_tp));
163         if (evsel->priv != NULL) {
164                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
165                         goto out_delete;
166
167                 evsel->handler = handler;
168                 return 0;
169         }
170
171         return -ENOMEM;
172
173 out_delete:
174         zfree(&evsel->priv);
175         return -ENOENT;
176 }
177
178 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
179 {
180         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
181
182         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
183         if (evsel == NULL)
184                 evsel = perf_evsel__newtp("syscalls", direction);
185
186         if (evsel) {
187                 if (perf_evsel__init_syscall_tp(evsel, handler))
188                         goto out_delete;
189         }
190
191         return evsel;
192
193 out_delete:
194         perf_evsel__delete_priv(evsel);
195         return NULL;
196 }
197
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199         ({ struct syscall_tp *fields = evsel->priv; \
200            fields->name.integer(&fields->name, sample); })
201
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203         ({ struct syscall_tp *fields = evsel->priv; \
204            fields->name.pointer(&fields->name, sample); })
205
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
207                                           void *sys_enter_handler,
208                                           void *sys_exit_handler)
209 {
210         int ret = -1;
211         struct perf_evsel *sys_enter, *sys_exit;
212
213         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
214         if (sys_enter == NULL)
215                 goto out;
216
217         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
218                 goto out_delete_sys_enter;
219
220         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
221         if (sys_exit == NULL)
222                 goto out_delete_sys_enter;
223
224         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
225                 goto out_delete_sys_exit;
226
227         perf_evlist__add(evlist, sys_enter);
228         perf_evlist__add(evlist, sys_exit);
229
230         ret = 0;
231 out:
232         return ret;
233
234 out_delete_sys_exit:
235         perf_evsel__delete_priv(sys_exit);
236 out_delete_sys_enter:
237         perf_evsel__delete_priv(sys_enter);
238         goto out;
239 }
240
241
242 struct syscall_arg {
243         unsigned long val;
244         struct thread *thread;
245         struct trace  *trace;
246         void          *parm;
247         u8            idx;
248         u8            mask;
249 };
250
251 struct strarray {
252         int         offset;
253         int         nr_entries;
254         const char **entries;
255 };
256
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258         .nr_entries = ARRAY_SIZE(array), \
259         .entries = array, \
260 }
261
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
263         .offset     = off, \
264         .nr_entries = ARRAY_SIZE(array), \
265         .entries = array, \
266 }
267
268 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
269                                                 const char *intfmt,
270                                                 struct syscall_arg *arg)
271 {
272         struct strarray *sa = arg->parm;
273         int idx = arg->val - sa->offset;
274
275         if (idx < 0 || idx >= sa->nr_entries)
276                 return scnprintf(bf, size, intfmt, arg->val);
277
278         return scnprintf(bf, size, "%s", sa->entries[idx]);
279 }
280
281 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
282                                               struct syscall_arg *arg)
283 {
284         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
285 }
286
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
288
289 #if defined(__i386__) || defined(__x86_64__)
290 /*
291  * FIXME: Make this available to all arches as soon as the ioctl beautifier
292  *        gets rewritten to support all arches.
293  */
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
295                                                  struct syscall_arg *arg)
296 {
297         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
298 }
299
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
302
303 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
304                                         struct syscall_arg *arg);
305
306 #define SCA_FD syscall_arg__scnprintf_fd
307
308 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
309                                            struct syscall_arg *arg)
310 {
311         int fd = arg->val;
312
313         if (fd == AT_FDCWD)
314                 return scnprintf(bf, size, "CWD");
315
316         return syscall_arg__scnprintf_fd(bf, size, arg);
317 }
318
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
320
321 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
322                                               struct syscall_arg *arg);
323
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
325
326 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
327                                          struct syscall_arg *arg)
328 {
329         return scnprintf(bf, size, "%#lx", arg->val);
330 }
331
332 #define SCA_HEX syscall_arg__scnprintf_hex
333
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335                                                struct syscall_arg *arg)
336 {
337         int printed = 0, prot = arg->val;
338
339         if (prot == PROT_NONE)
340                 return scnprintf(bf, size, "NONE");
341 #define P_MMAP_PROT(n) \
342         if (prot & PROT_##n) { \
343                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344                 prot &= ~PROT_##n; \
345         }
346
347         P_MMAP_PROT(EXEC);
348         P_MMAP_PROT(READ);
349         P_MMAP_PROT(WRITE);
350 #ifdef PROT_SEM
351         P_MMAP_PROT(SEM);
352 #endif
353         P_MMAP_PROT(GROWSDOWN);
354         P_MMAP_PROT(GROWSUP);
355 #undef P_MMAP_PROT
356
357         if (prot)
358                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
359
360         return printed;
361 }
362
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
364
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
366                                                 struct syscall_arg *arg)
367 {
368         int printed = 0, flags = arg->val;
369
370 #define P_MMAP_FLAG(n) \
371         if (flags & MAP_##n) { \
372                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
373                 flags &= ~MAP_##n; \
374         }
375
376         P_MMAP_FLAG(SHARED);
377         P_MMAP_FLAG(PRIVATE);
378 #ifdef MAP_32BIT
379         P_MMAP_FLAG(32BIT);
380 #endif
381         P_MMAP_FLAG(ANONYMOUS);
382         P_MMAP_FLAG(DENYWRITE);
383         P_MMAP_FLAG(EXECUTABLE);
384         P_MMAP_FLAG(FILE);
385         P_MMAP_FLAG(FIXED);
386         P_MMAP_FLAG(GROWSDOWN);
387 #ifdef MAP_HUGETLB
388         P_MMAP_FLAG(HUGETLB);
389 #endif
390         P_MMAP_FLAG(LOCKED);
391         P_MMAP_FLAG(NONBLOCK);
392         P_MMAP_FLAG(NORESERVE);
393         P_MMAP_FLAG(POPULATE);
394         P_MMAP_FLAG(STACK);
395 #ifdef MAP_UNINITIALIZED
396         P_MMAP_FLAG(UNINITIALIZED);
397 #endif
398 #undef P_MMAP_FLAG
399
400         if (flags)
401                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
402
403         return printed;
404 }
405
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
407
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
409                                                   struct syscall_arg *arg)
410 {
411         int printed = 0, flags = arg->val;
412
413 #define P_MREMAP_FLAG(n) \
414         if (flags & MREMAP_##n) { \
415                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416                 flags &= ~MREMAP_##n; \
417         }
418
419         P_MREMAP_FLAG(MAYMOVE);
420 #ifdef MREMAP_FIXED
421         P_MREMAP_FLAG(FIXED);
422 #endif
423 #undef P_MREMAP_FLAG
424
425         if (flags)
426                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
427
428         return printed;
429 }
430
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
432
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
434                                                       struct syscall_arg *arg)
435 {
436         int behavior = arg->val;
437
438         switch (behavior) {
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
440         P_MADV_BHV(NORMAL);
441         P_MADV_BHV(RANDOM);
442         P_MADV_BHV(SEQUENTIAL);
443         P_MADV_BHV(WILLNEED);
444         P_MADV_BHV(DONTNEED);
445         P_MADV_BHV(REMOVE);
446         P_MADV_BHV(DONTFORK);
447         P_MADV_BHV(DOFORK);
448         P_MADV_BHV(HWPOISON);
449 #ifdef MADV_SOFT_OFFLINE
450         P_MADV_BHV(SOFT_OFFLINE);
451 #endif
452         P_MADV_BHV(MERGEABLE);
453         P_MADV_BHV(UNMERGEABLE);
454 #ifdef MADV_HUGEPAGE
455         P_MADV_BHV(HUGEPAGE);
456 #endif
457 #ifdef MADV_NOHUGEPAGE
458         P_MADV_BHV(NOHUGEPAGE);
459 #endif
460 #ifdef MADV_DONTDUMP
461         P_MADV_BHV(DONTDUMP);
462 #endif
463 #ifdef MADV_DODUMP
464         P_MADV_BHV(DODUMP);
465 #endif
466 #undef P_MADV_PHV
467         default: break;
468         }
469
470         return scnprintf(bf, size, "%#x", behavior);
471 }
472
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
474
475 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
476                                            struct syscall_arg *arg)
477 {
478         int printed = 0, op = arg->val;
479
480         if (op == 0)
481                 return scnprintf(bf, size, "NONE");
482 #define P_CMD(cmd) \
483         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
485                 op &= ~LOCK_##cmd; \
486         }
487
488         P_CMD(SH);
489         P_CMD(EX);
490         P_CMD(NB);
491         P_CMD(UN);
492         P_CMD(MAND);
493         P_CMD(RW);
494         P_CMD(READ);
495         P_CMD(WRITE);
496 #undef P_OP
497
498         if (op)
499                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
500
501         return printed;
502 }
503
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
505
506 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
507 {
508         enum syscall_futex_args {
509                 SCF_UADDR   = (1 << 0),
510                 SCF_OP      = (1 << 1),
511                 SCF_VAL     = (1 << 2),
512                 SCF_TIMEOUT = (1 << 3),
513                 SCF_UADDR2  = (1 << 4),
514                 SCF_VAL3    = (1 << 5),
515         };
516         int op = arg->val;
517         int cmd = op & FUTEX_CMD_MASK;
518         size_t printed = 0;
519
520         switch (cmd) {
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
523         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
524         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
525         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
526         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
527         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
528         P_FUTEX_OP(WAKE_OP);                                                      break;
529         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
530         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
531         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
532         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
533         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
534         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
535         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
536         }
537
538         if (op & FUTEX_PRIVATE_FLAG)
539                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
540
541         if (op & FUTEX_CLOCK_REALTIME)
542                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
543
544         return printed;
545 }
546
547 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
548
549 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
551
552 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers);
554
555 static const char *whences[] = { "SET", "CUR", "END",
556 #ifdef SEEK_DATA
557 "DATA",
558 #endif
559 #ifdef SEEK_HOLE
560 "HOLE",
561 #endif
562 };
563 static DEFINE_STRARRAY(whences);
564
565 static const char *fcntl_cmds[] = {
566         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
569         "F_GETOWNER_UIDS",
570 };
571 static DEFINE_STRARRAY(fcntl_cmds);
572
573 static const char *rlimit_resources[] = {
574         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
576         "RTTIME",
577 };
578 static DEFINE_STRARRAY(rlimit_resources);
579
580 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow);
582
583 static const char *clockid[] = {
584         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
586 };
587 static DEFINE_STRARRAY(clockid);
588
589 static const char *socket_families[] = {
590         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595         "ALG", "NFC", "VSOCK",
596 };
597 static DEFINE_STRARRAY(socket_families);
598
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
601 #endif
602
603 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
604                                                       struct syscall_arg *arg)
605 {
606         size_t printed;
607         int type = arg->val,
608             flags = type & ~SOCK_TYPE_MASK;
609
610         type &= SOCK_TYPE_MASK;
611         /*
612          * Can't use a strarray, MIPS may override for ABI reasons.
613          */
614         switch (type) {
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
616         P_SK_TYPE(STREAM);
617         P_SK_TYPE(DGRAM);
618         P_SK_TYPE(RAW);
619         P_SK_TYPE(RDM);
620         P_SK_TYPE(SEQPACKET);
621         P_SK_TYPE(DCCP);
622         P_SK_TYPE(PACKET);
623 #undef P_SK_TYPE
624         default:
625                 printed = scnprintf(bf, size, "%#x", type);
626         }
627
628 #define P_SK_FLAG(n) \
629         if (flags & SOCK_##n) { \
630                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631                 flags &= ~SOCK_##n; \
632         }
633
634         P_SK_FLAG(CLOEXEC);
635         P_SK_FLAG(NONBLOCK);
636 #undef P_SK_FLAG
637
638         if (flags)
639                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
640
641         return printed;
642 }
643
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
645
646 #ifndef MSG_PROBE
647 #define MSG_PROBE            0x10
648 #endif
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE  0x10000
651 #endif
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
654 #endif
655 #ifndef MSG_FASTOPEN
656 #define MSG_FASTOPEN         0x20000000
657 #endif
658
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
660                                                struct syscall_arg *arg)
661 {
662         int printed = 0, flags = arg->val;
663
664         if (flags == 0)
665                 return scnprintf(bf, size, "NONE");
666 #define P_MSG_FLAG(n) \
667         if (flags & MSG_##n) { \
668                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
669                 flags &= ~MSG_##n; \
670         }
671
672         P_MSG_FLAG(OOB);
673         P_MSG_FLAG(PEEK);
674         P_MSG_FLAG(DONTROUTE);
675         P_MSG_FLAG(TRYHARD);
676         P_MSG_FLAG(CTRUNC);
677         P_MSG_FLAG(PROBE);
678         P_MSG_FLAG(TRUNC);
679         P_MSG_FLAG(DONTWAIT);
680         P_MSG_FLAG(EOR);
681         P_MSG_FLAG(WAITALL);
682         P_MSG_FLAG(FIN);
683         P_MSG_FLAG(SYN);
684         P_MSG_FLAG(CONFIRM);
685         P_MSG_FLAG(RST);
686         P_MSG_FLAG(ERRQUEUE);
687         P_MSG_FLAG(NOSIGNAL);
688         P_MSG_FLAG(MORE);
689         P_MSG_FLAG(WAITFORONE);
690         P_MSG_FLAG(SENDPAGE_NOTLAST);
691         P_MSG_FLAG(FASTOPEN);
692         P_MSG_FLAG(CMSG_CLOEXEC);
693 #undef P_MSG_FLAG
694
695         if (flags)
696                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
697
698         return printed;
699 }
700
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
702
703 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
704                                                  struct syscall_arg *arg)
705 {
706         size_t printed = 0;
707         int mode = arg->val;
708
709         if (mode == F_OK) /* 0 */
710                 return scnprintf(bf, size, "F");
711 #define P_MODE(n) \
712         if (mode & n##_OK) { \
713                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
714                 mode &= ~n##_OK; \
715         }
716
717         P_MODE(R);
718         P_MODE(W);
719         P_MODE(X);
720 #undef P_MODE
721
722         if (mode)
723                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
724
725         return printed;
726 }
727
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
729
730 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
731                                                struct syscall_arg *arg)
732 {
733         int printed = 0, flags = arg->val;
734
735         if (!(flags & O_CREAT))
736                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
737
738         if (flags == 0)
739                 return scnprintf(bf, size, "RDONLY");
740 #define P_FLAG(n) \
741         if (flags & O_##n) { \
742                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
743                 flags &= ~O_##n; \
744         }
745
746         P_FLAG(APPEND);
747         P_FLAG(ASYNC);
748         P_FLAG(CLOEXEC);
749         P_FLAG(CREAT);
750         P_FLAG(DIRECT);
751         P_FLAG(DIRECTORY);
752         P_FLAG(EXCL);
753         P_FLAG(LARGEFILE);
754         P_FLAG(NOATIME);
755         P_FLAG(NOCTTY);
756 #ifdef O_NONBLOCK
757         P_FLAG(NONBLOCK);
758 #elif O_NDELAY
759         P_FLAG(NDELAY);
760 #endif
761 #ifdef O_PATH
762         P_FLAG(PATH);
763 #endif
764         P_FLAG(RDWR);
765 #ifdef O_DSYNC
766         if ((flags & O_SYNC) == O_SYNC)
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
768         else {
769                 P_FLAG(DSYNC);
770         }
771 #else
772         P_FLAG(SYNC);
773 #endif
774         P_FLAG(TRUNC);
775         P_FLAG(WRONLY);
776 #undef P_FLAG
777
778         if (flags)
779                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
780
781         return printed;
782 }
783
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
785
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787                                                    struct syscall_arg *arg)
788 {
789         int printed = 0, flags = arg->val;
790
791         if (flags == 0)
792                 return scnprintf(bf, size, "NONE");
793 #define P_FLAG(n) \
794         if (flags & EFD_##n) { \
795                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
796                 flags &= ~EFD_##n; \
797         }
798
799         P_FLAG(SEMAPHORE);
800         P_FLAG(CLOEXEC);
801         P_FLAG(NONBLOCK);
802 #undef P_FLAG
803
804         if (flags)
805                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806
807         return printed;
808 }
809
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
811
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
813                                                 struct syscall_arg *arg)
814 {
815         int printed = 0, flags = arg->val;
816
817 #define P_FLAG(n) \
818         if (flags & O_##n) { \
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820                 flags &= ~O_##n; \
821         }
822
823         P_FLAG(CLOEXEC);
824         P_FLAG(NONBLOCK);
825 #undef P_FLAG
826
827         if (flags)
828                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829
830         return printed;
831 }
832
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
834
835 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
836 {
837         int sig = arg->val;
838
839         switch (sig) {
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
841         P_SIGNUM(HUP);
842         P_SIGNUM(INT);
843         P_SIGNUM(QUIT);
844         P_SIGNUM(ILL);
845         P_SIGNUM(TRAP);
846         P_SIGNUM(ABRT);
847         P_SIGNUM(BUS);
848         P_SIGNUM(FPE);
849         P_SIGNUM(KILL);
850         P_SIGNUM(USR1);
851         P_SIGNUM(SEGV);
852         P_SIGNUM(USR2);
853         P_SIGNUM(PIPE);
854         P_SIGNUM(ALRM);
855         P_SIGNUM(TERM);
856         P_SIGNUM(CHLD);
857         P_SIGNUM(CONT);
858         P_SIGNUM(STOP);
859         P_SIGNUM(TSTP);
860         P_SIGNUM(TTIN);
861         P_SIGNUM(TTOU);
862         P_SIGNUM(URG);
863         P_SIGNUM(XCPU);
864         P_SIGNUM(XFSZ);
865         P_SIGNUM(VTALRM);
866         P_SIGNUM(PROF);
867         P_SIGNUM(WINCH);
868         P_SIGNUM(IO);
869         P_SIGNUM(PWR);
870         P_SIGNUM(SYS);
871 #ifdef SIGEMT
872         P_SIGNUM(EMT);
873 #endif
874 #ifdef SIGSTKFLT
875         P_SIGNUM(STKFLT);
876 #endif
877 #ifdef SIGSWI
878         P_SIGNUM(SWI);
879 #endif
880         default: break;
881         }
882
883         return scnprintf(bf, size, "%#x", sig);
884 }
885
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
887
888 #if defined(__i386__) || defined(__x86_64__)
889 /*
890  * FIXME: Make this available to all arches.
891  */
892 #define TCGETS          0x5401
893
894 static const char *tioctls[] = {
895         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
910 };
911
912 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
914
915 #define STRARRAY(arg, name, array) \
916           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917           .arg_parm      = { [arg] = &strarray__##array, }
918
919 static struct syscall_fmt {
920         const char *name;
921         const char *alias;
922         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
923         void       *arg_parm[6];
924         bool       errmsg;
925         bool       timeout;
926         bool       hexret;
927 } syscall_fmts[] = {
928         { .name     = "access",     .errmsg = true,
929           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
930         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
931         { .name     = "brk",        .hexret = true,
932           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
933         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
934         { .name     = "close",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
936         { .name     = "connect",    .errmsg = true, },
937         { .name     = "dup",        .errmsg = true,
938           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939         { .name     = "dup2",       .errmsg = true,
940           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
941         { .name     = "dup3",       .errmsg = true,
942           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
944         { .name     = "eventfd2",   .errmsg = true,
945           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
946         { .name     = "faccessat",  .errmsg = true,
947           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
948         { .name     = "fadvise64",  .errmsg = true,
949           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
950         { .name     = "fallocate",  .errmsg = true,
951           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
952         { .name     = "fchdir",     .errmsg = true,
953           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
954         { .name     = "fchmod",     .errmsg = true,
955           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956         { .name     = "fchmodat",   .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958         { .name     = "fchown",     .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
960         { .name     = "fchownat",   .errmsg = true,
961           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
962         { .name     = "fcntl",      .errmsg = true,
963           .arg_scnprintf = { [0] = SCA_FD, /* fd */
964                              [1] = SCA_STRARRAY, /* cmd */ },
965           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
966         { .name     = "fdatasync",  .errmsg = true,
967           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
968         { .name     = "flock",      .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */
970                              [1] = SCA_FLOCK, /* cmd */ }, },
971         { .name     = "fsetxattr",  .errmsg = true,
972           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
973         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
974           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
975         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
976           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
977         { .name     = "fstatfs",    .errmsg = true,
978           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979         { .name     = "fsync",    .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981         { .name     = "ftruncate", .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983         { .name     = "futex",      .errmsg = true,
984           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
985         { .name     = "futimesat", .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
987         { .name     = "getdents",   .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989         { .name     = "getdents64", .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
992         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
993         { .name     = "ioctl",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FD, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
996 /*
997  * FIXME: Make this available to all arches.
998  */
999                              [1] = SCA_STRHEXARRAY, /* cmd */
1000                              [2] = SCA_HEX, /* arg */ },
1001           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1002 #else
1003                              [2] = SCA_HEX, /* arg */ }, },
1004 #endif
1005         { .name     = "kill",       .errmsg = true,
1006           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1007         { .name     = "linkat",     .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1009         { .name     = "lseek",      .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1011                              [2] = SCA_STRARRAY, /* whence */ },
1012           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1013         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1014         { .name     = "madvise",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1016                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1017         { .name     = "mkdirat",    .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1019         { .name     = "mknodat",    .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1021         { .name     = "mlock",      .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1023         { .name     = "mlockall",   .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1025         { .name     = "mmap",       .hexret = true,
1026           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1027                              [2] = SCA_MMAP_PROT, /* prot */
1028                              [3] = SCA_MMAP_FLAGS, /* flags */
1029                              [4] = SCA_FD,        /* fd */ }, },
1030         { .name     = "mprotect",   .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1032                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1033         { .name     = "mremap",     .hexret = true,
1034           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1035                              [3] = SCA_MREMAP_FLAGS, /* flags */
1036                              [4] = SCA_HEX, /* new_addr */ }, },
1037         { .name     = "munlock",    .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1039         { .name     = "munmap",     .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1041         { .name     = "name_to_handle_at", .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043         { .name     = "newfstatat", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1045         { .name     = "open",       .errmsg = true,
1046           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "open_by_handle_at", .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "openat",     .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1053         { .name     = "pipe2",      .errmsg = true,
1054           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055         { .name     = "poll",       .errmsg = true, .timeout = true, },
1056         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1057         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1062         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064         { .name     = "pwritev",    .errmsg = true,
1065           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066         { .name     = "read",       .errmsg = true,
1067           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1068         { .name     = "readlinkat", .errmsg = true,
1069           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1070         { .name     = "readv",      .errmsg = true,
1071           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072         { .name     = "recvfrom",   .errmsg = true,
1073           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1074         { .name     = "recvmmsg",   .errmsg = true,
1075           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1076         { .name     = "recvmsg",    .errmsg = true,
1077           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1078         { .name     = "renameat",   .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080         { .name     = "rt_sigaction", .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1083         { .name     = "rt_sigqueueinfo", .errmsg = true,
1084           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1085         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1086           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1087         { .name     = "select",     .errmsg = true, .timeout = true, },
1088         { .name     = "sendmmsg",    .errmsg = true,
1089           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1090         { .name     = "sendmsg",    .errmsg = true,
1091           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1092         { .name     = "sendto",     .errmsg = true,
1093           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1094         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096         { .name     = "shutdown",   .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1098         { .name     = "socket",     .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1100                              [1] = SCA_SK_TYPE, /* type */ },
1101           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1102         { .name     = "socketpair", .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1104                              [1] = SCA_SK_TYPE, /* type */ },
1105           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1106         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1107         { .name     = "symlinkat",  .errmsg = true,
1108           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1109         { .name     = "tgkill",     .errmsg = true,
1110           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1111         { .name     = "tkill",      .errmsg = true,
1112           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1113         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1114         { .name     = "unlinkat",   .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1116         { .name     = "utimensat",  .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1118         { .name     = "write",      .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120         { .name     = "writev",     .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 };
1123
1124 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1125 {
1126         const struct syscall_fmt *fmt = fmtp;
1127         return strcmp(name, fmt->name);
1128 }
1129
1130 static struct syscall_fmt *syscall_fmt__find(const char *name)
1131 {
1132         const int nmemb = ARRAY_SIZE(syscall_fmts);
1133         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 }
1135
1136 struct syscall {
1137         struct event_format *tp_format;
1138         const char          *name;
1139         bool                filtered;
1140         bool                is_exit;
1141         struct syscall_fmt  *fmt;
1142         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1143         void                **arg_parm;
1144 };
1145
1146 static size_t fprintf_duration(unsigned long t, FILE *fp)
1147 {
1148         double duration = (double)t / NSEC_PER_MSEC;
1149         size_t printed = fprintf(fp, "(");
1150
1151         if (duration >= 1.0)
1152                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1153         else if (duration >= 0.01)
1154                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1155         else
1156                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1157         return printed + fprintf(fp, "): ");
1158 }
1159
1160 struct thread_trace {
1161         u64               entry_time;
1162         u64               exit_time;
1163         bool              entry_pending;
1164         unsigned long     nr_events;
1165         unsigned long     pfmaj, pfmin;
1166         char              *entry_str;
1167         double            runtime_ms;
1168         struct {
1169                 int       max;
1170                 char      **table;
1171         } paths;
1172
1173         struct intlist *syscall_stats;
1174 };
1175
1176 static struct thread_trace *thread_trace__new(void)
1177 {
1178         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1179
1180         if (ttrace)
1181                 ttrace->paths.max = -1;
1182
1183         ttrace->syscall_stats = intlist__new(NULL);
1184
1185         return ttrace;
1186 }
1187
1188 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1189 {
1190         struct thread_trace *ttrace;
1191
1192         if (thread == NULL)
1193                 goto fail;
1194
1195         if (thread__priv(thread) == NULL)
1196                 thread__set_priv(thread, thread_trace__new());
1197
1198         if (thread__priv(thread) == NULL)
1199                 goto fail;
1200
1201         ttrace = thread__priv(thread);
1202         ++ttrace->nr_events;
1203
1204         return ttrace;
1205 fail:
1206         color_fprintf(fp, PERF_COLOR_RED,
1207                       "WARNING: not enough memory, dropping samples!\n");
1208         return NULL;
1209 }
1210
1211 #define TRACE_PFMAJ             (1 << 0)
1212 #define TRACE_PFMIN             (1 << 1)
1213
1214 struct trace {
1215         struct perf_tool        tool;
1216         struct {
1217                 int             machine;
1218                 int             open_id;
1219         }                       audit;
1220         struct {
1221                 int             max;
1222                 struct syscall  *table;
1223         } syscalls;
1224         struct record_opts      opts;
1225         struct perf_evlist      *evlist;
1226         struct machine          *host;
1227         struct thread           *current;
1228         u64                     base_time;
1229         FILE                    *output;
1230         unsigned long           nr_events;
1231         struct strlist          *ev_qualifier;
1232         const char              *last_vfs_getname;
1233         struct intlist          *tid_list;
1234         struct intlist          *pid_list;
1235         struct {
1236                 size_t          nr;
1237                 pid_t           *entries;
1238         }                       filter_pids;
1239         double                  duration_filter;
1240         double                  runtime_ms;
1241         struct {
1242                 u64             vfs_getname,
1243                                 proc_getname;
1244         } stats;
1245         bool                    not_ev_qualifier;
1246         bool                    live;
1247         bool                    full_time;
1248         bool                    sched;
1249         bool                    multiple_threads;
1250         bool                    summary;
1251         bool                    summary_only;
1252         bool                    show_comm;
1253         bool                    show_tool_stats;
1254         bool                    trace_syscalls;
1255         int                     trace_pgfaults;
1256 };
1257
1258 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1259 {
1260         struct thread_trace *ttrace = thread__priv(thread);
1261
1262         if (fd > ttrace->paths.max) {
1263                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1264
1265                 if (npath == NULL)
1266                         return -1;
1267
1268                 if (ttrace->paths.max != -1) {
1269                         memset(npath + ttrace->paths.max + 1, 0,
1270                                (fd - ttrace->paths.max) * sizeof(char *));
1271                 } else {
1272                         memset(npath, 0, (fd + 1) * sizeof(char *));
1273                 }
1274
1275                 ttrace->paths.table = npath;
1276                 ttrace->paths.max   = fd;
1277         }
1278
1279         ttrace->paths.table[fd] = strdup(pathname);
1280
1281         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1282 }
1283
1284 static int thread__read_fd_path(struct thread *thread, int fd)
1285 {
1286         char linkname[PATH_MAX], pathname[PATH_MAX];
1287         struct stat st;
1288         int ret;
1289
1290         if (thread->pid_ == thread->tid) {
1291                 scnprintf(linkname, sizeof(linkname),
1292                           "/proc/%d/fd/%d", thread->pid_, fd);
1293         } else {
1294                 scnprintf(linkname, sizeof(linkname),
1295                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1296         }
1297
1298         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1299                 return -1;
1300
1301         ret = readlink(linkname, pathname, sizeof(pathname));
1302
1303         if (ret < 0 || ret > st.st_size)
1304                 return -1;
1305
1306         pathname[ret] = '\0';
1307         return trace__set_fd_pathname(thread, fd, pathname);
1308 }
1309
1310 static const char *thread__fd_path(struct thread *thread, int fd,
1311                                    struct trace *trace)
1312 {
1313         struct thread_trace *ttrace = thread__priv(thread);
1314
1315         if (ttrace == NULL)
1316                 return NULL;
1317
1318         if (fd < 0)
1319                 return NULL;
1320
1321         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1322                 if (!trace->live)
1323                         return NULL;
1324                 ++trace->stats.proc_getname;
1325                 if (thread__read_fd_path(thread, fd))
1326                         return NULL;
1327         }
1328
1329         return ttrace->paths.table[fd];
1330 }
1331
1332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1333                                         struct syscall_arg *arg)
1334 {
1335         int fd = arg->val;
1336         size_t printed = scnprintf(bf, size, "%d", fd);
1337         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1338
1339         if (path)
1340                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1341
1342         return printed;
1343 }
1344
1345 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1346                                               struct syscall_arg *arg)
1347 {
1348         int fd = arg->val;
1349         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1350         struct thread_trace *ttrace = thread__priv(arg->thread);
1351
1352         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1353                 zfree(&ttrace->paths.table[fd]);
1354
1355         return printed;
1356 }
1357
1358 static bool trace__filter_duration(struct trace *trace, double t)
1359 {
1360         return t < (trace->duration_filter * NSEC_PER_MSEC);
1361 }
1362
1363 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1364 {
1365         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1366
1367         return fprintf(fp, "%10.3f ", ts);
1368 }
1369
1370 static bool done = false;
1371 static bool interrupted = false;
1372
1373 static void sig_handler(int sig)
1374 {
1375         done = true;
1376         interrupted = sig == SIGINT;
1377 }
1378
1379 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1380                                         u64 duration, u64 tstamp, FILE *fp)
1381 {
1382         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1383         printed += fprintf_duration(duration, fp);
1384
1385         if (trace->multiple_threads) {
1386                 if (trace->show_comm)
1387                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1388                 printed += fprintf(fp, "%d ", thread->tid);
1389         }
1390
1391         return printed;
1392 }
1393
1394 static int trace__process_event(struct trace *trace, struct machine *machine,
1395                                 union perf_event *event, struct perf_sample *sample)
1396 {
1397         int ret = 0;
1398
1399         switch (event->header.type) {
1400         case PERF_RECORD_LOST:
1401                 color_fprintf(trace->output, PERF_COLOR_RED,
1402                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1403                 ret = machine__process_lost_event(machine, event, sample);
1404         default:
1405                 ret = machine__process_event(machine, event, sample);
1406                 break;
1407         }
1408
1409         return ret;
1410 }
1411
1412 static int trace__tool_process(struct perf_tool *tool,
1413                                union perf_event *event,
1414                                struct perf_sample *sample,
1415                                struct machine *machine)
1416 {
1417         struct trace *trace = container_of(tool, struct trace, tool);
1418         return trace__process_event(trace, machine, event, sample);
1419 }
1420
1421 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1422 {
1423         int err = symbol__init(NULL);
1424
1425         if (err)
1426                 return err;
1427
1428         trace->host = machine__new_host();
1429         if (trace->host == NULL)
1430                 return -ENOMEM;
1431
1432         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1433                                             evlist->threads, trace__tool_process, false);
1434         if (err)
1435                 symbol__exit();
1436
1437         return err;
1438 }
1439
1440 static int syscall__set_arg_fmts(struct syscall *sc)
1441 {
1442         struct format_field *field;
1443         int idx = 0;
1444
1445         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1446         if (sc->arg_scnprintf == NULL)
1447                 return -1;
1448
1449         if (sc->fmt)
1450                 sc->arg_parm = sc->fmt->arg_parm;
1451
1452         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1453                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1454                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1455                 else if (field->flags & FIELD_IS_POINTER)
1456                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1457                 ++idx;
1458         }
1459
1460         return 0;
1461 }
1462
1463 static int trace__read_syscall_info(struct trace *trace, int id)
1464 {
1465         char tp_name[128];
1466         struct syscall *sc;
1467         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1468
1469         if (name == NULL)
1470                 return -1;
1471
1472         if (id > trace->syscalls.max) {
1473                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1474
1475                 if (nsyscalls == NULL)
1476                         return -1;
1477
1478                 if (trace->syscalls.max != -1) {
1479                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1480                                (id - trace->syscalls.max) * sizeof(*sc));
1481                 } else {
1482                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1483                 }
1484
1485                 trace->syscalls.table = nsyscalls;
1486                 trace->syscalls.max   = id;
1487         }
1488
1489         sc = trace->syscalls.table + id;
1490         sc->name = name;
1491
1492         if (trace->ev_qualifier) {
1493                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1494
1495                 if (!(in ^ trace->not_ev_qualifier)) {
1496                         sc->filtered = true;
1497                         /*
1498                          * No need to do read tracepoint information since this will be
1499                          * filtered out.
1500                          */
1501                         return 0;
1502                 }
1503         }
1504
1505         sc->fmt  = syscall_fmt__find(sc->name);
1506
1507         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1508         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1509
1510         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1511                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1512                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1513         }
1514
1515         if (sc->tp_format == NULL)
1516                 return -1;
1517
1518         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1519
1520         return syscall__set_arg_fmts(sc);
1521 }
1522
1523 /*
1524  * args is to be interpreted as a series of longs but we need to handle
1525  * 8-byte unaligned accesses. args points to raw_data within the event
1526  * and raw_data is guaranteed to be 8-byte unaligned because it is
1527  * preceded by raw_size which is a u32. So we need to copy args to a temp
1528  * variable to read it. Most notably this avoids extended load instructions
1529  * on unaligned addresses
1530  */
1531
1532 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1533                                       unsigned char *args, struct trace *trace,
1534                                       struct thread *thread)
1535 {
1536         size_t printed = 0;
1537         unsigned char *p;
1538         unsigned long val;
1539
1540         if (sc->tp_format != NULL) {
1541                 struct format_field *field;
1542                 u8 bit = 1;
1543                 struct syscall_arg arg = {
1544                         .idx    = 0,
1545                         .mask   = 0,
1546                         .trace  = trace,
1547                         .thread = thread,
1548                 };
1549
1550                 for (field = sc->tp_format->format.fields->next; field;
1551                      field = field->next, ++arg.idx, bit <<= 1) {
1552                         if (arg.mask & bit)
1553                                 continue;
1554
1555                         /* special care for unaligned accesses */
1556                         p = args + sizeof(unsigned long) * arg.idx;
1557                         memcpy(&val, p, sizeof(val));
1558
1559                         /*
1560                          * Suppress this argument if its value is zero and
1561                          * and we don't have a string associated in an
1562                          * strarray for it.
1563                          */
1564                         if (val == 0 &&
1565                             !(sc->arg_scnprintf &&
1566                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1567                               sc->arg_parm[arg.idx]))
1568                                 continue;
1569
1570                         printed += scnprintf(bf + printed, size - printed,
1571                                              "%s%s: ", printed ? ", " : "", field->name);
1572                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1573                                 arg.val = val;
1574                                 if (sc->arg_parm)
1575                                         arg.parm = sc->arg_parm[arg.idx];
1576                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1577                                                                       size - printed, &arg);
1578                         } else {
1579                                 printed += scnprintf(bf + printed, size - printed,
1580                                                      "%ld", val);
1581                         }
1582                 }
1583         } else {
1584                 int i = 0;
1585
1586                 while (i < 6) {
1587                         /* special care for unaligned accesses */
1588                         p = args + sizeof(unsigned long) * i;
1589                         memcpy(&val, p, sizeof(val));
1590                         printed += scnprintf(bf + printed, size - printed,
1591                                              "%sarg%d: %ld",
1592                                              printed ? ", " : "", i, val);
1593                         ++i;
1594                 }
1595         }
1596
1597         return printed;
1598 }
1599
1600 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1601                                   union perf_event *event,
1602                                   struct perf_sample *sample);
1603
1604 static struct syscall *trace__syscall_info(struct trace *trace,
1605                                            struct perf_evsel *evsel, int id)
1606 {
1607
1608         if (id < 0) {
1609
1610                 /*
1611                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1612                  * before that, leaving at a higher verbosity level till that is
1613                  * explained. Reproduced with plain ftrace with:
1614                  *
1615                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1616                  * grep "NR -1 " /t/trace_pipe
1617                  *
1618                  * After generating some load on the machine.
1619                  */
1620                 if (verbose > 1) {
1621                         static u64 n;
1622                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1623                                 id, perf_evsel__name(evsel), ++n);
1624                 }
1625                 return NULL;
1626         }
1627
1628         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1629             trace__read_syscall_info(trace, id))
1630                 goto out_cant_read;
1631
1632         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1633                 goto out_cant_read;
1634
1635         return &trace->syscalls.table[id];
1636
1637 out_cant_read:
1638         if (verbose) {
1639                 fprintf(trace->output, "Problems reading syscall %d", id);
1640                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1641                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1642                 fputs(" information\n", trace->output);
1643         }
1644         return NULL;
1645 }
1646
1647 static void thread__update_stats(struct thread_trace *ttrace,
1648                                  int id, struct perf_sample *sample)
1649 {
1650         struct int_node *inode;
1651         struct stats *stats;
1652         u64 duration = 0;
1653
1654         inode = intlist__findnew(ttrace->syscall_stats, id);
1655         if (inode == NULL)
1656                 return;
1657
1658         stats = inode->priv;
1659         if (stats == NULL) {
1660                 stats = malloc(sizeof(struct stats));
1661                 if (stats == NULL)
1662                         return;
1663                 init_stats(stats);
1664                 inode->priv = stats;
1665         }
1666
1667         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1668                 duration = sample->time - ttrace->entry_time;
1669
1670         update_stats(stats, duration);
1671 }
1672
1673 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1674 {
1675         struct thread_trace *ttrace;
1676         u64 duration;
1677         size_t printed;
1678
1679         if (trace->current == NULL)
1680                 return 0;
1681
1682         ttrace = thread__priv(trace->current);
1683
1684         if (!ttrace->entry_pending)
1685                 return 0;
1686
1687         duration = sample->time - ttrace->entry_time;
1688
1689         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1690         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1691         ttrace->entry_pending = false;
1692
1693         return printed;
1694 }
1695
1696 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1697                             union perf_event *event __maybe_unused,
1698                             struct perf_sample *sample)
1699 {
1700         char *msg;
1701         void *args;
1702         size_t printed = 0;
1703         struct thread *thread;
1704         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1705         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1706         struct thread_trace *ttrace;
1707
1708         if (sc == NULL)
1709                 return -1;
1710
1711         if (sc->filtered)
1712                 return 0;
1713
1714         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1715         ttrace = thread__trace(thread, trace->output);
1716         if (ttrace == NULL)
1717                 return -1;
1718
1719         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1720
1721         if (ttrace->entry_str == NULL) {
1722                 ttrace->entry_str = malloc(1024);
1723                 if (!ttrace->entry_str)
1724                         return -1;
1725         }
1726
1727         printed += trace__printf_interrupted_entry(trace, sample);
1728
1729         ttrace->entry_time = sample->time;
1730         msg = ttrace->entry_str;
1731         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1732
1733         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1734                                            args, trace, thread);
1735
1736         if (sc->is_exit) {
1737                 if (!trace->duration_filter && !trace->summary_only) {
1738                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1739                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1740                 }
1741         } else
1742                 ttrace->entry_pending = true;
1743
1744         trace->current = thread;
1745
1746         return 0;
1747 }
1748
1749 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1750                            union perf_event *event __maybe_unused,
1751                            struct perf_sample *sample)
1752 {
1753         long ret;
1754         u64 duration = 0;
1755         struct thread *thread;
1756         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1757         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1758         struct thread_trace *ttrace;
1759
1760         if (sc == NULL)
1761                 return -1;
1762
1763         if (sc->filtered)
1764                 return 0;
1765
1766         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1767         ttrace = thread__trace(thread, trace->output);
1768         if (ttrace == NULL)
1769                 return -1;
1770
1771         if (trace->summary)
1772                 thread__update_stats(ttrace, id, sample);
1773
1774         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1775
1776         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1777                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1778                 trace->last_vfs_getname = NULL;
1779                 ++trace->stats.vfs_getname;
1780         }
1781
1782         ttrace->exit_time = sample->time;
1783
1784         if (ttrace->entry_time) {
1785                 duration = sample->time - ttrace->entry_time;
1786                 if (trace__filter_duration(trace, duration))
1787                         goto out;
1788         } else if (trace->duration_filter)
1789                 goto out;
1790
1791         if (trace->summary_only)
1792                 goto out;
1793
1794         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1795
1796         if (ttrace->entry_pending) {
1797                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1798         } else {
1799                 fprintf(trace->output, " ... [");
1800                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1801                 fprintf(trace->output, "]: %s()", sc->name);
1802         }
1803
1804         if (sc->fmt == NULL) {
1805 signed_print:
1806                 fprintf(trace->output, ") = %ld", ret);
1807         } else if (ret < 0 && sc->fmt->errmsg) {
1808                 char bf[STRERR_BUFSIZE];
1809                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1810                            *e = audit_errno_to_name(-ret);
1811
1812                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1813         } else if (ret == 0 && sc->fmt->timeout)
1814                 fprintf(trace->output, ") = 0 Timeout");
1815         else if (sc->fmt->hexret)
1816                 fprintf(trace->output, ") = %#lx", ret);
1817         else
1818                 goto signed_print;
1819
1820         fputc('\n', trace->output);
1821 out:
1822         ttrace->entry_pending = false;
1823
1824         return 0;
1825 }
1826
1827 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1828                               union perf_event *event __maybe_unused,
1829                               struct perf_sample *sample)
1830 {
1831         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1832         return 0;
1833 }
1834
1835 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1836                                      union perf_event *event __maybe_unused,
1837                                      struct perf_sample *sample)
1838 {
1839         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1840         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1841         struct thread *thread = machine__findnew_thread(trace->host,
1842                                                         sample->pid,
1843                                                         sample->tid);
1844         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1845
1846         if (ttrace == NULL)
1847                 goto out_dump;
1848
1849         ttrace->runtime_ms += runtime_ms;
1850         trace->runtime_ms += runtime_ms;
1851         return 0;
1852
1853 out_dump:
1854         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1855                evsel->name,
1856                perf_evsel__strval(evsel, sample, "comm"),
1857                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1858                runtime,
1859                perf_evsel__intval(evsel, sample, "vruntime"));
1860         return 0;
1861 }
1862
1863 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1864                                 union perf_event *event __maybe_unused,
1865                                 struct perf_sample *sample)
1866 {
1867         trace__printf_interrupted_entry(trace, sample);
1868         trace__fprintf_tstamp(trace, sample->time, trace->output);
1869
1870         if (trace->trace_syscalls)
1871                 fprintf(trace->output, "(         ): ");
1872
1873         fprintf(trace->output, "%s:", evsel->name);
1874
1875         if (evsel->tp_format) {
1876                 event_format__fprintf(evsel->tp_format, sample->cpu,
1877                                       sample->raw_data, sample->raw_size,
1878                                       trace->output);
1879         }
1880
1881         fprintf(trace->output, ")\n");
1882         return 0;
1883 }
1884
1885 static void print_location(FILE *f, struct perf_sample *sample,
1886                            struct addr_location *al,
1887                            bool print_dso, bool print_sym)
1888 {
1889
1890         if ((verbose || print_dso) && al->map)
1891                 fprintf(f, "%s@", al->map->dso->long_name);
1892
1893         if ((verbose || print_sym) && al->sym)
1894                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1895                         al->addr - al->sym->start);
1896         else if (al->map)
1897                 fprintf(f, "0x%" PRIx64, al->addr);
1898         else
1899                 fprintf(f, "0x%" PRIx64, sample->addr);
1900 }
1901
1902 static int trace__pgfault(struct trace *trace,
1903                           struct perf_evsel *evsel,
1904                           union perf_event *event,
1905                           struct perf_sample *sample)
1906 {
1907         struct thread *thread;
1908         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1909         struct addr_location al;
1910         char map_type = 'd';
1911         struct thread_trace *ttrace;
1912
1913         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1914         ttrace = thread__trace(thread, trace->output);
1915         if (ttrace == NULL)
1916                 return -1;
1917
1918         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1919                 ttrace->pfmaj++;
1920         else
1921                 ttrace->pfmin++;
1922
1923         if (trace->summary_only)
1924                 return 0;
1925
1926         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1927                               sample->ip, &al);
1928
1929         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1930
1931         fprintf(trace->output, "%sfault [",
1932                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1933                 "maj" : "min");
1934
1935         print_location(trace->output, sample, &al, false, true);
1936
1937         fprintf(trace->output, "] => ");
1938
1939         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1940                                    sample->addr, &al);
1941
1942         if (!al.map) {
1943                 thread__find_addr_location(thread, cpumode,
1944                                            MAP__FUNCTION, sample->addr, &al);
1945
1946                 if (al.map)
1947                         map_type = 'x';
1948                 else
1949                         map_type = '?';
1950         }
1951
1952         print_location(trace->output, sample, &al, true, false);
1953
1954         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1955
1956         return 0;
1957 }
1958
1959 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1960 {
1961         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1962             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1963                 return false;
1964
1965         if (trace->pid_list || trace->tid_list)
1966                 return true;
1967
1968         return false;
1969 }
1970
1971 static int trace__process_sample(struct perf_tool *tool,
1972                                  union perf_event *event,
1973                                  struct perf_sample *sample,
1974                                  struct perf_evsel *evsel,
1975                                  struct machine *machine __maybe_unused)
1976 {
1977         struct trace *trace = container_of(tool, struct trace, tool);
1978         int err = 0;
1979
1980         tracepoint_handler handler = evsel->handler;
1981
1982         if (skip_sample(trace, sample))
1983                 return 0;
1984
1985         if (!trace->full_time && trace->base_time == 0)
1986                 trace->base_time = sample->time;
1987
1988         if (handler) {
1989                 ++trace->nr_events;
1990                 handler(trace, evsel, event, sample);
1991         }
1992
1993         return err;
1994 }
1995
1996 static int parse_target_str(struct trace *trace)
1997 {
1998         if (trace->opts.target.pid) {
1999                 trace->pid_list = intlist__new(trace->opts.target.pid);
2000                 if (trace->pid_list == NULL) {
2001                         pr_err("Error parsing process id string\n");
2002                         return -EINVAL;
2003                 }
2004         }
2005
2006         if (trace->opts.target.tid) {
2007                 trace->tid_list = intlist__new(trace->opts.target.tid);
2008                 if (trace->tid_list == NULL) {
2009                         pr_err("Error parsing thread id string\n");
2010                         return -EINVAL;
2011                 }
2012         }
2013
2014         return 0;
2015 }
2016
2017 static int trace__record(struct trace *trace, int argc, const char **argv)
2018 {
2019         unsigned int rec_argc, i, j;
2020         const char **rec_argv;
2021         const char * const record_args[] = {
2022                 "record",
2023                 "-R",
2024                 "-m", "1024",
2025                 "-c", "1",
2026         };
2027
2028         const char * const sc_args[] = { "-e", };
2029         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2030         const char * const majpf_args[] = { "-e", "major-faults" };
2031         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2032         const char * const minpf_args[] = { "-e", "minor-faults" };
2033         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2034
2035         /* +1 is for the event string below */
2036         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2037                 majpf_args_nr + minpf_args_nr + argc;
2038         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2039
2040         if (rec_argv == NULL)
2041                 return -ENOMEM;
2042
2043         j = 0;
2044         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2045                 rec_argv[j++] = record_args[i];
2046
2047         if (trace->trace_syscalls) {
2048                 for (i = 0; i < sc_args_nr; i++)
2049                         rec_argv[j++] = sc_args[i];
2050
2051                 /* event string may be different for older kernels - e.g., RHEL6 */
2052                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2053                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2054                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2055                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2056                 else {
2057                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2058                         return -1;
2059                 }
2060         }
2061
2062         if (trace->trace_pgfaults & TRACE_PFMAJ)
2063                 for (i = 0; i < majpf_args_nr; i++)
2064                         rec_argv[j++] = majpf_args[i];
2065
2066         if (trace->trace_pgfaults & TRACE_PFMIN)
2067                 for (i = 0; i < minpf_args_nr; i++)
2068                         rec_argv[j++] = minpf_args[i];
2069
2070         for (i = 0; i < (unsigned int)argc; i++)
2071                 rec_argv[j++] = argv[i];
2072
2073         return cmd_record(j, rec_argv, NULL);
2074 }
2075
2076 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2077
2078 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2079 {
2080         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2081         if (evsel == NULL)
2082                 return;
2083
2084         if (perf_evsel__field(evsel, "pathname") == NULL) {
2085                 perf_evsel__delete(evsel);
2086                 return;
2087         }
2088
2089         evsel->handler = trace__vfs_getname;
2090         perf_evlist__add(evlist, evsel);
2091 }
2092
2093 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2094                                     u64 config)
2095 {
2096         struct perf_evsel *evsel;
2097         struct perf_event_attr attr = {
2098                 .type = PERF_TYPE_SOFTWARE,
2099                 .mmap_data = 1,
2100         };
2101
2102         attr.config = config;
2103         attr.sample_period = 1;
2104
2105         event_attr_init(&attr);
2106
2107         evsel = perf_evsel__new(&attr);
2108         if (!evsel)
2109                 return -ENOMEM;
2110
2111         evsel->handler = trace__pgfault;
2112         perf_evlist__add(evlist, evsel);
2113
2114         return 0;
2115 }
2116
2117 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2118 {
2119         const u32 type = event->header.type;
2120         struct perf_evsel *evsel;
2121
2122         if (!trace->full_time && trace->base_time == 0)
2123                 trace->base_time = sample->time;
2124
2125         if (type != PERF_RECORD_SAMPLE) {
2126                 trace__process_event(trace, trace->host, event, sample);
2127                 return;
2128         }
2129
2130         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2131         if (evsel == NULL) {
2132                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2133                 return;
2134         }
2135
2136         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2137             sample->raw_data == NULL) {
2138                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2139                        perf_evsel__name(evsel), sample->tid,
2140                        sample->cpu, sample->raw_size);
2141         } else {
2142                 tracepoint_handler handler = evsel->handler;
2143                 handler(trace, evsel, event, sample);
2144         }
2145 }
2146
2147 static int trace__run(struct trace *trace, int argc, const char **argv)
2148 {
2149         struct perf_evlist *evlist = trace->evlist;
2150         int err = -1, i;
2151         unsigned long before;
2152         const bool forks = argc > 0;
2153         bool draining = false;
2154
2155         trace->live = true;
2156
2157         if (trace->trace_syscalls &&
2158             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2159                                            trace__sys_exit))
2160                 goto out_error_raw_syscalls;
2161
2162         if (trace->trace_syscalls)
2163                 perf_evlist__add_vfs_getname(evlist);
2164
2165         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2166             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2167                 goto out_error_mem;
2168         }
2169
2170         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2171             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2172                 goto out_error_mem;
2173
2174         if (trace->sched &&
2175             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2176                                    trace__sched_stat_runtime))
2177                 goto out_error_sched_stat_runtime;
2178
2179         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2180         if (err < 0) {
2181                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2182                 goto out_delete_evlist;
2183         }
2184
2185         err = trace__symbols_init(trace, evlist);
2186         if (err < 0) {
2187                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2188                 goto out_delete_evlist;
2189         }
2190
2191         perf_evlist__config(evlist, &trace->opts);
2192
2193         signal(SIGCHLD, sig_handler);
2194         signal(SIGINT, sig_handler);
2195
2196         if (forks) {
2197                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2198                                                     argv, false, NULL);
2199                 if (err < 0) {
2200                         fprintf(trace->output, "Couldn't run the workload!\n");
2201                         goto out_delete_evlist;
2202                 }
2203         }
2204
2205         err = perf_evlist__open(evlist);
2206         if (err < 0)
2207                 goto out_error_open;
2208
2209         /*
2210          * Better not use !target__has_task() here because we need to cover the
2211          * case where no threads were specified in the command line, but a
2212          * workload was, and in that case we will fill in the thread_map when
2213          * we fork the workload in perf_evlist__prepare_workload.
2214          */
2215         if (trace->filter_pids.nr > 0)
2216                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2217         else if (evlist->threads->map[0] == -1)
2218                 err = perf_evlist__set_filter_pid(evlist, getpid());
2219
2220         if (err < 0) {
2221                 printf("err=%d,%s\n", -err, strerror(-err));
2222                 exit(1);
2223         }
2224
2225         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2226         if (err < 0)
2227                 goto out_error_mmap;
2228
2229         if (forks)
2230                 perf_evlist__start_workload(evlist);
2231         else
2232                 perf_evlist__enable(evlist);
2233
2234         trace->multiple_threads = evlist->threads->map[0] == -1 ||
2235                                   evlist->threads->nr > 1 ||
2236                                   perf_evlist__first(evlist)->attr.inherit;
2237 again:
2238         before = trace->nr_events;
2239
2240         for (i = 0; i < evlist->nr_mmaps; i++) {
2241                 union perf_event *event;
2242
2243                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2244                         struct perf_sample sample;
2245
2246                         ++trace->nr_events;
2247
2248                         err = perf_evlist__parse_sample(evlist, event, &sample);
2249                         if (err) {
2250                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2251                                 goto next_event;
2252                         }
2253
2254                         trace__handle_event(trace, event, &sample);
2255 next_event:
2256                         perf_evlist__mmap_consume(evlist, i);
2257
2258                         if (interrupted)
2259                                 goto out_disable;
2260                 }
2261         }
2262
2263         if (trace->nr_events == before) {
2264                 int timeout = done ? 100 : -1;
2265
2266                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2267                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2268                                 draining = true;
2269
2270                         goto again;
2271                 }
2272         } else {
2273                 goto again;
2274         }
2275
2276 out_disable:
2277         perf_evlist__disable(evlist);
2278
2279         if (!err) {
2280                 if (trace->summary)
2281                         trace__fprintf_thread_summary(trace, trace->output);
2282
2283                 if (trace->show_tool_stats) {
2284                         fprintf(trace->output, "Stats:\n "
2285                                                " vfs_getname : %" PRIu64 "\n"
2286                                                " proc_getname: %" PRIu64 "\n",
2287                                 trace->stats.vfs_getname,
2288                                 trace->stats.proc_getname);
2289                 }
2290         }
2291
2292 out_delete_evlist:
2293         perf_evlist__delete(evlist);
2294         trace->evlist = NULL;
2295         trace->live = false;
2296         return err;
2297 {
2298         char errbuf[BUFSIZ];
2299
2300 out_error_sched_stat_runtime:
2301         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2302         goto out_error;
2303
2304 out_error_raw_syscalls:
2305         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2306         goto out_error;
2307
2308 out_error_mmap:
2309         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2310         goto out_error;
2311
2312 out_error_open:
2313         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2314
2315 out_error:
2316         fprintf(trace->output, "%s\n", errbuf);
2317         goto out_delete_evlist;
2318 }
2319 out_error_mem:
2320         fprintf(trace->output, "Not enough memory to run!\n");
2321         goto out_delete_evlist;
2322 }
2323
2324 static int trace__replay(struct trace *trace)
2325 {
2326         const struct perf_evsel_str_handler handlers[] = {
2327                 { "probe:vfs_getname",       trace__vfs_getname, },
2328         };
2329         struct perf_data_file file = {
2330                 .path  = input_name,
2331                 .mode  = PERF_DATA_MODE_READ,
2332         };
2333         struct perf_session *session;
2334         struct perf_evsel *evsel;
2335         int err = -1;
2336
2337         trace->tool.sample        = trace__process_sample;
2338         trace->tool.mmap          = perf_event__process_mmap;
2339         trace->tool.mmap2         = perf_event__process_mmap2;
2340         trace->tool.comm          = perf_event__process_comm;
2341         trace->tool.exit          = perf_event__process_exit;
2342         trace->tool.fork          = perf_event__process_fork;
2343         trace->tool.attr          = perf_event__process_attr;
2344         trace->tool.tracing_data = perf_event__process_tracing_data;
2345         trace->tool.build_id      = perf_event__process_build_id;
2346
2347         trace->tool.ordered_events = true;
2348         trace->tool.ordering_requires_timestamps = true;
2349
2350         /* add tid to output */
2351         trace->multiple_threads = true;
2352
2353         session = perf_session__new(&file, false, &trace->tool);
2354         if (session == NULL)
2355                 return -1;
2356
2357         if (symbol__init(&session->header.env) < 0)
2358                 goto out;
2359
2360         trace->host = &session->machines.host;
2361
2362         err = perf_session__set_tracepoints_handlers(session, handlers);
2363         if (err)
2364                 goto out;
2365
2366         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2367                                                      "raw_syscalls:sys_enter");
2368         /* older kernels have syscalls tp versus raw_syscalls */
2369         if (evsel == NULL)
2370                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2371                                                              "syscalls:sys_enter");
2372
2373         if (evsel &&
2374             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2375             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2376                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2377                 goto out;
2378         }
2379
2380         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2381                                                      "raw_syscalls:sys_exit");
2382         if (evsel == NULL)
2383                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2384                                                              "syscalls:sys_exit");
2385         if (evsel &&
2386             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2387             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2388                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2389                 goto out;
2390         }
2391
2392         evlist__for_each(session->evlist, evsel) {
2393                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2394                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2395                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2396                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2397                         evsel->handler = trace__pgfault;
2398         }
2399
2400         err = parse_target_str(trace);
2401         if (err != 0)
2402                 goto out;
2403
2404         setup_pager();
2405
2406         err = perf_session__process_events(session, &trace->tool);
2407         if (err)
2408                 pr_err("Failed to process events, error %d", err);
2409
2410         else if (trace->summary)
2411                 trace__fprintf_thread_summary(trace, trace->output);
2412
2413 out:
2414         perf_session__delete(session);
2415
2416         return err;
2417 }
2418
2419 static size_t trace__fprintf_threads_header(FILE *fp)
2420 {
2421         size_t printed;
2422
2423         printed  = fprintf(fp, "\n Summary of events:\n\n");
2424
2425         return printed;
2426 }
2427
2428 static size_t thread__dump_stats(struct thread_trace *ttrace,
2429                                  struct trace *trace, FILE *fp)
2430 {
2431         struct stats *stats;
2432         size_t printed = 0;
2433         struct syscall *sc;
2434         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2435
2436         if (inode == NULL)
2437                 return 0;
2438
2439         printed += fprintf(fp, "\n");
2440
2441         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2442         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2443         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2444
2445         /* each int_node is a syscall */
2446         while (inode) {
2447                 stats = inode->priv;
2448                 if (stats) {
2449                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2450                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2451                         double avg = avg_stats(stats);
2452                         double pct;
2453                         u64 n = (u64) stats->n;
2454
2455                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2456                         avg /= NSEC_PER_MSEC;
2457
2458                         sc = &trace->syscalls.table[inode->i];
2459                         printed += fprintf(fp, "   %-15s", sc->name);
2460                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2461                                            n, min, avg);
2462                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2463                 }
2464
2465                 inode = intlist__next(inode);
2466         }
2467
2468         printed += fprintf(fp, "\n\n");
2469
2470         return printed;
2471 }
2472
2473 /* struct used to pass data to per-thread function */
2474 struct summary_data {
2475         FILE *fp;
2476         struct trace *trace;
2477         size_t printed;
2478 };
2479
2480 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2481 {
2482         struct summary_data *data = priv;
2483         FILE *fp = data->fp;
2484         size_t printed = data->printed;
2485         struct trace *trace = data->trace;
2486         struct thread_trace *ttrace = thread__priv(thread);
2487         double ratio;
2488
2489         if (ttrace == NULL)
2490                 return 0;
2491
2492         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2493
2494         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2495         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2496         printed += fprintf(fp, "%.1f%%", ratio);
2497         if (ttrace->pfmaj)
2498                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2499         if (ttrace->pfmin)
2500                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2501         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2502         printed += thread__dump_stats(ttrace, trace, fp);
2503
2504         data->printed += printed;
2505
2506         return 0;
2507 }
2508
2509 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2510 {
2511         struct summary_data data = {
2512                 .fp = fp,
2513                 .trace = trace
2514         };
2515         data.printed = trace__fprintf_threads_header(fp);
2516
2517         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2518
2519         return data.printed;
2520 }
2521
2522 static int trace__set_duration(const struct option *opt, const char *str,
2523                                int unset __maybe_unused)
2524 {
2525         struct trace *trace = opt->value;
2526
2527         trace->duration_filter = atof(str);
2528         return 0;
2529 }
2530
2531 static int trace__set_filter_pids(const struct option *opt, const char *str,
2532                                   int unset __maybe_unused)
2533 {
2534         int ret = -1;
2535         size_t i;
2536         struct trace *trace = opt->value;
2537         /*
2538          * FIXME: introduce a intarray class, plain parse csv and create a
2539          * { int nr, int entries[] } struct...
2540          */
2541         struct intlist *list = intlist__new(str);
2542
2543         if (list == NULL)
2544                 return -1;
2545
2546         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2547         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2548
2549         if (trace->filter_pids.entries == NULL)
2550                 goto out;
2551
2552         trace->filter_pids.entries[0] = getpid();
2553
2554         for (i = 1; i < trace->filter_pids.nr; ++i)
2555                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2556
2557         intlist__delete(list);
2558         ret = 0;
2559 out:
2560         return ret;
2561 }
2562
2563 static int trace__open_output(struct trace *trace, const char *filename)
2564 {
2565         struct stat st;
2566
2567         if (!stat(filename, &st) && st.st_size) {
2568                 char oldname[PATH_MAX];
2569
2570                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2571                 unlink(oldname);
2572                 rename(filename, oldname);
2573         }
2574
2575         trace->output = fopen(filename, "w");
2576
2577         return trace->output == NULL ? -errno : 0;
2578 }
2579
2580 static int parse_pagefaults(const struct option *opt, const char *str,
2581                             int unset __maybe_unused)
2582 {
2583         int *trace_pgfaults = opt->value;
2584
2585         if (strcmp(str, "all") == 0)
2586                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2587         else if (strcmp(str, "maj") == 0)
2588                 *trace_pgfaults |= TRACE_PFMAJ;
2589         else if (strcmp(str, "min") == 0)
2590                 *trace_pgfaults |= TRACE_PFMIN;
2591         else
2592                 return -1;
2593
2594         return 0;
2595 }
2596
2597 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2598 {
2599         struct perf_evsel *evsel;
2600
2601         evlist__for_each(evlist, evsel)
2602                 evsel->handler = handler;
2603 }
2604
2605 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2606 {
2607         const char * const trace_usage[] = {
2608                 "perf trace [<options>] [<command>]",
2609                 "perf trace [<options>] -- <command> [<options>]",
2610                 "perf trace record [<options>] [<command>]",
2611                 "perf trace record [<options>] -- <command> [<options>]",
2612                 NULL
2613         };
2614         struct trace trace = {
2615                 .audit = {
2616                         .machine = audit_detect_machine(),
2617                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2618                 },
2619                 .syscalls = {
2620                         . max = -1,
2621                 },
2622                 .opts = {
2623                         .target = {
2624                                 .uid       = UINT_MAX,
2625                                 .uses_mmap = true,
2626                         },
2627                         .user_freq     = UINT_MAX,
2628                         .user_interval = ULLONG_MAX,
2629                         .no_buffering  = true,
2630                         .mmap_pages    = UINT_MAX,
2631                 },
2632                 .output = stdout,
2633                 .show_comm = true,
2634                 .trace_syscalls = true,
2635         };
2636         const char *output_name = NULL;
2637         const char *ev_qualifier_str = NULL;
2638         const struct option trace_options[] = {
2639         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2640                      "event selector. use 'perf list' to list available events",
2641                      parse_events_option),
2642         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2643                     "show the thread COMM next to its id"),
2644         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2645         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2646                     "list of events to trace"),
2647         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2648         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2649         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2650                     "trace events on existing process id"),
2651         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2652                     "trace events on existing thread id"),
2653         OPT_CALLBACK(0, "filter-pids", &trace, "float",
2654                      "show only events with duration > N.M ms", trace__set_filter_pids),
2655         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2656                     "system-wide collection from all CPUs"),
2657         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2658                     "list of cpus to monitor"),
2659         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2660                     "child tasks do not inherit counters"),
2661         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2662                      "number of mmap data pages",
2663                      perf_evlist__parse_mmap_pages),
2664         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2665                    "user to profile"),
2666         OPT_CALLBACK(0, "duration", &trace, "float",
2667                      "show only events with duration > N.M ms",
2668                      trace__set_duration),
2669         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2670         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2671         OPT_BOOLEAN('T', "time", &trace.full_time,
2672                     "Show full timestamp, not time relative to first start"),
2673         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2674                     "Show only syscall summary with statistics"),
2675         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2676                     "Show all syscalls and summary with statistics"),
2677         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2678                      "Trace pagefaults", parse_pagefaults, "maj"),
2679         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2680         OPT_END()
2681         };
2682         int err;
2683         char bf[BUFSIZ];
2684
2685         signal(SIGSEGV, sighandler_dump_stack);
2686         signal(SIGFPE, sighandler_dump_stack);
2687
2688         trace.evlist = perf_evlist__new();
2689         if (trace.evlist == NULL)
2690                 return -ENOMEM;
2691
2692         if (trace.evlist == NULL) {
2693                 pr_err("Not enough memory to run!\n");
2694                 goto out;
2695         }
2696
2697         argc = parse_options(argc, argv, trace_options, trace_usage,
2698                              PARSE_OPT_STOP_AT_NON_OPTION);
2699
2700         if (trace.trace_pgfaults) {
2701                 trace.opts.sample_address = true;
2702                 trace.opts.sample_time = true;
2703         }
2704
2705         if (trace.evlist->nr_entries > 0)
2706                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2707
2708         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2709                 return trace__record(&trace, argc-1, &argv[1]);
2710
2711         /* summary_only implies summary option, but don't overwrite summary if set */
2712         if (trace.summary_only)
2713                 trace.summary = trace.summary_only;
2714
2715         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2716             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2717                 pr_err("Please specify something to trace.\n");
2718                 return -1;
2719         }
2720
2721         if (output_name != NULL) {
2722                 err = trace__open_output(&trace, output_name);
2723                 if (err < 0) {
2724                         perror("failed to create output file");
2725                         goto out;
2726                 }
2727         }
2728
2729         if (ev_qualifier_str != NULL) {
2730                 const char *s = ev_qualifier_str;
2731
2732                 trace.not_ev_qualifier = *s == '!';
2733                 if (trace.not_ev_qualifier)
2734                         ++s;
2735                 trace.ev_qualifier = strlist__new(true, s);
2736                 if (trace.ev_qualifier == NULL) {
2737                         fputs("Not enough memory to parse event qualifier",
2738                               trace.output);
2739                         err = -ENOMEM;
2740                         goto out_close;
2741                 }
2742         }
2743
2744         err = target__validate(&trace.opts.target);
2745         if (err) {
2746                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2747                 fprintf(trace.output, "%s", bf);
2748                 goto out_close;
2749         }
2750
2751         err = target__parse_uid(&trace.opts.target);
2752         if (err) {
2753                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2754                 fprintf(trace.output, "%s", bf);
2755                 goto out_close;
2756         }
2757
2758         if (!argc && target__none(&trace.opts.target))
2759                 trace.opts.target.system_wide = true;
2760
2761         if (input_name)
2762                 err = trace__replay(&trace);
2763         else
2764                 err = trace__run(&trace, argc, argv);
2765
2766 out_close:
2767         if (output_name != NULL)
2768                 fclose(trace.output);
2769 out:
2770         return err;
2771 }