perf trace: Associate some more syscall args with the getname beautifier
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *whences[] = { "SET", "CUR", "END",
573 #ifdef SEEK_DATA
574 "DATA",
575 #endif
576 #ifdef SEEK_HOLE
577 "HOLE",
578 #endif
579 };
580 static DEFINE_STRARRAY(whences);
581
582 static const char *fcntl_cmds[] = {
583         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
586         "F_GETOWNER_UIDS",
587 };
588 static DEFINE_STRARRAY(fcntl_cmds);
589
590 static const char *rlimit_resources[] = {
591         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
593         "RTTIME",
594 };
595 static DEFINE_STRARRAY(rlimit_resources);
596
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
599
600 static const char *clockid[] = {
601         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
603 };
604 static DEFINE_STRARRAY(clockid);
605
606 static const char *socket_families[] = {
607         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612         "ALG", "NFC", "VSOCK",
613 };
614 static DEFINE_STRARRAY(socket_families);
615
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
618 #endif
619
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621                                                       struct syscall_arg *arg)
622 {
623         size_t printed;
624         int type = arg->val,
625             flags = type & ~SOCK_TYPE_MASK;
626
627         type &= SOCK_TYPE_MASK;
628         /*
629          * Can't use a strarray, MIPS may override for ABI reasons.
630          */
631         switch (type) {
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
633         P_SK_TYPE(STREAM);
634         P_SK_TYPE(DGRAM);
635         P_SK_TYPE(RAW);
636         P_SK_TYPE(RDM);
637         P_SK_TYPE(SEQPACKET);
638         P_SK_TYPE(DCCP);
639         P_SK_TYPE(PACKET);
640 #undef P_SK_TYPE
641         default:
642                 printed = scnprintf(bf, size, "%#x", type);
643         }
644
645 #define P_SK_FLAG(n) \
646         if (flags & SOCK_##n) { \
647                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648                 flags &= ~SOCK_##n; \
649         }
650
651         P_SK_FLAG(CLOEXEC);
652         P_SK_FLAG(NONBLOCK);
653 #undef P_SK_FLAG
654
655         if (flags)
656                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
657
658         return printed;
659 }
660
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
662
663 #ifndef MSG_PROBE
664 #define MSG_PROBE            0x10
665 #endif
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE  0x10000
668 #endif
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
671 #endif
672 #ifndef MSG_FASTOPEN
673 #define MSG_FASTOPEN         0x20000000
674 #endif
675
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677                                                struct syscall_arg *arg)
678 {
679         int printed = 0, flags = arg->val;
680
681         if (flags == 0)
682                 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684         if (flags & MSG_##n) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
686                 flags &= ~MSG_##n; \
687         }
688
689         P_MSG_FLAG(OOB);
690         P_MSG_FLAG(PEEK);
691         P_MSG_FLAG(DONTROUTE);
692         P_MSG_FLAG(TRYHARD);
693         P_MSG_FLAG(CTRUNC);
694         P_MSG_FLAG(PROBE);
695         P_MSG_FLAG(TRUNC);
696         P_MSG_FLAG(DONTWAIT);
697         P_MSG_FLAG(EOR);
698         P_MSG_FLAG(WAITALL);
699         P_MSG_FLAG(FIN);
700         P_MSG_FLAG(SYN);
701         P_MSG_FLAG(CONFIRM);
702         P_MSG_FLAG(RST);
703         P_MSG_FLAG(ERRQUEUE);
704         P_MSG_FLAG(NOSIGNAL);
705         P_MSG_FLAG(MORE);
706         P_MSG_FLAG(WAITFORONE);
707         P_MSG_FLAG(SENDPAGE_NOTLAST);
708         P_MSG_FLAG(FASTOPEN);
709         P_MSG_FLAG(CMSG_CLOEXEC);
710 #undef P_MSG_FLAG
711
712         if (flags)
713                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
714
715         return printed;
716 }
717
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
719
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721                                                  struct syscall_arg *arg)
722 {
723         size_t printed = 0;
724         int mode = arg->val;
725
726         if (mode == F_OK) /* 0 */
727                 return scnprintf(bf, size, "F");
728 #define P_MODE(n) \
729         if (mode & n##_OK) { \
730                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
731                 mode &= ~n##_OK; \
732         }
733
734         P_MODE(R);
735         P_MODE(W);
736         P_MODE(X);
737 #undef P_MODE
738
739         if (mode)
740                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
741
742         return printed;
743 }
744
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
746
747 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
748                                               struct syscall_arg *arg);
749
750 #define SCA_FILENAME syscall_arg__scnprintf_filename
751
752 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
753                                                struct syscall_arg *arg)
754 {
755         int printed = 0, flags = arg->val;
756
757         if (!(flags & O_CREAT))
758                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
759
760         if (flags == 0)
761                 return scnprintf(bf, size, "RDONLY");
762 #define P_FLAG(n) \
763         if (flags & O_##n) { \
764                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
765                 flags &= ~O_##n; \
766         }
767
768         P_FLAG(APPEND);
769         P_FLAG(ASYNC);
770         P_FLAG(CLOEXEC);
771         P_FLAG(CREAT);
772         P_FLAG(DIRECT);
773         P_FLAG(DIRECTORY);
774         P_FLAG(EXCL);
775         P_FLAG(LARGEFILE);
776         P_FLAG(NOATIME);
777         P_FLAG(NOCTTY);
778 #ifdef O_NONBLOCK
779         P_FLAG(NONBLOCK);
780 #elif O_NDELAY
781         P_FLAG(NDELAY);
782 #endif
783 #ifdef O_PATH
784         P_FLAG(PATH);
785 #endif
786         P_FLAG(RDWR);
787 #ifdef O_DSYNC
788         if ((flags & O_SYNC) == O_SYNC)
789                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
790         else {
791                 P_FLAG(DSYNC);
792         }
793 #else
794         P_FLAG(SYNC);
795 #endif
796         P_FLAG(TRUNC);
797         P_FLAG(WRONLY);
798 #undef P_FLAG
799
800         if (flags)
801                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
802
803         return printed;
804 }
805
806 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
807
808 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
809                                                 struct syscall_arg *arg)
810 {
811         int printed = 0, flags = arg->val;
812
813         if (flags == 0)
814                 return 0;
815
816 #define P_FLAG(n) \
817         if (flags & PERF_FLAG_##n) { \
818                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
819                 flags &= ~PERF_FLAG_##n; \
820         }
821
822         P_FLAG(FD_NO_GROUP);
823         P_FLAG(FD_OUTPUT);
824         P_FLAG(PID_CGROUP);
825         P_FLAG(FD_CLOEXEC);
826 #undef P_FLAG
827
828         if (flags)
829                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
830
831         return printed;
832 }
833
834 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
835
836 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
837                                                    struct syscall_arg *arg)
838 {
839         int printed = 0, flags = arg->val;
840
841         if (flags == 0)
842                 return scnprintf(bf, size, "NONE");
843 #define P_FLAG(n) \
844         if (flags & EFD_##n) { \
845                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
846                 flags &= ~EFD_##n; \
847         }
848
849         P_FLAG(SEMAPHORE);
850         P_FLAG(CLOEXEC);
851         P_FLAG(NONBLOCK);
852 #undef P_FLAG
853
854         if (flags)
855                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
856
857         return printed;
858 }
859
860 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
861
862 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
863                                                 struct syscall_arg *arg)
864 {
865         int printed = 0, flags = arg->val;
866
867 #define P_FLAG(n) \
868         if (flags & O_##n) { \
869                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
870                 flags &= ~O_##n; \
871         }
872
873         P_FLAG(CLOEXEC);
874         P_FLAG(NONBLOCK);
875 #undef P_FLAG
876
877         if (flags)
878                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
879
880         return printed;
881 }
882
883 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
884
885 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
886 {
887         int sig = arg->val;
888
889         switch (sig) {
890 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
891         P_SIGNUM(HUP);
892         P_SIGNUM(INT);
893         P_SIGNUM(QUIT);
894         P_SIGNUM(ILL);
895         P_SIGNUM(TRAP);
896         P_SIGNUM(ABRT);
897         P_SIGNUM(BUS);
898         P_SIGNUM(FPE);
899         P_SIGNUM(KILL);
900         P_SIGNUM(USR1);
901         P_SIGNUM(SEGV);
902         P_SIGNUM(USR2);
903         P_SIGNUM(PIPE);
904         P_SIGNUM(ALRM);
905         P_SIGNUM(TERM);
906         P_SIGNUM(CHLD);
907         P_SIGNUM(CONT);
908         P_SIGNUM(STOP);
909         P_SIGNUM(TSTP);
910         P_SIGNUM(TTIN);
911         P_SIGNUM(TTOU);
912         P_SIGNUM(URG);
913         P_SIGNUM(XCPU);
914         P_SIGNUM(XFSZ);
915         P_SIGNUM(VTALRM);
916         P_SIGNUM(PROF);
917         P_SIGNUM(WINCH);
918         P_SIGNUM(IO);
919         P_SIGNUM(PWR);
920         P_SIGNUM(SYS);
921 #ifdef SIGEMT
922         P_SIGNUM(EMT);
923 #endif
924 #ifdef SIGSTKFLT
925         P_SIGNUM(STKFLT);
926 #endif
927 #ifdef SIGSWI
928         P_SIGNUM(SWI);
929 #endif
930         default: break;
931         }
932
933         return scnprintf(bf, size, "%#x", sig);
934 }
935
936 #define SCA_SIGNUM syscall_arg__scnprintf_signum
937
938 #if defined(__i386__) || defined(__x86_64__)
939 /*
940  * FIXME: Make this available to all arches.
941  */
942 #define TCGETS          0x5401
943
944 static const char *tioctls[] = {
945         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
946         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
947         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
948         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
949         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
950         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
951         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
952         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
953         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
954         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
955         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
956         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
957         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
958         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
959         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
960 };
961
962 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
963 #endif /* defined(__i386__) || defined(__x86_64__) */
964
965 #define STRARRAY(arg, name, array) \
966           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
967           .arg_parm      = { [arg] = &strarray__##array, }
968
969 static struct syscall_fmt {
970         const char *name;
971         const char *alias;
972         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
973         void       *arg_parm[6];
974         bool       errmsg;
975         bool       timeout;
976         bool       hexret;
977 } syscall_fmts[] = {
978         { .name     = "access",     .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
980                              [1] = SCA_ACCMODE,  /* mode */ }, },
981         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
982         { .name     = "brk",        .hexret = true,
983           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
984         { .name     = "chdir",      .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
986         { .name     = "chmod",      .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
988         { .name     = "chroot",     .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
990         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
991         { .name     = "close",      .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
993         { .name     = "connect",    .errmsg = true, },
994         { .name     = "creat",      .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
996         { .name     = "dup",        .errmsg = true,
997           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998         { .name     = "dup2",       .errmsg = true,
999           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000         { .name     = "dup3",       .errmsg = true,
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1003         { .name     = "eventfd2",   .errmsg = true,
1004           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1005         { .name     = "faccessat",  .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1007                              [1] = SCA_FILENAME, /* filename */ }, },
1008         { .name     = "fadvise64",  .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010         { .name     = "fallocate",  .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012         { .name     = "fchdir",     .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1014         { .name     = "fchmod",     .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1016         { .name     = "fchmodat",   .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1018                              [1] = SCA_FILENAME, /* filename */ }, },
1019         { .name     = "fchown",     .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1021         { .name     = "fchownat",   .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1023                              [1] = SCA_FILENAME, /* filename */ }, },
1024         { .name     = "fcntl",      .errmsg = true,
1025           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1026                              [1] = SCA_STRARRAY, /* cmd */ },
1027           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1028         { .name     = "fdatasync",  .errmsg = true,
1029           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030         { .name     = "flock",      .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1032                              [1] = SCA_FLOCK, /* cmd */ }, },
1033         { .name     = "fsetxattr",  .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1038           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1039                              [1] = SCA_FILENAME, /* filename */ }, },
1040         { .name     = "fstatfs",    .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042         { .name     = "fsync",    .errmsg = true,
1043           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044         { .name     = "ftruncate", .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046         { .name     = "futex",      .errmsg = true,
1047           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1048         { .name     = "futimesat", .errmsg = true,
1049           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1050                              [1] = SCA_FILENAME, /* filename */ }, },
1051         { .name     = "getdents",   .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053         { .name     = "getdents64", .errmsg = true,
1054           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1056         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1057         { .name     = "getxattr",    .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1059         { .name     = "inotify_add_watch",          .errmsg = true,
1060           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1061         { .name     = "ioctl",      .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1063 #if defined(__i386__) || defined(__x86_64__)
1064 /*
1065  * FIXME: Make this available to all arches.
1066  */
1067                              [1] = SCA_STRHEXARRAY, /* cmd */
1068                              [2] = SCA_HEX, /* arg */ },
1069           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1070 #else
1071                              [2] = SCA_HEX, /* arg */ }, },
1072 #endif
1073         { .name     = "kill",       .errmsg = true,
1074           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1075         { .name     = "lchown",    .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1077         { .name     = "lgetxattr",  .errmsg = true,
1078           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1079         { .name     = "linkat",     .errmsg = true,
1080           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1081         { .name     = "listxattr",  .errmsg = true,
1082           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1083         { .name     = "llistxattr", .errmsg = true,
1084           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1085         { .name     = "lremovexattr",  .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1087         { .name     = "lseek",      .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1089                              [2] = SCA_STRARRAY, /* whence */ },
1090           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1091         { .name     = "lsetxattr",  .errmsg = true,
1092           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1093         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1094           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1095         { .name     = "lsxattr",    .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1097         { .name     = "madvise",    .errmsg = true,
1098           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1099                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1100         { .name     = "mkdir",    .errmsg = true,
1101           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1102         { .name     = "mkdirat",    .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1104                              [1] = SCA_FILENAME, /* pathname */ }, },
1105         { .name     = "mknod",      .errmsg = true,
1106           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1107         { .name     = "mknodat",    .errmsg = true,
1108           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1109                              [1] = SCA_FILENAME, /* filename */ }, },
1110         { .name     = "mlock",      .errmsg = true,
1111           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1112         { .name     = "mlockall",   .errmsg = true,
1113           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1114         { .name     = "mmap",       .hexret = true,
1115           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1116                              [2] = SCA_MMAP_PROT, /* prot */
1117                              [3] = SCA_MMAP_FLAGS, /* flags */
1118                              [4] = SCA_FD,        /* fd */ }, },
1119         { .name     = "mprotect",   .errmsg = true,
1120           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1121                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1122         { .name     = "mq_unlink", .errmsg = true,
1123           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1124         { .name     = "mremap",     .hexret = true,
1125           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1126                              [3] = SCA_MREMAP_FLAGS, /* flags */
1127                              [4] = SCA_HEX, /* new_addr */ }, },
1128         { .name     = "munlock",    .errmsg = true,
1129           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1130         { .name     = "munmap",     .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1132         { .name     = "name_to_handle_at", .errmsg = true,
1133           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1134         { .name     = "newfstatat", .errmsg = true,
1135           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1136                              [1] = SCA_FILENAME, /* filename */ }, },
1137         { .name     = "open",       .errmsg = true,
1138           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1139                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1140         { .name     = "open_by_handle_at", .errmsg = true,
1141           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1142                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1143         { .name     = "openat",     .errmsg = true,
1144           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1145                              [1] = SCA_FILENAME, /* filename */
1146                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1147         { .name     = "perf_event_open", .errmsg = true,
1148           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1149                              [2] = SCA_INT, /* cpu */
1150                              [3] = SCA_FD,  /* group_fd */
1151                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1152         { .name     = "pipe2",      .errmsg = true,
1153           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1154         { .name     = "poll",       .errmsg = true, .timeout = true, },
1155         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1156         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1157           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1158         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1159           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1160         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1161         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1162           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1163         { .name     = "pwritev",    .errmsg = true,
1164           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1165         { .name     = "read",       .errmsg = true,
1166           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1167         { .name     = "readlink",   .errmsg = true,
1168           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1169         { .name     = "readlinkat", .errmsg = true,
1170           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1171                              [1] = SCA_FILENAME, /* pathname */ }, },
1172         { .name     = "readv",      .errmsg = true,
1173           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1174         { .name     = "recvfrom",   .errmsg = true,
1175           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1176         { .name     = "recvmmsg",   .errmsg = true,
1177           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1178         { .name     = "recvmsg",    .errmsg = true,
1179           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1180         { .name     = "removexattr", .errmsg = true,
1181           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1182         { .name     = "renameat",   .errmsg = true,
1183           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1184         { .name     = "rmdir",    .errmsg = true,
1185           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1186         { .name     = "rt_sigaction", .errmsg = true,
1187           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1188         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1189         { .name     = "rt_sigqueueinfo", .errmsg = true,
1190           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1191         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1192           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1193         { .name     = "select",     .errmsg = true, .timeout = true, },
1194         { .name     = "sendmmsg",    .errmsg = true,
1195           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1196         { .name     = "sendmsg",    .errmsg = true,
1197           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1198         { .name     = "sendto",     .errmsg = true,
1199           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1200         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1201         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1202         { .name     = "setxattr",   .errmsg = true,
1203           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1204         { .name     = "shutdown",   .errmsg = true,
1205           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1206         { .name     = "socket",     .errmsg = true,
1207           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1208                              [1] = SCA_SK_TYPE, /* type */ },
1209           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1210         { .name     = "socketpair", .errmsg = true,
1211           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1212                              [1] = SCA_SK_TYPE, /* type */ },
1213           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1214         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1215           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1216         { .name     = "statfs",     .errmsg = true,
1217           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1218         { .name     = "swapoff",    .errmsg = true,
1219           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1220         { .name     = "swapon",     .errmsg = true,
1221           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1222         { .name     = "symlinkat",  .errmsg = true,
1223           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1224         { .name     = "tgkill",     .errmsg = true,
1225           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1226         { .name     = "tkill",      .errmsg = true,
1227           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1228         { .name     = "truncate",   .errmsg = true,
1229           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1230         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1231         { .name     = "unlinkat",   .errmsg = true,
1232           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1233                              [1] = SCA_FILENAME, /* pathname */ }, },
1234         { .name     = "utime",  .errmsg = true,
1235           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1236         { .name     = "utimensat",  .errmsg = true,
1237           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1238                              [1] = SCA_FILENAME, /* filename */ }, },
1239         { .name     = "utimes",  .errmsg = true,
1240           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1241         { .name     = "vmsplice",  .errmsg = true,
1242           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1243         { .name     = "write",      .errmsg = true,
1244           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1245         { .name     = "writev",     .errmsg = true,
1246           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1247 };
1248
1249 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1250 {
1251         const struct syscall_fmt *fmt = fmtp;
1252         return strcmp(name, fmt->name);
1253 }
1254
1255 static struct syscall_fmt *syscall_fmt__find(const char *name)
1256 {
1257         const int nmemb = ARRAY_SIZE(syscall_fmts);
1258         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1259 }
1260
1261 struct syscall {
1262         struct event_format *tp_format;
1263         int                 nr_args;
1264         struct format_field *args;
1265         const char          *name;
1266         bool                is_exit;
1267         struct syscall_fmt  *fmt;
1268         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1269         void                **arg_parm;
1270 };
1271
1272 static size_t fprintf_duration(unsigned long t, FILE *fp)
1273 {
1274         double duration = (double)t / NSEC_PER_MSEC;
1275         size_t printed = fprintf(fp, "(");
1276
1277         if (duration >= 1.0)
1278                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1279         else if (duration >= 0.01)
1280                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1281         else
1282                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1283         return printed + fprintf(fp, "): ");
1284 }
1285
1286 /**
1287  * filename.ptr: The filename char pointer that will be vfs_getname'd
1288  * filename.entry_str_pos: Where to insert the string translated from
1289  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1290  */
1291 struct thread_trace {
1292         u64               entry_time;
1293         u64               exit_time;
1294         bool              entry_pending;
1295         unsigned long     nr_events;
1296         unsigned long     pfmaj, pfmin;
1297         char              *entry_str;
1298         double            runtime_ms;
1299         struct {
1300                 unsigned long ptr;
1301                 int           entry_str_pos;
1302         } filename;
1303         struct {
1304                 int       max;
1305                 char      **table;
1306         } paths;
1307
1308         struct intlist *syscall_stats;
1309 };
1310
1311 static struct thread_trace *thread_trace__new(void)
1312 {
1313         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1314
1315         if (ttrace)
1316                 ttrace->paths.max = -1;
1317
1318         ttrace->syscall_stats = intlist__new(NULL);
1319
1320         return ttrace;
1321 }
1322
1323 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1324 {
1325         struct thread_trace *ttrace;
1326
1327         if (thread == NULL)
1328                 goto fail;
1329
1330         if (thread__priv(thread) == NULL)
1331                 thread__set_priv(thread, thread_trace__new());
1332
1333         if (thread__priv(thread) == NULL)
1334                 goto fail;
1335
1336         ttrace = thread__priv(thread);
1337         ++ttrace->nr_events;
1338
1339         return ttrace;
1340 fail:
1341         color_fprintf(fp, PERF_COLOR_RED,
1342                       "WARNING: not enough memory, dropping samples!\n");
1343         return NULL;
1344 }
1345
1346 #define TRACE_PFMAJ             (1 << 0)
1347 #define TRACE_PFMIN             (1 << 1)
1348
1349 static const size_t trace__entry_str_size = 2048;
1350
1351 struct trace {
1352         struct perf_tool        tool;
1353         struct {
1354                 int             machine;
1355                 int             open_id;
1356         }                       audit;
1357         struct {
1358                 int             max;
1359                 struct syscall  *table;
1360                 struct {
1361                         struct perf_evsel *sys_enter,
1362                                           *sys_exit;
1363                 }               events;
1364         } syscalls;
1365         struct record_opts      opts;
1366         struct perf_evlist      *evlist;
1367         struct machine          *host;
1368         struct thread           *current;
1369         u64                     base_time;
1370         FILE                    *output;
1371         unsigned long           nr_events;
1372         struct strlist          *ev_qualifier;
1373         struct {
1374                 size_t          nr;
1375                 int             *entries;
1376         }                       ev_qualifier_ids;
1377         const char              *last_vfs_getname;
1378         struct intlist          *tid_list;
1379         struct intlist          *pid_list;
1380         struct {
1381                 size_t          nr;
1382                 pid_t           *entries;
1383         }                       filter_pids;
1384         double                  duration_filter;
1385         double                  runtime_ms;
1386         struct {
1387                 u64             vfs_getname,
1388                                 proc_getname;
1389         } stats;
1390         bool                    not_ev_qualifier;
1391         bool                    live;
1392         bool                    full_time;
1393         bool                    sched;
1394         bool                    multiple_threads;
1395         bool                    summary;
1396         bool                    summary_only;
1397         bool                    show_comm;
1398         bool                    show_tool_stats;
1399         bool                    trace_syscalls;
1400         bool                    force;
1401         bool                    vfs_getname;
1402         int                     trace_pgfaults;
1403 };
1404
1405 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1406 {
1407         struct thread_trace *ttrace = thread__priv(thread);
1408
1409         if (fd > ttrace->paths.max) {
1410                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1411
1412                 if (npath == NULL)
1413                         return -1;
1414
1415                 if (ttrace->paths.max != -1) {
1416                         memset(npath + ttrace->paths.max + 1, 0,
1417                                (fd - ttrace->paths.max) * sizeof(char *));
1418                 } else {
1419                         memset(npath, 0, (fd + 1) * sizeof(char *));
1420                 }
1421
1422                 ttrace->paths.table = npath;
1423                 ttrace->paths.max   = fd;
1424         }
1425
1426         ttrace->paths.table[fd] = strdup(pathname);
1427
1428         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1429 }
1430
1431 static int thread__read_fd_path(struct thread *thread, int fd)
1432 {
1433         char linkname[PATH_MAX], pathname[PATH_MAX];
1434         struct stat st;
1435         int ret;
1436
1437         if (thread->pid_ == thread->tid) {
1438                 scnprintf(linkname, sizeof(linkname),
1439                           "/proc/%d/fd/%d", thread->pid_, fd);
1440         } else {
1441                 scnprintf(linkname, sizeof(linkname),
1442                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1443         }
1444
1445         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1446                 return -1;
1447
1448         ret = readlink(linkname, pathname, sizeof(pathname));
1449
1450         if (ret < 0 || ret > st.st_size)
1451                 return -1;
1452
1453         pathname[ret] = '\0';
1454         return trace__set_fd_pathname(thread, fd, pathname);
1455 }
1456
1457 static const char *thread__fd_path(struct thread *thread, int fd,
1458                                    struct trace *trace)
1459 {
1460         struct thread_trace *ttrace = thread__priv(thread);
1461
1462         if (ttrace == NULL)
1463                 return NULL;
1464
1465         if (fd < 0)
1466                 return NULL;
1467
1468         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1469                 if (!trace->live)
1470                         return NULL;
1471                 ++trace->stats.proc_getname;
1472                 if (thread__read_fd_path(thread, fd))
1473                         return NULL;
1474         }
1475
1476         return ttrace->paths.table[fd];
1477 }
1478
1479 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1480                                         struct syscall_arg *arg)
1481 {
1482         int fd = arg->val;
1483         size_t printed = scnprintf(bf, size, "%d", fd);
1484         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1485
1486         if (path)
1487                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1488
1489         return printed;
1490 }
1491
1492 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1493                                               struct syscall_arg *arg)
1494 {
1495         int fd = arg->val;
1496         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1497         struct thread_trace *ttrace = thread__priv(arg->thread);
1498
1499         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1500                 zfree(&ttrace->paths.table[fd]);
1501
1502         return printed;
1503 }
1504
1505 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1506                                      unsigned long ptr)
1507 {
1508         struct thread_trace *ttrace = thread__priv(thread);
1509
1510         ttrace->filename.ptr = ptr;
1511         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1512 }
1513
1514 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1515                                               struct syscall_arg *arg)
1516 {
1517         unsigned long ptr = arg->val;
1518
1519         if (!arg->trace->vfs_getname)
1520                 return scnprintf(bf, size, "%#x", ptr);
1521
1522         thread__set_filename_pos(arg->thread, bf, ptr);
1523         return 0;
1524 }
1525
1526 static bool trace__filter_duration(struct trace *trace, double t)
1527 {
1528         return t < (trace->duration_filter * NSEC_PER_MSEC);
1529 }
1530
1531 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1532 {
1533         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1534
1535         return fprintf(fp, "%10.3f ", ts);
1536 }
1537
1538 static bool done = false;
1539 static bool interrupted = false;
1540
1541 static void sig_handler(int sig)
1542 {
1543         done = true;
1544         interrupted = sig == SIGINT;
1545 }
1546
1547 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1548                                         u64 duration, u64 tstamp, FILE *fp)
1549 {
1550         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1551         printed += fprintf_duration(duration, fp);
1552
1553         if (trace->multiple_threads) {
1554                 if (trace->show_comm)
1555                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1556                 printed += fprintf(fp, "%d ", thread->tid);
1557         }
1558
1559         return printed;
1560 }
1561
1562 static int trace__process_event(struct trace *trace, struct machine *machine,
1563                                 union perf_event *event, struct perf_sample *sample)
1564 {
1565         int ret = 0;
1566
1567         switch (event->header.type) {
1568         case PERF_RECORD_LOST:
1569                 color_fprintf(trace->output, PERF_COLOR_RED,
1570                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1571                 ret = machine__process_lost_event(machine, event, sample);
1572         default:
1573                 ret = machine__process_event(machine, event, sample);
1574                 break;
1575         }
1576
1577         return ret;
1578 }
1579
1580 static int trace__tool_process(struct perf_tool *tool,
1581                                union perf_event *event,
1582                                struct perf_sample *sample,
1583                                struct machine *machine)
1584 {
1585         struct trace *trace = container_of(tool, struct trace, tool);
1586         return trace__process_event(trace, machine, event, sample);
1587 }
1588
1589 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1590 {
1591         int err = symbol__init(NULL);
1592
1593         if (err)
1594                 return err;
1595
1596         trace->host = machine__new_host();
1597         if (trace->host == NULL)
1598                 return -ENOMEM;
1599
1600         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1601                 return -errno;
1602
1603         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1604                                             evlist->threads, trace__tool_process, false,
1605                                             trace->opts.proc_map_timeout);
1606         if (err)
1607                 symbol__exit();
1608
1609         return err;
1610 }
1611
1612 static int syscall__set_arg_fmts(struct syscall *sc)
1613 {
1614         struct format_field *field;
1615         int idx = 0;
1616
1617         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1618         if (sc->arg_scnprintf == NULL)
1619                 return -1;
1620
1621         if (sc->fmt)
1622                 sc->arg_parm = sc->fmt->arg_parm;
1623
1624         for (field = sc->args; field; field = field->next) {
1625                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1626                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1627                 else if (field->flags & FIELD_IS_POINTER)
1628                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1629                 ++idx;
1630         }
1631
1632         return 0;
1633 }
1634
1635 static int trace__read_syscall_info(struct trace *trace, int id)
1636 {
1637         char tp_name[128];
1638         struct syscall *sc;
1639         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1640
1641         if (name == NULL)
1642                 return -1;
1643
1644         if (id > trace->syscalls.max) {
1645                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1646
1647                 if (nsyscalls == NULL)
1648                         return -1;
1649
1650                 if (trace->syscalls.max != -1) {
1651                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1652                                (id - trace->syscalls.max) * sizeof(*sc));
1653                 } else {
1654                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1655                 }
1656
1657                 trace->syscalls.table = nsyscalls;
1658                 trace->syscalls.max   = id;
1659         }
1660
1661         sc = trace->syscalls.table + id;
1662         sc->name = name;
1663
1664         sc->fmt  = syscall_fmt__find(sc->name);
1665
1666         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1667         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1668
1669         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1670                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1671                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1672         }
1673
1674         if (sc->tp_format == NULL)
1675                 return -1;
1676
1677         sc->args = sc->tp_format->format.fields;
1678         sc->nr_args = sc->tp_format->format.nr_fields;
1679         /* drop nr field - not relevant here; does not exist on older kernels */
1680         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1681                 sc->args = sc->args->next;
1682                 --sc->nr_args;
1683         }
1684
1685         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1686
1687         return syscall__set_arg_fmts(sc);
1688 }
1689
1690 static int trace__validate_ev_qualifier(struct trace *trace)
1691 {
1692         int err = 0, i;
1693         struct str_node *pos;
1694
1695         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1696         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1697                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1698
1699         if (trace->ev_qualifier_ids.entries == NULL) {
1700                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1701                        trace->output);
1702                 err = -EINVAL;
1703                 goto out;
1704         }
1705
1706         i = 0;
1707
1708         strlist__for_each(pos, trace->ev_qualifier) {
1709                 const char *sc = pos->s;
1710                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1711
1712                 if (id < 0) {
1713                         if (err == 0) {
1714                                 fputs("Error:\tInvalid syscall ", trace->output);
1715                                 err = -EINVAL;
1716                         } else {
1717                                 fputs(", ", trace->output);
1718                         }
1719
1720                         fputs(sc, trace->output);
1721                 }
1722
1723                 trace->ev_qualifier_ids.entries[i++] = id;
1724         }
1725
1726         if (err < 0) {
1727                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1728                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1729                 zfree(&trace->ev_qualifier_ids.entries);
1730                 trace->ev_qualifier_ids.nr = 0;
1731         }
1732 out:
1733         return err;
1734 }
1735
1736 /*
1737  * args is to be interpreted as a series of longs but we need to handle
1738  * 8-byte unaligned accesses. args points to raw_data within the event
1739  * and raw_data is guaranteed to be 8-byte unaligned because it is
1740  * preceded by raw_size which is a u32. So we need to copy args to a temp
1741  * variable to read it. Most notably this avoids extended load instructions
1742  * on unaligned addresses
1743  */
1744
1745 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1746                                       unsigned char *args, struct trace *trace,
1747                                       struct thread *thread)
1748 {
1749         size_t printed = 0;
1750         unsigned char *p;
1751         unsigned long val;
1752
1753         if (sc->args != NULL) {
1754                 struct format_field *field;
1755                 u8 bit = 1;
1756                 struct syscall_arg arg = {
1757                         .idx    = 0,
1758                         .mask   = 0,
1759                         .trace  = trace,
1760                         .thread = thread,
1761                 };
1762
1763                 for (field = sc->args; field;
1764                      field = field->next, ++arg.idx, bit <<= 1) {
1765                         if (arg.mask & bit)
1766                                 continue;
1767
1768                         /* special care for unaligned accesses */
1769                         p = args + sizeof(unsigned long) * arg.idx;
1770                         memcpy(&val, p, sizeof(val));
1771
1772                         /*
1773                          * Suppress this argument if its value is zero and
1774                          * and we don't have a string associated in an
1775                          * strarray for it.
1776                          */
1777                         if (val == 0 &&
1778                             !(sc->arg_scnprintf &&
1779                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1780                               sc->arg_parm[arg.idx]))
1781                                 continue;
1782
1783                         printed += scnprintf(bf + printed, size - printed,
1784                                              "%s%s: ", printed ? ", " : "", field->name);
1785                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1786                                 arg.val = val;
1787                                 if (sc->arg_parm)
1788                                         arg.parm = sc->arg_parm[arg.idx];
1789                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1790                                                                       size - printed, &arg);
1791                         } else {
1792                                 printed += scnprintf(bf + printed, size - printed,
1793                                                      "%ld", val);
1794                         }
1795                 }
1796         } else {
1797                 int i = 0;
1798
1799                 while (i < 6) {
1800                         /* special care for unaligned accesses */
1801                         p = args + sizeof(unsigned long) * i;
1802                         memcpy(&val, p, sizeof(val));
1803                         printed += scnprintf(bf + printed, size - printed,
1804                                              "%sarg%d: %ld",
1805                                              printed ? ", " : "", i, val);
1806                         ++i;
1807                 }
1808         }
1809
1810         return printed;
1811 }
1812
1813 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1814                                   union perf_event *event,
1815                                   struct perf_sample *sample);
1816
1817 static struct syscall *trace__syscall_info(struct trace *trace,
1818                                            struct perf_evsel *evsel, int id)
1819 {
1820
1821         if (id < 0) {
1822
1823                 /*
1824                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1825                  * before that, leaving at a higher verbosity level till that is
1826                  * explained. Reproduced with plain ftrace with:
1827                  *
1828                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1829                  * grep "NR -1 " /t/trace_pipe
1830                  *
1831                  * After generating some load on the machine.
1832                  */
1833                 if (verbose > 1) {
1834                         static u64 n;
1835                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1836                                 id, perf_evsel__name(evsel), ++n);
1837                 }
1838                 return NULL;
1839         }
1840
1841         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1842             trace__read_syscall_info(trace, id))
1843                 goto out_cant_read;
1844
1845         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1846                 goto out_cant_read;
1847
1848         return &trace->syscalls.table[id];
1849
1850 out_cant_read:
1851         if (verbose) {
1852                 fprintf(trace->output, "Problems reading syscall %d", id);
1853                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1854                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1855                 fputs(" information\n", trace->output);
1856         }
1857         return NULL;
1858 }
1859
1860 static void thread__update_stats(struct thread_trace *ttrace,
1861                                  int id, struct perf_sample *sample)
1862 {
1863         struct int_node *inode;
1864         struct stats *stats;
1865         u64 duration = 0;
1866
1867         inode = intlist__findnew(ttrace->syscall_stats, id);
1868         if (inode == NULL)
1869                 return;
1870
1871         stats = inode->priv;
1872         if (stats == NULL) {
1873                 stats = malloc(sizeof(struct stats));
1874                 if (stats == NULL)
1875                         return;
1876                 init_stats(stats);
1877                 inode->priv = stats;
1878         }
1879
1880         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1881                 duration = sample->time - ttrace->entry_time;
1882
1883         update_stats(stats, duration);
1884 }
1885
1886 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1887 {
1888         struct thread_trace *ttrace;
1889         u64 duration;
1890         size_t printed;
1891
1892         if (trace->current == NULL)
1893                 return 0;
1894
1895         ttrace = thread__priv(trace->current);
1896
1897         if (!ttrace->entry_pending)
1898                 return 0;
1899
1900         duration = sample->time - ttrace->entry_time;
1901
1902         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1903         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1904         ttrace->entry_pending = false;
1905
1906         return printed;
1907 }
1908
1909 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1910                             union perf_event *event __maybe_unused,
1911                             struct perf_sample *sample)
1912 {
1913         char *msg;
1914         void *args;
1915         size_t printed = 0;
1916         struct thread *thread;
1917         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1918         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1919         struct thread_trace *ttrace;
1920
1921         if (sc == NULL)
1922                 return -1;
1923
1924         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1925         ttrace = thread__trace(thread, trace->output);
1926         if (ttrace == NULL)
1927                 goto out_put;
1928
1929         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1930
1931         if (ttrace->entry_str == NULL) {
1932                 ttrace->entry_str = malloc(trace__entry_str_size);
1933                 if (!ttrace->entry_str)
1934                         goto out_put;
1935         }
1936
1937         if (!trace->summary_only)
1938                 trace__printf_interrupted_entry(trace, sample);
1939
1940         ttrace->entry_time = sample->time;
1941         msg = ttrace->entry_str;
1942         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1943
1944         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1945                                            args, trace, thread);
1946
1947         if (sc->is_exit) {
1948                 if (!trace->duration_filter && !trace->summary_only) {
1949                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1950                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1951                 }
1952         } else
1953                 ttrace->entry_pending = true;
1954
1955         if (trace->current != thread) {
1956                 thread__put(trace->current);
1957                 trace->current = thread__get(thread);
1958         }
1959         err = 0;
1960 out_put:
1961         thread__put(thread);
1962         return err;
1963 }
1964
1965 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1966                            union perf_event *event __maybe_unused,
1967                            struct perf_sample *sample)
1968 {
1969         long ret;
1970         u64 duration = 0;
1971         struct thread *thread;
1972         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1973         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1974         struct thread_trace *ttrace;
1975
1976         if (sc == NULL)
1977                 return -1;
1978
1979         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1980         ttrace = thread__trace(thread, trace->output);
1981         if (ttrace == NULL)
1982                 goto out_put;
1983
1984         if (trace->summary)
1985                 thread__update_stats(ttrace, id, sample);
1986
1987         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1988
1989         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1990                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1991                 trace->last_vfs_getname = NULL;
1992                 ++trace->stats.vfs_getname;
1993         }
1994
1995         ttrace->exit_time = sample->time;
1996
1997         if (ttrace->entry_time) {
1998                 duration = sample->time - ttrace->entry_time;
1999                 if (trace__filter_duration(trace, duration))
2000                         goto out;
2001         } else if (trace->duration_filter)
2002                 goto out;
2003
2004         if (trace->summary_only)
2005                 goto out;
2006
2007         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2008
2009         if (ttrace->entry_pending) {
2010                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2011         } else {
2012                 fprintf(trace->output, " ... [");
2013                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2014                 fprintf(trace->output, "]: %s()", sc->name);
2015         }
2016
2017         if (sc->fmt == NULL) {
2018 signed_print:
2019                 fprintf(trace->output, ") = %ld", ret);
2020         } else if (ret < 0 && sc->fmt->errmsg) {
2021                 char bf[STRERR_BUFSIZE];
2022                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2023                            *e = audit_errno_to_name(-ret);
2024
2025                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2026         } else if (ret == 0 && sc->fmt->timeout)
2027                 fprintf(trace->output, ") = 0 Timeout");
2028         else if (sc->fmt->hexret)
2029                 fprintf(trace->output, ") = %#lx", ret);
2030         else
2031                 goto signed_print;
2032
2033         fputc('\n', trace->output);
2034 out:
2035         ttrace->entry_pending = false;
2036         err = 0;
2037 out_put:
2038         thread__put(thread);
2039         return err;
2040 }
2041
2042 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2043                               union perf_event *event __maybe_unused,
2044                               struct perf_sample *sample)
2045 {
2046         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2047         struct thread_trace *ttrace;
2048         size_t filename_len, entry_str_len, to_move;
2049         ssize_t remaining_space;
2050         char *pos;
2051         const char *filename;
2052
2053         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2054
2055         if (!thread)
2056                 goto out;
2057
2058         ttrace = thread__priv(thread);
2059         if (!ttrace)
2060                 goto out;
2061
2062         if (!ttrace->filename.ptr)
2063                 goto out;
2064
2065         entry_str_len = strlen(ttrace->entry_str);
2066         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2067         if (remaining_space <= 0)
2068                 goto out;
2069
2070         filename = trace->last_vfs_getname;
2071         filename_len = strlen(filename);
2072         if (filename_len > (size_t)remaining_space) {
2073                 filename += filename_len - remaining_space;
2074                 filename_len = remaining_space;
2075         }
2076
2077         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2078         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2079         memmove(pos + filename_len, pos, to_move);
2080         memcpy(pos, filename, filename_len);
2081
2082         ttrace->filename.ptr = 0;
2083         ttrace->filename.entry_str_pos = 0;
2084 out:
2085         return 0;
2086 }
2087
2088 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2089                                      union perf_event *event __maybe_unused,
2090                                      struct perf_sample *sample)
2091 {
2092         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2093         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2094         struct thread *thread = machine__findnew_thread(trace->host,
2095                                                         sample->pid,
2096                                                         sample->tid);
2097         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2098
2099         if (ttrace == NULL)
2100                 goto out_dump;
2101
2102         ttrace->runtime_ms += runtime_ms;
2103         trace->runtime_ms += runtime_ms;
2104         thread__put(thread);
2105         return 0;
2106
2107 out_dump:
2108         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2109                evsel->name,
2110                perf_evsel__strval(evsel, sample, "comm"),
2111                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2112                runtime,
2113                perf_evsel__intval(evsel, sample, "vruntime"));
2114         thread__put(thread);
2115         return 0;
2116 }
2117
2118 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2119                                 union perf_event *event __maybe_unused,
2120                                 struct perf_sample *sample)
2121 {
2122         trace__printf_interrupted_entry(trace, sample);
2123         trace__fprintf_tstamp(trace, sample->time, trace->output);
2124
2125         if (trace->trace_syscalls)
2126                 fprintf(trace->output, "(         ): ");
2127
2128         fprintf(trace->output, "%s:", evsel->name);
2129
2130         if (evsel->tp_format) {
2131                 event_format__fprintf(evsel->tp_format, sample->cpu,
2132                                       sample->raw_data, sample->raw_size,
2133                                       trace->output);
2134         }
2135
2136         fprintf(trace->output, ")\n");
2137         return 0;
2138 }
2139
2140 static void print_location(FILE *f, struct perf_sample *sample,
2141                            struct addr_location *al,
2142                            bool print_dso, bool print_sym)
2143 {
2144
2145         if ((verbose || print_dso) && al->map)
2146                 fprintf(f, "%s@", al->map->dso->long_name);
2147
2148         if ((verbose || print_sym) && al->sym)
2149                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2150                         al->addr - al->sym->start);
2151         else if (al->map)
2152                 fprintf(f, "0x%" PRIx64, al->addr);
2153         else
2154                 fprintf(f, "0x%" PRIx64, sample->addr);
2155 }
2156
2157 static int trace__pgfault(struct trace *trace,
2158                           struct perf_evsel *evsel,
2159                           union perf_event *event,
2160                           struct perf_sample *sample)
2161 {
2162         struct thread *thread;
2163         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2164         struct addr_location al;
2165         char map_type = 'd';
2166         struct thread_trace *ttrace;
2167         int err = -1;
2168
2169         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2170         ttrace = thread__trace(thread, trace->output);
2171         if (ttrace == NULL)
2172                 goto out_put;
2173
2174         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2175                 ttrace->pfmaj++;
2176         else
2177                 ttrace->pfmin++;
2178
2179         if (trace->summary_only)
2180                 goto out;
2181
2182         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2183                               sample->ip, &al);
2184
2185         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2186
2187         fprintf(trace->output, "%sfault [",
2188                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2189                 "maj" : "min");
2190
2191         print_location(trace->output, sample, &al, false, true);
2192
2193         fprintf(trace->output, "] => ");
2194
2195         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2196                                    sample->addr, &al);
2197
2198         if (!al.map) {
2199                 thread__find_addr_location(thread, cpumode,
2200                                            MAP__FUNCTION, sample->addr, &al);
2201
2202                 if (al.map)
2203                         map_type = 'x';
2204                 else
2205                         map_type = '?';
2206         }
2207
2208         print_location(trace->output, sample, &al, true, false);
2209
2210         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2211 out:
2212         err = 0;
2213 out_put:
2214         thread__put(thread);
2215         return err;
2216 }
2217
2218 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2219 {
2220         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2221             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2222                 return false;
2223
2224         if (trace->pid_list || trace->tid_list)
2225                 return true;
2226
2227         return false;
2228 }
2229
2230 static int trace__process_sample(struct perf_tool *tool,
2231                                  union perf_event *event,
2232                                  struct perf_sample *sample,
2233                                  struct perf_evsel *evsel,
2234                                  struct machine *machine __maybe_unused)
2235 {
2236         struct trace *trace = container_of(tool, struct trace, tool);
2237         int err = 0;
2238
2239         tracepoint_handler handler = evsel->handler;
2240
2241         if (skip_sample(trace, sample))
2242                 return 0;
2243
2244         if (!trace->full_time && trace->base_time == 0)
2245                 trace->base_time = sample->time;
2246
2247         if (handler) {
2248                 ++trace->nr_events;
2249                 handler(trace, evsel, event, sample);
2250         }
2251
2252         return err;
2253 }
2254
2255 static int parse_target_str(struct trace *trace)
2256 {
2257         if (trace->opts.target.pid) {
2258                 trace->pid_list = intlist__new(trace->opts.target.pid);
2259                 if (trace->pid_list == NULL) {
2260                         pr_err("Error parsing process id string\n");
2261                         return -EINVAL;
2262                 }
2263         }
2264
2265         if (trace->opts.target.tid) {
2266                 trace->tid_list = intlist__new(trace->opts.target.tid);
2267                 if (trace->tid_list == NULL) {
2268                         pr_err("Error parsing thread id string\n");
2269                         return -EINVAL;
2270                 }
2271         }
2272
2273         return 0;
2274 }
2275
2276 static int trace__record(struct trace *trace, int argc, const char **argv)
2277 {
2278         unsigned int rec_argc, i, j;
2279         const char **rec_argv;
2280         const char * const record_args[] = {
2281                 "record",
2282                 "-R",
2283                 "-m", "1024",
2284                 "-c", "1",
2285         };
2286
2287         const char * const sc_args[] = { "-e", };
2288         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2289         const char * const majpf_args[] = { "-e", "major-faults" };
2290         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2291         const char * const minpf_args[] = { "-e", "minor-faults" };
2292         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2293
2294         /* +1 is for the event string below */
2295         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2296                 majpf_args_nr + minpf_args_nr + argc;
2297         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2298
2299         if (rec_argv == NULL)
2300                 return -ENOMEM;
2301
2302         j = 0;
2303         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2304                 rec_argv[j++] = record_args[i];
2305
2306         if (trace->trace_syscalls) {
2307                 for (i = 0; i < sc_args_nr; i++)
2308                         rec_argv[j++] = sc_args[i];
2309
2310                 /* event string may be different for older kernels - e.g., RHEL6 */
2311                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2312                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2313                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2314                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2315                 else {
2316                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2317                         return -1;
2318                 }
2319         }
2320
2321         if (trace->trace_pgfaults & TRACE_PFMAJ)
2322                 for (i = 0; i < majpf_args_nr; i++)
2323                         rec_argv[j++] = majpf_args[i];
2324
2325         if (trace->trace_pgfaults & TRACE_PFMIN)
2326                 for (i = 0; i < minpf_args_nr; i++)
2327                         rec_argv[j++] = minpf_args[i];
2328
2329         for (i = 0; i < (unsigned int)argc; i++)
2330                 rec_argv[j++] = argv[i];
2331
2332         return cmd_record(j, rec_argv, NULL);
2333 }
2334
2335 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2336
2337 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2338 {
2339         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2340         if (evsel == NULL)
2341                 return false;
2342
2343         if (perf_evsel__field(evsel, "pathname") == NULL) {
2344                 perf_evsel__delete(evsel);
2345                 return false;
2346         }
2347
2348         evsel->handler = trace__vfs_getname;
2349         perf_evlist__add(evlist, evsel);
2350         return true;
2351 }
2352
2353 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2354                                     u64 config)
2355 {
2356         struct perf_evsel *evsel;
2357         struct perf_event_attr attr = {
2358                 .type = PERF_TYPE_SOFTWARE,
2359                 .mmap_data = 1,
2360         };
2361
2362         attr.config = config;
2363         attr.sample_period = 1;
2364
2365         event_attr_init(&attr);
2366
2367         evsel = perf_evsel__new(&attr);
2368         if (!evsel)
2369                 return -ENOMEM;
2370
2371         evsel->handler = trace__pgfault;
2372         perf_evlist__add(evlist, evsel);
2373
2374         return 0;
2375 }
2376
2377 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2378 {
2379         const u32 type = event->header.type;
2380         struct perf_evsel *evsel;
2381
2382         if (!trace->full_time && trace->base_time == 0)
2383                 trace->base_time = sample->time;
2384
2385         if (type != PERF_RECORD_SAMPLE) {
2386                 trace__process_event(trace, trace->host, event, sample);
2387                 return;
2388         }
2389
2390         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2391         if (evsel == NULL) {
2392                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2393                 return;
2394         }
2395
2396         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2397             sample->raw_data == NULL) {
2398                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2399                        perf_evsel__name(evsel), sample->tid,
2400                        sample->cpu, sample->raw_size);
2401         } else {
2402                 tracepoint_handler handler = evsel->handler;
2403                 handler(trace, evsel, event, sample);
2404         }
2405 }
2406
2407 static int trace__add_syscall_newtp(struct trace *trace)
2408 {
2409         int ret = -1;
2410         struct perf_evlist *evlist = trace->evlist;
2411         struct perf_evsel *sys_enter, *sys_exit;
2412
2413         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2414         if (sys_enter == NULL)
2415                 goto out;
2416
2417         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2418                 goto out_delete_sys_enter;
2419
2420         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2421         if (sys_exit == NULL)
2422                 goto out_delete_sys_enter;
2423
2424         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2425                 goto out_delete_sys_exit;
2426
2427         perf_evlist__add(evlist, sys_enter);
2428         perf_evlist__add(evlist, sys_exit);
2429
2430         trace->syscalls.events.sys_enter = sys_enter;
2431         trace->syscalls.events.sys_exit  = sys_exit;
2432
2433         ret = 0;
2434 out:
2435         return ret;
2436
2437 out_delete_sys_exit:
2438         perf_evsel__delete_priv(sys_exit);
2439 out_delete_sys_enter:
2440         perf_evsel__delete_priv(sys_enter);
2441         goto out;
2442 }
2443
2444 static int trace__set_ev_qualifier_filter(struct trace *trace)
2445 {
2446         int err = -1;
2447         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2448                                                 trace->ev_qualifier_ids.nr,
2449                                                 trace->ev_qualifier_ids.entries);
2450
2451         if (filter == NULL)
2452                 goto out_enomem;
2453
2454         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2455                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2456
2457         free(filter);
2458 out:
2459         return err;
2460 out_enomem:
2461         errno = ENOMEM;
2462         goto out;
2463 }
2464
2465 static int trace__run(struct trace *trace, int argc, const char **argv)
2466 {
2467         struct perf_evlist *evlist = trace->evlist;
2468         struct perf_evsel *evsel;
2469         int err = -1, i;
2470         unsigned long before;
2471         const bool forks = argc > 0;
2472         bool draining = false;
2473
2474         trace->live = true;
2475
2476         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2477                 goto out_error_raw_syscalls;
2478
2479         if (trace->trace_syscalls)
2480                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2481
2482         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2483             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2484                 goto out_error_mem;
2485         }
2486
2487         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2488             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2489                 goto out_error_mem;
2490
2491         if (trace->sched &&
2492             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2493                                    trace__sched_stat_runtime))
2494                 goto out_error_sched_stat_runtime;
2495
2496         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2497         if (err < 0) {
2498                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2499                 goto out_delete_evlist;
2500         }
2501
2502         err = trace__symbols_init(trace, evlist);
2503         if (err < 0) {
2504                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2505                 goto out_delete_evlist;
2506         }
2507
2508         perf_evlist__config(evlist, &trace->opts);
2509
2510         signal(SIGCHLD, sig_handler);
2511         signal(SIGINT, sig_handler);
2512
2513         if (forks) {
2514                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2515                                                     argv, false, NULL);
2516                 if (err < 0) {
2517                         fprintf(trace->output, "Couldn't run the workload!\n");
2518                         goto out_delete_evlist;
2519                 }
2520         }
2521
2522         err = perf_evlist__open(evlist);
2523         if (err < 0)
2524                 goto out_error_open;
2525
2526         /*
2527          * Better not use !target__has_task() here because we need to cover the
2528          * case where no threads were specified in the command line, but a
2529          * workload was, and in that case we will fill in the thread_map when
2530          * we fork the workload in perf_evlist__prepare_workload.
2531          */
2532         if (trace->filter_pids.nr > 0)
2533                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2534         else if (thread_map__pid(evlist->threads, 0) == -1)
2535                 err = perf_evlist__set_filter_pid(evlist, getpid());
2536
2537         if (err < 0)
2538                 goto out_error_mem;
2539
2540         if (trace->ev_qualifier_ids.nr > 0) {
2541                 err = trace__set_ev_qualifier_filter(trace);
2542                 if (err < 0)
2543                         goto out_errno;
2544
2545                 pr_debug("event qualifier tracepoint filter: %s\n",
2546                          trace->syscalls.events.sys_exit->filter);
2547         }
2548
2549         err = perf_evlist__apply_filters(evlist, &evsel);
2550         if (err < 0)
2551                 goto out_error_apply_filters;
2552
2553         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2554         if (err < 0)
2555                 goto out_error_mmap;
2556
2557         if (!target__none(&trace->opts.target))
2558                 perf_evlist__enable(evlist);
2559
2560         if (forks)
2561                 perf_evlist__start_workload(evlist);
2562
2563         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2564                                   evlist->threads->nr > 1 ||
2565                                   perf_evlist__first(evlist)->attr.inherit;
2566 again:
2567         before = trace->nr_events;
2568
2569         for (i = 0; i < evlist->nr_mmaps; i++) {
2570                 union perf_event *event;
2571
2572                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2573                         struct perf_sample sample;
2574
2575                         ++trace->nr_events;
2576
2577                         err = perf_evlist__parse_sample(evlist, event, &sample);
2578                         if (err) {
2579                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2580                                 goto next_event;
2581                         }
2582
2583                         trace__handle_event(trace, event, &sample);
2584 next_event:
2585                         perf_evlist__mmap_consume(evlist, i);
2586
2587                         if (interrupted)
2588                                 goto out_disable;
2589
2590                         if (done && !draining) {
2591                                 perf_evlist__disable(evlist);
2592                                 draining = true;
2593                         }
2594                 }
2595         }
2596
2597         if (trace->nr_events == before) {
2598                 int timeout = done ? 100 : -1;
2599
2600                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2601                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2602                                 draining = true;
2603
2604                         goto again;
2605                 }
2606         } else {
2607                 goto again;
2608         }
2609
2610 out_disable:
2611         thread__zput(trace->current);
2612
2613         perf_evlist__disable(evlist);
2614
2615         if (!err) {
2616                 if (trace->summary)
2617                         trace__fprintf_thread_summary(trace, trace->output);
2618
2619                 if (trace->show_tool_stats) {
2620                         fprintf(trace->output, "Stats:\n "
2621                                                " vfs_getname : %" PRIu64 "\n"
2622                                                " proc_getname: %" PRIu64 "\n",
2623                                 trace->stats.vfs_getname,
2624                                 trace->stats.proc_getname);
2625                 }
2626         }
2627
2628 out_delete_evlist:
2629         perf_evlist__delete(evlist);
2630         trace->evlist = NULL;
2631         trace->live = false;
2632         return err;
2633 {
2634         char errbuf[BUFSIZ];
2635
2636 out_error_sched_stat_runtime:
2637         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2638         goto out_error;
2639
2640 out_error_raw_syscalls:
2641         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2642         goto out_error;
2643
2644 out_error_mmap:
2645         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2646         goto out_error;
2647
2648 out_error_open:
2649         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2650
2651 out_error:
2652         fprintf(trace->output, "%s\n", errbuf);
2653         goto out_delete_evlist;
2654
2655 out_error_apply_filters:
2656         fprintf(trace->output,
2657                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2658                 evsel->filter, perf_evsel__name(evsel), errno,
2659                 strerror_r(errno, errbuf, sizeof(errbuf)));
2660         goto out_delete_evlist;
2661 }
2662 out_error_mem:
2663         fprintf(trace->output, "Not enough memory to run!\n");
2664         goto out_delete_evlist;
2665
2666 out_errno:
2667         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2668         goto out_delete_evlist;
2669 }
2670
2671 static int trace__replay(struct trace *trace)
2672 {
2673         const struct perf_evsel_str_handler handlers[] = {
2674                 { "probe:vfs_getname",       trace__vfs_getname, },
2675         };
2676         struct perf_data_file file = {
2677                 .path  = input_name,
2678                 .mode  = PERF_DATA_MODE_READ,
2679                 .force = trace->force,
2680         };
2681         struct perf_session *session;
2682         struct perf_evsel *evsel;
2683         int err = -1;
2684
2685         trace->tool.sample        = trace__process_sample;
2686         trace->tool.mmap          = perf_event__process_mmap;
2687         trace->tool.mmap2         = perf_event__process_mmap2;
2688         trace->tool.comm          = perf_event__process_comm;
2689         trace->tool.exit          = perf_event__process_exit;
2690         trace->tool.fork          = perf_event__process_fork;
2691         trace->tool.attr          = perf_event__process_attr;
2692         trace->tool.tracing_data = perf_event__process_tracing_data;
2693         trace->tool.build_id      = perf_event__process_build_id;
2694
2695         trace->tool.ordered_events = true;
2696         trace->tool.ordering_requires_timestamps = true;
2697
2698         /* add tid to output */
2699         trace->multiple_threads = true;
2700
2701         session = perf_session__new(&file, false, &trace->tool);
2702         if (session == NULL)
2703                 return -1;
2704
2705         if (symbol__init(&session->header.env) < 0)
2706                 goto out;
2707
2708         trace->host = &session->machines.host;
2709
2710         err = perf_session__set_tracepoints_handlers(session, handlers);
2711         if (err)
2712                 goto out;
2713
2714         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2715                                                      "raw_syscalls:sys_enter");
2716         /* older kernels have syscalls tp versus raw_syscalls */
2717         if (evsel == NULL)
2718                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2719                                                              "syscalls:sys_enter");
2720
2721         if (evsel &&
2722             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2723             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2724                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2725                 goto out;
2726         }
2727
2728         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2729                                                      "raw_syscalls:sys_exit");
2730         if (evsel == NULL)
2731                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2732                                                              "syscalls:sys_exit");
2733         if (evsel &&
2734             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2735             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2736                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2737                 goto out;
2738         }
2739
2740         evlist__for_each(session->evlist, evsel) {
2741                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2742                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2743                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2744                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2745                         evsel->handler = trace__pgfault;
2746         }
2747
2748         err = parse_target_str(trace);
2749         if (err != 0)
2750                 goto out;
2751
2752         setup_pager();
2753
2754         err = perf_session__process_events(session);
2755         if (err)
2756                 pr_err("Failed to process events, error %d", err);
2757
2758         else if (trace->summary)
2759                 trace__fprintf_thread_summary(trace, trace->output);
2760
2761 out:
2762         perf_session__delete(session);
2763
2764         return err;
2765 }
2766
2767 static size_t trace__fprintf_threads_header(FILE *fp)
2768 {
2769         size_t printed;
2770
2771         printed  = fprintf(fp, "\n Summary of events:\n\n");
2772
2773         return printed;
2774 }
2775
2776 static size_t thread__dump_stats(struct thread_trace *ttrace,
2777                                  struct trace *trace, FILE *fp)
2778 {
2779         struct stats *stats;
2780         size_t printed = 0;
2781         struct syscall *sc;
2782         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2783
2784         if (inode == NULL)
2785                 return 0;
2786
2787         printed += fprintf(fp, "\n");
2788
2789         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2790         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2791         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2792
2793         /* each int_node is a syscall */
2794         while (inode) {
2795                 stats = inode->priv;
2796                 if (stats) {
2797                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2798                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2799                         double avg = avg_stats(stats);
2800                         double pct;
2801                         u64 n = (u64) stats->n;
2802
2803                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2804                         avg /= NSEC_PER_MSEC;
2805
2806                         sc = &trace->syscalls.table[inode->i];
2807                         printed += fprintf(fp, "   %-15s", sc->name);
2808                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2809                                            n, avg * n, min, avg);
2810                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2811                 }
2812
2813                 inode = intlist__next(inode);
2814         }
2815
2816         printed += fprintf(fp, "\n\n");
2817
2818         return printed;
2819 }
2820
2821 /* struct used to pass data to per-thread function */
2822 struct summary_data {
2823         FILE *fp;
2824         struct trace *trace;
2825         size_t printed;
2826 };
2827
2828 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2829 {
2830         struct summary_data *data = priv;
2831         FILE *fp = data->fp;
2832         size_t printed = data->printed;
2833         struct trace *trace = data->trace;
2834         struct thread_trace *ttrace = thread__priv(thread);
2835         double ratio;
2836
2837         if (ttrace == NULL)
2838                 return 0;
2839
2840         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2841
2842         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2843         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2844         printed += fprintf(fp, "%.1f%%", ratio);
2845         if (ttrace->pfmaj)
2846                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2847         if (ttrace->pfmin)
2848                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2849         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2850         printed += thread__dump_stats(ttrace, trace, fp);
2851
2852         data->printed += printed;
2853
2854         return 0;
2855 }
2856
2857 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2858 {
2859         struct summary_data data = {
2860                 .fp = fp,
2861                 .trace = trace
2862         };
2863         data.printed = trace__fprintf_threads_header(fp);
2864
2865         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2866
2867         return data.printed;
2868 }
2869
2870 static int trace__set_duration(const struct option *opt, const char *str,
2871                                int unset __maybe_unused)
2872 {
2873         struct trace *trace = opt->value;
2874
2875         trace->duration_filter = atof(str);
2876         return 0;
2877 }
2878
2879 static int trace__set_filter_pids(const struct option *opt, const char *str,
2880                                   int unset __maybe_unused)
2881 {
2882         int ret = -1;
2883         size_t i;
2884         struct trace *trace = opt->value;
2885         /*
2886          * FIXME: introduce a intarray class, plain parse csv and create a
2887          * { int nr, int entries[] } struct...
2888          */
2889         struct intlist *list = intlist__new(str);
2890
2891         if (list == NULL)
2892                 return -1;
2893
2894         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2895         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2896
2897         if (trace->filter_pids.entries == NULL)
2898                 goto out;
2899
2900         trace->filter_pids.entries[0] = getpid();
2901
2902         for (i = 1; i < trace->filter_pids.nr; ++i)
2903                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2904
2905         intlist__delete(list);
2906         ret = 0;
2907 out:
2908         return ret;
2909 }
2910
2911 static int trace__open_output(struct trace *trace, const char *filename)
2912 {
2913         struct stat st;
2914
2915         if (!stat(filename, &st) && st.st_size) {
2916                 char oldname[PATH_MAX];
2917
2918                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2919                 unlink(oldname);
2920                 rename(filename, oldname);
2921         }
2922
2923         trace->output = fopen(filename, "w");
2924
2925         return trace->output == NULL ? -errno : 0;
2926 }
2927
2928 static int parse_pagefaults(const struct option *opt, const char *str,
2929                             int unset __maybe_unused)
2930 {
2931         int *trace_pgfaults = opt->value;
2932
2933         if (strcmp(str, "all") == 0)
2934                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2935         else if (strcmp(str, "maj") == 0)
2936                 *trace_pgfaults |= TRACE_PFMAJ;
2937         else if (strcmp(str, "min") == 0)
2938                 *trace_pgfaults |= TRACE_PFMIN;
2939         else
2940                 return -1;
2941
2942         return 0;
2943 }
2944
2945 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2946 {
2947         struct perf_evsel *evsel;
2948
2949         evlist__for_each(evlist, evsel)
2950                 evsel->handler = handler;
2951 }
2952
2953 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2954 {
2955         const char *trace_usage[] = {
2956                 "perf trace [<options>] [<command>]",
2957                 "perf trace [<options>] -- <command> [<options>]",
2958                 "perf trace record [<options>] [<command>]",
2959                 "perf trace record [<options>] -- <command> [<options>]",
2960                 NULL
2961         };
2962         struct trace trace = {
2963                 .audit = {
2964                         .machine = audit_detect_machine(),
2965                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2966                 },
2967                 .syscalls = {
2968                         . max = -1,
2969                 },
2970                 .opts = {
2971                         .target = {
2972                                 .uid       = UINT_MAX,
2973                                 .uses_mmap = true,
2974                         },
2975                         .user_freq     = UINT_MAX,
2976                         .user_interval = ULLONG_MAX,
2977                         .no_buffering  = true,
2978                         .mmap_pages    = UINT_MAX,
2979                         .proc_map_timeout  = 500,
2980                 },
2981                 .output = stderr,
2982                 .show_comm = true,
2983                 .trace_syscalls = true,
2984         };
2985         const char *output_name = NULL;
2986         const char *ev_qualifier_str = NULL;
2987         const struct option trace_options[] = {
2988         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2989                      "event selector. use 'perf list' to list available events",
2990                      parse_events_option),
2991         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2992                     "show the thread COMM next to its id"),
2993         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2994         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2995         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2996         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2997         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2998                     "trace events on existing process id"),
2999         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3000                     "trace events on existing thread id"),
3001         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3002                      "pids to filter (by the kernel)", trace__set_filter_pids),
3003         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3004                     "system-wide collection from all CPUs"),
3005         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3006                     "list of cpus to monitor"),
3007         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3008                     "child tasks do not inherit counters"),
3009         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3010                      "number of mmap data pages",
3011                      perf_evlist__parse_mmap_pages),
3012         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3013                    "user to profile"),
3014         OPT_CALLBACK(0, "duration", &trace, "float",
3015                      "show only events with duration > N.M ms",
3016                      trace__set_duration),
3017         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3018         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3019         OPT_BOOLEAN('T', "time", &trace.full_time,
3020                     "Show full timestamp, not time relative to first start"),
3021         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3022                     "Show only syscall summary with statistics"),
3023         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3024                     "Show all syscalls and summary with statistics"),
3025         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3026                      "Trace pagefaults", parse_pagefaults, "maj"),
3027         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3028         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3029         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3030                         "per thread proc mmap processing timeout in ms"),
3031         OPT_END()
3032         };
3033         const char * const trace_subcommands[] = { "record", NULL };
3034         int err;
3035         char bf[BUFSIZ];
3036
3037         signal(SIGSEGV, sighandler_dump_stack);
3038         signal(SIGFPE, sighandler_dump_stack);
3039
3040         trace.evlist = perf_evlist__new();
3041
3042         if (trace.evlist == NULL) {
3043                 pr_err("Not enough memory to run!\n");
3044                 err = -ENOMEM;
3045                 goto out;
3046         }
3047
3048         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3049                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3050
3051         if (trace.trace_pgfaults) {
3052                 trace.opts.sample_address = true;
3053                 trace.opts.sample_time = true;
3054         }
3055
3056         if (trace.evlist->nr_entries > 0)
3057                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3058
3059         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3060                 return trace__record(&trace, argc-1, &argv[1]);
3061
3062         /* summary_only implies summary option, but don't overwrite summary if set */
3063         if (trace.summary_only)
3064                 trace.summary = trace.summary_only;
3065
3066         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3067             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3068                 pr_err("Please specify something to trace.\n");
3069                 return -1;
3070         }
3071
3072         if (output_name != NULL) {
3073                 err = trace__open_output(&trace, output_name);
3074                 if (err < 0) {
3075                         perror("failed to create output file");
3076                         goto out;
3077                 }
3078         }
3079
3080         if (ev_qualifier_str != NULL) {
3081                 const char *s = ev_qualifier_str;
3082                 struct strlist_config slist_config = {
3083                         .dirname = system_path(STRACE_GROUPS_DIR),
3084                 };
3085
3086                 trace.not_ev_qualifier = *s == '!';
3087                 if (trace.not_ev_qualifier)
3088                         ++s;
3089                 trace.ev_qualifier = strlist__new(s, &slist_config);
3090                 if (trace.ev_qualifier == NULL) {
3091                         fputs("Not enough memory to parse event qualifier",
3092                               trace.output);
3093                         err = -ENOMEM;
3094                         goto out_close;
3095                 }
3096
3097                 err = trace__validate_ev_qualifier(&trace);
3098                 if (err)
3099                         goto out_close;
3100         }
3101
3102         err = target__validate(&trace.opts.target);
3103         if (err) {
3104                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3105                 fprintf(trace.output, "%s", bf);
3106                 goto out_close;
3107         }
3108
3109         err = target__parse_uid(&trace.opts.target);
3110         if (err) {
3111                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3112                 fprintf(trace.output, "%s", bf);
3113                 goto out_close;
3114         }
3115
3116         if (!argc && target__none(&trace.opts.target))
3117                 trace.opts.target.system_wide = true;
3118
3119         if (input_name)
3120                 err = trace__replay(&trace);
3121         else
3122                 err = trace__run(&trace, argc, argv);
3123
3124 out_close:
3125         if (output_name != NULL)
3126                 fclose(trace.output);
3127 out:
3128         return err;
3129 }