perf trace: Use the FD beautifier for socket syscall fds
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *whences[] = { "SET", "CUR", "END",
573 #ifdef SEEK_DATA
574 "DATA",
575 #endif
576 #ifdef SEEK_HOLE
577 "HOLE",
578 #endif
579 };
580 static DEFINE_STRARRAY(whences);
581
582 static const char *fcntl_cmds[] = {
583         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
586         "F_GETOWNER_UIDS",
587 };
588 static DEFINE_STRARRAY(fcntl_cmds);
589
590 static const char *rlimit_resources[] = {
591         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
593         "RTTIME",
594 };
595 static DEFINE_STRARRAY(rlimit_resources);
596
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
599
600 static const char *clockid[] = {
601         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
603         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
604 };
605 static DEFINE_STRARRAY(clockid);
606
607 static const char *socket_families[] = {
608         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
609         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
610         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
611         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
612         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
613         "ALG", "NFC", "VSOCK",
614 };
615 static DEFINE_STRARRAY(socket_families);
616
617 #ifndef SOCK_TYPE_MASK
618 #define SOCK_TYPE_MASK 0xf
619 #endif
620
621 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
622                                                       struct syscall_arg *arg)
623 {
624         size_t printed;
625         int type = arg->val,
626             flags = type & ~SOCK_TYPE_MASK;
627
628         type &= SOCK_TYPE_MASK;
629         /*
630          * Can't use a strarray, MIPS may override for ABI reasons.
631          */
632         switch (type) {
633 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
634         P_SK_TYPE(STREAM);
635         P_SK_TYPE(DGRAM);
636         P_SK_TYPE(RAW);
637         P_SK_TYPE(RDM);
638         P_SK_TYPE(SEQPACKET);
639         P_SK_TYPE(DCCP);
640         P_SK_TYPE(PACKET);
641 #undef P_SK_TYPE
642         default:
643                 printed = scnprintf(bf, size, "%#x", type);
644         }
645
646 #define P_SK_FLAG(n) \
647         if (flags & SOCK_##n) { \
648                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
649                 flags &= ~SOCK_##n; \
650         }
651
652         P_SK_FLAG(CLOEXEC);
653         P_SK_FLAG(NONBLOCK);
654 #undef P_SK_FLAG
655
656         if (flags)
657                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
658
659         return printed;
660 }
661
662 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
663
664 #ifndef MSG_PROBE
665 #define MSG_PROBE            0x10
666 #endif
667 #ifndef MSG_WAITFORONE
668 #define MSG_WAITFORONE  0x10000
669 #endif
670 #ifndef MSG_SENDPAGE_NOTLAST
671 #define MSG_SENDPAGE_NOTLAST 0x20000
672 #endif
673 #ifndef MSG_FASTOPEN
674 #define MSG_FASTOPEN         0x20000000
675 #endif
676
677 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
678                                                struct syscall_arg *arg)
679 {
680         int printed = 0, flags = arg->val;
681
682         if (flags == 0)
683                 return scnprintf(bf, size, "NONE");
684 #define P_MSG_FLAG(n) \
685         if (flags & MSG_##n) { \
686                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
687                 flags &= ~MSG_##n; \
688         }
689
690         P_MSG_FLAG(OOB);
691         P_MSG_FLAG(PEEK);
692         P_MSG_FLAG(DONTROUTE);
693         P_MSG_FLAG(TRYHARD);
694         P_MSG_FLAG(CTRUNC);
695         P_MSG_FLAG(PROBE);
696         P_MSG_FLAG(TRUNC);
697         P_MSG_FLAG(DONTWAIT);
698         P_MSG_FLAG(EOR);
699         P_MSG_FLAG(WAITALL);
700         P_MSG_FLAG(FIN);
701         P_MSG_FLAG(SYN);
702         P_MSG_FLAG(CONFIRM);
703         P_MSG_FLAG(RST);
704         P_MSG_FLAG(ERRQUEUE);
705         P_MSG_FLAG(NOSIGNAL);
706         P_MSG_FLAG(MORE);
707         P_MSG_FLAG(WAITFORONE);
708         P_MSG_FLAG(SENDPAGE_NOTLAST);
709         P_MSG_FLAG(FASTOPEN);
710         P_MSG_FLAG(CMSG_CLOEXEC);
711 #undef P_MSG_FLAG
712
713         if (flags)
714                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
715
716         return printed;
717 }
718
719 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720
721 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
722                                                  struct syscall_arg *arg)
723 {
724         size_t printed = 0;
725         int mode = arg->val;
726
727         if (mode == F_OK) /* 0 */
728                 return scnprintf(bf, size, "F");
729 #define P_MODE(n) \
730         if (mode & n##_OK) { \
731                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
732                 mode &= ~n##_OK; \
733         }
734
735         P_MODE(R);
736         P_MODE(W);
737         P_MODE(X);
738 #undef P_MODE
739
740         if (mode)
741                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
742
743         return printed;
744 }
745
746 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747
748 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
749                                               struct syscall_arg *arg);
750
751 #define SCA_FILENAME syscall_arg__scnprintf_filename
752
753 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
754                                                struct syscall_arg *arg)
755 {
756         int printed = 0, flags = arg->val;
757
758         if (!(flags & O_CREAT))
759                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
760
761         if (flags == 0)
762                 return scnprintf(bf, size, "RDONLY");
763 #define P_FLAG(n) \
764         if (flags & O_##n) { \
765                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
766                 flags &= ~O_##n; \
767         }
768
769         P_FLAG(APPEND);
770         P_FLAG(ASYNC);
771         P_FLAG(CLOEXEC);
772         P_FLAG(CREAT);
773         P_FLAG(DIRECT);
774         P_FLAG(DIRECTORY);
775         P_FLAG(EXCL);
776         P_FLAG(LARGEFILE);
777         P_FLAG(NOATIME);
778         P_FLAG(NOCTTY);
779 #ifdef O_NONBLOCK
780         P_FLAG(NONBLOCK);
781 #elif O_NDELAY
782         P_FLAG(NDELAY);
783 #endif
784 #ifdef O_PATH
785         P_FLAG(PATH);
786 #endif
787         P_FLAG(RDWR);
788 #ifdef O_DSYNC
789         if ((flags & O_SYNC) == O_SYNC)
790                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
791         else {
792                 P_FLAG(DSYNC);
793         }
794 #else
795         P_FLAG(SYNC);
796 #endif
797         P_FLAG(TRUNC);
798         P_FLAG(WRONLY);
799 #undef P_FLAG
800
801         if (flags)
802                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
803
804         return printed;
805 }
806
807 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
808
809 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
810                                                 struct syscall_arg *arg)
811 {
812         int printed = 0, flags = arg->val;
813
814         if (flags == 0)
815                 return 0;
816
817 #define P_FLAG(n) \
818         if (flags & PERF_FLAG_##n) { \
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820                 flags &= ~PERF_FLAG_##n; \
821         }
822
823         P_FLAG(FD_NO_GROUP);
824         P_FLAG(FD_OUTPUT);
825         P_FLAG(PID_CGROUP);
826         P_FLAG(FD_CLOEXEC);
827 #undef P_FLAG
828
829         if (flags)
830                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
831
832         return printed;
833 }
834
835 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
836
837 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
838                                                    struct syscall_arg *arg)
839 {
840         int printed = 0, flags = arg->val;
841
842         if (flags == 0)
843                 return scnprintf(bf, size, "NONE");
844 #define P_FLAG(n) \
845         if (flags & EFD_##n) { \
846                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
847                 flags &= ~EFD_##n; \
848         }
849
850         P_FLAG(SEMAPHORE);
851         P_FLAG(CLOEXEC);
852         P_FLAG(NONBLOCK);
853 #undef P_FLAG
854
855         if (flags)
856                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
857
858         return printed;
859 }
860
861 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
862
863 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
864                                                 struct syscall_arg *arg)
865 {
866         int printed = 0, flags = arg->val;
867
868 #define P_FLAG(n) \
869         if (flags & O_##n) { \
870                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
871                 flags &= ~O_##n; \
872         }
873
874         P_FLAG(CLOEXEC);
875         P_FLAG(NONBLOCK);
876 #undef P_FLAG
877
878         if (flags)
879                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
880
881         return printed;
882 }
883
884 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
885
886 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
887 {
888         int sig = arg->val;
889
890         switch (sig) {
891 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
892         P_SIGNUM(HUP);
893         P_SIGNUM(INT);
894         P_SIGNUM(QUIT);
895         P_SIGNUM(ILL);
896         P_SIGNUM(TRAP);
897         P_SIGNUM(ABRT);
898         P_SIGNUM(BUS);
899         P_SIGNUM(FPE);
900         P_SIGNUM(KILL);
901         P_SIGNUM(USR1);
902         P_SIGNUM(SEGV);
903         P_SIGNUM(USR2);
904         P_SIGNUM(PIPE);
905         P_SIGNUM(ALRM);
906         P_SIGNUM(TERM);
907         P_SIGNUM(CHLD);
908         P_SIGNUM(CONT);
909         P_SIGNUM(STOP);
910         P_SIGNUM(TSTP);
911         P_SIGNUM(TTIN);
912         P_SIGNUM(TTOU);
913         P_SIGNUM(URG);
914         P_SIGNUM(XCPU);
915         P_SIGNUM(XFSZ);
916         P_SIGNUM(VTALRM);
917         P_SIGNUM(PROF);
918         P_SIGNUM(WINCH);
919         P_SIGNUM(IO);
920         P_SIGNUM(PWR);
921         P_SIGNUM(SYS);
922 #ifdef SIGEMT
923         P_SIGNUM(EMT);
924 #endif
925 #ifdef SIGSTKFLT
926         P_SIGNUM(STKFLT);
927 #endif
928 #ifdef SIGSWI
929         P_SIGNUM(SWI);
930 #endif
931         default: break;
932         }
933
934         return scnprintf(bf, size, "%#x", sig);
935 }
936
937 #define SCA_SIGNUM syscall_arg__scnprintf_signum
938
939 #if defined(__i386__) || defined(__x86_64__)
940 /*
941  * FIXME: Make this available to all arches.
942  */
943 #define TCGETS          0x5401
944
945 static const char *tioctls[] = {
946         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
947         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
948         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
949         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
950         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
951         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
952         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
953         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
954         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
955         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
956         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
957         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
958         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
959         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
960         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
961 };
962
963 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
964 #endif /* defined(__i386__) || defined(__x86_64__) */
965
966 #define STRARRAY(arg, name, array) \
967           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
968           .arg_parm      = { [arg] = &strarray__##array, }
969
970 static struct syscall_fmt {
971         const char *name;
972         const char *alias;
973         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
974         void       *arg_parm[6];
975         bool       errmsg;
976         bool       timeout;
977         bool       hexret;
978 } syscall_fmts[] = {
979         { .name     = "access",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
981                              [1] = SCA_ACCMODE,  /* mode */ }, },
982         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
983         { .name     = "brk",        .hexret = true,
984           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
985         { .name     = "chdir",      .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
987         { .name     = "chmod",      .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
989         { .name     = "chroot",     .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
991         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
992         { .name     = "close",      .errmsg = true,
993           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
994         { .name     = "connect",    .errmsg = true, },
995         { .name     = "creat",      .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
997         { .name     = "dup",        .errmsg = true,
998           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999         { .name     = "dup2",       .errmsg = true,
1000           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001         { .name     = "dup3",       .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1004         { .name     = "eventfd2",   .errmsg = true,
1005           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1006         { .name     = "faccessat",  .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1008                              [1] = SCA_FILENAME, /* filename */ }, },
1009         { .name     = "fadvise64",  .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1011         { .name     = "fallocate",  .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013         { .name     = "fchdir",     .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1015         { .name     = "fchmod",     .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1017         { .name     = "fchmodat",   .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1019                              [1] = SCA_FILENAME, /* filename */ }, },
1020         { .name     = "fchown",     .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022         { .name     = "fchownat",   .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1024                              [1] = SCA_FILENAME, /* filename */ }, },
1025         { .name     = "fcntl",      .errmsg = true,
1026           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1027                              [1] = SCA_STRARRAY, /* cmd */ },
1028           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1029         { .name     = "fdatasync",  .errmsg = true,
1030           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031         { .name     = "flock",      .errmsg = true,
1032           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1033                              [1] = SCA_FLOCK, /* cmd */ }, },
1034         { .name     = "fsetxattr",  .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1037           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1039           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1040                              [1] = SCA_FILENAME, /* filename */ }, },
1041         { .name     = "fstatfs",    .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043         { .name     = "fsync",    .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045         { .name     = "ftruncate", .errmsg = true,
1046           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1047         { .name     = "futex",      .errmsg = true,
1048           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1049         { .name     = "futimesat", .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1051                              [1] = SCA_FILENAME, /* filename */ }, },
1052         { .name     = "getdents",   .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054         { .name     = "getdents64", .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1057         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1058         { .name     = "getxattr",    .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1060         { .name     = "inotify_add_watch",          .errmsg = true,
1061           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1062         { .name     = "ioctl",      .errmsg = true,
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1064 #if defined(__i386__) || defined(__x86_64__)
1065 /*
1066  * FIXME: Make this available to all arches.
1067  */
1068                              [1] = SCA_STRHEXARRAY, /* cmd */
1069                              [2] = SCA_HEX, /* arg */ },
1070           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1071 #else
1072                              [2] = SCA_HEX, /* arg */ }, },
1073 #endif
1074         { .name     = "kill",       .errmsg = true,
1075           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1076         { .name     = "lchown",    .errmsg = true,
1077           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1078         { .name     = "lgetxattr",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1080         { .name     = "linkat",     .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1082         { .name     = "listxattr",  .errmsg = true,
1083           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1084         { .name     = "llistxattr", .errmsg = true,
1085           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1086         { .name     = "lremovexattr",  .errmsg = true,
1087           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088         { .name     = "lseek",      .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1090                              [2] = SCA_STRARRAY, /* whence */ },
1091           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1092         { .name     = "lsetxattr",  .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1095           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1096         { .name     = "lsxattr",    .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "madvise",    .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1100                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1101         { .name     = "mkdir",    .errmsg = true,
1102           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1103         { .name     = "mkdirat",    .errmsg = true,
1104           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1105                              [1] = SCA_FILENAME, /* pathname */ }, },
1106         { .name     = "mknod",      .errmsg = true,
1107           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1108         { .name     = "mknodat",    .errmsg = true,
1109           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1110                              [1] = SCA_FILENAME, /* filename */ }, },
1111         { .name     = "mlock",      .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1113         { .name     = "mlockall",   .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1115         { .name     = "mmap",       .hexret = true,
1116           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1117                              [2] = SCA_MMAP_PROT, /* prot */
1118                              [3] = SCA_MMAP_FLAGS, /* flags */
1119                              [4] = SCA_FD,        /* fd */ }, },
1120         { .name     = "mprotect",   .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1122                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1123         { .name     = "mq_unlink", .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1125         { .name     = "mremap",     .hexret = true,
1126           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1127                              [3] = SCA_MREMAP_FLAGS, /* flags */
1128                              [4] = SCA_HEX, /* new_addr */ }, },
1129         { .name     = "munlock",    .errmsg = true,
1130           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1131         { .name     = "munmap",     .errmsg = true,
1132           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1133         { .name     = "name_to_handle_at", .errmsg = true,
1134           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1135         { .name     = "newfstatat", .errmsg = true,
1136           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1137                              [1] = SCA_FILENAME, /* filename */ }, },
1138         { .name     = "open",       .errmsg = true,
1139           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1140                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1141         { .name     = "open_by_handle_at", .errmsg = true,
1142           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1143                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1144         { .name     = "openat",     .errmsg = true,
1145           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1146                              [1] = SCA_FILENAME, /* filename */
1147                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1148         { .name     = "perf_event_open", .errmsg = true,
1149           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1150                              [2] = SCA_INT, /* cpu */
1151                              [3] = SCA_FD,  /* group_fd */
1152                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1153         { .name     = "pipe2",      .errmsg = true,
1154           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1155         { .name     = "poll",       .errmsg = true, .timeout = true, },
1156         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1157         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1158           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1159         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1160           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1161         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1162         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1163           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1164         { .name     = "pwritev",    .errmsg = true,
1165           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1166         { .name     = "read",       .errmsg = true,
1167           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1168         { .name     = "readlink",   .errmsg = true,
1169           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1170         { .name     = "readlinkat", .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172                              [1] = SCA_FILENAME, /* pathname */ }, },
1173         { .name     = "readv",      .errmsg = true,
1174           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1175         { .name     = "recvfrom",   .errmsg = true,
1176           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1177                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1178         { .name     = "recvmmsg",   .errmsg = true,
1179           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1180                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1181         { .name     = "recvmsg",    .errmsg = true,
1182           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1183                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1184         { .name     = "removexattr", .errmsg = true,
1185           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1186         { .name     = "renameat",   .errmsg = true,
1187           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1188         { .name     = "rmdir",    .errmsg = true,
1189           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1190         { .name     = "rt_sigaction", .errmsg = true,
1191           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1192         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1193         { .name     = "rt_sigqueueinfo", .errmsg = true,
1194           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1195         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1196           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1197         { .name     = "select",     .errmsg = true, .timeout = true, },
1198         { .name     = "sendmmsg",    .errmsg = true,
1199           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1200                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1201         { .name     = "sendmsg",    .errmsg = true,
1202           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1203                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1204         { .name     = "sendto",     .errmsg = true,
1205           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1206                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1207         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1208         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1209         { .name     = "setxattr",   .errmsg = true,
1210           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1211         { .name     = "shutdown",   .errmsg = true,
1212           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1213         { .name     = "socket",     .errmsg = true,
1214           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1215                              [1] = SCA_SK_TYPE, /* type */ },
1216           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1217         { .name     = "socketpair", .errmsg = true,
1218           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1219                              [1] = SCA_SK_TYPE, /* type */ },
1220           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1221         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1222           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1223         { .name     = "statfs",     .errmsg = true,
1224           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1225         { .name     = "swapoff",    .errmsg = true,
1226           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1227         { .name     = "swapon",     .errmsg = true,
1228           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1229         { .name     = "symlinkat",  .errmsg = true,
1230           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1231         { .name     = "tgkill",     .errmsg = true,
1232           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1233         { .name     = "tkill",      .errmsg = true,
1234           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1235         { .name     = "truncate",   .errmsg = true,
1236           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1237         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1238         { .name     = "unlinkat",   .errmsg = true,
1239           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1240                              [1] = SCA_FILENAME, /* pathname */ }, },
1241         { .name     = "utime",  .errmsg = true,
1242           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1243         { .name     = "utimensat",  .errmsg = true,
1244           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1245                              [1] = SCA_FILENAME, /* filename */ }, },
1246         { .name     = "utimes",  .errmsg = true,
1247           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1248         { .name     = "vmsplice",  .errmsg = true,
1249           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1250         { .name     = "write",      .errmsg = true,
1251           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1252         { .name     = "writev",     .errmsg = true,
1253           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1254 };
1255
1256 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1257 {
1258         const struct syscall_fmt *fmt = fmtp;
1259         return strcmp(name, fmt->name);
1260 }
1261
1262 static struct syscall_fmt *syscall_fmt__find(const char *name)
1263 {
1264         const int nmemb = ARRAY_SIZE(syscall_fmts);
1265         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1266 }
1267
1268 struct syscall {
1269         struct event_format *tp_format;
1270         int                 nr_args;
1271         struct format_field *args;
1272         const char          *name;
1273         bool                is_exit;
1274         struct syscall_fmt  *fmt;
1275         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1276         void                **arg_parm;
1277 };
1278
1279 static size_t fprintf_duration(unsigned long t, FILE *fp)
1280 {
1281         double duration = (double)t / NSEC_PER_MSEC;
1282         size_t printed = fprintf(fp, "(");
1283
1284         if (duration >= 1.0)
1285                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1286         else if (duration >= 0.01)
1287                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1288         else
1289                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1290         return printed + fprintf(fp, "): ");
1291 }
1292
1293 /**
1294  * filename.ptr: The filename char pointer that will be vfs_getname'd
1295  * filename.entry_str_pos: Where to insert the string translated from
1296  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1297  */
1298 struct thread_trace {
1299         u64               entry_time;
1300         u64               exit_time;
1301         bool              entry_pending;
1302         unsigned long     nr_events;
1303         unsigned long     pfmaj, pfmin;
1304         char              *entry_str;
1305         double            runtime_ms;
1306         struct {
1307                 unsigned long ptr;
1308                 int           entry_str_pos;
1309         } filename;
1310         struct {
1311                 int       max;
1312                 char      **table;
1313         } paths;
1314
1315         struct intlist *syscall_stats;
1316 };
1317
1318 static struct thread_trace *thread_trace__new(void)
1319 {
1320         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1321
1322         if (ttrace)
1323                 ttrace->paths.max = -1;
1324
1325         ttrace->syscall_stats = intlist__new(NULL);
1326
1327         return ttrace;
1328 }
1329
1330 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1331 {
1332         struct thread_trace *ttrace;
1333
1334         if (thread == NULL)
1335                 goto fail;
1336
1337         if (thread__priv(thread) == NULL)
1338                 thread__set_priv(thread, thread_trace__new());
1339
1340         if (thread__priv(thread) == NULL)
1341                 goto fail;
1342
1343         ttrace = thread__priv(thread);
1344         ++ttrace->nr_events;
1345
1346         return ttrace;
1347 fail:
1348         color_fprintf(fp, PERF_COLOR_RED,
1349                       "WARNING: not enough memory, dropping samples!\n");
1350         return NULL;
1351 }
1352
1353 #define TRACE_PFMAJ             (1 << 0)
1354 #define TRACE_PFMIN             (1 << 1)
1355
1356 static const size_t trace__entry_str_size = 2048;
1357
1358 struct trace {
1359         struct perf_tool        tool;
1360         struct {
1361                 int             machine;
1362                 int             open_id;
1363         }                       audit;
1364         struct {
1365                 int             max;
1366                 struct syscall  *table;
1367                 struct {
1368                         struct perf_evsel *sys_enter,
1369                                           *sys_exit;
1370                 }               events;
1371         } syscalls;
1372         struct record_opts      opts;
1373         struct perf_evlist      *evlist;
1374         struct machine          *host;
1375         struct thread           *current;
1376         u64                     base_time;
1377         FILE                    *output;
1378         unsigned long           nr_events;
1379         struct strlist          *ev_qualifier;
1380         struct {
1381                 size_t          nr;
1382                 int             *entries;
1383         }                       ev_qualifier_ids;
1384         const char              *last_vfs_getname;
1385         struct intlist          *tid_list;
1386         struct intlist          *pid_list;
1387         struct {
1388                 size_t          nr;
1389                 pid_t           *entries;
1390         }                       filter_pids;
1391         double                  duration_filter;
1392         double                  runtime_ms;
1393         struct {
1394                 u64             vfs_getname,
1395                                 proc_getname;
1396         } stats;
1397         bool                    not_ev_qualifier;
1398         bool                    live;
1399         bool                    full_time;
1400         bool                    sched;
1401         bool                    multiple_threads;
1402         bool                    summary;
1403         bool                    summary_only;
1404         bool                    show_comm;
1405         bool                    show_tool_stats;
1406         bool                    trace_syscalls;
1407         bool                    force;
1408         bool                    vfs_getname;
1409         int                     trace_pgfaults;
1410 };
1411
1412 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1413 {
1414         struct thread_trace *ttrace = thread__priv(thread);
1415
1416         if (fd > ttrace->paths.max) {
1417                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1418
1419                 if (npath == NULL)
1420                         return -1;
1421
1422                 if (ttrace->paths.max != -1) {
1423                         memset(npath + ttrace->paths.max + 1, 0,
1424                                (fd - ttrace->paths.max) * sizeof(char *));
1425                 } else {
1426                         memset(npath, 0, (fd + 1) * sizeof(char *));
1427                 }
1428
1429                 ttrace->paths.table = npath;
1430                 ttrace->paths.max   = fd;
1431         }
1432
1433         ttrace->paths.table[fd] = strdup(pathname);
1434
1435         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1436 }
1437
1438 static int thread__read_fd_path(struct thread *thread, int fd)
1439 {
1440         char linkname[PATH_MAX], pathname[PATH_MAX];
1441         struct stat st;
1442         int ret;
1443
1444         if (thread->pid_ == thread->tid) {
1445                 scnprintf(linkname, sizeof(linkname),
1446                           "/proc/%d/fd/%d", thread->pid_, fd);
1447         } else {
1448                 scnprintf(linkname, sizeof(linkname),
1449                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1450         }
1451
1452         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1453                 return -1;
1454
1455         ret = readlink(linkname, pathname, sizeof(pathname));
1456
1457         if (ret < 0 || ret > st.st_size)
1458                 return -1;
1459
1460         pathname[ret] = '\0';
1461         return trace__set_fd_pathname(thread, fd, pathname);
1462 }
1463
1464 static const char *thread__fd_path(struct thread *thread, int fd,
1465                                    struct trace *trace)
1466 {
1467         struct thread_trace *ttrace = thread__priv(thread);
1468
1469         if (ttrace == NULL)
1470                 return NULL;
1471
1472         if (fd < 0)
1473                 return NULL;
1474
1475         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1476                 if (!trace->live)
1477                         return NULL;
1478                 ++trace->stats.proc_getname;
1479                 if (thread__read_fd_path(thread, fd))
1480                         return NULL;
1481         }
1482
1483         return ttrace->paths.table[fd];
1484 }
1485
1486 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1487                                         struct syscall_arg *arg)
1488 {
1489         int fd = arg->val;
1490         size_t printed = scnprintf(bf, size, "%d", fd);
1491         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1492
1493         if (path)
1494                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1495
1496         return printed;
1497 }
1498
1499 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1500                                               struct syscall_arg *arg)
1501 {
1502         int fd = arg->val;
1503         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1504         struct thread_trace *ttrace = thread__priv(arg->thread);
1505
1506         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1507                 zfree(&ttrace->paths.table[fd]);
1508
1509         return printed;
1510 }
1511
1512 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1513                                      unsigned long ptr)
1514 {
1515         struct thread_trace *ttrace = thread__priv(thread);
1516
1517         ttrace->filename.ptr = ptr;
1518         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1519 }
1520
1521 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1522                                               struct syscall_arg *arg)
1523 {
1524         unsigned long ptr = arg->val;
1525
1526         if (!arg->trace->vfs_getname)
1527                 return scnprintf(bf, size, "%#x", ptr);
1528
1529         thread__set_filename_pos(arg->thread, bf, ptr);
1530         return 0;
1531 }
1532
1533 static bool trace__filter_duration(struct trace *trace, double t)
1534 {
1535         return t < (trace->duration_filter * NSEC_PER_MSEC);
1536 }
1537
1538 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1539 {
1540         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1541
1542         return fprintf(fp, "%10.3f ", ts);
1543 }
1544
1545 static bool done = false;
1546 static bool interrupted = false;
1547
1548 static void sig_handler(int sig)
1549 {
1550         done = true;
1551         interrupted = sig == SIGINT;
1552 }
1553
1554 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1555                                         u64 duration, u64 tstamp, FILE *fp)
1556 {
1557         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1558         printed += fprintf_duration(duration, fp);
1559
1560         if (trace->multiple_threads) {
1561                 if (trace->show_comm)
1562                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1563                 printed += fprintf(fp, "%d ", thread->tid);
1564         }
1565
1566         return printed;
1567 }
1568
1569 static int trace__process_event(struct trace *trace, struct machine *machine,
1570                                 union perf_event *event, struct perf_sample *sample)
1571 {
1572         int ret = 0;
1573
1574         switch (event->header.type) {
1575         case PERF_RECORD_LOST:
1576                 color_fprintf(trace->output, PERF_COLOR_RED,
1577                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1578                 ret = machine__process_lost_event(machine, event, sample);
1579         default:
1580                 ret = machine__process_event(machine, event, sample);
1581                 break;
1582         }
1583
1584         return ret;
1585 }
1586
1587 static int trace__tool_process(struct perf_tool *tool,
1588                                union perf_event *event,
1589                                struct perf_sample *sample,
1590                                struct machine *machine)
1591 {
1592         struct trace *trace = container_of(tool, struct trace, tool);
1593         return trace__process_event(trace, machine, event, sample);
1594 }
1595
1596 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1597 {
1598         int err = symbol__init(NULL);
1599
1600         if (err)
1601                 return err;
1602
1603         trace->host = machine__new_host();
1604         if (trace->host == NULL)
1605                 return -ENOMEM;
1606
1607         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1608                 return -errno;
1609
1610         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1611                                             evlist->threads, trace__tool_process, false,
1612                                             trace->opts.proc_map_timeout);
1613         if (err)
1614                 symbol__exit();
1615
1616         return err;
1617 }
1618
1619 static int syscall__set_arg_fmts(struct syscall *sc)
1620 {
1621         struct format_field *field;
1622         int idx = 0;
1623
1624         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1625         if (sc->arg_scnprintf == NULL)
1626                 return -1;
1627
1628         if (sc->fmt)
1629                 sc->arg_parm = sc->fmt->arg_parm;
1630
1631         for (field = sc->args; field; field = field->next) {
1632                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1633                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1634                 else if (field->flags & FIELD_IS_POINTER)
1635                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1636                 ++idx;
1637         }
1638
1639         return 0;
1640 }
1641
1642 static int trace__read_syscall_info(struct trace *trace, int id)
1643 {
1644         char tp_name[128];
1645         struct syscall *sc;
1646         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1647
1648         if (name == NULL)
1649                 return -1;
1650
1651         if (id > trace->syscalls.max) {
1652                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1653
1654                 if (nsyscalls == NULL)
1655                         return -1;
1656
1657                 if (trace->syscalls.max != -1) {
1658                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1659                                (id - trace->syscalls.max) * sizeof(*sc));
1660                 } else {
1661                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1662                 }
1663
1664                 trace->syscalls.table = nsyscalls;
1665                 trace->syscalls.max   = id;
1666         }
1667
1668         sc = trace->syscalls.table + id;
1669         sc->name = name;
1670
1671         sc->fmt  = syscall_fmt__find(sc->name);
1672
1673         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1674         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1675
1676         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1677                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1678                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1679         }
1680
1681         if (sc->tp_format == NULL)
1682                 return -1;
1683
1684         sc->args = sc->tp_format->format.fields;
1685         sc->nr_args = sc->tp_format->format.nr_fields;
1686         /* drop nr field - not relevant here; does not exist on older kernels */
1687         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1688                 sc->args = sc->args->next;
1689                 --sc->nr_args;
1690         }
1691
1692         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1693
1694         return syscall__set_arg_fmts(sc);
1695 }
1696
1697 static int trace__validate_ev_qualifier(struct trace *trace)
1698 {
1699         int err = 0, i;
1700         struct str_node *pos;
1701
1702         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1703         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1704                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1705
1706         if (trace->ev_qualifier_ids.entries == NULL) {
1707                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1708                        trace->output);
1709                 err = -EINVAL;
1710                 goto out;
1711         }
1712
1713         i = 0;
1714
1715         strlist__for_each(pos, trace->ev_qualifier) {
1716                 const char *sc = pos->s;
1717                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1718
1719                 if (id < 0) {
1720                         if (err == 0) {
1721                                 fputs("Error:\tInvalid syscall ", trace->output);
1722                                 err = -EINVAL;
1723                         } else {
1724                                 fputs(", ", trace->output);
1725                         }
1726
1727                         fputs(sc, trace->output);
1728                 }
1729
1730                 trace->ev_qualifier_ids.entries[i++] = id;
1731         }
1732
1733         if (err < 0) {
1734                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1735                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1736                 zfree(&trace->ev_qualifier_ids.entries);
1737                 trace->ev_qualifier_ids.nr = 0;
1738         }
1739 out:
1740         return err;
1741 }
1742
1743 /*
1744  * args is to be interpreted as a series of longs but we need to handle
1745  * 8-byte unaligned accesses. args points to raw_data within the event
1746  * and raw_data is guaranteed to be 8-byte unaligned because it is
1747  * preceded by raw_size which is a u32. So we need to copy args to a temp
1748  * variable to read it. Most notably this avoids extended load instructions
1749  * on unaligned addresses
1750  */
1751
1752 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1753                                       unsigned char *args, struct trace *trace,
1754                                       struct thread *thread)
1755 {
1756         size_t printed = 0;
1757         unsigned char *p;
1758         unsigned long val;
1759
1760         if (sc->args != NULL) {
1761                 struct format_field *field;
1762                 u8 bit = 1;
1763                 struct syscall_arg arg = {
1764                         .idx    = 0,
1765                         .mask   = 0,
1766                         .trace  = trace,
1767                         .thread = thread,
1768                 };
1769
1770                 for (field = sc->args; field;
1771                      field = field->next, ++arg.idx, bit <<= 1) {
1772                         if (arg.mask & bit)
1773                                 continue;
1774
1775                         /* special care for unaligned accesses */
1776                         p = args + sizeof(unsigned long) * arg.idx;
1777                         memcpy(&val, p, sizeof(val));
1778
1779                         /*
1780                          * Suppress this argument if its value is zero and
1781                          * and we don't have a string associated in an
1782                          * strarray for it.
1783                          */
1784                         if (val == 0 &&
1785                             !(sc->arg_scnprintf &&
1786                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1787                               sc->arg_parm[arg.idx]))
1788                                 continue;
1789
1790                         printed += scnprintf(bf + printed, size - printed,
1791                                              "%s%s: ", printed ? ", " : "", field->name);
1792                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1793                                 arg.val = val;
1794                                 if (sc->arg_parm)
1795                                         arg.parm = sc->arg_parm[arg.idx];
1796                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1797                                                                       size - printed, &arg);
1798                         } else {
1799                                 printed += scnprintf(bf + printed, size - printed,
1800                                                      "%ld", val);
1801                         }
1802                 }
1803         } else {
1804                 int i = 0;
1805
1806                 while (i < 6) {
1807                         /* special care for unaligned accesses */
1808                         p = args + sizeof(unsigned long) * i;
1809                         memcpy(&val, p, sizeof(val));
1810                         printed += scnprintf(bf + printed, size - printed,
1811                                              "%sarg%d: %ld",
1812                                              printed ? ", " : "", i, val);
1813                         ++i;
1814                 }
1815         }
1816
1817         return printed;
1818 }
1819
1820 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1821                                   union perf_event *event,
1822                                   struct perf_sample *sample);
1823
1824 static struct syscall *trace__syscall_info(struct trace *trace,
1825                                            struct perf_evsel *evsel, int id)
1826 {
1827
1828         if (id < 0) {
1829
1830                 /*
1831                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1832                  * before that, leaving at a higher verbosity level till that is
1833                  * explained. Reproduced with plain ftrace with:
1834                  *
1835                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1836                  * grep "NR -1 " /t/trace_pipe
1837                  *
1838                  * After generating some load on the machine.
1839                  */
1840                 if (verbose > 1) {
1841                         static u64 n;
1842                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1843                                 id, perf_evsel__name(evsel), ++n);
1844                 }
1845                 return NULL;
1846         }
1847
1848         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1849             trace__read_syscall_info(trace, id))
1850                 goto out_cant_read;
1851
1852         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1853                 goto out_cant_read;
1854
1855         return &trace->syscalls.table[id];
1856
1857 out_cant_read:
1858         if (verbose) {
1859                 fprintf(trace->output, "Problems reading syscall %d", id);
1860                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1861                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1862                 fputs(" information\n", trace->output);
1863         }
1864         return NULL;
1865 }
1866
1867 static void thread__update_stats(struct thread_trace *ttrace,
1868                                  int id, struct perf_sample *sample)
1869 {
1870         struct int_node *inode;
1871         struct stats *stats;
1872         u64 duration = 0;
1873
1874         inode = intlist__findnew(ttrace->syscall_stats, id);
1875         if (inode == NULL)
1876                 return;
1877
1878         stats = inode->priv;
1879         if (stats == NULL) {
1880                 stats = malloc(sizeof(struct stats));
1881                 if (stats == NULL)
1882                         return;
1883                 init_stats(stats);
1884                 inode->priv = stats;
1885         }
1886
1887         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1888                 duration = sample->time - ttrace->entry_time;
1889
1890         update_stats(stats, duration);
1891 }
1892
1893 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1894 {
1895         struct thread_trace *ttrace;
1896         u64 duration;
1897         size_t printed;
1898
1899         if (trace->current == NULL)
1900                 return 0;
1901
1902         ttrace = thread__priv(trace->current);
1903
1904         if (!ttrace->entry_pending)
1905                 return 0;
1906
1907         duration = sample->time - ttrace->entry_time;
1908
1909         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1910         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1911         ttrace->entry_pending = false;
1912
1913         return printed;
1914 }
1915
1916 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1917                             union perf_event *event __maybe_unused,
1918                             struct perf_sample *sample)
1919 {
1920         char *msg;
1921         void *args;
1922         size_t printed = 0;
1923         struct thread *thread;
1924         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1925         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1926         struct thread_trace *ttrace;
1927
1928         if (sc == NULL)
1929                 return -1;
1930
1931         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1932         ttrace = thread__trace(thread, trace->output);
1933         if (ttrace == NULL)
1934                 goto out_put;
1935
1936         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1937
1938         if (ttrace->entry_str == NULL) {
1939                 ttrace->entry_str = malloc(trace__entry_str_size);
1940                 if (!ttrace->entry_str)
1941                         goto out_put;
1942         }
1943
1944         if (!trace->summary_only)
1945                 trace__printf_interrupted_entry(trace, sample);
1946
1947         ttrace->entry_time = sample->time;
1948         msg = ttrace->entry_str;
1949         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1950
1951         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1952                                            args, trace, thread);
1953
1954         if (sc->is_exit) {
1955                 if (!trace->duration_filter && !trace->summary_only) {
1956                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1957                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1958                 }
1959         } else
1960                 ttrace->entry_pending = true;
1961
1962         if (trace->current != thread) {
1963                 thread__put(trace->current);
1964                 trace->current = thread__get(thread);
1965         }
1966         err = 0;
1967 out_put:
1968         thread__put(thread);
1969         return err;
1970 }
1971
1972 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1973                            union perf_event *event __maybe_unused,
1974                            struct perf_sample *sample)
1975 {
1976         long ret;
1977         u64 duration = 0;
1978         struct thread *thread;
1979         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1980         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1981         struct thread_trace *ttrace;
1982
1983         if (sc == NULL)
1984                 return -1;
1985
1986         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1987         ttrace = thread__trace(thread, trace->output);
1988         if (ttrace == NULL)
1989                 goto out_put;
1990
1991         if (trace->summary)
1992                 thread__update_stats(ttrace, id, sample);
1993
1994         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1995
1996         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1997                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1998                 trace->last_vfs_getname = NULL;
1999                 ++trace->stats.vfs_getname;
2000         }
2001
2002         ttrace->exit_time = sample->time;
2003
2004         if (ttrace->entry_time) {
2005                 duration = sample->time - ttrace->entry_time;
2006                 if (trace__filter_duration(trace, duration))
2007                         goto out;
2008         } else if (trace->duration_filter)
2009                 goto out;
2010
2011         if (trace->summary_only)
2012                 goto out;
2013
2014         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2015
2016         if (ttrace->entry_pending) {
2017                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2018         } else {
2019                 fprintf(trace->output, " ... [");
2020                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2021                 fprintf(trace->output, "]: %s()", sc->name);
2022         }
2023
2024         if (sc->fmt == NULL) {
2025 signed_print:
2026                 fprintf(trace->output, ") = %ld", ret);
2027         } else if (ret < 0 && sc->fmt->errmsg) {
2028                 char bf[STRERR_BUFSIZE];
2029                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2030                            *e = audit_errno_to_name(-ret);
2031
2032                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2033         } else if (ret == 0 && sc->fmt->timeout)
2034                 fprintf(trace->output, ") = 0 Timeout");
2035         else if (sc->fmt->hexret)
2036                 fprintf(trace->output, ") = %#lx", ret);
2037         else
2038                 goto signed_print;
2039
2040         fputc('\n', trace->output);
2041 out:
2042         ttrace->entry_pending = false;
2043         err = 0;
2044 out_put:
2045         thread__put(thread);
2046         return err;
2047 }
2048
2049 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2050                               union perf_event *event __maybe_unused,
2051                               struct perf_sample *sample)
2052 {
2053         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2054         struct thread_trace *ttrace;
2055         size_t filename_len, entry_str_len, to_move;
2056         ssize_t remaining_space;
2057         char *pos;
2058         const char *filename;
2059
2060         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2061
2062         if (!thread)
2063                 goto out;
2064
2065         ttrace = thread__priv(thread);
2066         if (!ttrace)
2067                 goto out;
2068
2069         if (!ttrace->filename.ptr)
2070                 goto out;
2071
2072         entry_str_len = strlen(ttrace->entry_str);
2073         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2074         if (remaining_space <= 0)
2075                 goto out;
2076
2077         filename = trace->last_vfs_getname;
2078         filename_len = strlen(filename);
2079         if (filename_len > (size_t)remaining_space) {
2080                 filename += filename_len - remaining_space;
2081                 filename_len = remaining_space;
2082         }
2083
2084         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2085         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2086         memmove(pos + filename_len, pos, to_move);
2087         memcpy(pos, filename, filename_len);
2088
2089         ttrace->filename.ptr = 0;
2090         ttrace->filename.entry_str_pos = 0;
2091 out:
2092         return 0;
2093 }
2094
2095 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2096                                      union perf_event *event __maybe_unused,
2097                                      struct perf_sample *sample)
2098 {
2099         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2100         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2101         struct thread *thread = machine__findnew_thread(trace->host,
2102                                                         sample->pid,
2103                                                         sample->tid);
2104         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2105
2106         if (ttrace == NULL)
2107                 goto out_dump;
2108
2109         ttrace->runtime_ms += runtime_ms;
2110         trace->runtime_ms += runtime_ms;
2111         thread__put(thread);
2112         return 0;
2113
2114 out_dump:
2115         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2116                evsel->name,
2117                perf_evsel__strval(evsel, sample, "comm"),
2118                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2119                runtime,
2120                perf_evsel__intval(evsel, sample, "vruntime"));
2121         thread__put(thread);
2122         return 0;
2123 }
2124
2125 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2126                                 union perf_event *event __maybe_unused,
2127                                 struct perf_sample *sample)
2128 {
2129         trace__printf_interrupted_entry(trace, sample);
2130         trace__fprintf_tstamp(trace, sample->time, trace->output);
2131
2132         if (trace->trace_syscalls)
2133                 fprintf(trace->output, "(         ): ");
2134
2135         fprintf(trace->output, "%s:", evsel->name);
2136
2137         if (evsel->tp_format) {
2138                 event_format__fprintf(evsel->tp_format, sample->cpu,
2139                                       sample->raw_data, sample->raw_size,
2140                                       trace->output);
2141         }
2142
2143         fprintf(trace->output, ")\n");
2144         return 0;
2145 }
2146
2147 static void print_location(FILE *f, struct perf_sample *sample,
2148                            struct addr_location *al,
2149                            bool print_dso, bool print_sym)
2150 {
2151
2152         if ((verbose || print_dso) && al->map)
2153                 fprintf(f, "%s@", al->map->dso->long_name);
2154
2155         if ((verbose || print_sym) && al->sym)
2156                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2157                         al->addr - al->sym->start);
2158         else if (al->map)
2159                 fprintf(f, "0x%" PRIx64, al->addr);
2160         else
2161                 fprintf(f, "0x%" PRIx64, sample->addr);
2162 }
2163
2164 static int trace__pgfault(struct trace *trace,
2165                           struct perf_evsel *evsel,
2166                           union perf_event *event,
2167                           struct perf_sample *sample)
2168 {
2169         struct thread *thread;
2170         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2171         struct addr_location al;
2172         char map_type = 'd';
2173         struct thread_trace *ttrace;
2174         int err = -1;
2175
2176         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2177         ttrace = thread__trace(thread, trace->output);
2178         if (ttrace == NULL)
2179                 goto out_put;
2180
2181         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2182                 ttrace->pfmaj++;
2183         else
2184                 ttrace->pfmin++;
2185
2186         if (trace->summary_only)
2187                 goto out;
2188
2189         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2190                               sample->ip, &al);
2191
2192         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2193
2194         fprintf(trace->output, "%sfault [",
2195                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2196                 "maj" : "min");
2197
2198         print_location(trace->output, sample, &al, false, true);
2199
2200         fprintf(trace->output, "] => ");
2201
2202         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2203                                    sample->addr, &al);
2204
2205         if (!al.map) {
2206                 thread__find_addr_location(thread, cpumode,
2207                                            MAP__FUNCTION, sample->addr, &al);
2208
2209                 if (al.map)
2210                         map_type = 'x';
2211                 else
2212                         map_type = '?';
2213         }
2214
2215         print_location(trace->output, sample, &al, true, false);
2216
2217         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2218 out:
2219         err = 0;
2220 out_put:
2221         thread__put(thread);
2222         return err;
2223 }
2224
2225 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2226 {
2227         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2228             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2229                 return false;
2230
2231         if (trace->pid_list || trace->tid_list)
2232                 return true;
2233
2234         return false;
2235 }
2236
2237 static int trace__process_sample(struct perf_tool *tool,
2238                                  union perf_event *event,
2239                                  struct perf_sample *sample,
2240                                  struct perf_evsel *evsel,
2241                                  struct machine *machine __maybe_unused)
2242 {
2243         struct trace *trace = container_of(tool, struct trace, tool);
2244         int err = 0;
2245
2246         tracepoint_handler handler = evsel->handler;
2247
2248         if (skip_sample(trace, sample))
2249                 return 0;
2250
2251         if (!trace->full_time && trace->base_time == 0)
2252                 trace->base_time = sample->time;
2253
2254         if (handler) {
2255                 ++trace->nr_events;
2256                 handler(trace, evsel, event, sample);
2257         }
2258
2259         return err;
2260 }
2261
2262 static int parse_target_str(struct trace *trace)
2263 {
2264         if (trace->opts.target.pid) {
2265                 trace->pid_list = intlist__new(trace->opts.target.pid);
2266                 if (trace->pid_list == NULL) {
2267                         pr_err("Error parsing process id string\n");
2268                         return -EINVAL;
2269                 }
2270         }
2271
2272         if (trace->opts.target.tid) {
2273                 trace->tid_list = intlist__new(trace->opts.target.tid);
2274                 if (trace->tid_list == NULL) {
2275                         pr_err("Error parsing thread id string\n");
2276                         return -EINVAL;
2277                 }
2278         }
2279
2280         return 0;
2281 }
2282
2283 static int trace__record(struct trace *trace, int argc, const char **argv)
2284 {
2285         unsigned int rec_argc, i, j;
2286         const char **rec_argv;
2287         const char * const record_args[] = {
2288                 "record",
2289                 "-R",
2290                 "-m", "1024",
2291                 "-c", "1",
2292         };
2293
2294         const char * const sc_args[] = { "-e", };
2295         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2296         const char * const majpf_args[] = { "-e", "major-faults" };
2297         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2298         const char * const minpf_args[] = { "-e", "minor-faults" };
2299         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2300
2301         /* +1 is for the event string below */
2302         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2303                 majpf_args_nr + minpf_args_nr + argc;
2304         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2305
2306         if (rec_argv == NULL)
2307                 return -ENOMEM;
2308
2309         j = 0;
2310         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2311                 rec_argv[j++] = record_args[i];
2312
2313         if (trace->trace_syscalls) {
2314                 for (i = 0; i < sc_args_nr; i++)
2315                         rec_argv[j++] = sc_args[i];
2316
2317                 /* event string may be different for older kernels - e.g., RHEL6 */
2318                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2319                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2320                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2321                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2322                 else {
2323                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2324                         return -1;
2325                 }
2326         }
2327
2328         if (trace->trace_pgfaults & TRACE_PFMAJ)
2329                 for (i = 0; i < majpf_args_nr; i++)
2330                         rec_argv[j++] = majpf_args[i];
2331
2332         if (trace->trace_pgfaults & TRACE_PFMIN)
2333                 for (i = 0; i < minpf_args_nr; i++)
2334                         rec_argv[j++] = minpf_args[i];
2335
2336         for (i = 0; i < (unsigned int)argc; i++)
2337                 rec_argv[j++] = argv[i];
2338
2339         return cmd_record(j, rec_argv, NULL);
2340 }
2341
2342 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2343
2344 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2345 {
2346         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2347         if (evsel == NULL)
2348                 return false;
2349
2350         if (perf_evsel__field(evsel, "pathname") == NULL) {
2351                 perf_evsel__delete(evsel);
2352                 return false;
2353         }
2354
2355         evsel->handler = trace__vfs_getname;
2356         perf_evlist__add(evlist, evsel);
2357         return true;
2358 }
2359
2360 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2361                                     u64 config)
2362 {
2363         struct perf_evsel *evsel;
2364         struct perf_event_attr attr = {
2365                 .type = PERF_TYPE_SOFTWARE,
2366                 .mmap_data = 1,
2367         };
2368
2369         attr.config = config;
2370         attr.sample_period = 1;
2371
2372         event_attr_init(&attr);
2373
2374         evsel = perf_evsel__new(&attr);
2375         if (!evsel)
2376                 return -ENOMEM;
2377
2378         evsel->handler = trace__pgfault;
2379         perf_evlist__add(evlist, evsel);
2380
2381         return 0;
2382 }
2383
2384 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2385 {
2386         const u32 type = event->header.type;
2387         struct perf_evsel *evsel;
2388
2389         if (!trace->full_time && trace->base_time == 0)
2390                 trace->base_time = sample->time;
2391
2392         if (type != PERF_RECORD_SAMPLE) {
2393                 trace__process_event(trace, trace->host, event, sample);
2394                 return;
2395         }
2396
2397         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2398         if (evsel == NULL) {
2399                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2400                 return;
2401         }
2402
2403         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2404             sample->raw_data == NULL) {
2405                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2406                        perf_evsel__name(evsel), sample->tid,
2407                        sample->cpu, sample->raw_size);
2408         } else {
2409                 tracepoint_handler handler = evsel->handler;
2410                 handler(trace, evsel, event, sample);
2411         }
2412 }
2413
2414 static int trace__add_syscall_newtp(struct trace *trace)
2415 {
2416         int ret = -1;
2417         struct perf_evlist *evlist = trace->evlist;
2418         struct perf_evsel *sys_enter, *sys_exit;
2419
2420         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2421         if (sys_enter == NULL)
2422                 goto out;
2423
2424         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2425                 goto out_delete_sys_enter;
2426
2427         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2428         if (sys_exit == NULL)
2429                 goto out_delete_sys_enter;
2430
2431         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2432                 goto out_delete_sys_exit;
2433
2434         perf_evlist__add(evlist, sys_enter);
2435         perf_evlist__add(evlist, sys_exit);
2436
2437         trace->syscalls.events.sys_enter = sys_enter;
2438         trace->syscalls.events.sys_exit  = sys_exit;
2439
2440         ret = 0;
2441 out:
2442         return ret;
2443
2444 out_delete_sys_exit:
2445         perf_evsel__delete_priv(sys_exit);
2446 out_delete_sys_enter:
2447         perf_evsel__delete_priv(sys_enter);
2448         goto out;
2449 }
2450
2451 static int trace__set_ev_qualifier_filter(struct trace *trace)
2452 {
2453         int err = -1;
2454         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2455                                                 trace->ev_qualifier_ids.nr,
2456                                                 trace->ev_qualifier_ids.entries);
2457
2458         if (filter == NULL)
2459                 goto out_enomem;
2460
2461         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2462                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2463
2464         free(filter);
2465 out:
2466         return err;
2467 out_enomem:
2468         errno = ENOMEM;
2469         goto out;
2470 }
2471
2472 static int trace__run(struct trace *trace, int argc, const char **argv)
2473 {
2474         struct perf_evlist *evlist = trace->evlist;
2475         struct perf_evsel *evsel;
2476         int err = -1, i;
2477         unsigned long before;
2478         const bool forks = argc > 0;
2479         bool draining = false;
2480
2481         trace->live = true;
2482
2483         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2484                 goto out_error_raw_syscalls;
2485
2486         if (trace->trace_syscalls)
2487                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2488
2489         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2490             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2491                 goto out_error_mem;
2492         }
2493
2494         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2495             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2496                 goto out_error_mem;
2497
2498         if (trace->sched &&
2499             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2500                                    trace__sched_stat_runtime))
2501                 goto out_error_sched_stat_runtime;
2502
2503         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2504         if (err < 0) {
2505                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2506                 goto out_delete_evlist;
2507         }
2508
2509         err = trace__symbols_init(trace, evlist);
2510         if (err < 0) {
2511                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2512                 goto out_delete_evlist;
2513         }
2514
2515         perf_evlist__config(evlist, &trace->opts);
2516
2517         signal(SIGCHLD, sig_handler);
2518         signal(SIGINT, sig_handler);
2519
2520         if (forks) {
2521                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2522                                                     argv, false, NULL);
2523                 if (err < 0) {
2524                         fprintf(trace->output, "Couldn't run the workload!\n");
2525                         goto out_delete_evlist;
2526                 }
2527         }
2528
2529         err = perf_evlist__open(evlist);
2530         if (err < 0)
2531                 goto out_error_open;
2532
2533         /*
2534          * Better not use !target__has_task() here because we need to cover the
2535          * case where no threads were specified in the command line, but a
2536          * workload was, and in that case we will fill in the thread_map when
2537          * we fork the workload in perf_evlist__prepare_workload.
2538          */
2539         if (trace->filter_pids.nr > 0)
2540                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2541         else if (thread_map__pid(evlist->threads, 0) == -1)
2542                 err = perf_evlist__set_filter_pid(evlist, getpid());
2543
2544         if (err < 0)
2545                 goto out_error_mem;
2546
2547         if (trace->ev_qualifier_ids.nr > 0) {
2548                 err = trace__set_ev_qualifier_filter(trace);
2549                 if (err < 0)
2550                         goto out_errno;
2551
2552                 pr_debug("event qualifier tracepoint filter: %s\n",
2553                          trace->syscalls.events.sys_exit->filter);
2554         }
2555
2556         err = perf_evlist__apply_filters(evlist, &evsel);
2557         if (err < 0)
2558                 goto out_error_apply_filters;
2559
2560         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2561         if (err < 0)
2562                 goto out_error_mmap;
2563
2564         if (!target__none(&trace->opts.target))
2565                 perf_evlist__enable(evlist);
2566
2567         if (forks)
2568                 perf_evlist__start_workload(evlist);
2569
2570         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2571                                   evlist->threads->nr > 1 ||
2572                                   perf_evlist__first(evlist)->attr.inherit;
2573 again:
2574         before = trace->nr_events;
2575
2576         for (i = 0; i < evlist->nr_mmaps; i++) {
2577                 union perf_event *event;
2578
2579                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2580                         struct perf_sample sample;
2581
2582                         ++trace->nr_events;
2583
2584                         err = perf_evlist__parse_sample(evlist, event, &sample);
2585                         if (err) {
2586                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2587                                 goto next_event;
2588                         }
2589
2590                         trace__handle_event(trace, event, &sample);
2591 next_event:
2592                         perf_evlist__mmap_consume(evlist, i);
2593
2594                         if (interrupted)
2595                                 goto out_disable;
2596
2597                         if (done && !draining) {
2598                                 perf_evlist__disable(evlist);
2599                                 draining = true;
2600                         }
2601                 }
2602         }
2603
2604         if (trace->nr_events == before) {
2605                 int timeout = done ? 100 : -1;
2606
2607                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2608                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2609                                 draining = true;
2610
2611                         goto again;
2612                 }
2613         } else {
2614                 goto again;
2615         }
2616
2617 out_disable:
2618         thread__zput(trace->current);
2619
2620         perf_evlist__disable(evlist);
2621
2622         if (!err) {
2623                 if (trace->summary)
2624                         trace__fprintf_thread_summary(trace, trace->output);
2625
2626                 if (trace->show_tool_stats) {
2627                         fprintf(trace->output, "Stats:\n "
2628                                                " vfs_getname : %" PRIu64 "\n"
2629                                                " proc_getname: %" PRIu64 "\n",
2630                                 trace->stats.vfs_getname,
2631                                 trace->stats.proc_getname);
2632                 }
2633         }
2634
2635 out_delete_evlist:
2636         perf_evlist__delete(evlist);
2637         trace->evlist = NULL;
2638         trace->live = false;
2639         return err;
2640 {
2641         char errbuf[BUFSIZ];
2642
2643 out_error_sched_stat_runtime:
2644         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2645         goto out_error;
2646
2647 out_error_raw_syscalls:
2648         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2649         goto out_error;
2650
2651 out_error_mmap:
2652         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2653         goto out_error;
2654
2655 out_error_open:
2656         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2657
2658 out_error:
2659         fprintf(trace->output, "%s\n", errbuf);
2660         goto out_delete_evlist;
2661
2662 out_error_apply_filters:
2663         fprintf(trace->output,
2664                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2665                 evsel->filter, perf_evsel__name(evsel), errno,
2666                 strerror_r(errno, errbuf, sizeof(errbuf)));
2667         goto out_delete_evlist;
2668 }
2669 out_error_mem:
2670         fprintf(trace->output, "Not enough memory to run!\n");
2671         goto out_delete_evlist;
2672
2673 out_errno:
2674         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2675         goto out_delete_evlist;
2676 }
2677
2678 static int trace__replay(struct trace *trace)
2679 {
2680         const struct perf_evsel_str_handler handlers[] = {
2681                 { "probe:vfs_getname",       trace__vfs_getname, },
2682         };
2683         struct perf_data_file file = {
2684                 .path  = input_name,
2685                 .mode  = PERF_DATA_MODE_READ,
2686                 .force = trace->force,
2687         };
2688         struct perf_session *session;
2689         struct perf_evsel *evsel;
2690         int err = -1;
2691
2692         trace->tool.sample        = trace__process_sample;
2693         trace->tool.mmap          = perf_event__process_mmap;
2694         trace->tool.mmap2         = perf_event__process_mmap2;
2695         trace->tool.comm          = perf_event__process_comm;
2696         trace->tool.exit          = perf_event__process_exit;
2697         trace->tool.fork          = perf_event__process_fork;
2698         trace->tool.attr          = perf_event__process_attr;
2699         trace->tool.tracing_data = perf_event__process_tracing_data;
2700         trace->tool.build_id      = perf_event__process_build_id;
2701
2702         trace->tool.ordered_events = true;
2703         trace->tool.ordering_requires_timestamps = true;
2704
2705         /* add tid to output */
2706         trace->multiple_threads = true;
2707
2708         session = perf_session__new(&file, false, &trace->tool);
2709         if (session == NULL)
2710                 return -1;
2711
2712         if (symbol__init(&session->header.env) < 0)
2713                 goto out;
2714
2715         trace->host = &session->machines.host;
2716
2717         err = perf_session__set_tracepoints_handlers(session, handlers);
2718         if (err)
2719                 goto out;
2720
2721         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2722                                                      "raw_syscalls:sys_enter");
2723         /* older kernels have syscalls tp versus raw_syscalls */
2724         if (evsel == NULL)
2725                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2726                                                              "syscalls:sys_enter");
2727
2728         if (evsel &&
2729             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2730             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2731                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2732                 goto out;
2733         }
2734
2735         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2736                                                      "raw_syscalls:sys_exit");
2737         if (evsel == NULL)
2738                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2739                                                              "syscalls:sys_exit");
2740         if (evsel &&
2741             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2742             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2743                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2744                 goto out;
2745         }
2746
2747         evlist__for_each(session->evlist, evsel) {
2748                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2749                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2750                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2751                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2752                         evsel->handler = trace__pgfault;
2753         }
2754
2755         err = parse_target_str(trace);
2756         if (err != 0)
2757                 goto out;
2758
2759         setup_pager();
2760
2761         err = perf_session__process_events(session);
2762         if (err)
2763                 pr_err("Failed to process events, error %d", err);
2764
2765         else if (trace->summary)
2766                 trace__fprintf_thread_summary(trace, trace->output);
2767
2768 out:
2769         perf_session__delete(session);
2770
2771         return err;
2772 }
2773
2774 static size_t trace__fprintf_threads_header(FILE *fp)
2775 {
2776         size_t printed;
2777
2778         printed  = fprintf(fp, "\n Summary of events:\n\n");
2779
2780         return printed;
2781 }
2782
2783 static size_t thread__dump_stats(struct thread_trace *ttrace,
2784                                  struct trace *trace, FILE *fp)
2785 {
2786         struct stats *stats;
2787         size_t printed = 0;
2788         struct syscall *sc;
2789         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2790
2791         if (inode == NULL)
2792                 return 0;
2793
2794         printed += fprintf(fp, "\n");
2795
2796         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2797         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2798         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2799
2800         /* each int_node is a syscall */
2801         while (inode) {
2802                 stats = inode->priv;
2803                 if (stats) {
2804                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2805                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2806                         double avg = avg_stats(stats);
2807                         double pct;
2808                         u64 n = (u64) stats->n;
2809
2810                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2811                         avg /= NSEC_PER_MSEC;
2812
2813                         sc = &trace->syscalls.table[inode->i];
2814                         printed += fprintf(fp, "   %-15s", sc->name);
2815                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2816                                            n, avg * n, min, avg);
2817                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2818                 }
2819
2820                 inode = intlist__next(inode);
2821         }
2822
2823         printed += fprintf(fp, "\n\n");
2824
2825         return printed;
2826 }
2827
2828 /* struct used to pass data to per-thread function */
2829 struct summary_data {
2830         FILE *fp;
2831         struct trace *trace;
2832         size_t printed;
2833 };
2834
2835 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2836 {
2837         struct summary_data *data = priv;
2838         FILE *fp = data->fp;
2839         size_t printed = data->printed;
2840         struct trace *trace = data->trace;
2841         struct thread_trace *ttrace = thread__priv(thread);
2842         double ratio;
2843
2844         if (ttrace == NULL)
2845                 return 0;
2846
2847         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2848
2849         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2850         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2851         printed += fprintf(fp, "%.1f%%", ratio);
2852         if (ttrace->pfmaj)
2853                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2854         if (ttrace->pfmin)
2855                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2856         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2857         printed += thread__dump_stats(ttrace, trace, fp);
2858
2859         data->printed += printed;
2860
2861         return 0;
2862 }
2863
2864 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2865 {
2866         struct summary_data data = {
2867                 .fp = fp,
2868                 .trace = trace
2869         };
2870         data.printed = trace__fprintf_threads_header(fp);
2871
2872         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2873
2874         return data.printed;
2875 }
2876
2877 static int trace__set_duration(const struct option *opt, const char *str,
2878                                int unset __maybe_unused)
2879 {
2880         struct trace *trace = opt->value;
2881
2882         trace->duration_filter = atof(str);
2883         return 0;
2884 }
2885
2886 static int trace__set_filter_pids(const struct option *opt, const char *str,
2887                                   int unset __maybe_unused)
2888 {
2889         int ret = -1;
2890         size_t i;
2891         struct trace *trace = opt->value;
2892         /*
2893          * FIXME: introduce a intarray class, plain parse csv and create a
2894          * { int nr, int entries[] } struct...
2895          */
2896         struct intlist *list = intlist__new(str);
2897
2898         if (list == NULL)
2899                 return -1;
2900
2901         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2902         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2903
2904         if (trace->filter_pids.entries == NULL)
2905                 goto out;
2906
2907         trace->filter_pids.entries[0] = getpid();
2908
2909         for (i = 1; i < trace->filter_pids.nr; ++i)
2910                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2911
2912         intlist__delete(list);
2913         ret = 0;
2914 out:
2915         return ret;
2916 }
2917
2918 static int trace__open_output(struct trace *trace, const char *filename)
2919 {
2920         struct stat st;
2921
2922         if (!stat(filename, &st) && st.st_size) {
2923                 char oldname[PATH_MAX];
2924
2925                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2926                 unlink(oldname);
2927                 rename(filename, oldname);
2928         }
2929
2930         trace->output = fopen(filename, "w");
2931
2932         return trace->output == NULL ? -errno : 0;
2933 }
2934
2935 static int parse_pagefaults(const struct option *opt, const char *str,
2936                             int unset __maybe_unused)
2937 {
2938         int *trace_pgfaults = opt->value;
2939
2940         if (strcmp(str, "all") == 0)
2941                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2942         else if (strcmp(str, "maj") == 0)
2943                 *trace_pgfaults |= TRACE_PFMAJ;
2944         else if (strcmp(str, "min") == 0)
2945                 *trace_pgfaults |= TRACE_PFMIN;
2946         else
2947                 return -1;
2948
2949         return 0;
2950 }
2951
2952 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2953 {
2954         struct perf_evsel *evsel;
2955
2956         evlist__for_each(evlist, evsel)
2957                 evsel->handler = handler;
2958 }
2959
2960 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2961 {
2962         const char *trace_usage[] = {
2963                 "perf trace [<options>] [<command>]",
2964                 "perf trace [<options>] -- <command> [<options>]",
2965                 "perf trace record [<options>] [<command>]",
2966                 "perf trace record [<options>] -- <command> [<options>]",
2967                 NULL
2968         };
2969         struct trace trace = {
2970                 .audit = {
2971                         .machine = audit_detect_machine(),
2972                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2973                 },
2974                 .syscalls = {
2975                         . max = -1,
2976                 },
2977                 .opts = {
2978                         .target = {
2979                                 .uid       = UINT_MAX,
2980                                 .uses_mmap = true,
2981                         },
2982                         .user_freq     = UINT_MAX,
2983                         .user_interval = ULLONG_MAX,
2984                         .no_buffering  = true,
2985                         .mmap_pages    = UINT_MAX,
2986                         .proc_map_timeout  = 500,
2987                 },
2988                 .output = stderr,
2989                 .show_comm = true,
2990                 .trace_syscalls = true,
2991         };
2992         const char *output_name = NULL;
2993         const char *ev_qualifier_str = NULL;
2994         const struct option trace_options[] = {
2995         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2996                      "event selector. use 'perf list' to list available events",
2997                      parse_events_option),
2998         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2999                     "show the thread COMM next to its id"),
3000         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3001         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3002         OPT_STRING('o', "output", &output_name, "file", "output file name"),
3003         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3004         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3005                     "trace events on existing process id"),
3006         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3007                     "trace events on existing thread id"),
3008         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3009                      "pids to filter (by the kernel)", trace__set_filter_pids),
3010         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3011                     "system-wide collection from all CPUs"),
3012         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3013                     "list of cpus to monitor"),
3014         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3015                     "child tasks do not inherit counters"),
3016         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3017                      "number of mmap data pages",
3018                      perf_evlist__parse_mmap_pages),
3019         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3020                    "user to profile"),
3021         OPT_CALLBACK(0, "duration", &trace, "float",
3022                      "show only events with duration > N.M ms",
3023                      trace__set_duration),
3024         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3025         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3026         OPT_BOOLEAN('T', "time", &trace.full_time,
3027                     "Show full timestamp, not time relative to first start"),
3028         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3029                     "Show only syscall summary with statistics"),
3030         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3031                     "Show all syscalls and summary with statistics"),
3032         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3033                      "Trace pagefaults", parse_pagefaults, "maj"),
3034         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3035         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3036         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3037                         "per thread proc mmap processing timeout in ms"),
3038         OPT_END()
3039         };
3040         const char * const trace_subcommands[] = { "record", NULL };
3041         int err;
3042         char bf[BUFSIZ];
3043
3044         signal(SIGSEGV, sighandler_dump_stack);
3045         signal(SIGFPE, sighandler_dump_stack);
3046
3047         trace.evlist = perf_evlist__new();
3048
3049         if (trace.evlist == NULL) {
3050                 pr_err("Not enough memory to run!\n");
3051                 err = -ENOMEM;
3052                 goto out;
3053         }
3054
3055         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3056                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3057
3058         if (trace.trace_pgfaults) {
3059                 trace.opts.sample_address = true;
3060                 trace.opts.sample_time = true;
3061         }
3062
3063         if (trace.evlist->nr_entries > 0)
3064                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3065
3066         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3067                 return trace__record(&trace, argc-1, &argv[1]);
3068
3069         /* summary_only implies summary option, but don't overwrite summary if set */
3070         if (trace.summary_only)
3071                 trace.summary = trace.summary_only;
3072
3073         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3074             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3075                 pr_err("Please specify something to trace.\n");
3076                 return -1;
3077         }
3078
3079         if (output_name != NULL) {
3080                 err = trace__open_output(&trace, output_name);
3081                 if (err < 0) {
3082                         perror("failed to create output file");
3083                         goto out;
3084                 }
3085         }
3086
3087         if (ev_qualifier_str != NULL) {
3088                 const char *s = ev_qualifier_str;
3089                 struct strlist_config slist_config = {
3090                         .dirname = system_path(STRACE_GROUPS_DIR),
3091                 };
3092
3093                 trace.not_ev_qualifier = *s == '!';
3094                 if (trace.not_ev_qualifier)
3095                         ++s;
3096                 trace.ev_qualifier = strlist__new(s, &slist_config);
3097                 if (trace.ev_qualifier == NULL) {
3098                         fputs("Not enough memory to parse event qualifier",
3099                               trace.output);
3100                         err = -ENOMEM;
3101                         goto out_close;
3102                 }
3103
3104                 err = trace__validate_ev_qualifier(&trace);
3105                 if (err)
3106                         goto out_close;
3107         }
3108
3109         err = target__validate(&trace.opts.target);
3110         if (err) {
3111                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3112                 fprintf(trace.output, "%s", bf);
3113                 goto out_close;
3114         }
3115
3116         err = target__parse_uid(&trace.opts.target);
3117         if (err) {
3118                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3119                 fprintf(trace.output, "%s", bf);
3120                 goto out_close;
3121         }
3122
3123         if (!argc && target__none(&trace.opts.target))
3124                 trace.opts.target.system_wide = true;
3125
3126         if (input_name)
3127                 err = trace__replay(&trace);
3128         else
3129                 err = trace__run(&trace, argc, argv);
3130
3131 out_close:
3132         if (output_name != NULL)
3133                 fclose(trace.output);
3134 out:
3135         return err;
3136 }