perf trace: Move vfs_getname storage to per thread area
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *keyctl_options[] = {
573         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
574         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
575         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
576         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
577         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
578 };
579 static DEFINE_STRARRAY(keyctl_options);
580
581 static const char *whences[] = { "SET", "CUR", "END",
582 #ifdef SEEK_DATA
583 "DATA",
584 #endif
585 #ifdef SEEK_HOLE
586 "HOLE",
587 #endif
588 };
589 static DEFINE_STRARRAY(whences);
590
591 static const char *fcntl_cmds[] = {
592         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
593         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
594         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
595         "F_GETOWNER_UIDS",
596 };
597 static DEFINE_STRARRAY(fcntl_cmds);
598
599 static const char *rlimit_resources[] = {
600         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
601         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
602         "RTTIME",
603 };
604 static DEFINE_STRARRAY(rlimit_resources);
605
606 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
607 static DEFINE_STRARRAY(sighow);
608
609 static const char *clockid[] = {
610         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
611         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
612         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
613 };
614 static DEFINE_STRARRAY(clockid);
615
616 static const char *socket_families[] = {
617         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
618         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
619         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
620         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
621         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
622         "ALG", "NFC", "VSOCK",
623 };
624 static DEFINE_STRARRAY(socket_families);
625
626 #ifndef SOCK_TYPE_MASK
627 #define SOCK_TYPE_MASK 0xf
628 #endif
629
630 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
631                                                       struct syscall_arg *arg)
632 {
633         size_t printed;
634         int type = arg->val,
635             flags = type & ~SOCK_TYPE_MASK;
636
637         type &= SOCK_TYPE_MASK;
638         /*
639          * Can't use a strarray, MIPS may override for ABI reasons.
640          */
641         switch (type) {
642 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
643         P_SK_TYPE(STREAM);
644         P_SK_TYPE(DGRAM);
645         P_SK_TYPE(RAW);
646         P_SK_TYPE(RDM);
647         P_SK_TYPE(SEQPACKET);
648         P_SK_TYPE(DCCP);
649         P_SK_TYPE(PACKET);
650 #undef P_SK_TYPE
651         default:
652                 printed = scnprintf(bf, size, "%#x", type);
653         }
654
655 #define P_SK_FLAG(n) \
656         if (flags & SOCK_##n) { \
657                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
658                 flags &= ~SOCK_##n; \
659         }
660
661         P_SK_FLAG(CLOEXEC);
662         P_SK_FLAG(NONBLOCK);
663 #undef P_SK_FLAG
664
665         if (flags)
666                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
667
668         return printed;
669 }
670
671 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
672
673 #ifndef MSG_PROBE
674 #define MSG_PROBE            0x10
675 #endif
676 #ifndef MSG_WAITFORONE
677 #define MSG_WAITFORONE  0x10000
678 #endif
679 #ifndef MSG_SENDPAGE_NOTLAST
680 #define MSG_SENDPAGE_NOTLAST 0x20000
681 #endif
682 #ifndef MSG_FASTOPEN
683 #define MSG_FASTOPEN         0x20000000
684 #endif
685
686 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
687                                                struct syscall_arg *arg)
688 {
689         int printed = 0, flags = arg->val;
690
691         if (flags == 0)
692                 return scnprintf(bf, size, "NONE");
693 #define P_MSG_FLAG(n) \
694         if (flags & MSG_##n) { \
695                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
696                 flags &= ~MSG_##n; \
697         }
698
699         P_MSG_FLAG(OOB);
700         P_MSG_FLAG(PEEK);
701         P_MSG_FLAG(DONTROUTE);
702         P_MSG_FLAG(TRYHARD);
703         P_MSG_FLAG(CTRUNC);
704         P_MSG_FLAG(PROBE);
705         P_MSG_FLAG(TRUNC);
706         P_MSG_FLAG(DONTWAIT);
707         P_MSG_FLAG(EOR);
708         P_MSG_FLAG(WAITALL);
709         P_MSG_FLAG(FIN);
710         P_MSG_FLAG(SYN);
711         P_MSG_FLAG(CONFIRM);
712         P_MSG_FLAG(RST);
713         P_MSG_FLAG(ERRQUEUE);
714         P_MSG_FLAG(NOSIGNAL);
715         P_MSG_FLAG(MORE);
716         P_MSG_FLAG(WAITFORONE);
717         P_MSG_FLAG(SENDPAGE_NOTLAST);
718         P_MSG_FLAG(FASTOPEN);
719         P_MSG_FLAG(CMSG_CLOEXEC);
720 #undef P_MSG_FLAG
721
722         if (flags)
723                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
724
725         return printed;
726 }
727
728 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
729
730 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
731                                                  struct syscall_arg *arg)
732 {
733         size_t printed = 0;
734         int mode = arg->val;
735
736         if (mode == F_OK) /* 0 */
737                 return scnprintf(bf, size, "F");
738 #define P_MODE(n) \
739         if (mode & n##_OK) { \
740                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
741                 mode &= ~n##_OK; \
742         }
743
744         P_MODE(R);
745         P_MODE(W);
746         P_MODE(X);
747 #undef P_MODE
748
749         if (mode)
750                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
751
752         return printed;
753 }
754
755 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
756
757 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
758                                               struct syscall_arg *arg);
759
760 #define SCA_FILENAME syscall_arg__scnprintf_filename
761
762 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
763                                                struct syscall_arg *arg)
764 {
765         int printed = 0, flags = arg->val;
766
767         if (!(flags & O_CREAT))
768                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
769
770         if (flags == 0)
771                 return scnprintf(bf, size, "RDONLY");
772 #define P_FLAG(n) \
773         if (flags & O_##n) { \
774                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
775                 flags &= ~O_##n; \
776         }
777
778         P_FLAG(APPEND);
779         P_FLAG(ASYNC);
780         P_FLAG(CLOEXEC);
781         P_FLAG(CREAT);
782         P_FLAG(DIRECT);
783         P_FLAG(DIRECTORY);
784         P_FLAG(EXCL);
785         P_FLAG(LARGEFILE);
786         P_FLAG(NOATIME);
787         P_FLAG(NOCTTY);
788 #ifdef O_NONBLOCK
789         P_FLAG(NONBLOCK);
790 #elif O_NDELAY
791         P_FLAG(NDELAY);
792 #endif
793 #ifdef O_PATH
794         P_FLAG(PATH);
795 #endif
796         P_FLAG(RDWR);
797 #ifdef O_DSYNC
798         if ((flags & O_SYNC) == O_SYNC)
799                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
800         else {
801                 P_FLAG(DSYNC);
802         }
803 #else
804         P_FLAG(SYNC);
805 #endif
806         P_FLAG(TRUNC);
807         P_FLAG(WRONLY);
808 #undef P_FLAG
809
810         if (flags)
811                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
812
813         return printed;
814 }
815
816 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
817
818 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
819                                                 struct syscall_arg *arg)
820 {
821         int printed = 0, flags = arg->val;
822
823         if (flags == 0)
824                 return 0;
825
826 #define P_FLAG(n) \
827         if (flags & PERF_FLAG_##n) { \
828                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
829                 flags &= ~PERF_FLAG_##n; \
830         }
831
832         P_FLAG(FD_NO_GROUP);
833         P_FLAG(FD_OUTPUT);
834         P_FLAG(PID_CGROUP);
835         P_FLAG(FD_CLOEXEC);
836 #undef P_FLAG
837
838         if (flags)
839                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841         return printed;
842 }
843
844 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
845
846 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
847                                                    struct syscall_arg *arg)
848 {
849         int printed = 0, flags = arg->val;
850
851         if (flags == 0)
852                 return scnprintf(bf, size, "NONE");
853 #define P_FLAG(n) \
854         if (flags & EFD_##n) { \
855                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
856                 flags &= ~EFD_##n; \
857         }
858
859         P_FLAG(SEMAPHORE);
860         P_FLAG(CLOEXEC);
861         P_FLAG(NONBLOCK);
862 #undef P_FLAG
863
864         if (flags)
865                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
866
867         return printed;
868 }
869
870 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
871
872 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
873                                                 struct syscall_arg *arg)
874 {
875         int printed = 0, flags = arg->val;
876
877 #define P_FLAG(n) \
878         if (flags & O_##n) { \
879                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
880                 flags &= ~O_##n; \
881         }
882
883         P_FLAG(CLOEXEC);
884         P_FLAG(NONBLOCK);
885 #undef P_FLAG
886
887         if (flags)
888                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
889
890         return printed;
891 }
892
893 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
894
895 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
896 {
897         int sig = arg->val;
898
899         switch (sig) {
900 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
901         P_SIGNUM(HUP);
902         P_SIGNUM(INT);
903         P_SIGNUM(QUIT);
904         P_SIGNUM(ILL);
905         P_SIGNUM(TRAP);
906         P_SIGNUM(ABRT);
907         P_SIGNUM(BUS);
908         P_SIGNUM(FPE);
909         P_SIGNUM(KILL);
910         P_SIGNUM(USR1);
911         P_SIGNUM(SEGV);
912         P_SIGNUM(USR2);
913         P_SIGNUM(PIPE);
914         P_SIGNUM(ALRM);
915         P_SIGNUM(TERM);
916         P_SIGNUM(CHLD);
917         P_SIGNUM(CONT);
918         P_SIGNUM(STOP);
919         P_SIGNUM(TSTP);
920         P_SIGNUM(TTIN);
921         P_SIGNUM(TTOU);
922         P_SIGNUM(URG);
923         P_SIGNUM(XCPU);
924         P_SIGNUM(XFSZ);
925         P_SIGNUM(VTALRM);
926         P_SIGNUM(PROF);
927         P_SIGNUM(WINCH);
928         P_SIGNUM(IO);
929         P_SIGNUM(PWR);
930         P_SIGNUM(SYS);
931 #ifdef SIGEMT
932         P_SIGNUM(EMT);
933 #endif
934 #ifdef SIGSTKFLT
935         P_SIGNUM(STKFLT);
936 #endif
937 #ifdef SIGSWI
938         P_SIGNUM(SWI);
939 #endif
940         default: break;
941         }
942
943         return scnprintf(bf, size, "%#x", sig);
944 }
945
946 #define SCA_SIGNUM syscall_arg__scnprintf_signum
947
948 #if defined(__i386__) || defined(__x86_64__)
949 /*
950  * FIXME: Make this available to all arches.
951  */
952 #define TCGETS          0x5401
953
954 static const char *tioctls[] = {
955         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
956         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
957         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
958         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
959         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
960         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
961         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
962         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
963         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
964         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
965         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
966         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
967         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
968         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
969         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
970 };
971
972 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
973 #endif /* defined(__i386__) || defined(__x86_64__) */
974
975 #define STRARRAY(arg, name, array) \
976           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
977           .arg_parm      = { [arg] = &strarray__##array, }
978
979 static struct syscall_fmt {
980         const char *name;
981         const char *alias;
982         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
983         void       *arg_parm[6];
984         bool       errmsg;
985         bool       timeout;
986         bool       hexret;
987 } syscall_fmts[] = {
988         { .name     = "access",     .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
990                              [1] = SCA_ACCMODE,  /* mode */ }, },
991         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
992         { .name     = "brk",        .hexret = true,
993           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
994         { .name     = "chdir",      .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
996         { .name     = "chmod",      .errmsg = true,
997           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
998         { .name     = "chroot",     .errmsg = true,
999           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1000         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1001         { .name     = "close",      .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1003         { .name     = "connect",    .errmsg = true, },
1004         { .name     = "creat",      .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1006         { .name     = "dup",        .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1008         { .name     = "dup2",       .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010         { .name     = "dup3",       .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1013         { .name     = "eventfd2",   .errmsg = true,
1014           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1015         { .name     = "faccessat",  .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1017                              [1] = SCA_FILENAME, /* filename */ }, },
1018         { .name     = "fadvise64",  .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020         { .name     = "fallocate",  .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022         { .name     = "fchdir",     .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024         { .name     = "fchmod",     .errmsg = true,
1025           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1026         { .name     = "fchmodat",   .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1028                              [1] = SCA_FILENAME, /* filename */ }, },
1029         { .name     = "fchown",     .errmsg = true,
1030           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031         { .name     = "fchownat",   .errmsg = true,
1032           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1033                              [1] = SCA_FILENAME, /* filename */ }, },
1034         { .name     = "fcntl",      .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1036                              [1] = SCA_STRARRAY, /* cmd */ },
1037           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1038         { .name     = "fdatasync",  .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040         { .name     = "flock",      .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1042                              [1] = SCA_FLOCK, /* cmd */ }, },
1043         { .name     = "fsetxattr",  .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1046           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1047         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [1] = SCA_FILENAME, /* filename */ }, },
1050         { .name     = "fstatfs",    .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1052         { .name     = "fsync",    .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054         { .name     = "ftruncate", .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "futex",      .errmsg = true,
1057           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1058         { .name     = "futimesat", .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1060                              [1] = SCA_FILENAME, /* filename */ }, },
1061         { .name     = "getdents",   .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063         { .name     = "getdents64", .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "getxattr",    .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1069         { .name     = "inotify_add_watch",          .errmsg = true,
1070           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1071         { .name     = "ioctl",      .errmsg = true,
1072           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1073 #if defined(__i386__) || defined(__x86_64__)
1074 /*
1075  * FIXME: Make this available to all arches.
1076  */
1077                              [1] = SCA_STRHEXARRAY, /* cmd */
1078                              [2] = SCA_HEX, /* arg */ },
1079           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1080 #else
1081                              [2] = SCA_HEX, /* arg */ }, },
1082 #endif
1083         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1084         { .name     = "kill",       .errmsg = true,
1085           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1086         { .name     = "lchown",    .errmsg = true,
1087           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1088         { .name     = "lgetxattr",  .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1090         { .name     = "linkat",     .errmsg = true,
1091           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1092         { .name     = "listxattr",  .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094         { .name     = "llistxattr", .errmsg = true,
1095           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1096         { .name     = "lremovexattr",  .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "lseek",      .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1100                              [2] = SCA_STRARRAY, /* whence */ },
1101           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1102         { .name     = "lsetxattr",  .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1104         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1105           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1106         { .name     = "lsxattr",    .errmsg = true,
1107           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1108         { .name     = "madvise",    .errmsg = true,
1109           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1110                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1111         { .name     = "mkdir",    .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1113         { .name     = "mkdirat",    .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1115                              [1] = SCA_FILENAME, /* pathname */ }, },
1116         { .name     = "mknod",      .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1118         { .name     = "mknodat",    .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1120                              [1] = SCA_FILENAME, /* filename */ }, },
1121         { .name     = "mlock",      .errmsg = true,
1122           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1123         { .name     = "mlockall",   .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1125         { .name     = "mmap",       .hexret = true,
1126           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1127                              [2] = SCA_MMAP_PROT, /* prot */
1128                              [3] = SCA_MMAP_FLAGS, /* flags */
1129                              [4] = SCA_FD,        /* fd */ }, },
1130         { .name     = "mprotect",   .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1132                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1133         { .name     = "mq_unlink", .errmsg = true,
1134           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1135         { .name     = "mremap",     .hexret = true,
1136           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1137                              [3] = SCA_MREMAP_FLAGS, /* flags */
1138                              [4] = SCA_HEX, /* new_addr */ }, },
1139         { .name     = "munlock",    .errmsg = true,
1140           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1141         { .name     = "munmap",     .errmsg = true,
1142           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1143         { .name     = "name_to_handle_at", .errmsg = true,
1144           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1145         { .name     = "newfstatat", .errmsg = true,
1146           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1147                              [1] = SCA_FILENAME, /* filename */ }, },
1148         { .name     = "open",       .errmsg = true,
1149           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1150                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1151         { .name     = "open_by_handle_at", .errmsg = true,
1152           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1153                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1154         { .name     = "openat",     .errmsg = true,
1155           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1156                              [1] = SCA_FILENAME, /* filename */
1157                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1158         { .name     = "perf_event_open", .errmsg = true,
1159           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1160                              [2] = SCA_INT, /* cpu */
1161                              [3] = SCA_FD,  /* group_fd */
1162                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1163         { .name     = "pipe2",      .errmsg = true,
1164           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1165         { .name     = "poll",       .errmsg = true, .timeout = true, },
1166         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1167         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1168           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1169         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1170           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1171         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1172         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1173           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1174         { .name     = "pwritev",    .errmsg = true,
1175           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1176         { .name     = "read",       .errmsg = true,
1177           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1178         { .name     = "readlink",   .errmsg = true,
1179           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1180         { .name     = "readlinkat", .errmsg = true,
1181           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1182                              [1] = SCA_FILENAME, /* pathname */ }, },
1183         { .name     = "readv",      .errmsg = true,
1184           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1185         { .name     = "recvfrom",   .errmsg = true,
1186           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1187                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1188         { .name     = "recvmmsg",   .errmsg = true,
1189           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1190                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1191         { .name     = "recvmsg",    .errmsg = true,
1192           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1193                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1194         { .name     = "removexattr", .errmsg = true,
1195           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1196         { .name     = "renameat",   .errmsg = true,
1197           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1198         { .name     = "rmdir",    .errmsg = true,
1199           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1200         { .name     = "rt_sigaction", .errmsg = true,
1201           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1202         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1203         { .name     = "rt_sigqueueinfo", .errmsg = true,
1204           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1205         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1206           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1207         { .name     = "select",     .errmsg = true, .timeout = true, },
1208         { .name     = "sendmmsg",    .errmsg = true,
1209           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1210                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1211         { .name     = "sendmsg",    .errmsg = true,
1212           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1213                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1214         { .name     = "sendto",     .errmsg = true,
1215           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1216                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1217         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1218         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1219         { .name     = "setxattr",   .errmsg = true,
1220           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1221         { .name     = "shutdown",   .errmsg = true,
1222           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1223         { .name     = "socket",     .errmsg = true,
1224           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1225                              [1] = SCA_SK_TYPE, /* type */ },
1226           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1227         { .name     = "socketpair", .errmsg = true,
1228           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1229                              [1] = SCA_SK_TYPE, /* type */ },
1230           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1231         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1232           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1233         { .name     = "statfs",     .errmsg = true,
1234           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1235         { .name     = "swapoff",    .errmsg = true,
1236           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1237         { .name     = "swapon",     .errmsg = true,
1238           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1239         { .name     = "symlinkat",  .errmsg = true,
1240           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1241         { .name     = "tgkill",     .errmsg = true,
1242           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1243         { .name     = "tkill",      .errmsg = true,
1244           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1245         { .name     = "truncate",   .errmsg = true,
1246           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1247         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1248         { .name     = "unlinkat",   .errmsg = true,
1249           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1250                              [1] = SCA_FILENAME, /* pathname */ }, },
1251         { .name     = "utime",  .errmsg = true,
1252           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1253         { .name     = "utimensat",  .errmsg = true,
1254           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1255                              [1] = SCA_FILENAME, /* filename */ }, },
1256         { .name     = "utimes",  .errmsg = true,
1257           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1258         { .name     = "vmsplice",  .errmsg = true,
1259           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1260         { .name     = "write",      .errmsg = true,
1261           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1262         { .name     = "writev",     .errmsg = true,
1263           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1264 };
1265
1266 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1267 {
1268         const struct syscall_fmt *fmt = fmtp;
1269         return strcmp(name, fmt->name);
1270 }
1271
1272 static struct syscall_fmt *syscall_fmt__find(const char *name)
1273 {
1274         const int nmemb = ARRAY_SIZE(syscall_fmts);
1275         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1276 }
1277
1278 struct syscall {
1279         struct event_format *tp_format;
1280         int                 nr_args;
1281         struct format_field *args;
1282         const char          *name;
1283         bool                is_exit;
1284         struct syscall_fmt  *fmt;
1285         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1286         void                **arg_parm;
1287 };
1288
1289 static size_t fprintf_duration(unsigned long t, FILE *fp)
1290 {
1291         double duration = (double)t / NSEC_PER_MSEC;
1292         size_t printed = fprintf(fp, "(");
1293
1294         if (duration >= 1.0)
1295                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1296         else if (duration >= 0.01)
1297                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1298         else
1299                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1300         return printed + fprintf(fp, "): ");
1301 }
1302
1303 /**
1304  * filename.ptr: The filename char pointer that will be vfs_getname'd
1305  * filename.entry_str_pos: Where to insert the string translated from
1306  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1307  */
1308 struct thread_trace {
1309         u64               entry_time;
1310         u64               exit_time;
1311         bool              entry_pending;
1312         unsigned long     nr_events;
1313         unsigned long     pfmaj, pfmin;
1314         char              *entry_str;
1315         double            runtime_ms;
1316         struct {
1317                 unsigned long ptr;
1318                 short int     entry_str_pos;
1319                 bool          pending_open;
1320                 unsigned int  namelen;
1321                 char          *name;
1322         } filename;
1323         struct {
1324                 int       max;
1325                 char      **table;
1326         } paths;
1327
1328         struct intlist *syscall_stats;
1329 };
1330
1331 static struct thread_trace *thread_trace__new(void)
1332 {
1333         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1334
1335         if (ttrace)
1336                 ttrace->paths.max = -1;
1337
1338         ttrace->syscall_stats = intlist__new(NULL);
1339
1340         return ttrace;
1341 }
1342
1343 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1344 {
1345         struct thread_trace *ttrace;
1346
1347         if (thread == NULL)
1348                 goto fail;
1349
1350         if (thread__priv(thread) == NULL)
1351                 thread__set_priv(thread, thread_trace__new());
1352
1353         if (thread__priv(thread) == NULL)
1354                 goto fail;
1355
1356         ttrace = thread__priv(thread);
1357         ++ttrace->nr_events;
1358
1359         return ttrace;
1360 fail:
1361         color_fprintf(fp, PERF_COLOR_RED,
1362                       "WARNING: not enough memory, dropping samples!\n");
1363         return NULL;
1364 }
1365
1366 #define TRACE_PFMAJ             (1 << 0)
1367 #define TRACE_PFMIN             (1 << 1)
1368
1369 static const size_t trace__entry_str_size = 2048;
1370
1371 struct trace {
1372         struct perf_tool        tool;
1373         struct {
1374                 int             machine;
1375                 int             open_id;
1376         }                       audit;
1377         struct {
1378                 int             max;
1379                 struct syscall  *table;
1380                 struct {
1381                         struct perf_evsel *sys_enter,
1382                                           *sys_exit;
1383                 }               events;
1384         } syscalls;
1385         struct record_opts      opts;
1386         struct perf_evlist      *evlist;
1387         struct machine          *host;
1388         struct thread           *current;
1389         u64                     base_time;
1390         FILE                    *output;
1391         unsigned long           nr_events;
1392         struct strlist          *ev_qualifier;
1393         struct {
1394                 size_t          nr;
1395                 int             *entries;
1396         }                       ev_qualifier_ids;
1397         struct intlist          *tid_list;
1398         struct intlist          *pid_list;
1399         struct {
1400                 size_t          nr;
1401                 pid_t           *entries;
1402         }                       filter_pids;
1403         double                  duration_filter;
1404         double                  runtime_ms;
1405         struct {
1406                 u64             vfs_getname,
1407                                 proc_getname;
1408         } stats;
1409         bool                    not_ev_qualifier;
1410         bool                    live;
1411         bool                    full_time;
1412         bool                    sched;
1413         bool                    multiple_threads;
1414         bool                    summary;
1415         bool                    summary_only;
1416         bool                    show_comm;
1417         bool                    show_tool_stats;
1418         bool                    trace_syscalls;
1419         bool                    force;
1420         bool                    vfs_getname;
1421         int                     trace_pgfaults;
1422 };
1423
1424 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1425 {
1426         struct thread_trace *ttrace = thread__priv(thread);
1427
1428         if (fd > ttrace->paths.max) {
1429                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1430
1431                 if (npath == NULL)
1432                         return -1;
1433
1434                 if (ttrace->paths.max != -1) {
1435                         memset(npath + ttrace->paths.max + 1, 0,
1436                                (fd - ttrace->paths.max) * sizeof(char *));
1437                 } else {
1438                         memset(npath, 0, (fd + 1) * sizeof(char *));
1439                 }
1440
1441                 ttrace->paths.table = npath;
1442                 ttrace->paths.max   = fd;
1443         }
1444
1445         ttrace->paths.table[fd] = strdup(pathname);
1446
1447         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1448 }
1449
1450 static int thread__read_fd_path(struct thread *thread, int fd)
1451 {
1452         char linkname[PATH_MAX], pathname[PATH_MAX];
1453         struct stat st;
1454         int ret;
1455
1456         if (thread->pid_ == thread->tid) {
1457                 scnprintf(linkname, sizeof(linkname),
1458                           "/proc/%d/fd/%d", thread->pid_, fd);
1459         } else {
1460                 scnprintf(linkname, sizeof(linkname),
1461                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1462         }
1463
1464         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1465                 return -1;
1466
1467         ret = readlink(linkname, pathname, sizeof(pathname));
1468
1469         if (ret < 0 || ret > st.st_size)
1470                 return -1;
1471
1472         pathname[ret] = '\0';
1473         return trace__set_fd_pathname(thread, fd, pathname);
1474 }
1475
1476 static const char *thread__fd_path(struct thread *thread, int fd,
1477                                    struct trace *trace)
1478 {
1479         struct thread_trace *ttrace = thread__priv(thread);
1480
1481         if (ttrace == NULL)
1482                 return NULL;
1483
1484         if (fd < 0)
1485                 return NULL;
1486
1487         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1488                 if (!trace->live)
1489                         return NULL;
1490                 ++trace->stats.proc_getname;
1491                 if (thread__read_fd_path(thread, fd))
1492                         return NULL;
1493         }
1494
1495         return ttrace->paths.table[fd];
1496 }
1497
1498 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1499                                         struct syscall_arg *arg)
1500 {
1501         int fd = arg->val;
1502         size_t printed = scnprintf(bf, size, "%d", fd);
1503         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1504
1505         if (path)
1506                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1507
1508         return printed;
1509 }
1510
1511 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1512                                               struct syscall_arg *arg)
1513 {
1514         int fd = arg->val;
1515         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1516         struct thread_trace *ttrace = thread__priv(arg->thread);
1517
1518         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1519                 zfree(&ttrace->paths.table[fd]);
1520
1521         return printed;
1522 }
1523
1524 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1525                                      unsigned long ptr)
1526 {
1527         struct thread_trace *ttrace = thread__priv(thread);
1528
1529         ttrace->filename.ptr = ptr;
1530         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1531 }
1532
1533 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1534                                               struct syscall_arg *arg)
1535 {
1536         unsigned long ptr = arg->val;
1537
1538         if (!arg->trace->vfs_getname)
1539                 return scnprintf(bf, size, "%#x", ptr);
1540
1541         thread__set_filename_pos(arg->thread, bf, ptr);
1542         return 0;
1543 }
1544
1545 static bool trace__filter_duration(struct trace *trace, double t)
1546 {
1547         return t < (trace->duration_filter * NSEC_PER_MSEC);
1548 }
1549
1550 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1551 {
1552         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1553
1554         return fprintf(fp, "%10.3f ", ts);
1555 }
1556
1557 static bool done = false;
1558 static bool interrupted = false;
1559
1560 static void sig_handler(int sig)
1561 {
1562         done = true;
1563         interrupted = sig == SIGINT;
1564 }
1565
1566 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1567                                         u64 duration, u64 tstamp, FILE *fp)
1568 {
1569         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1570         printed += fprintf_duration(duration, fp);
1571
1572         if (trace->multiple_threads) {
1573                 if (trace->show_comm)
1574                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1575                 printed += fprintf(fp, "%d ", thread->tid);
1576         }
1577
1578         return printed;
1579 }
1580
1581 static int trace__process_event(struct trace *trace, struct machine *machine,
1582                                 union perf_event *event, struct perf_sample *sample)
1583 {
1584         int ret = 0;
1585
1586         switch (event->header.type) {
1587         case PERF_RECORD_LOST:
1588                 color_fprintf(trace->output, PERF_COLOR_RED,
1589                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1590                 ret = machine__process_lost_event(machine, event, sample);
1591         default:
1592                 ret = machine__process_event(machine, event, sample);
1593                 break;
1594         }
1595
1596         return ret;
1597 }
1598
1599 static int trace__tool_process(struct perf_tool *tool,
1600                                union perf_event *event,
1601                                struct perf_sample *sample,
1602                                struct machine *machine)
1603 {
1604         struct trace *trace = container_of(tool, struct trace, tool);
1605         return trace__process_event(trace, machine, event, sample);
1606 }
1607
1608 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1609 {
1610         int err = symbol__init(NULL);
1611
1612         if (err)
1613                 return err;
1614
1615         trace->host = machine__new_host();
1616         if (trace->host == NULL)
1617                 return -ENOMEM;
1618
1619         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1620                 return -errno;
1621
1622         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1623                                             evlist->threads, trace__tool_process, false,
1624                                             trace->opts.proc_map_timeout);
1625         if (err)
1626                 symbol__exit();
1627
1628         return err;
1629 }
1630
1631 static int syscall__set_arg_fmts(struct syscall *sc)
1632 {
1633         struct format_field *field;
1634         int idx = 0;
1635
1636         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1637         if (sc->arg_scnprintf == NULL)
1638                 return -1;
1639
1640         if (sc->fmt)
1641                 sc->arg_parm = sc->fmt->arg_parm;
1642
1643         for (field = sc->args; field; field = field->next) {
1644                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1645                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1646                 else if (field->flags & FIELD_IS_POINTER)
1647                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1648                 ++idx;
1649         }
1650
1651         return 0;
1652 }
1653
1654 static int trace__read_syscall_info(struct trace *trace, int id)
1655 {
1656         char tp_name[128];
1657         struct syscall *sc;
1658         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1659
1660         if (name == NULL)
1661                 return -1;
1662
1663         if (id > trace->syscalls.max) {
1664                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1665
1666                 if (nsyscalls == NULL)
1667                         return -1;
1668
1669                 if (trace->syscalls.max != -1) {
1670                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1671                                (id - trace->syscalls.max) * sizeof(*sc));
1672                 } else {
1673                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1674                 }
1675
1676                 trace->syscalls.table = nsyscalls;
1677                 trace->syscalls.max   = id;
1678         }
1679
1680         sc = trace->syscalls.table + id;
1681         sc->name = name;
1682
1683         sc->fmt  = syscall_fmt__find(sc->name);
1684
1685         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1686         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1687
1688         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1689                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1690                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1691         }
1692
1693         if (sc->tp_format == NULL)
1694                 return -1;
1695
1696         sc->args = sc->tp_format->format.fields;
1697         sc->nr_args = sc->tp_format->format.nr_fields;
1698         /* drop nr field - not relevant here; does not exist on older kernels */
1699         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1700                 sc->args = sc->args->next;
1701                 --sc->nr_args;
1702         }
1703
1704         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1705
1706         return syscall__set_arg_fmts(sc);
1707 }
1708
1709 static int trace__validate_ev_qualifier(struct trace *trace)
1710 {
1711         int err = 0, i;
1712         struct str_node *pos;
1713
1714         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1715         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1716                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1717
1718         if (trace->ev_qualifier_ids.entries == NULL) {
1719                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1720                        trace->output);
1721                 err = -EINVAL;
1722                 goto out;
1723         }
1724
1725         i = 0;
1726
1727         strlist__for_each(pos, trace->ev_qualifier) {
1728                 const char *sc = pos->s;
1729                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1730
1731                 if (id < 0) {
1732                         if (err == 0) {
1733                                 fputs("Error:\tInvalid syscall ", trace->output);
1734                                 err = -EINVAL;
1735                         } else {
1736                                 fputs(", ", trace->output);
1737                         }
1738
1739                         fputs(sc, trace->output);
1740                 }
1741
1742                 trace->ev_qualifier_ids.entries[i++] = id;
1743         }
1744
1745         if (err < 0) {
1746                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1747                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1748                 zfree(&trace->ev_qualifier_ids.entries);
1749                 trace->ev_qualifier_ids.nr = 0;
1750         }
1751 out:
1752         return err;
1753 }
1754
1755 /*
1756  * args is to be interpreted as a series of longs but we need to handle
1757  * 8-byte unaligned accesses. args points to raw_data within the event
1758  * and raw_data is guaranteed to be 8-byte unaligned because it is
1759  * preceded by raw_size which is a u32. So we need to copy args to a temp
1760  * variable to read it. Most notably this avoids extended load instructions
1761  * on unaligned addresses
1762  */
1763
1764 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1765                                       unsigned char *args, struct trace *trace,
1766                                       struct thread *thread)
1767 {
1768         size_t printed = 0;
1769         unsigned char *p;
1770         unsigned long val;
1771
1772         if (sc->args != NULL) {
1773                 struct format_field *field;
1774                 u8 bit = 1;
1775                 struct syscall_arg arg = {
1776                         .idx    = 0,
1777                         .mask   = 0,
1778                         .trace  = trace,
1779                         .thread = thread,
1780                 };
1781
1782                 for (field = sc->args; field;
1783                      field = field->next, ++arg.idx, bit <<= 1) {
1784                         if (arg.mask & bit)
1785                                 continue;
1786
1787                         /* special care for unaligned accesses */
1788                         p = args + sizeof(unsigned long) * arg.idx;
1789                         memcpy(&val, p, sizeof(val));
1790
1791                         /*
1792                          * Suppress this argument if its value is zero and
1793                          * and we don't have a string associated in an
1794                          * strarray for it.
1795                          */
1796                         if (val == 0 &&
1797                             !(sc->arg_scnprintf &&
1798                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1799                               sc->arg_parm[arg.idx]))
1800                                 continue;
1801
1802                         printed += scnprintf(bf + printed, size - printed,
1803                                              "%s%s: ", printed ? ", " : "", field->name);
1804                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1805                                 arg.val = val;
1806                                 if (sc->arg_parm)
1807                                         arg.parm = sc->arg_parm[arg.idx];
1808                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1809                                                                       size - printed, &arg);
1810                         } else {
1811                                 printed += scnprintf(bf + printed, size - printed,
1812                                                      "%ld", val);
1813                         }
1814                 }
1815         } else {
1816                 int i = 0;
1817
1818                 while (i < 6) {
1819                         /* special care for unaligned accesses */
1820                         p = args + sizeof(unsigned long) * i;
1821                         memcpy(&val, p, sizeof(val));
1822                         printed += scnprintf(bf + printed, size - printed,
1823                                              "%sarg%d: %ld",
1824                                              printed ? ", " : "", i, val);
1825                         ++i;
1826                 }
1827         }
1828
1829         return printed;
1830 }
1831
1832 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1833                                   union perf_event *event,
1834                                   struct perf_sample *sample);
1835
1836 static struct syscall *trace__syscall_info(struct trace *trace,
1837                                            struct perf_evsel *evsel, int id)
1838 {
1839
1840         if (id < 0) {
1841
1842                 /*
1843                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1844                  * before that, leaving at a higher verbosity level till that is
1845                  * explained. Reproduced with plain ftrace with:
1846                  *
1847                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1848                  * grep "NR -1 " /t/trace_pipe
1849                  *
1850                  * After generating some load on the machine.
1851                  */
1852                 if (verbose > 1) {
1853                         static u64 n;
1854                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1855                                 id, perf_evsel__name(evsel), ++n);
1856                 }
1857                 return NULL;
1858         }
1859
1860         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1861             trace__read_syscall_info(trace, id))
1862                 goto out_cant_read;
1863
1864         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1865                 goto out_cant_read;
1866
1867         return &trace->syscalls.table[id];
1868
1869 out_cant_read:
1870         if (verbose) {
1871                 fprintf(trace->output, "Problems reading syscall %d", id);
1872                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1873                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1874                 fputs(" information\n", trace->output);
1875         }
1876         return NULL;
1877 }
1878
1879 static void thread__update_stats(struct thread_trace *ttrace,
1880                                  int id, struct perf_sample *sample)
1881 {
1882         struct int_node *inode;
1883         struct stats *stats;
1884         u64 duration = 0;
1885
1886         inode = intlist__findnew(ttrace->syscall_stats, id);
1887         if (inode == NULL)
1888                 return;
1889
1890         stats = inode->priv;
1891         if (stats == NULL) {
1892                 stats = malloc(sizeof(struct stats));
1893                 if (stats == NULL)
1894                         return;
1895                 init_stats(stats);
1896                 inode->priv = stats;
1897         }
1898
1899         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1900                 duration = sample->time - ttrace->entry_time;
1901
1902         update_stats(stats, duration);
1903 }
1904
1905 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1906 {
1907         struct thread_trace *ttrace;
1908         u64 duration;
1909         size_t printed;
1910
1911         if (trace->current == NULL)
1912                 return 0;
1913
1914         ttrace = thread__priv(trace->current);
1915
1916         if (!ttrace->entry_pending)
1917                 return 0;
1918
1919         duration = sample->time - ttrace->entry_time;
1920
1921         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1922         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1923         ttrace->entry_pending = false;
1924
1925         return printed;
1926 }
1927
1928 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1929                             union perf_event *event __maybe_unused,
1930                             struct perf_sample *sample)
1931 {
1932         char *msg;
1933         void *args;
1934         size_t printed = 0;
1935         struct thread *thread;
1936         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1937         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1938         struct thread_trace *ttrace;
1939
1940         if (sc == NULL)
1941                 return -1;
1942
1943         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1944         ttrace = thread__trace(thread, trace->output);
1945         if (ttrace == NULL)
1946                 goto out_put;
1947
1948         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1949
1950         if (ttrace->entry_str == NULL) {
1951                 ttrace->entry_str = malloc(trace__entry_str_size);
1952                 if (!ttrace->entry_str)
1953                         goto out_put;
1954         }
1955
1956         if (!trace->summary_only)
1957                 trace__printf_interrupted_entry(trace, sample);
1958
1959         ttrace->entry_time = sample->time;
1960         msg = ttrace->entry_str;
1961         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1962
1963         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1964                                            args, trace, thread);
1965
1966         if (sc->is_exit) {
1967                 if (!trace->duration_filter && !trace->summary_only) {
1968                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1969                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1970                 }
1971         } else {
1972                 ttrace->entry_pending = true;
1973                 /* See trace__vfs_getname & trace__sys_exit */
1974                 ttrace->filename.pending_open = false;
1975         }
1976
1977         if (trace->current != thread) {
1978                 thread__put(trace->current);
1979                 trace->current = thread__get(thread);
1980         }
1981         err = 0;
1982 out_put:
1983         thread__put(thread);
1984         return err;
1985 }
1986
1987 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1988                            union perf_event *event __maybe_unused,
1989                            struct perf_sample *sample)
1990 {
1991         long ret;
1992         u64 duration = 0;
1993         struct thread *thread;
1994         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1995         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1996         struct thread_trace *ttrace;
1997
1998         if (sc == NULL)
1999                 return -1;
2000
2001         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2002         ttrace = thread__trace(thread, trace->output);
2003         if (ttrace == NULL)
2004                 goto out_put;
2005
2006         if (trace->summary)
2007                 thread__update_stats(ttrace, id, sample);
2008
2009         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2010
2011         if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2012                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2013                 ttrace->filename.pending_open = false;
2014                 ++trace->stats.vfs_getname;
2015         }
2016
2017         ttrace->exit_time = sample->time;
2018
2019         if (ttrace->entry_time) {
2020                 duration = sample->time - ttrace->entry_time;
2021                 if (trace__filter_duration(trace, duration))
2022                         goto out;
2023         } else if (trace->duration_filter)
2024                 goto out;
2025
2026         if (trace->summary_only)
2027                 goto out;
2028
2029         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2030
2031         if (ttrace->entry_pending) {
2032                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2033         } else {
2034                 fprintf(trace->output, " ... [");
2035                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2036                 fprintf(trace->output, "]: %s()", sc->name);
2037         }
2038
2039         if (sc->fmt == NULL) {
2040 signed_print:
2041                 fprintf(trace->output, ") = %ld", ret);
2042         } else if (ret < 0 && sc->fmt->errmsg) {
2043                 char bf[STRERR_BUFSIZE];
2044                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2045                            *e = audit_errno_to_name(-ret);
2046
2047                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2048         } else if (ret == 0 && sc->fmt->timeout)
2049                 fprintf(trace->output, ") = 0 Timeout");
2050         else if (sc->fmt->hexret)
2051                 fprintf(trace->output, ") = %#lx", ret);
2052         else
2053                 goto signed_print;
2054
2055         fputc('\n', trace->output);
2056 out:
2057         ttrace->entry_pending = false;
2058         err = 0;
2059 out_put:
2060         thread__put(thread);
2061         return err;
2062 }
2063
2064 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2065                               union perf_event *event __maybe_unused,
2066                               struct perf_sample *sample)
2067 {
2068         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2069         struct thread_trace *ttrace;
2070         size_t filename_len, entry_str_len, to_move;
2071         ssize_t remaining_space;
2072         char *pos;
2073         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2074
2075         if (!thread)
2076                 goto out;
2077
2078         ttrace = thread__priv(thread);
2079         if (!ttrace)
2080                 goto out;
2081
2082         filename_len = strlen(filename);
2083
2084         if (ttrace->filename.namelen < filename_len) {
2085                 char *f = realloc(ttrace->filename.name, filename_len + 1);
2086
2087                 if (f == NULL)
2088                                 goto out;
2089
2090                 ttrace->filename.namelen = filename_len;
2091                 ttrace->filename.name = f;
2092         }
2093
2094         strcpy(ttrace->filename.name, filename);
2095         ttrace->filename.pending_open = true;
2096
2097         if (!ttrace->filename.ptr)
2098                 goto out;
2099
2100         entry_str_len = strlen(ttrace->entry_str);
2101         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2102         if (remaining_space <= 0)
2103                 goto out;
2104
2105         if (filename_len > (size_t)remaining_space) {
2106                 filename += filename_len - remaining_space;
2107                 filename_len = remaining_space;
2108         }
2109
2110         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2111         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2112         memmove(pos + filename_len, pos, to_move);
2113         memcpy(pos, filename, filename_len);
2114
2115         ttrace->filename.ptr = 0;
2116         ttrace->filename.entry_str_pos = 0;
2117 out:
2118         return 0;
2119 }
2120
2121 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2122                                      union perf_event *event __maybe_unused,
2123                                      struct perf_sample *sample)
2124 {
2125         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2126         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2127         struct thread *thread = machine__findnew_thread(trace->host,
2128                                                         sample->pid,
2129                                                         sample->tid);
2130         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2131
2132         if (ttrace == NULL)
2133                 goto out_dump;
2134
2135         ttrace->runtime_ms += runtime_ms;
2136         trace->runtime_ms += runtime_ms;
2137         thread__put(thread);
2138         return 0;
2139
2140 out_dump:
2141         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2142                evsel->name,
2143                perf_evsel__strval(evsel, sample, "comm"),
2144                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2145                runtime,
2146                perf_evsel__intval(evsel, sample, "vruntime"));
2147         thread__put(thread);
2148         return 0;
2149 }
2150
2151 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2152                                 union perf_event *event __maybe_unused,
2153                                 struct perf_sample *sample)
2154 {
2155         trace__printf_interrupted_entry(trace, sample);
2156         trace__fprintf_tstamp(trace, sample->time, trace->output);
2157
2158         if (trace->trace_syscalls)
2159                 fprintf(trace->output, "(         ): ");
2160
2161         fprintf(trace->output, "%s:", evsel->name);
2162
2163         if (evsel->tp_format) {
2164                 event_format__fprintf(evsel->tp_format, sample->cpu,
2165                                       sample->raw_data, sample->raw_size,
2166                                       trace->output);
2167         }
2168
2169         fprintf(trace->output, ")\n");
2170         return 0;
2171 }
2172
2173 static void print_location(FILE *f, struct perf_sample *sample,
2174                            struct addr_location *al,
2175                            bool print_dso, bool print_sym)
2176 {
2177
2178         if ((verbose || print_dso) && al->map)
2179                 fprintf(f, "%s@", al->map->dso->long_name);
2180
2181         if ((verbose || print_sym) && al->sym)
2182                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2183                         al->addr - al->sym->start);
2184         else if (al->map)
2185                 fprintf(f, "0x%" PRIx64, al->addr);
2186         else
2187                 fprintf(f, "0x%" PRIx64, sample->addr);
2188 }
2189
2190 static int trace__pgfault(struct trace *trace,
2191                           struct perf_evsel *evsel,
2192                           union perf_event *event,
2193                           struct perf_sample *sample)
2194 {
2195         struct thread *thread;
2196         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2197         struct addr_location al;
2198         char map_type = 'd';
2199         struct thread_trace *ttrace;
2200         int err = -1;
2201
2202         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2203         ttrace = thread__trace(thread, trace->output);
2204         if (ttrace == NULL)
2205                 goto out_put;
2206
2207         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2208                 ttrace->pfmaj++;
2209         else
2210                 ttrace->pfmin++;
2211
2212         if (trace->summary_only)
2213                 goto out;
2214
2215         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2216                               sample->ip, &al);
2217
2218         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2219
2220         fprintf(trace->output, "%sfault [",
2221                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2222                 "maj" : "min");
2223
2224         print_location(trace->output, sample, &al, false, true);
2225
2226         fprintf(trace->output, "] => ");
2227
2228         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2229                                    sample->addr, &al);
2230
2231         if (!al.map) {
2232                 thread__find_addr_location(thread, cpumode,
2233                                            MAP__FUNCTION, sample->addr, &al);
2234
2235                 if (al.map)
2236                         map_type = 'x';
2237                 else
2238                         map_type = '?';
2239         }
2240
2241         print_location(trace->output, sample, &al, true, false);
2242
2243         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2244 out:
2245         err = 0;
2246 out_put:
2247         thread__put(thread);
2248         return err;
2249 }
2250
2251 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2252 {
2253         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2254             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2255                 return false;
2256
2257         if (trace->pid_list || trace->tid_list)
2258                 return true;
2259
2260         return false;
2261 }
2262
2263 static int trace__process_sample(struct perf_tool *tool,
2264                                  union perf_event *event,
2265                                  struct perf_sample *sample,
2266                                  struct perf_evsel *evsel,
2267                                  struct machine *machine __maybe_unused)
2268 {
2269         struct trace *trace = container_of(tool, struct trace, tool);
2270         int err = 0;
2271
2272         tracepoint_handler handler = evsel->handler;
2273
2274         if (skip_sample(trace, sample))
2275                 return 0;
2276
2277         if (!trace->full_time && trace->base_time == 0)
2278                 trace->base_time = sample->time;
2279
2280         if (handler) {
2281                 ++trace->nr_events;
2282                 handler(trace, evsel, event, sample);
2283         }
2284
2285         return err;
2286 }
2287
2288 static int parse_target_str(struct trace *trace)
2289 {
2290         if (trace->opts.target.pid) {
2291                 trace->pid_list = intlist__new(trace->opts.target.pid);
2292                 if (trace->pid_list == NULL) {
2293                         pr_err("Error parsing process id string\n");
2294                         return -EINVAL;
2295                 }
2296         }
2297
2298         if (trace->opts.target.tid) {
2299                 trace->tid_list = intlist__new(trace->opts.target.tid);
2300                 if (trace->tid_list == NULL) {
2301                         pr_err("Error parsing thread id string\n");
2302                         return -EINVAL;
2303                 }
2304         }
2305
2306         return 0;
2307 }
2308
2309 static int trace__record(struct trace *trace, int argc, const char **argv)
2310 {
2311         unsigned int rec_argc, i, j;
2312         const char **rec_argv;
2313         const char * const record_args[] = {
2314                 "record",
2315                 "-R",
2316                 "-m", "1024",
2317                 "-c", "1",
2318         };
2319
2320         const char * const sc_args[] = { "-e", };
2321         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2322         const char * const majpf_args[] = { "-e", "major-faults" };
2323         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2324         const char * const minpf_args[] = { "-e", "minor-faults" };
2325         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2326
2327         /* +1 is for the event string below */
2328         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2329                 majpf_args_nr + minpf_args_nr + argc;
2330         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2331
2332         if (rec_argv == NULL)
2333                 return -ENOMEM;
2334
2335         j = 0;
2336         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2337                 rec_argv[j++] = record_args[i];
2338
2339         if (trace->trace_syscalls) {
2340                 for (i = 0; i < sc_args_nr; i++)
2341                         rec_argv[j++] = sc_args[i];
2342
2343                 /* event string may be different for older kernels - e.g., RHEL6 */
2344                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2345                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2346                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2347                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2348                 else {
2349                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2350                         return -1;
2351                 }
2352         }
2353
2354         if (trace->trace_pgfaults & TRACE_PFMAJ)
2355                 for (i = 0; i < majpf_args_nr; i++)
2356                         rec_argv[j++] = majpf_args[i];
2357
2358         if (trace->trace_pgfaults & TRACE_PFMIN)
2359                 for (i = 0; i < minpf_args_nr; i++)
2360                         rec_argv[j++] = minpf_args[i];
2361
2362         for (i = 0; i < (unsigned int)argc; i++)
2363                 rec_argv[j++] = argv[i];
2364
2365         return cmd_record(j, rec_argv, NULL);
2366 }
2367
2368 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2369
2370 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2371 {
2372         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2373         if (evsel == NULL)
2374                 return false;
2375
2376         if (perf_evsel__field(evsel, "pathname") == NULL) {
2377                 perf_evsel__delete(evsel);
2378                 return false;
2379         }
2380
2381         evsel->handler = trace__vfs_getname;
2382         perf_evlist__add(evlist, evsel);
2383         return true;
2384 }
2385
2386 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2387                                     u64 config)
2388 {
2389         struct perf_evsel *evsel;
2390         struct perf_event_attr attr = {
2391                 .type = PERF_TYPE_SOFTWARE,
2392                 .mmap_data = 1,
2393         };
2394
2395         attr.config = config;
2396         attr.sample_period = 1;
2397
2398         event_attr_init(&attr);
2399
2400         evsel = perf_evsel__new(&attr);
2401         if (!evsel)
2402                 return -ENOMEM;
2403
2404         evsel->handler = trace__pgfault;
2405         perf_evlist__add(evlist, evsel);
2406
2407         return 0;
2408 }
2409
2410 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2411 {
2412         const u32 type = event->header.type;
2413         struct perf_evsel *evsel;
2414
2415         if (!trace->full_time && trace->base_time == 0)
2416                 trace->base_time = sample->time;
2417
2418         if (type != PERF_RECORD_SAMPLE) {
2419                 trace__process_event(trace, trace->host, event, sample);
2420                 return;
2421         }
2422
2423         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2424         if (evsel == NULL) {
2425                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2426                 return;
2427         }
2428
2429         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2430             sample->raw_data == NULL) {
2431                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2432                        perf_evsel__name(evsel), sample->tid,
2433                        sample->cpu, sample->raw_size);
2434         } else {
2435                 tracepoint_handler handler = evsel->handler;
2436                 handler(trace, evsel, event, sample);
2437         }
2438 }
2439
2440 static int trace__add_syscall_newtp(struct trace *trace)
2441 {
2442         int ret = -1;
2443         struct perf_evlist *evlist = trace->evlist;
2444         struct perf_evsel *sys_enter, *sys_exit;
2445
2446         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2447         if (sys_enter == NULL)
2448                 goto out;
2449
2450         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2451                 goto out_delete_sys_enter;
2452
2453         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2454         if (sys_exit == NULL)
2455                 goto out_delete_sys_enter;
2456
2457         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2458                 goto out_delete_sys_exit;
2459
2460         perf_evlist__add(evlist, sys_enter);
2461         perf_evlist__add(evlist, sys_exit);
2462
2463         trace->syscalls.events.sys_enter = sys_enter;
2464         trace->syscalls.events.sys_exit  = sys_exit;
2465
2466         ret = 0;
2467 out:
2468         return ret;
2469
2470 out_delete_sys_exit:
2471         perf_evsel__delete_priv(sys_exit);
2472 out_delete_sys_enter:
2473         perf_evsel__delete_priv(sys_enter);
2474         goto out;
2475 }
2476
2477 static int trace__set_ev_qualifier_filter(struct trace *trace)
2478 {
2479         int err = -1;
2480         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2481                                                 trace->ev_qualifier_ids.nr,
2482                                                 trace->ev_qualifier_ids.entries);
2483
2484         if (filter == NULL)
2485                 goto out_enomem;
2486
2487         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2488                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2489
2490         free(filter);
2491 out:
2492         return err;
2493 out_enomem:
2494         errno = ENOMEM;
2495         goto out;
2496 }
2497
2498 static int trace__run(struct trace *trace, int argc, const char **argv)
2499 {
2500         struct perf_evlist *evlist = trace->evlist;
2501         struct perf_evsel *evsel;
2502         int err = -1, i;
2503         unsigned long before;
2504         const bool forks = argc > 0;
2505         bool draining = false;
2506
2507         trace->live = true;
2508
2509         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2510                 goto out_error_raw_syscalls;
2511
2512         if (trace->trace_syscalls)
2513                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2514
2515         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2516             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2517                 goto out_error_mem;
2518         }
2519
2520         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2521             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2522                 goto out_error_mem;
2523
2524         if (trace->sched &&
2525             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2526                                    trace__sched_stat_runtime))
2527                 goto out_error_sched_stat_runtime;
2528
2529         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2530         if (err < 0) {
2531                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2532                 goto out_delete_evlist;
2533         }
2534
2535         err = trace__symbols_init(trace, evlist);
2536         if (err < 0) {
2537                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2538                 goto out_delete_evlist;
2539         }
2540
2541         perf_evlist__config(evlist, &trace->opts);
2542
2543         signal(SIGCHLD, sig_handler);
2544         signal(SIGINT, sig_handler);
2545
2546         if (forks) {
2547                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2548                                                     argv, false, NULL);
2549                 if (err < 0) {
2550                         fprintf(trace->output, "Couldn't run the workload!\n");
2551                         goto out_delete_evlist;
2552                 }
2553         }
2554
2555         err = perf_evlist__open(evlist);
2556         if (err < 0)
2557                 goto out_error_open;
2558
2559         /*
2560          * Better not use !target__has_task() here because we need to cover the
2561          * case where no threads were specified in the command line, but a
2562          * workload was, and in that case we will fill in the thread_map when
2563          * we fork the workload in perf_evlist__prepare_workload.
2564          */
2565         if (trace->filter_pids.nr > 0)
2566                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2567         else if (thread_map__pid(evlist->threads, 0) == -1)
2568                 err = perf_evlist__set_filter_pid(evlist, getpid());
2569
2570         if (err < 0)
2571                 goto out_error_mem;
2572
2573         if (trace->ev_qualifier_ids.nr > 0) {
2574                 err = trace__set_ev_qualifier_filter(trace);
2575                 if (err < 0)
2576                         goto out_errno;
2577
2578                 pr_debug("event qualifier tracepoint filter: %s\n",
2579                          trace->syscalls.events.sys_exit->filter);
2580         }
2581
2582         err = perf_evlist__apply_filters(evlist, &evsel);
2583         if (err < 0)
2584                 goto out_error_apply_filters;
2585
2586         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2587         if (err < 0)
2588                 goto out_error_mmap;
2589
2590         if (!target__none(&trace->opts.target))
2591                 perf_evlist__enable(evlist);
2592
2593         if (forks)
2594                 perf_evlist__start_workload(evlist);
2595
2596         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2597                                   evlist->threads->nr > 1 ||
2598                                   perf_evlist__first(evlist)->attr.inherit;
2599 again:
2600         before = trace->nr_events;
2601
2602         for (i = 0; i < evlist->nr_mmaps; i++) {
2603                 union perf_event *event;
2604
2605                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2606                         struct perf_sample sample;
2607
2608                         ++trace->nr_events;
2609
2610                         err = perf_evlist__parse_sample(evlist, event, &sample);
2611                         if (err) {
2612                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2613                                 goto next_event;
2614                         }
2615
2616                         trace__handle_event(trace, event, &sample);
2617 next_event:
2618                         perf_evlist__mmap_consume(evlist, i);
2619
2620                         if (interrupted)
2621                                 goto out_disable;
2622
2623                         if (done && !draining) {
2624                                 perf_evlist__disable(evlist);
2625                                 draining = true;
2626                         }
2627                 }
2628         }
2629
2630         if (trace->nr_events == before) {
2631                 int timeout = done ? 100 : -1;
2632
2633                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2634                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2635                                 draining = true;
2636
2637                         goto again;
2638                 }
2639         } else {
2640                 goto again;
2641         }
2642
2643 out_disable:
2644         thread__zput(trace->current);
2645
2646         perf_evlist__disable(evlist);
2647
2648         if (!err) {
2649                 if (trace->summary)
2650                         trace__fprintf_thread_summary(trace, trace->output);
2651
2652                 if (trace->show_tool_stats) {
2653                         fprintf(trace->output, "Stats:\n "
2654                                                " vfs_getname : %" PRIu64 "\n"
2655                                                " proc_getname: %" PRIu64 "\n",
2656                                 trace->stats.vfs_getname,
2657                                 trace->stats.proc_getname);
2658                 }
2659         }
2660
2661 out_delete_evlist:
2662         perf_evlist__delete(evlist);
2663         trace->evlist = NULL;
2664         trace->live = false;
2665         return err;
2666 {
2667         char errbuf[BUFSIZ];
2668
2669 out_error_sched_stat_runtime:
2670         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2671         goto out_error;
2672
2673 out_error_raw_syscalls:
2674         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2675         goto out_error;
2676
2677 out_error_mmap:
2678         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2679         goto out_error;
2680
2681 out_error_open:
2682         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2683
2684 out_error:
2685         fprintf(trace->output, "%s\n", errbuf);
2686         goto out_delete_evlist;
2687
2688 out_error_apply_filters:
2689         fprintf(trace->output,
2690                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2691                 evsel->filter, perf_evsel__name(evsel), errno,
2692                 strerror_r(errno, errbuf, sizeof(errbuf)));
2693         goto out_delete_evlist;
2694 }
2695 out_error_mem:
2696         fprintf(trace->output, "Not enough memory to run!\n");
2697         goto out_delete_evlist;
2698
2699 out_errno:
2700         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2701         goto out_delete_evlist;
2702 }
2703
2704 static int trace__replay(struct trace *trace)
2705 {
2706         const struct perf_evsel_str_handler handlers[] = {
2707                 { "probe:vfs_getname",       trace__vfs_getname, },
2708         };
2709         struct perf_data_file file = {
2710                 .path  = input_name,
2711                 .mode  = PERF_DATA_MODE_READ,
2712                 .force = trace->force,
2713         };
2714         struct perf_session *session;
2715         struct perf_evsel *evsel;
2716         int err = -1;
2717
2718         trace->tool.sample        = trace__process_sample;
2719         trace->tool.mmap          = perf_event__process_mmap;
2720         trace->tool.mmap2         = perf_event__process_mmap2;
2721         trace->tool.comm          = perf_event__process_comm;
2722         trace->tool.exit          = perf_event__process_exit;
2723         trace->tool.fork          = perf_event__process_fork;
2724         trace->tool.attr          = perf_event__process_attr;
2725         trace->tool.tracing_data = perf_event__process_tracing_data;
2726         trace->tool.build_id      = perf_event__process_build_id;
2727
2728         trace->tool.ordered_events = true;
2729         trace->tool.ordering_requires_timestamps = true;
2730
2731         /* add tid to output */
2732         trace->multiple_threads = true;
2733
2734         session = perf_session__new(&file, false, &trace->tool);
2735         if (session == NULL)
2736                 return -1;
2737
2738         if (symbol__init(&session->header.env) < 0)
2739                 goto out;
2740
2741         trace->host = &session->machines.host;
2742
2743         err = perf_session__set_tracepoints_handlers(session, handlers);
2744         if (err)
2745                 goto out;
2746
2747         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2748                                                      "raw_syscalls:sys_enter");
2749         /* older kernels have syscalls tp versus raw_syscalls */
2750         if (evsel == NULL)
2751                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2752                                                              "syscalls:sys_enter");
2753
2754         if (evsel &&
2755             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2756             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2757                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2758                 goto out;
2759         }
2760
2761         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2762                                                      "raw_syscalls:sys_exit");
2763         if (evsel == NULL)
2764                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2765                                                              "syscalls:sys_exit");
2766         if (evsel &&
2767             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2768             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2769                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2770                 goto out;
2771         }
2772
2773         evlist__for_each(session->evlist, evsel) {
2774                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2775                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2776                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2777                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2778                         evsel->handler = trace__pgfault;
2779         }
2780
2781         err = parse_target_str(trace);
2782         if (err != 0)
2783                 goto out;
2784
2785         setup_pager();
2786
2787         err = perf_session__process_events(session);
2788         if (err)
2789                 pr_err("Failed to process events, error %d", err);
2790
2791         else if (trace->summary)
2792                 trace__fprintf_thread_summary(trace, trace->output);
2793
2794 out:
2795         perf_session__delete(session);
2796
2797         return err;
2798 }
2799
2800 static size_t trace__fprintf_threads_header(FILE *fp)
2801 {
2802         size_t printed;
2803
2804         printed  = fprintf(fp, "\n Summary of events:\n\n");
2805
2806         return printed;
2807 }
2808
2809 static size_t thread__dump_stats(struct thread_trace *ttrace,
2810                                  struct trace *trace, FILE *fp)
2811 {
2812         struct stats *stats;
2813         size_t printed = 0;
2814         struct syscall *sc;
2815         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2816
2817         if (inode == NULL)
2818                 return 0;
2819
2820         printed += fprintf(fp, "\n");
2821
2822         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2823         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2824         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2825
2826         /* each int_node is a syscall */
2827         while (inode) {
2828                 stats = inode->priv;
2829                 if (stats) {
2830                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2831                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2832                         double avg = avg_stats(stats);
2833                         double pct;
2834                         u64 n = (u64) stats->n;
2835
2836                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2837                         avg /= NSEC_PER_MSEC;
2838
2839                         sc = &trace->syscalls.table[inode->i];
2840                         printed += fprintf(fp, "   %-15s", sc->name);
2841                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2842                                            n, avg * n, min, avg);
2843                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2844                 }
2845
2846                 inode = intlist__next(inode);
2847         }
2848
2849         printed += fprintf(fp, "\n\n");
2850
2851         return printed;
2852 }
2853
2854 /* struct used to pass data to per-thread function */
2855 struct summary_data {
2856         FILE *fp;
2857         struct trace *trace;
2858         size_t printed;
2859 };
2860
2861 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2862 {
2863         struct summary_data *data = priv;
2864         FILE *fp = data->fp;
2865         size_t printed = data->printed;
2866         struct trace *trace = data->trace;
2867         struct thread_trace *ttrace = thread__priv(thread);
2868         double ratio;
2869
2870         if (ttrace == NULL)
2871                 return 0;
2872
2873         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2874
2875         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2876         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2877         printed += fprintf(fp, "%.1f%%", ratio);
2878         if (ttrace->pfmaj)
2879                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2880         if (ttrace->pfmin)
2881                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2882         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2883         printed += thread__dump_stats(ttrace, trace, fp);
2884
2885         data->printed += printed;
2886
2887         return 0;
2888 }
2889
2890 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2891 {
2892         struct summary_data data = {
2893                 .fp = fp,
2894                 .trace = trace
2895         };
2896         data.printed = trace__fprintf_threads_header(fp);
2897
2898         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2899
2900         return data.printed;
2901 }
2902
2903 static int trace__set_duration(const struct option *opt, const char *str,
2904                                int unset __maybe_unused)
2905 {
2906         struct trace *trace = opt->value;
2907
2908         trace->duration_filter = atof(str);
2909         return 0;
2910 }
2911
2912 static int trace__set_filter_pids(const struct option *opt, const char *str,
2913                                   int unset __maybe_unused)
2914 {
2915         int ret = -1;
2916         size_t i;
2917         struct trace *trace = opt->value;
2918         /*
2919          * FIXME: introduce a intarray class, plain parse csv and create a
2920          * { int nr, int entries[] } struct...
2921          */
2922         struct intlist *list = intlist__new(str);
2923
2924         if (list == NULL)
2925                 return -1;
2926
2927         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2928         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2929
2930         if (trace->filter_pids.entries == NULL)
2931                 goto out;
2932
2933         trace->filter_pids.entries[0] = getpid();
2934
2935         for (i = 1; i < trace->filter_pids.nr; ++i)
2936                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2937
2938         intlist__delete(list);
2939         ret = 0;
2940 out:
2941         return ret;
2942 }
2943
2944 static int trace__open_output(struct trace *trace, const char *filename)
2945 {
2946         struct stat st;
2947
2948         if (!stat(filename, &st) && st.st_size) {
2949                 char oldname[PATH_MAX];
2950
2951                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2952                 unlink(oldname);
2953                 rename(filename, oldname);
2954         }
2955
2956         trace->output = fopen(filename, "w");
2957
2958         return trace->output == NULL ? -errno : 0;
2959 }
2960
2961 static int parse_pagefaults(const struct option *opt, const char *str,
2962                             int unset __maybe_unused)
2963 {
2964         int *trace_pgfaults = opt->value;
2965
2966         if (strcmp(str, "all") == 0)
2967                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2968         else if (strcmp(str, "maj") == 0)
2969                 *trace_pgfaults |= TRACE_PFMAJ;
2970         else if (strcmp(str, "min") == 0)
2971                 *trace_pgfaults |= TRACE_PFMIN;
2972         else
2973                 return -1;
2974
2975         return 0;
2976 }
2977
2978 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2979 {
2980         struct perf_evsel *evsel;
2981
2982         evlist__for_each(evlist, evsel)
2983                 evsel->handler = handler;
2984 }
2985
2986 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2987 {
2988         const char *trace_usage[] = {
2989                 "perf trace [<options>] [<command>]",
2990                 "perf trace [<options>] -- <command> [<options>]",
2991                 "perf trace record [<options>] [<command>]",
2992                 "perf trace record [<options>] -- <command> [<options>]",
2993                 NULL
2994         };
2995         struct trace trace = {
2996                 .audit = {
2997                         .machine = audit_detect_machine(),
2998                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2999                 },
3000                 .syscalls = {
3001                         . max = -1,
3002                 },
3003                 .opts = {
3004                         .target = {
3005                                 .uid       = UINT_MAX,
3006                                 .uses_mmap = true,
3007                         },
3008                         .user_freq     = UINT_MAX,
3009                         .user_interval = ULLONG_MAX,
3010                         .no_buffering  = true,
3011                         .mmap_pages    = UINT_MAX,
3012                         .proc_map_timeout  = 500,
3013                 },
3014                 .output = stderr,
3015                 .show_comm = true,
3016                 .trace_syscalls = true,
3017         };
3018         const char *output_name = NULL;
3019         const char *ev_qualifier_str = NULL;
3020         const struct option trace_options[] = {
3021         OPT_CALLBACK(0, "event", &trace.evlist, "event",
3022                      "event selector. use 'perf list' to list available events",
3023                      parse_events_option),
3024         OPT_BOOLEAN(0, "comm", &trace.show_comm,
3025                     "show the thread COMM next to its id"),
3026         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3027         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3028         OPT_STRING('o', "output", &output_name, "file", "output file name"),
3029         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3030         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3031                     "trace events on existing process id"),
3032         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3033                     "trace events on existing thread id"),
3034         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3035                      "pids to filter (by the kernel)", trace__set_filter_pids),
3036         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3037                     "system-wide collection from all CPUs"),
3038         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3039                     "list of cpus to monitor"),
3040         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3041                     "child tasks do not inherit counters"),
3042         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3043                      "number of mmap data pages",
3044                      perf_evlist__parse_mmap_pages),
3045         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3046                    "user to profile"),
3047         OPT_CALLBACK(0, "duration", &trace, "float",
3048                      "show only events with duration > N.M ms",
3049                      trace__set_duration),
3050         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3051         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3052         OPT_BOOLEAN('T', "time", &trace.full_time,
3053                     "Show full timestamp, not time relative to first start"),
3054         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3055                     "Show only syscall summary with statistics"),
3056         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3057                     "Show all syscalls and summary with statistics"),
3058         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3059                      "Trace pagefaults", parse_pagefaults, "maj"),
3060         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3061         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3062         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3063                         "per thread proc mmap processing timeout in ms"),
3064         OPT_END()
3065         };
3066         const char * const trace_subcommands[] = { "record", NULL };
3067         int err;
3068         char bf[BUFSIZ];
3069
3070         signal(SIGSEGV, sighandler_dump_stack);
3071         signal(SIGFPE, sighandler_dump_stack);
3072
3073         trace.evlist = perf_evlist__new();
3074
3075         if (trace.evlist == NULL) {
3076                 pr_err("Not enough memory to run!\n");
3077                 err = -ENOMEM;
3078                 goto out;
3079         }
3080
3081         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3082                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3083
3084         if (trace.trace_pgfaults) {
3085                 trace.opts.sample_address = true;
3086                 trace.opts.sample_time = true;
3087         }
3088
3089         if (trace.evlist->nr_entries > 0)
3090                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3091
3092         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3093                 return trace__record(&trace, argc-1, &argv[1]);
3094
3095         /* summary_only implies summary option, but don't overwrite summary if set */
3096         if (trace.summary_only)
3097                 trace.summary = trace.summary_only;
3098
3099         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3100             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3101                 pr_err("Please specify something to trace.\n");
3102                 return -1;
3103         }
3104
3105         if (output_name != NULL) {
3106                 err = trace__open_output(&trace, output_name);
3107                 if (err < 0) {
3108                         perror("failed to create output file");
3109                         goto out;
3110                 }
3111         }
3112
3113         if (ev_qualifier_str != NULL) {
3114                 const char *s = ev_qualifier_str;
3115                 struct strlist_config slist_config = {
3116                         .dirname = system_path(STRACE_GROUPS_DIR),
3117                 };
3118
3119                 trace.not_ev_qualifier = *s == '!';
3120                 if (trace.not_ev_qualifier)
3121                         ++s;
3122                 trace.ev_qualifier = strlist__new(s, &slist_config);
3123                 if (trace.ev_qualifier == NULL) {
3124                         fputs("Not enough memory to parse event qualifier",
3125                               trace.output);
3126                         err = -ENOMEM;
3127                         goto out_close;
3128                 }
3129
3130                 err = trace__validate_ev_qualifier(&trace);
3131                 if (err)
3132                         goto out_close;
3133         }
3134
3135         err = target__validate(&trace.opts.target);
3136         if (err) {
3137                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3138                 fprintf(trace.output, "%s", bf);
3139                 goto out_close;
3140         }
3141
3142         err = target__parse_uid(&trace.opts.target);
3143         if (err) {
3144                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3145                 fprintf(trace.output, "%s", bf);
3146                 goto out_close;
3147         }
3148
3149         if (!argc && target__none(&trace.opts.target))
3150                 trace.opts.target.system_wide = true;
3151
3152         if (input_name)
3153                 err = trace__replay(&trace);
3154         else
3155                 err = trace__run(&trace, argc, argv);
3156
3157 out_close:
3158         if (output_name != NULL)
3159                 fclose(trace.output);
3160 out:
3161         return err;
3162 }