perf trace: Use vfs_getname syscall arg beautifier in more syscalls
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *whences[] = { "SET", "CUR", "END",
573 #ifdef SEEK_DATA
574 "DATA",
575 #endif
576 #ifdef SEEK_HOLE
577 "HOLE",
578 #endif
579 };
580 static DEFINE_STRARRAY(whences);
581
582 static const char *fcntl_cmds[] = {
583         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
586         "F_GETOWNER_UIDS",
587 };
588 static DEFINE_STRARRAY(fcntl_cmds);
589
590 static const char *rlimit_resources[] = {
591         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
593         "RTTIME",
594 };
595 static DEFINE_STRARRAY(rlimit_resources);
596
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
599
600 static const char *clockid[] = {
601         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
603 };
604 static DEFINE_STRARRAY(clockid);
605
606 static const char *socket_families[] = {
607         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612         "ALG", "NFC", "VSOCK",
613 };
614 static DEFINE_STRARRAY(socket_families);
615
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
618 #endif
619
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621                                                       struct syscall_arg *arg)
622 {
623         size_t printed;
624         int type = arg->val,
625             flags = type & ~SOCK_TYPE_MASK;
626
627         type &= SOCK_TYPE_MASK;
628         /*
629          * Can't use a strarray, MIPS may override for ABI reasons.
630          */
631         switch (type) {
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
633         P_SK_TYPE(STREAM);
634         P_SK_TYPE(DGRAM);
635         P_SK_TYPE(RAW);
636         P_SK_TYPE(RDM);
637         P_SK_TYPE(SEQPACKET);
638         P_SK_TYPE(DCCP);
639         P_SK_TYPE(PACKET);
640 #undef P_SK_TYPE
641         default:
642                 printed = scnprintf(bf, size, "%#x", type);
643         }
644
645 #define P_SK_FLAG(n) \
646         if (flags & SOCK_##n) { \
647                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648                 flags &= ~SOCK_##n; \
649         }
650
651         P_SK_FLAG(CLOEXEC);
652         P_SK_FLAG(NONBLOCK);
653 #undef P_SK_FLAG
654
655         if (flags)
656                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
657
658         return printed;
659 }
660
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
662
663 #ifndef MSG_PROBE
664 #define MSG_PROBE            0x10
665 #endif
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE  0x10000
668 #endif
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
671 #endif
672 #ifndef MSG_FASTOPEN
673 #define MSG_FASTOPEN         0x20000000
674 #endif
675
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677                                                struct syscall_arg *arg)
678 {
679         int printed = 0, flags = arg->val;
680
681         if (flags == 0)
682                 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684         if (flags & MSG_##n) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
686                 flags &= ~MSG_##n; \
687         }
688
689         P_MSG_FLAG(OOB);
690         P_MSG_FLAG(PEEK);
691         P_MSG_FLAG(DONTROUTE);
692         P_MSG_FLAG(TRYHARD);
693         P_MSG_FLAG(CTRUNC);
694         P_MSG_FLAG(PROBE);
695         P_MSG_FLAG(TRUNC);
696         P_MSG_FLAG(DONTWAIT);
697         P_MSG_FLAG(EOR);
698         P_MSG_FLAG(WAITALL);
699         P_MSG_FLAG(FIN);
700         P_MSG_FLAG(SYN);
701         P_MSG_FLAG(CONFIRM);
702         P_MSG_FLAG(RST);
703         P_MSG_FLAG(ERRQUEUE);
704         P_MSG_FLAG(NOSIGNAL);
705         P_MSG_FLAG(MORE);
706         P_MSG_FLAG(WAITFORONE);
707         P_MSG_FLAG(SENDPAGE_NOTLAST);
708         P_MSG_FLAG(FASTOPEN);
709         P_MSG_FLAG(CMSG_CLOEXEC);
710 #undef P_MSG_FLAG
711
712         if (flags)
713                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
714
715         return printed;
716 }
717
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
719
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721                                                  struct syscall_arg *arg)
722 {
723         size_t printed = 0;
724         int mode = arg->val;
725
726         if (mode == F_OK) /* 0 */
727                 return scnprintf(bf, size, "F");
728 #define P_MODE(n) \
729         if (mode & n##_OK) { \
730                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
731                 mode &= ~n##_OK; \
732         }
733
734         P_MODE(R);
735         P_MODE(W);
736         P_MODE(X);
737 #undef P_MODE
738
739         if (mode)
740                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
741
742         return printed;
743 }
744
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
746
747 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
748                                               struct syscall_arg *arg);
749
750 #define SCA_FILENAME syscall_arg__scnprintf_filename
751
752 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
753                                                struct syscall_arg *arg)
754 {
755         int printed = 0, flags = arg->val;
756
757         if (!(flags & O_CREAT))
758                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
759
760         if (flags == 0)
761                 return scnprintf(bf, size, "RDONLY");
762 #define P_FLAG(n) \
763         if (flags & O_##n) { \
764                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
765                 flags &= ~O_##n; \
766         }
767
768         P_FLAG(APPEND);
769         P_FLAG(ASYNC);
770         P_FLAG(CLOEXEC);
771         P_FLAG(CREAT);
772         P_FLAG(DIRECT);
773         P_FLAG(DIRECTORY);
774         P_FLAG(EXCL);
775         P_FLAG(LARGEFILE);
776         P_FLAG(NOATIME);
777         P_FLAG(NOCTTY);
778 #ifdef O_NONBLOCK
779         P_FLAG(NONBLOCK);
780 #elif O_NDELAY
781         P_FLAG(NDELAY);
782 #endif
783 #ifdef O_PATH
784         P_FLAG(PATH);
785 #endif
786         P_FLAG(RDWR);
787 #ifdef O_DSYNC
788         if ((flags & O_SYNC) == O_SYNC)
789                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
790         else {
791                 P_FLAG(DSYNC);
792         }
793 #else
794         P_FLAG(SYNC);
795 #endif
796         P_FLAG(TRUNC);
797         P_FLAG(WRONLY);
798 #undef P_FLAG
799
800         if (flags)
801                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
802
803         return printed;
804 }
805
806 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
807
808 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
809                                                 struct syscall_arg *arg)
810 {
811         int printed = 0, flags = arg->val;
812
813         if (flags == 0)
814                 return 0;
815
816 #define P_FLAG(n) \
817         if (flags & PERF_FLAG_##n) { \
818                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
819                 flags &= ~PERF_FLAG_##n; \
820         }
821
822         P_FLAG(FD_NO_GROUP);
823         P_FLAG(FD_OUTPUT);
824         P_FLAG(PID_CGROUP);
825         P_FLAG(FD_CLOEXEC);
826 #undef P_FLAG
827
828         if (flags)
829                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
830
831         return printed;
832 }
833
834 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
835
836 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
837                                                    struct syscall_arg *arg)
838 {
839         int printed = 0, flags = arg->val;
840
841         if (flags == 0)
842                 return scnprintf(bf, size, "NONE");
843 #define P_FLAG(n) \
844         if (flags & EFD_##n) { \
845                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
846                 flags &= ~EFD_##n; \
847         }
848
849         P_FLAG(SEMAPHORE);
850         P_FLAG(CLOEXEC);
851         P_FLAG(NONBLOCK);
852 #undef P_FLAG
853
854         if (flags)
855                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
856
857         return printed;
858 }
859
860 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
861
862 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
863                                                 struct syscall_arg *arg)
864 {
865         int printed = 0, flags = arg->val;
866
867 #define P_FLAG(n) \
868         if (flags & O_##n) { \
869                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
870                 flags &= ~O_##n; \
871         }
872
873         P_FLAG(CLOEXEC);
874         P_FLAG(NONBLOCK);
875 #undef P_FLAG
876
877         if (flags)
878                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
879
880         return printed;
881 }
882
883 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
884
885 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
886 {
887         int sig = arg->val;
888
889         switch (sig) {
890 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
891         P_SIGNUM(HUP);
892         P_SIGNUM(INT);
893         P_SIGNUM(QUIT);
894         P_SIGNUM(ILL);
895         P_SIGNUM(TRAP);
896         P_SIGNUM(ABRT);
897         P_SIGNUM(BUS);
898         P_SIGNUM(FPE);
899         P_SIGNUM(KILL);
900         P_SIGNUM(USR1);
901         P_SIGNUM(SEGV);
902         P_SIGNUM(USR2);
903         P_SIGNUM(PIPE);
904         P_SIGNUM(ALRM);
905         P_SIGNUM(TERM);
906         P_SIGNUM(CHLD);
907         P_SIGNUM(CONT);
908         P_SIGNUM(STOP);
909         P_SIGNUM(TSTP);
910         P_SIGNUM(TTIN);
911         P_SIGNUM(TTOU);
912         P_SIGNUM(URG);
913         P_SIGNUM(XCPU);
914         P_SIGNUM(XFSZ);
915         P_SIGNUM(VTALRM);
916         P_SIGNUM(PROF);
917         P_SIGNUM(WINCH);
918         P_SIGNUM(IO);
919         P_SIGNUM(PWR);
920         P_SIGNUM(SYS);
921 #ifdef SIGEMT
922         P_SIGNUM(EMT);
923 #endif
924 #ifdef SIGSTKFLT
925         P_SIGNUM(STKFLT);
926 #endif
927 #ifdef SIGSWI
928         P_SIGNUM(SWI);
929 #endif
930         default: break;
931         }
932
933         return scnprintf(bf, size, "%#x", sig);
934 }
935
936 #define SCA_SIGNUM syscall_arg__scnprintf_signum
937
938 #if defined(__i386__) || defined(__x86_64__)
939 /*
940  * FIXME: Make this available to all arches.
941  */
942 #define TCGETS          0x5401
943
944 static const char *tioctls[] = {
945         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
946         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
947         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
948         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
949         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
950         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
951         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
952         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
953         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
954         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
955         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
956         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
957         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
958         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
959         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
960 };
961
962 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
963 #endif /* defined(__i386__) || defined(__x86_64__) */
964
965 #define STRARRAY(arg, name, array) \
966           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
967           .arg_parm      = { [arg] = &strarray__##array, }
968
969 static struct syscall_fmt {
970         const char *name;
971         const char *alias;
972         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
973         void       *arg_parm[6];
974         bool       errmsg;
975         bool       timeout;
976         bool       hexret;
977 } syscall_fmts[] = {
978         { .name     = "access",     .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
980                              [1] = SCA_ACCMODE,  /* mode */ }, },
981         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
982         { .name     = "brk",        .hexret = true,
983           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
984         { .name     = "chdir",      .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
986         { .name     = "chmod",      .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
988         { .name     = "chroot",     .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
990         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
991         { .name     = "close",      .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
993         { .name     = "connect",    .errmsg = true, },
994         { .name     = "creat",      .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
996         { .name     = "dup",        .errmsg = true,
997           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998         { .name     = "dup2",       .errmsg = true,
999           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000         { .name     = "dup3",       .errmsg = true,
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1003         { .name     = "eventfd2",   .errmsg = true,
1004           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1005         { .name     = "faccessat",  .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1007                              [1] = SCA_FILENAME, /* filename */ }, },
1008         { .name     = "fadvise64",  .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010         { .name     = "fallocate",  .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012         { .name     = "fchdir",     .errmsg = true,
1013           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1014         { .name     = "fchmod",     .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1016         { .name     = "fchmodat",   .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018         { .name     = "fchown",     .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020         { .name     = "fchownat",   .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1022                              [1] = SCA_FILENAME, /* filename */ }, },
1023         { .name     = "fcntl",      .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1025                              [1] = SCA_STRARRAY, /* cmd */ },
1026           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1027         { .name     = "fdatasync",  .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029         { .name     = "flock",      .errmsg = true,
1030           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1031                              [1] = SCA_FLOCK, /* cmd */ }, },
1032         { .name     = "fsetxattr",  .errmsg = true,
1033           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1037           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1038                              [1] = SCA_FILENAME, /* filename */ }, },
1039         { .name     = "fstatfs",    .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041         { .name     = "fsync",    .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043         { .name     = "ftruncate", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045         { .name     = "futex",      .errmsg = true,
1046           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1047         { .name     = "futimesat", .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1049         { .name     = "getdents",   .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051         { .name     = "getdents64", .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1054         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1055         { .name     = "getxattr",    .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1057         { .name     = "inotify_add_watch",          .errmsg = true,
1058           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1059         { .name     = "ioctl",      .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1061 #if defined(__i386__) || defined(__x86_64__)
1062 /*
1063  * FIXME: Make this available to all arches.
1064  */
1065                              [1] = SCA_STRHEXARRAY, /* cmd */
1066                              [2] = SCA_HEX, /* arg */ },
1067           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1068 #else
1069                              [2] = SCA_HEX, /* arg */ }, },
1070 #endif
1071         { .name     = "kill",       .errmsg = true,
1072           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1073         { .name     = "lchown",    .errmsg = true,
1074           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1075         { .name     = "lgetxattr",  .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1077         { .name     = "linkat",     .errmsg = true,
1078           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1079         { .name     = "listxattr",  .errmsg = true,
1080           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1081         { .name     = "lseek",      .errmsg = true,
1082           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1083                              [2] = SCA_STRARRAY, /* whence */ },
1084           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1085         { .name     = "lsetxattr",  .errmsg = true,
1086           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1087         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1088         { .name     = "lsxattr",    .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1090         { .name     = "madvise",    .errmsg = true,
1091           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1092                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1093         { .name     = "mkdir",    .errmsg = true,
1094           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1095         { .name     = "mkdirat",    .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1097                              [1] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "mknod",      .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1100         { .name     = "mknodat",    .errmsg = true,
1101           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1102         { .name     = "mlock",      .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1104         { .name     = "mlockall",   .errmsg = true,
1105           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1106         { .name     = "mmap",       .hexret = true,
1107           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1108                              [2] = SCA_MMAP_PROT, /* prot */
1109                              [3] = SCA_MMAP_FLAGS, /* flags */
1110                              [4] = SCA_FD,        /* fd */ }, },
1111         { .name     = "mprotect",   .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1113                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1114         { .name     = "mremap",     .hexret = true,
1115           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1116                              [3] = SCA_MREMAP_FLAGS, /* flags */
1117                              [4] = SCA_HEX, /* new_addr */ }, },
1118         { .name     = "munlock",    .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1120         { .name     = "munmap",     .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1122         { .name     = "name_to_handle_at", .errmsg = true,
1123           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1124         { .name     = "newfstatat", .errmsg = true,
1125           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1126                              [1] = SCA_FILENAME, /* filename */ }, },
1127         { .name     = "open",       .errmsg = true,
1128           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1129                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1130         { .name     = "open_by_handle_at", .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1132                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1133         { .name     = "openat",     .errmsg = true,
1134           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1135                              [1] = SCA_FILENAME, /* filename */
1136                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1137         { .name     = "perf_event_open", .errmsg = true,
1138           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1139                              [2] = SCA_INT, /* cpu */
1140                              [3] = SCA_FD,  /* group_fd */
1141                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1142         { .name     = "pipe2",      .errmsg = true,
1143           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1144         { .name     = "poll",       .errmsg = true, .timeout = true, },
1145         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1146         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1147           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1148         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1149           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1150         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1151         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1152           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1153         { .name     = "pwritev",    .errmsg = true,
1154           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1155         { .name     = "read",       .errmsg = true,
1156           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1157         { .name     = "readlink",   .errmsg = true,
1158           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1159         { .name     = "readlinkat", .errmsg = true,
1160           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1161                              [1] = SCA_FILENAME, /* pathname */ }, },
1162         { .name     = "readv",      .errmsg = true,
1163           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1164         { .name     = "recvfrom",   .errmsg = true,
1165           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1166         { .name     = "recvmmsg",   .errmsg = true,
1167           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1168         { .name     = "recvmsg",    .errmsg = true,
1169           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1170         { .name     = "removexattr", .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1172         { .name     = "renameat",   .errmsg = true,
1173           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1174         { .name     = "rmdir",    .errmsg = true,
1175           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1176         { .name     = "rt_sigaction", .errmsg = true,
1177           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1178         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1179         { .name     = "rt_sigqueueinfo", .errmsg = true,
1180           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1181         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1182           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1183         { .name     = "select",     .errmsg = true, .timeout = true, },
1184         { .name     = "sendmmsg",    .errmsg = true,
1185           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1186         { .name     = "sendmsg",    .errmsg = true,
1187           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1188         { .name     = "sendto",     .errmsg = true,
1189           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1190         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1191         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1192         { .name     = "setxattr",   .errmsg = true,
1193           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1194         { .name     = "shutdown",   .errmsg = true,
1195           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1196         { .name     = "socket",     .errmsg = true,
1197           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1198                              [1] = SCA_SK_TYPE, /* type */ },
1199           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1200         { .name     = "socketpair", .errmsg = true,
1201           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1202                              [1] = SCA_SK_TYPE, /* type */ },
1203           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1204         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1205         { .name     = "statfs",     .errmsg = true,
1206           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1207         { .name     = "swapoff",    .errmsg = true,
1208           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1209         { .name     = "swapon",     .errmsg = true,
1210           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1211         { .name     = "symlinkat",  .errmsg = true,
1212           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1213         { .name     = "tgkill",     .errmsg = true,
1214           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1215         { .name     = "tkill",      .errmsg = true,
1216           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1217         { .name     = "truncate",   .errmsg = true,
1218           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1219         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1220         { .name     = "unlinkat",   .errmsg = true,
1221           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1222                              [1] = SCA_FILENAME, /* pathname */ }, },
1223         { .name     = "utime",  .errmsg = true,
1224           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1225         { .name     = "utimensat",  .errmsg = true,
1226           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1227                              [1] = SCA_FILENAME, /* filename */ }, },
1228         { .name     = "utimes",  .errmsg = true,
1229           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1230         { .name     = "write",      .errmsg = true,
1231           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1232         { .name     = "writev",     .errmsg = true,
1233           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1234 };
1235
1236 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1237 {
1238         const struct syscall_fmt *fmt = fmtp;
1239         return strcmp(name, fmt->name);
1240 }
1241
1242 static struct syscall_fmt *syscall_fmt__find(const char *name)
1243 {
1244         const int nmemb = ARRAY_SIZE(syscall_fmts);
1245         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1246 }
1247
1248 struct syscall {
1249         struct event_format *tp_format;
1250         int                 nr_args;
1251         struct format_field *args;
1252         const char          *name;
1253         bool                is_exit;
1254         struct syscall_fmt  *fmt;
1255         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1256         void                **arg_parm;
1257 };
1258
1259 static size_t fprintf_duration(unsigned long t, FILE *fp)
1260 {
1261         double duration = (double)t / NSEC_PER_MSEC;
1262         size_t printed = fprintf(fp, "(");
1263
1264         if (duration >= 1.0)
1265                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1266         else if (duration >= 0.01)
1267                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1268         else
1269                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1270         return printed + fprintf(fp, "): ");
1271 }
1272
1273 /**
1274  * filename.ptr: The filename char pointer that will be vfs_getname'd
1275  * filename.entry_str_pos: Where to insert the string translated from
1276  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1277  */
1278 struct thread_trace {
1279         u64               entry_time;
1280         u64               exit_time;
1281         bool              entry_pending;
1282         unsigned long     nr_events;
1283         unsigned long     pfmaj, pfmin;
1284         char              *entry_str;
1285         double            runtime_ms;
1286         struct {
1287                 unsigned long ptr;
1288                 int           entry_str_pos;
1289         } filename;
1290         struct {
1291                 int       max;
1292                 char      **table;
1293         } paths;
1294
1295         struct intlist *syscall_stats;
1296 };
1297
1298 static struct thread_trace *thread_trace__new(void)
1299 {
1300         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1301
1302         if (ttrace)
1303                 ttrace->paths.max = -1;
1304
1305         ttrace->syscall_stats = intlist__new(NULL);
1306
1307         return ttrace;
1308 }
1309
1310 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1311 {
1312         struct thread_trace *ttrace;
1313
1314         if (thread == NULL)
1315                 goto fail;
1316
1317         if (thread__priv(thread) == NULL)
1318                 thread__set_priv(thread, thread_trace__new());
1319
1320         if (thread__priv(thread) == NULL)
1321                 goto fail;
1322
1323         ttrace = thread__priv(thread);
1324         ++ttrace->nr_events;
1325
1326         return ttrace;
1327 fail:
1328         color_fprintf(fp, PERF_COLOR_RED,
1329                       "WARNING: not enough memory, dropping samples!\n");
1330         return NULL;
1331 }
1332
1333 #define TRACE_PFMAJ             (1 << 0)
1334 #define TRACE_PFMIN             (1 << 1)
1335
1336 static const size_t trace__entry_str_size = 2048;
1337
1338 struct trace {
1339         struct perf_tool        tool;
1340         struct {
1341                 int             machine;
1342                 int             open_id;
1343         }                       audit;
1344         struct {
1345                 int             max;
1346                 struct syscall  *table;
1347                 struct {
1348                         struct perf_evsel *sys_enter,
1349                                           *sys_exit;
1350                 }               events;
1351         } syscalls;
1352         struct record_opts      opts;
1353         struct perf_evlist      *evlist;
1354         struct machine          *host;
1355         struct thread           *current;
1356         u64                     base_time;
1357         FILE                    *output;
1358         unsigned long           nr_events;
1359         struct strlist          *ev_qualifier;
1360         struct {
1361                 size_t          nr;
1362                 int             *entries;
1363         }                       ev_qualifier_ids;
1364         const char              *last_vfs_getname;
1365         struct intlist          *tid_list;
1366         struct intlist          *pid_list;
1367         struct {
1368                 size_t          nr;
1369                 pid_t           *entries;
1370         }                       filter_pids;
1371         double                  duration_filter;
1372         double                  runtime_ms;
1373         struct {
1374                 u64             vfs_getname,
1375                                 proc_getname;
1376         } stats;
1377         bool                    not_ev_qualifier;
1378         bool                    live;
1379         bool                    full_time;
1380         bool                    sched;
1381         bool                    multiple_threads;
1382         bool                    summary;
1383         bool                    summary_only;
1384         bool                    show_comm;
1385         bool                    show_tool_stats;
1386         bool                    trace_syscalls;
1387         bool                    force;
1388         bool                    vfs_getname;
1389         int                     trace_pgfaults;
1390 };
1391
1392 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1393 {
1394         struct thread_trace *ttrace = thread__priv(thread);
1395
1396         if (fd > ttrace->paths.max) {
1397                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1398
1399                 if (npath == NULL)
1400                         return -1;
1401
1402                 if (ttrace->paths.max != -1) {
1403                         memset(npath + ttrace->paths.max + 1, 0,
1404                                (fd - ttrace->paths.max) * sizeof(char *));
1405                 } else {
1406                         memset(npath, 0, (fd + 1) * sizeof(char *));
1407                 }
1408
1409                 ttrace->paths.table = npath;
1410                 ttrace->paths.max   = fd;
1411         }
1412
1413         ttrace->paths.table[fd] = strdup(pathname);
1414
1415         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1416 }
1417
1418 static int thread__read_fd_path(struct thread *thread, int fd)
1419 {
1420         char linkname[PATH_MAX], pathname[PATH_MAX];
1421         struct stat st;
1422         int ret;
1423
1424         if (thread->pid_ == thread->tid) {
1425                 scnprintf(linkname, sizeof(linkname),
1426                           "/proc/%d/fd/%d", thread->pid_, fd);
1427         } else {
1428                 scnprintf(linkname, sizeof(linkname),
1429                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1430         }
1431
1432         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1433                 return -1;
1434
1435         ret = readlink(linkname, pathname, sizeof(pathname));
1436
1437         if (ret < 0 || ret > st.st_size)
1438                 return -1;
1439
1440         pathname[ret] = '\0';
1441         return trace__set_fd_pathname(thread, fd, pathname);
1442 }
1443
1444 static const char *thread__fd_path(struct thread *thread, int fd,
1445                                    struct trace *trace)
1446 {
1447         struct thread_trace *ttrace = thread__priv(thread);
1448
1449         if (ttrace == NULL)
1450                 return NULL;
1451
1452         if (fd < 0)
1453                 return NULL;
1454
1455         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1456                 if (!trace->live)
1457                         return NULL;
1458                 ++trace->stats.proc_getname;
1459                 if (thread__read_fd_path(thread, fd))
1460                         return NULL;
1461         }
1462
1463         return ttrace->paths.table[fd];
1464 }
1465
1466 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1467                                         struct syscall_arg *arg)
1468 {
1469         int fd = arg->val;
1470         size_t printed = scnprintf(bf, size, "%d", fd);
1471         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1472
1473         if (path)
1474                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1475
1476         return printed;
1477 }
1478
1479 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1480                                               struct syscall_arg *arg)
1481 {
1482         int fd = arg->val;
1483         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1484         struct thread_trace *ttrace = thread__priv(arg->thread);
1485
1486         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1487                 zfree(&ttrace->paths.table[fd]);
1488
1489         return printed;
1490 }
1491
1492 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1493                                      unsigned long ptr)
1494 {
1495         struct thread_trace *ttrace = thread__priv(thread);
1496
1497         ttrace->filename.ptr = ptr;
1498         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1499 }
1500
1501 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1502                                               struct syscall_arg *arg)
1503 {
1504         unsigned long ptr = arg->val;
1505
1506         if (!arg->trace->vfs_getname)
1507                 return scnprintf(bf, size, "%#x", ptr);
1508
1509         thread__set_filename_pos(arg->thread, bf, ptr);
1510         return 0;
1511 }
1512
1513 static bool trace__filter_duration(struct trace *trace, double t)
1514 {
1515         return t < (trace->duration_filter * NSEC_PER_MSEC);
1516 }
1517
1518 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1519 {
1520         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1521
1522         return fprintf(fp, "%10.3f ", ts);
1523 }
1524
1525 static bool done = false;
1526 static bool interrupted = false;
1527
1528 static void sig_handler(int sig)
1529 {
1530         done = true;
1531         interrupted = sig == SIGINT;
1532 }
1533
1534 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1535                                         u64 duration, u64 tstamp, FILE *fp)
1536 {
1537         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1538         printed += fprintf_duration(duration, fp);
1539
1540         if (trace->multiple_threads) {
1541                 if (trace->show_comm)
1542                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1543                 printed += fprintf(fp, "%d ", thread->tid);
1544         }
1545
1546         return printed;
1547 }
1548
1549 static int trace__process_event(struct trace *trace, struct machine *machine,
1550                                 union perf_event *event, struct perf_sample *sample)
1551 {
1552         int ret = 0;
1553
1554         switch (event->header.type) {
1555         case PERF_RECORD_LOST:
1556                 color_fprintf(trace->output, PERF_COLOR_RED,
1557                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1558                 ret = machine__process_lost_event(machine, event, sample);
1559         default:
1560                 ret = machine__process_event(machine, event, sample);
1561                 break;
1562         }
1563
1564         return ret;
1565 }
1566
1567 static int trace__tool_process(struct perf_tool *tool,
1568                                union perf_event *event,
1569                                struct perf_sample *sample,
1570                                struct machine *machine)
1571 {
1572         struct trace *trace = container_of(tool, struct trace, tool);
1573         return trace__process_event(trace, machine, event, sample);
1574 }
1575
1576 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1577 {
1578         int err = symbol__init(NULL);
1579
1580         if (err)
1581                 return err;
1582
1583         trace->host = machine__new_host();
1584         if (trace->host == NULL)
1585                 return -ENOMEM;
1586
1587         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1588                 return -errno;
1589
1590         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1591                                             evlist->threads, trace__tool_process, false,
1592                                             trace->opts.proc_map_timeout);
1593         if (err)
1594                 symbol__exit();
1595
1596         return err;
1597 }
1598
1599 static int syscall__set_arg_fmts(struct syscall *sc)
1600 {
1601         struct format_field *field;
1602         int idx = 0;
1603
1604         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1605         if (sc->arg_scnprintf == NULL)
1606                 return -1;
1607
1608         if (sc->fmt)
1609                 sc->arg_parm = sc->fmt->arg_parm;
1610
1611         for (field = sc->args; field; field = field->next) {
1612                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1613                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1614                 else if (field->flags & FIELD_IS_POINTER)
1615                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1616                 ++idx;
1617         }
1618
1619         return 0;
1620 }
1621
1622 static int trace__read_syscall_info(struct trace *trace, int id)
1623 {
1624         char tp_name[128];
1625         struct syscall *sc;
1626         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1627
1628         if (name == NULL)
1629                 return -1;
1630
1631         if (id > trace->syscalls.max) {
1632                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1633
1634                 if (nsyscalls == NULL)
1635                         return -1;
1636
1637                 if (trace->syscalls.max != -1) {
1638                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1639                                (id - trace->syscalls.max) * sizeof(*sc));
1640                 } else {
1641                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1642                 }
1643
1644                 trace->syscalls.table = nsyscalls;
1645                 trace->syscalls.max   = id;
1646         }
1647
1648         sc = trace->syscalls.table + id;
1649         sc->name = name;
1650
1651         sc->fmt  = syscall_fmt__find(sc->name);
1652
1653         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1654         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1655
1656         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1657                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1658                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1659         }
1660
1661         if (sc->tp_format == NULL)
1662                 return -1;
1663
1664         sc->args = sc->tp_format->format.fields;
1665         sc->nr_args = sc->tp_format->format.nr_fields;
1666         /* drop nr field - not relevant here; does not exist on older kernels */
1667         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1668                 sc->args = sc->args->next;
1669                 --sc->nr_args;
1670         }
1671
1672         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1673
1674         return syscall__set_arg_fmts(sc);
1675 }
1676
1677 static int trace__validate_ev_qualifier(struct trace *trace)
1678 {
1679         int err = 0, i;
1680         struct str_node *pos;
1681
1682         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1683         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1684                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1685
1686         if (trace->ev_qualifier_ids.entries == NULL) {
1687                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1688                        trace->output);
1689                 err = -EINVAL;
1690                 goto out;
1691         }
1692
1693         i = 0;
1694
1695         strlist__for_each(pos, trace->ev_qualifier) {
1696                 const char *sc = pos->s;
1697                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1698
1699                 if (id < 0) {
1700                         if (err == 0) {
1701                                 fputs("Error:\tInvalid syscall ", trace->output);
1702                                 err = -EINVAL;
1703                         } else {
1704                                 fputs(", ", trace->output);
1705                         }
1706
1707                         fputs(sc, trace->output);
1708                 }
1709
1710                 trace->ev_qualifier_ids.entries[i++] = id;
1711         }
1712
1713         if (err < 0) {
1714                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1715                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1716                 zfree(&trace->ev_qualifier_ids.entries);
1717                 trace->ev_qualifier_ids.nr = 0;
1718         }
1719 out:
1720         return err;
1721 }
1722
1723 /*
1724  * args is to be interpreted as a series of longs but we need to handle
1725  * 8-byte unaligned accesses. args points to raw_data within the event
1726  * and raw_data is guaranteed to be 8-byte unaligned because it is
1727  * preceded by raw_size which is a u32. So we need to copy args to a temp
1728  * variable to read it. Most notably this avoids extended load instructions
1729  * on unaligned addresses
1730  */
1731
1732 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1733                                       unsigned char *args, struct trace *trace,
1734                                       struct thread *thread)
1735 {
1736         size_t printed = 0;
1737         unsigned char *p;
1738         unsigned long val;
1739
1740         if (sc->args != NULL) {
1741                 struct format_field *field;
1742                 u8 bit = 1;
1743                 struct syscall_arg arg = {
1744                         .idx    = 0,
1745                         .mask   = 0,
1746                         .trace  = trace,
1747                         .thread = thread,
1748                 };
1749
1750                 for (field = sc->args; field;
1751                      field = field->next, ++arg.idx, bit <<= 1) {
1752                         if (arg.mask & bit)
1753                                 continue;
1754
1755                         /* special care for unaligned accesses */
1756                         p = args + sizeof(unsigned long) * arg.idx;
1757                         memcpy(&val, p, sizeof(val));
1758
1759                         /*
1760                          * Suppress this argument if its value is zero and
1761                          * and we don't have a string associated in an
1762                          * strarray for it.
1763                          */
1764                         if (val == 0 &&
1765                             !(sc->arg_scnprintf &&
1766                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1767                               sc->arg_parm[arg.idx]))
1768                                 continue;
1769
1770                         printed += scnprintf(bf + printed, size - printed,
1771                                              "%s%s: ", printed ? ", " : "", field->name);
1772                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1773                                 arg.val = val;
1774                                 if (sc->arg_parm)
1775                                         arg.parm = sc->arg_parm[arg.idx];
1776                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1777                                                                       size - printed, &arg);
1778                         } else {
1779                                 printed += scnprintf(bf + printed, size - printed,
1780                                                      "%ld", val);
1781                         }
1782                 }
1783         } else {
1784                 int i = 0;
1785
1786                 while (i < 6) {
1787                         /* special care for unaligned accesses */
1788                         p = args + sizeof(unsigned long) * i;
1789                         memcpy(&val, p, sizeof(val));
1790                         printed += scnprintf(bf + printed, size - printed,
1791                                              "%sarg%d: %ld",
1792                                              printed ? ", " : "", i, val);
1793                         ++i;
1794                 }
1795         }
1796
1797         return printed;
1798 }
1799
1800 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1801                                   union perf_event *event,
1802                                   struct perf_sample *sample);
1803
1804 static struct syscall *trace__syscall_info(struct trace *trace,
1805                                            struct perf_evsel *evsel, int id)
1806 {
1807
1808         if (id < 0) {
1809
1810                 /*
1811                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1812                  * before that, leaving at a higher verbosity level till that is
1813                  * explained. Reproduced with plain ftrace with:
1814                  *
1815                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1816                  * grep "NR -1 " /t/trace_pipe
1817                  *
1818                  * After generating some load on the machine.
1819                  */
1820                 if (verbose > 1) {
1821                         static u64 n;
1822                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1823                                 id, perf_evsel__name(evsel), ++n);
1824                 }
1825                 return NULL;
1826         }
1827
1828         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1829             trace__read_syscall_info(trace, id))
1830                 goto out_cant_read;
1831
1832         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1833                 goto out_cant_read;
1834
1835         return &trace->syscalls.table[id];
1836
1837 out_cant_read:
1838         if (verbose) {
1839                 fprintf(trace->output, "Problems reading syscall %d", id);
1840                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1841                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1842                 fputs(" information\n", trace->output);
1843         }
1844         return NULL;
1845 }
1846
1847 static void thread__update_stats(struct thread_trace *ttrace,
1848                                  int id, struct perf_sample *sample)
1849 {
1850         struct int_node *inode;
1851         struct stats *stats;
1852         u64 duration = 0;
1853
1854         inode = intlist__findnew(ttrace->syscall_stats, id);
1855         if (inode == NULL)
1856                 return;
1857
1858         stats = inode->priv;
1859         if (stats == NULL) {
1860                 stats = malloc(sizeof(struct stats));
1861                 if (stats == NULL)
1862                         return;
1863                 init_stats(stats);
1864                 inode->priv = stats;
1865         }
1866
1867         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1868                 duration = sample->time - ttrace->entry_time;
1869
1870         update_stats(stats, duration);
1871 }
1872
1873 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1874 {
1875         struct thread_trace *ttrace;
1876         u64 duration;
1877         size_t printed;
1878
1879         if (trace->current == NULL)
1880                 return 0;
1881
1882         ttrace = thread__priv(trace->current);
1883
1884         if (!ttrace->entry_pending)
1885                 return 0;
1886
1887         duration = sample->time - ttrace->entry_time;
1888
1889         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1890         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1891         ttrace->entry_pending = false;
1892
1893         return printed;
1894 }
1895
1896 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1897                             union perf_event *event __maybe_unused,
1898                             struct perf_sample *sample)
1899 {
1900         char *msg;
1901         void *args;
1902         size_t printed = 0;
1903         struct thread *thread;
1904         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1905         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1906         struct thread_trace *ttrace;
1907
1908         if (sc == NULL)
1909                 return -1;
1910
1911         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1912         ttrace = thread__trace(thread, trace->output);
1913         if (ttrace == NULL)
1914                 goto out_put;
1915
1916         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1917
1918         if (ttrace->entry_str == NULL) {
1919                 ttrace->entry_str = malloc(trace__entry_str_size);
1920                 if (!ttrace->entry_str)
1921                         goto out_put;
1922         }
1923
1924         if (!trace->summary_only)
1925                 trace__printf_interrupted_entry(trace, sample);
1926
1927         ttrace->entry_time = sample->time;
1928         msg = ttrace->entry_str;
1929         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1930
1931         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1932                                            args, trace, thread);
1933
1934         if (sc->is_exit) {
1935                 if (!trace->duration_filter && !trace->summary_only) {
1936                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1937                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1938                 }
1939         } else
1940                 ttrace->entry_pending = true;
1941
1942         if (trace->current != thread) {
1943                 thread__put(trace->current);
1944                 trace->current = thread__get(thread);
1945         }
1946         err = 0;
1947 out_put:
1948         thread__put(thread);
1949         return err;
1950 }
1951
1952 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1953                            union perf_event *event __maybe_unused,
1954                            struct perf_sample *sample)
1955 {
1956         long ret;
1957         u64 duration = 0;
1958         struct thread *thread;
1959         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1960         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1961         struct thread_trace *ttrace;
1962
1963         if (sc == NULL)
1964                 return -1;
1965
1966         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1967         ttrace = thread__trace(thread, trace->output);
1968         if (ttrace == NULL)
1969                 goto out_put;
1970
1971         if (trace->summary)
1972                 thread__update_stats(ttrace, id, sample);
1973
1974         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1975
1976         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1977                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1978                 trace->last_vfs_getname = NULL;
1979                 ++trace->stats.vfs_getname;
1980         }
1981
1982         ttrace->exit_time = sample->time;
1983
1984         if (ttrace->entry_time) {
1985                 duration = sample->time - ttrace->entry_time;
1986                 if (trace__filter_duration(trace, duration))
1987                         goto out;
1988         } else if (trace->duration_filter)
1989                 goto out;
1990
1991         if (trace->summary_only)
1992                 goto out;
1993
1994         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1995
1996         if (ttrace->entry_pending) {
1997                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1998         } else {
1999                 fprintf(trace->output, " ... [");
2000                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2001                 fprintf(trace->output, "]: %s()", sc->name);
2002         }
2003
2004         if (sc->fmt == NULL) {
2005 signed_print:
2006                 fprintf(trace->output, ") = %ld", ret);
2007         } else if (ret < 0 && sc->fmt->errmsg) {
2008                 char bf[STRERR_BUFSIZE];
2009                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2010                            *e = audit_errno_to_name(-ret);
2011
2012                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2013         } else if (ret == 0 && sc->fmt->timeout)
2014                 fprintf(trace->output, ") = 0 Timeout");
2015         else if (sc->fmt->hexret)
2016                 fprintf(trace->output, ") = %#lx", ret);
2017         else
2018                 goto signed_print;
2019
2020         fputc('\n', trace->output);
2021 out:
2022         ttrace->entry_pending = false;
2023         err = 0;
2024 out_put:
2025         thread__put(thread);
2026         return err;
2027 }
2028
2029 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2030                               union perf_event *event __maybe_unused,
2031                               struct perf_sample *sample)
2032 {
2033         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2034         struct thread_trace *ttrace;
2035         size_t filename_len, entry_str_len, to_move;
2036         ssize_t remaining_space;
2037         char *pos;
2038         const char *filename;
2039
2040         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2041
2042         if (!thread)
2043                 goto out;
2044
2045         ttrace = thread__priv(thread);
2046         if (!ttrace)
2047                 goto out;
2048
2049         if (!ttrace->filename.ptr)
2050                 goto out;
2051
2052         entry_str_len = strlen(ttrace->entry_str);
2053         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2054         if (remaining_space <= 0)
2055                 goto out;
2056
2057         filename = trace->last_vfs_getname;
2058         filename_len = strlen(filename);
2059         if (filename_len > (size_t)remaining_space) {
2060                 filename += filename_len - remaining_space;
2061                 filename_len = remaining_space;
2062         }
2063
2064         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2065         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2066         memmove(pos + filename_len, pos, to_move);
2067         memcpy(pos, filename, filename_len);
2068
2069         ttrace->filename.ptr = 0;
2070         ttrace->filename.entry_str_pos = 0;
2071 out:
2072         return 0;
2073 }
2074
2075 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2076                                      union perf_event *event __maybe_unused,
2077                                      struct perf_sample *sample)
2078 {
2079         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2080         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2081         struct thread *thread = machine__findnew_thread(trace->host,
2082                                                         sample->pid,
2083                                                         sample->tid);
2084         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2085
2086         if (ttrace == NULL)
2087                 goto out_dump;
2088
2089         ttrace->runtime_ms += runtime_ms;
2090         trace->runtime_ms += runtime_ms;
2091         thread__put(thread);
2092         return 0;
2093
2094 out_dump:
2095         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2096                evsel->name,
2097                perf_evsel__strval(evsel, sample, "comm"),
2098                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2099                runtime,
2100                perf_evsel__intval(evsel, sample, "vruntime"));
2101         thread__put(thread);
2102         return 0;
2103 }
2104
2105 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2106                                 union perf_event *event __maybe_unused,
2107                                 struct perf_sample *sample)
2108 {
2109         trace__printf_interrupted_entry(trace, sample);
2110         trace__fprintf_tstamp(trace, sample->time, trace->output);
2111
2112         if (trace->trace_syscalls)
2113                 fprintf(trace->output, "(         ): ");
2114
2115         fprintf(trace->output, "%s:", evsel->name);
2116
2117         if (evsel->tp_format) {
2118                 event_format__fprintf(evsel->tp_format, sample->cpu,
2119                                       sample->raw_data, sample->raw_size,
2120                                       trace->output);
2121         }
2122
2123         fprintf(trace->output, ")\n");
2124         return 0;
2125 }
2126
2127 static void print_location(FILE *f, struct perf_sample *sample,
2128                            struct addr_location *al,
2129                            bool print_dso, bool print_sym)
2130 {
2131
2132         if ((verbose || print_dso) && al->map)
2133                 fprintf(f, "%s@", al->map->dso->long_name);
2134
2135         if ((verbose || print_sym) && al->sym)
2136                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2137                         al->addr - al->sym->start);
2138         else if (al->map)
2139                 fprintf(f, "0x%" PRIx64, al->addr);
2140         else
2141                 fprintf(f, "0x%" PRIx64, sample->addr);
2142 }
2143
2144 static int trace__pgfault(struct trace *trace,
2145                           struct perf_evsel *evsel,
2146                           union perf_event *event,
2147                           struct perf_sample *sample)
2148 {
2149         struct thread *thread;
2150         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2151         struct addr_location al;
2152         char map_type = 'd';
2153         struct thread_trace *ttrace;
2154         int err = -1;
2155
2156         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2157         ttrace = thread__trace(thread, trace->output);
2158         if (ttrace == NULL)
2159                 goto out_put;
2160
2161         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2162                 ttrace->pfmaj++;
2163         else
2164                 ttrace->pfmin++;
2165
2166         if (trace->summary_only)
2167                 goto out;
2168
2169         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2170                               sample->ip, &al);
2171
2172         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2173
2174         fprintf(trace->output, "%sfault [",
2175                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2176                 "maj" : "min");
2177
2178         print_location(trace->output, sample, &al, false, true);
2179
2180         fprintf(trace->output, "] => ");
2181
2182         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2183                                    sample->addr, &al);
2184
2185         if (!al.map) {
2186                 thread__find_addr_location(thread, cpumode,
2187                                            MAP__FUNCTION, sample->addr, &al);
2188
2189                 if (al.map)
2190                         map_type = 'x';
2191                 else
2192                         map_type = '?';
2193         }
2194
2195         print_location(trace->output, sample, &al, true, false);
2196
2197         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2198 out:
2199         err = 0;
2200 out_put:
2201         thread__put(thread);
2202         return err;
2203 }
2204
2205 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2206 {
2207         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2208             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2209                 return false;
2210
2211         if (trace->pid_list || trace->tid_list)
2212                 return true;
2213
2214         return false;
2215 }
2216
2217 static int trace__process_sample(struct perf_tool *tool,
2218                                  union perf_event *event,
2219                                  struct perf_sample *sample,
2220                                  struct perf_evsel *evsel,
2221                                  struct machine *machine __maybe_unused)
2222 {
2223         struct trace *trace = container_of(tool, struct trace, tool);
2224         int err = 0;
2225
2226         tracepoint_handler handler = evsel->handler;
2227
2228         if (skip_sample(trace, sample))
2229                 return 0;
2230
2231         if (!trace->full_time && trace->base_time == 0)
2232                 trace->base_time = sample->time;
2233
2234         if (handler) {
2235                 ++trace->nr_events;
2236                 handler(trace, evsel, event, sample);
2237         }
2238
2239         return err;
2240 }
2241
2242 static int parse_target_str(struct trace *trace)
2243 {
2244         if (trace->opts.target.pid) {
2245                 trace->pid_list = intlist__new(trace->opts.target.pid);
2246                 if (trace->pid_list == NULL) {
2247                         pr_err("Error parsing process id string\n");
2248                         return -EINVAL;
2249                 }
2250         }
2251
2252         if (trace->opts.target.tid) {
2253                 trace->tid_list = intlist__new(trace->opts.target.tid);
2254                 if (trace->tid_list == NULL) {
2255                         pr_err("Error parsing thread id string\n");
2256                         return -EINVAL;
2257                 }
2258         }
2259
2260         return 0;
2261 }
2262
2263 static int trace__record(struct trace *trace, int argc, const char **argv)
2264 {
2265         unsigned int rec_argc, i, j;
2266         const char **rec_argv;
2267         const char * const record_args[] = {
2268                 "record",
2269                 "-R",
2270                 "-m", "1024",
2271                 "-c", "1",
2272         };
2273
2274         const char * const sc_args[] = { "-e", };
2275         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2276         const char * const majpf_args[] = { "-e", "major-faults" };
2277         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2278         const char * const minpf_args[] = { "-e", "minor-faults" };
2279         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2280
2281         /* +1 is for the event string below */
2282         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2283                 majpf_args_nr + minpf_args_nr + argc;
2284         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2285
2286         if (rec_argv == NULL)
2287                 return -ENOMEM;
2288
2289         j = 0;
2290         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2291                 rec_argv[j++] = record_args[i];
2292
2293         if (trace->trace_syscalls) {
2294                 for (i = 0; i < sc_args_nr; i++)
2295                         rec_argv[j++] = sc_args[i];
2296
2297                 /* event string may be different for older kernels - e.g., RHEL6 */
2298                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2299                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2300                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2301                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2302                 else {
2303                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2304                         return -1;
2305                 }
2306         }
2307
2308         if (trace->trace_pgfaults & TRACE_PFMAJ)
2309                 for (i = 0; i < majpf_args_nr; i++)
2310                         rec_argv[j++] = majpf_args[i];
2311
2312         if (trace->trace_pgfaults & TRACE_PFMIN)
2313                 for (i = 0; i < minpf_args_nr; i++)
2314                         rec_argv[j++] = minpf_args[i];
2315
2316         for (i = 0; i < (unsigned int)argc; i++)
2317                 rec_argv[j++] = argv[i];
2318
2319         return cmd_record(j, rec_argv, NULL);
2320 }
2321
2322 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2323
2324 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2325 {
2326         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2327         if (evsel == NULL)
2328                 return false;
2329
2330         if (perf_evsel__field(evsel, "pathname") == NULL) {
2331                 perf_evsel__delete(evsel);
2332                 return false;
2333         }
2334
2335         evsel->handler = trace__vfs_getname;
2336         perf_evlist__add(evlist, evsel);
2337         return true;
2338 }
2339
2340 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2341                                     u64 config)
2342 {
2343         struct perf_evsel *evsel;
2344         struct perf_event_attr attr = {
2345                 .type = PERF_TYPE_SOFTWARE,
2346                 .mmap_data = 1,
2347         };
2348
2349         attr.config = config;
2350         attr.sample_period = 1;
2351
2352         event_attr_init(&attr);
2353
2354         evsel = perf_evsel__new(&attr);
2355         if (!evsel)
2356                 return -ENOMEM;
2357
2358         evsel->handler = trace__pgfault;
2359         perf_evlist__add(evlist, evsel);
2360
2361         return 0;
2362 }
2363
2364 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2365 {
2366         const u32 type = event->header.type;
2367         struct perf_evsel *evsel;
2368
2369         if (!trace->full_time && trace->base_time == 0)
2370                 trace->base_time = sample->time;
2371
2372         if (type != PERF_RECORD_SAMPLE) {
2373                 trace__process_event(trace, trace->host, event, sample);
2374                 return;
2375         }
2376
2377         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2378         if (evsel == NULL) {
2379                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2380                 return;
2381         }
2382
2383         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2384             sample->raw_data == NULL) {
2385                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2386                        perf_evsel__name(evsel), sample->tid,
2387                        sample->cpu, sample->raw_size);
2388         } else {
2389                 tracepoint_handler handler = evsel->handler;
2390                 handler(trace, evsel, event, sample);
2391         }
2392 }
2393
2394 static int trace__add_syscall_newtp(struct trace *trace)
2395 {
2396         int ret = -1;
2397         struct perf_evlist *evlist = trace->evlist;
2398         struct perf_evsel *sys_enter, *sys_exit;
2399
2400         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2401         if (sys_enter == NULL)
2402                 goto out;
2403
2404         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2405                 goto out_delete_sys_enter;
2406
2407         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2408         if (sys_exit == NULL)
2409                 goto out_delete_sys_enter;
2410
2411         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2412                 goto out_delete_sys_exit;
2413
2414         perf_evlist__add(evlist, sys_enter);
2415         perf_evlist__add(evlist, sys_exit);
2416
2417         trace->syscalls.events.sys_enter = sys_enter;
2418         trace->syscalls.events.sys_exit  = sys_exit;
2419
2420         ret = 0;
2421 out:
2422         return ret;
2423
2424 out_delete_sys_exit:
2425         perf_evsel__delete_priv(sys_exit);
2426 out_delete_sys_enter:
2427         perf_evsel__delete_priv(sys_enter);
2428         goto out;
2429 }
2430
2431 static int trace__set_ev_qualifier_filter(struct trace *trace)
2432 {
2433         int err = -1;
2434         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2435                                                 trace->ev_qualifier_ids.nr,
2436                                                 trace->ev_qualifier_ids.entries);
2437
2438         if (filter == NULL)
2439                 goto out_enomem;
2440
2441         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2442                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2443
2444         free(filter);
2445 out:
2446         return err;
2447 out_enomem:
2448         errno = ENOMEM;
2449         goto out;
2450 }
2451
2452 static int trace__run(struct trace *trace, int argc, const char **argv)
2453 {
2454         struct perf_evlist *evlist = trace->evlist;
2455         struct perf_evsel *evsel;
2456         int err = -1, i;
2457         unsigned long before;
2458         const bool forks = argc > 0;
2459         bool draining = false;
2460
2461         trace->live = true;
2462
2463         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2464                 goto out_error_raw_syscalls;
2465
2466         if (trace->trace_syscalls)
2467                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2468
2469         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2470             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2471                 goto out_error_mem;
2472         }
2473
2474         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2475             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2476                 goto out_error_mem;
2477
2478         if (trace->sched &&
2479             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2480                                    trace__sched_stat_runtime))
2481                 goto out_error_sched_stat_runtime;
2482
2483         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2484         if (err < 0) {
2485                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2486                 goto out_delete_evlist;
2487         }
2488
2489         err = trace__symbols_init(trace, evlist);
2490         if (err < 0) {
2491                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2492                 goto out_delete_evlist;
2493         }
2494
2495         perf_evlist__config(evlist, &trace->opts);
2496
2497         signal(SIGCHLD, sig_handler);
2498         signal(SIGINT, sig_handler);
2499
2500         if (forks) {
2501                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2502                                                     argv, false, NULL);
2503                 if (err < 0) {
2504                         fprintf(trace->output, "Couldn't run the workload!\n");
2505                         goto out_delete_evlist;
2506                 }
2507         }
2508
2509         err = perf_evlist__open(evlist);
2510         if (err < 0)
2511                 goto out_error_open;
2512
2513         /*
2514          * Better not use !target__has_task() here because we need to cover the
2515          * case where no threads were specified in the command line, but a
2516          * workload was, and in that case we will fill in the thread_map when
2517          * we fork the workload in perf_evlist__prepare_workload.
2518          */
2519         if (trace->filter_pids.nr > 0)
2520                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2521         else if (thread_map__pid(evlist->threads, 0) == -1)
2522                 err = perf_evlist__set_filter_pid(evlist, getpid());
2523
2524         if (err < 0)
2525                 goto out_error_mem;
2526
2527         if (trace->ev_qualifier_ids.nr > 0) {
2528                 err = trace__set_ev_qualifier_filter(trace);
2529                 if (err < 0)
2530                         goto out_errno;
2531
2532                 pr_debug("event qualifier tracepoint filter: %s\n",
2533                          trace->syscalls.events.sys_exit->filter);
2534         }
2535
2536         err = perf_evlist__apply_filters(evlist, &evsel);
2537         if (err < 0)
2538                 goto out_error_apply_filters;
2539
2540         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2541         if (err < 0)
2542                 goto out_error_mmap;
2543
2544         if (!target__none(&trace->opts.target))
2545                 perf_evlist__enable(evlist);
2546
2547         if (forks)
2548                 perf_evlist__start_workload(evlist);
2549
2550         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2551                                   evlist->threads->nr > 1 ||
2552                                   perf_evlist__first(evlist)->attr.inherit;
2553 again:
2554         before = trace->nr_events;
2555
2556         for (i = 0; i < evlist->nr_mmaps; i++) {
2557                 union perf_event *event;
2558
2559                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2560                         struct perf_sample sample;
2561
2562                         ++trace->nr_events;
2563
2564                         err = perf_evlist__parse_sample(evlist, event, &sample);
2565                         if (err) {
2566                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2567                                 goto next_event;
2568                         }
2569
2570                         trace__handle_event(trace, event, &sample);
2571 next_event:
2572                         perf_evlist__mmap_consume(evlist, i);
2573
2574                         if (interrupted)
2575                                 goto out_disable;
2576
2577                         if (done && !draining) {
2578                                 perf_evlist__disable(evlist);
2579                                 draining = true;
2580                         }
2581                 }
2582         }
2583
2584         if (trace->nr_events == before) {
2585                 int timeout = done ? 100 : -1;
2586
2587                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2588                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2589                                 draining = true;
2590
2591                         goto again;
2592                 }
2593         } else {
2594                 goto again;
2595         }
2596
2597 out_disable:
2598         thread__zput(trace->current);
2599
2600         perf_evlist__disable(evlist);
2601
2602         if (!err) {
2603                 if (trace->summary)
2604                         trace__fprintf_thread_summary(trace, trace->output);
2605
2606                 if (trace->show_tool_stats) {
2607                         fprintf(trace->output, "Stats:\n "
2608                                                " vfs_getname : %" PRIu64 "\n"
2609                                                " proc_getname: %" PRIu64 "\n",
2610                                 trace->stats.vfs_getname,
2611                                 trace->stats.proc_getname);
2612                 }
2613         }
2614
2615 out_delete_evlist:
2616         perf_evlist__delete(evlist);
2617         trace->evlist = NULL;
2618         trace->live = false;
2619         return err;
2620 {
2621         char errbuf[BUFSIZ];
2622
2623 out_error_sched_stat_runtime:
2624         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2625         goto out_error;
2626
2627 out_error_raw_syscalls:
2628         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2629         goto out_error;
2630
2631 out_error_mmap:
2632         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2633         goto out_error;
2634
2635 out_error_open:
2636         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2637
2638 out_error:
2639         fprintf(trace->output, "%s\n", errbuf);
2640         goto out_delete_evlist;
2641
2642 out_error_apply_filters:
2643         fprintf(trace->output,
2644                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2645                 evsel->filter, perf_evsel__name(evsel), errno,
2646                 strerror_r(errno, errbuf, sizeof(errbuf)));
2647         goto out_delete_evlist;
2648 }
2649 out_error_mem:
2650         fprintf(trace->output, "Not enough memory to run!\n");
2651         goto out_delete_evlist;
2652
2653 out_errno:
2654         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2655         goto out_delete_evlist;
2656 }
2657
2658 static int trace__replay(struct trace *trace)
2659 {
2660         const struct perf_evsel_str_handler handlers[] = {
2661                 { "probe:vfs_getname",       trace__vfs_getname, },
2662         };
2663         struct perf_data_file file = {
2664                 .path  = input_name,
2665                 .mode  = PERF_DATA_MODE_READ,
2666                 .force = trace->force,
2667         };
2668         struct perf_session *session;
2669         struct perf_evsel *evsel;
2670         int err = -1;
2671
2672         trace->tool.sample        = trace__process_sample;
2673         trace->tool.mmap          = perf_event__process_mmap;
2674         trace->tool.mmap2         = perf_event__process_mmap2;
2675         trace->tool.comm          = perf_event__process_comm;
2676         trace->tool.exit          = perf_event__process_exit;
2677         trace->tool.fork          = perf_event__process_fork;
2678         trace->tool.attr          = perf_event__process_attr;
2679         trace->tool.tracing_data = perf_event__process_tracing_data;
2680         trace->tool.build_id      = perf_event__process_build_id;
2681
2682         trace->tool.ordered_events = true;
2683         trace->tool.ordering_requires_timestamps = true;
2684
2685         /* add tid to output */
2686         trace->multiple_threads = true;
2687
2688         session = perf_session__new(&file, false, &trace->tool);
2689         if (session == NULL)
2690                 return -1;
2691
2692         if (symbol__init(&session->header.env) < 0)
2693                 goto out;
2694
2695         trace->host = &session->machines.host;
2696
2697         err = perf_session__set_tracepoints_handlers(session, handlers);
2698         if (err)
2699                 goto out;
2700
2701         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2702                                                      "raw_syscalls:sys_enter");
2703         /* older kernels have syscalls tp versus raw_syscalls */
2704         if (evsel == NULL)
2705                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2706                                                              "syscalls:sys_enter");
2707
2708         if (evsel &&
2709             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2710             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2711                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2712                 goto out;
2713         }
2714
2715         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2716                                                      "raw_syscalls:sys_exit");
2717         if (evsel == NULL)
2718                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2719                                                              "syscalls:sys_exit");
2720         if (evsel &&
2721             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2722             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2723                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2724                 goto out;
2725         }
2726
2727         evlist__for_each(session->evlist, evsel) {
2728                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2729                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2730                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2731                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2732                         evsel->handler = trace__pgfault;
2733         }
2734
2735         err = parse_target_str(trace);
2736         if (err != 0)
2737                 goto out;
2738
2739         setup_pager();
2740
2741         err = perf_session__process_events(session);
2742         if (err)
2743                 pr_err("Failed to process events, error %d", err);
2744
2745         else if (trace->summary)
2746                 trace__fprintf_thread_summary(trace, trace->output);
2747
2748 out:
2749         perf_session__delete(session);
2750
2751         return err;
2752 }
2753
2754 static size_t trace__fprintf_threads_header(FILE *fp)
2755 {
2756         size_t printed;
2757
2758         printed  = fprintf(fp, "\n Summary of events:\n\n");
2759
2760         return printed;
2761 }
2762
2763 static size_t thread__dump_stats(struct thread_trace *ttrace,
2764                                  struct trace *trace, FILE *fp)
2765 {
2766         struct stats *stats;
2767         size_t printed = 0;
2768         struct syscall *sc;
2769         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2770
2771         if (inode == NULL)
2772                 return 0;
2773
2774         printed += fprintf(fp, "\n");
2775
2776         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2777         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2778         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2779
2780         /* each int_node is a syscall */
2781         while (inode) {
2782                 stats = inode->priv;
2783                 if (stats) {
2784                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2785                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2786                         double avg = avg_stats(stats);
2787                         double pct;
2788                         u64 n = (u64) stats->n;
2789
2790                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2791                         avg /= NSEC_PER_MSEC;
2792
2793                         sc = &trace->syscalls.table[inode->i];
2794                         printed += fprintf(fp, "   %-15s", sc->name);
2795                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2796                                            n, min, avg);
2797                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2798                 }
2799
2800                 inode = intlist__next(inode);
2801         }
2802
2803         printed += fprintf(fp, "\n\n");
2804
2805         return printed;
2806 }
2807
2808 /* struct used to pass data to per-thread function */
2809 struct summary_data {
2810         FILE *fp;
2811         struct trace *trace;
2812         size_t printed;
2813 };
2814
2815 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2816 {
2817         struct summary_data *data = priv;
2818         FILE *fp = data->fp;
2819         size_t printed = data->printed;
2820         struct trace *trace = data->trace;
2821         struct thread_trace *ttrace = thread__priv(thread);
2822         double ratio;
2823
2824         if (ttrace == NULL)
2825                 return 0;
2826
2827         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2828
2829         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2830         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2831         printed += fprintf(fp, "%.1f%%", ratio);
2832         if (ttrace->pfmaj)
2833                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2834         if (ttrace->pfmin)
2835                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2836         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2837         printed += thread__dump_stats(ttrace, trace, fp);
2838
2839         data->printed += printed;
2840
2841         return 0;
2842 }
2843
2844 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2845 {
2846         struct summary_data data = {
2847                 .fp = fp,
2848                 .trace = trace
2849         };
2850         data.printed = trace__fprintf_threads_header(fp);
2851
2852         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2853
2854         return data.printed;
2855 }
2856
2857 static int trace__set_duration(const struct option *opt, const char *str,
2858                                int unset __maybe_unused)
2859 {
2860         struct trace *trace = opt->value;
2861
2862         trace->duration_filter = atof(str);
2863         return 0;
2864 }
2865
2866 static int trace__set_filter_pids(const struct option *opt, const char *str,
2867                                   int unset __maybe_unused)
2868 {
2869         int ret = -1;
2870         size_t i;
2871         struct trace *trace = opt->value;
2872         /*
2873          * FIXME: introduce a intarray class, plain parse csv and create a
2874          * { int nr, int entries[] } struct...
2875          */
2876         struct intlist *list = intlist__new(str);
2877
2878         if (list == NULL)
2879                 return -1;
2880
2881         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2882         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2883
2884         if (trace->filter_pids.entries == NULL)
2885                 goto out;
2886
2887         trace->filter_pids.entries[0] = getpid();
2888
2889         for (i = 1; i < trace->filter_pids.nr; ++i)
2890                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2891
2892         intlist__delete(list);
2893         ret = 0;
2894 out:
2895         return ret;
2896 }
2897
2898 static int trace__open_output(struct trace *trace, const char *filename)
2899 {
2900         struct stat st;
2901
2902         if (!stat(filename, &st) && st.st_size) {
2903                 char oldname[PATH_MAX];
2904
2905                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2906                 unlink(oldname);
2907                 rename(filename, oldname);
2908         }
2909
2910         trace->output = fopen(filename, "w");
2911
2912         return trace->output == NULL ? -errno : 0;
2913 }
2914
2915 static int parse_pagefaults(const struct option *opt, const char *str,
2916                             int unset __maybe_unused)
2917 {
2918         int *trace_pgfaults = opt->value;
2919
2920         if (strcmp(str, "all") == 0)
2921                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2922         else if (strcmp(str, "maj") == 0)
2923                 *trace_pgfaults |= TRACE_PFMAJ;
2924         else if (strcmp(str, "min") == 0)
2925                 *trace_pgfaults |= TRACE_PFMIN;
2926         else
2927                 return -1;
2928
2929         return 0;
2930 }
2931
2932 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2933 {
2934         struct perf_evsel *evsel;
2935
2936         evlist__for_each(evlist, evsel)
2937                 evsel->handler = handler;
2938 }
2939
2940 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2941 {
2942         const char *trace_usage[] = {
2943                 "perf trace [<options>] [<command>]",
2944                 "perf trace [<options>] -- <command> [<options>]",
2945                 "perf trace record [<options>] [<command>]",
2946                 "perf trace record [<options>] -- <command> [<options>]",
2947                 NULL
2948         };
2949         struct trace trace = {
2950                 .audit = {
2951                         .machine = audit_detect_machine(),
2952                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2953                 },
2954                 .syscalls = {
2955                         . max = -1,
2956                 },
2957                 .opts = {
2958                         .target = {
2959                                 .uid       = UINT_MAX,
2960                                 .uses_mmap = true,
2961                         },
2962                         .user_freq     = UINT_MAX,
2963                         .user_interval = ULLONG_MAX,
2964                         .no_buffering  = true,
2965                         .mmap_pages    = UINT_MAX,
2966                         .proc_map_timeout  = 500,
2967                 },
2968                 .output = stdout,
2969                 .show_comm = true,
2970                 .trace_syscalls = true,
2971         };
2972         const char *output_name = NULL;
2973         const char *ev_qualifier_str = NULL;
2974         const struct option trace_options[] = {
2975         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2976                      "event selector. use 'perf list' to list available events",
2977                      parse_events_option),
2978         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2979                     "show the thread COMM next to its id"),
2980         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2981         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2982         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2983         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2984         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2985                     "trace events on existing process id"),
2986         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2987                     "trace events on existing thread id"),
2988         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2989                      "pids to filter (by the kernel)", trace__set_filter_pids),
2990         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2991                     "system-wide collection from all CPUs"),
2992         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2993                     "list of cpus to monitor"),
2994         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2995                     "child tasks do not inherit counters"),
2996         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2997                      "number of mmap data pages",
2998                      perf_evlist__parse_mmap_pages),
2999         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3000                    "user to profile"),
3001         OPT_CALLBACK(0, "duration", &trace, "float",
3002                      "show only events with duration > N.M ms",
3003                      trace__set_duration),
3004         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3005         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3006         OPT_BOOLEAN('T', "time", &trace.full_time,
3007                     "Show full timestamp, not time relative to first start"),
3008         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3009                     "Show only syscall summary with statistics"),
3010         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3011                     "Show all syscalls and summary with statistics"),
3012         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3013                      "Trace pagefaults", parse_pagefaults, "maj"),
3014         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3015         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3016         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3017                         "per thread proc mmap processing timeout in ms"),
3018         OPT_END()
3019         };
3020         const char * const trace_subcommands[] = { "record", NULL };
3021         int err;
3022         char bf[BUFSIZ];
3023
3024         signal(SIGSEGV, sighandler_dump_stack);
3025         signal(SIGFPE, sighandler_dump_stack);
3026
3027         trace.evlist = perf_evlist__new();
3028
3029         if (trace.evlist == NULL) {
3030                 pr_err("Not enough memory to run!\n");
3031                 err = -ENOMEM;
3032                 goto out;
3033         }
3034
3035         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3036                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3037
3038         if (trace.trace_pgfaults) {
3039                 trace.opts.sample_address = true;
3040                 trace.opts.sample_time = true;
3041         }
3042
3043         if (trace.evlist->nr_entries > 0)
3044                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3045
3046         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3047                 return trace__record(&trace, argc-1, &argv[1]);
3048
3049         /* summary_only implies summary option, but don't overwrite summary if set */
3050         if (trace.summary_only)
3051                 trace.summary = trace.summary_only;
3052
3053         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3054             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3055                 pr_err("Please specify something to trace.\n");
3056                 return -1;
3057         }
3058
3059         if (output_name != NULL) {
3060                 err = trace__open_output(&trace, output_name);
3061                 if (err < 0) {
3062                         perror("failed to create output file");
3063                         goto out;
3064                 }
3065         }
3066
3067         if (ev_qualifier_str != NULL) {
3068                 const char *s = ev_qualifier_str;
3069                 struct strlist_config slist_config = {
3070                         .dirname = system_path(STRACE_GROUPS_DIR),
3071                 };
3072
3073                 trace.not_ev_qualifier = *s == '!';
3074                 if (trace.not_ev_qualifier)
3075                         ++s;
3076                 trace.ev_qualifier = strlist__new(s, &slist_config);
3077                 if (trace.ev_qualifier == NULL) {
3078                         fputs("Not enough memory to parse event qualifier",
3079                               trace.output);
3080                         err = -ENOMEM;
3081                         goto out_close;
3082                 }
3083
3084                 err = trace__validate_ev_qualifier(&trace);
3085                 if (err)
3086                         goto out_close;
3087         }
3088
3089         err = target__validate(&trace.opts.target);
3090         if (err) {
3091                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3092                 fprintf(trace.output, "%s", bf);
3093                 goto out_close;
3094         }
3095
3096         err = target__parse_uid(&trace.opts.target);
3097         if (err) {
3098                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3099                 fprintf(trace.output, "%s", bf);
3100                 goto out_close;
3101         }
3102
3103         if (!argc && target__none(&trace.opts.target))
3104                 trace.opts.target.system_wide = true;
3105
3106         if (input_name)
3107                 err = trace__replay(&trace);
3108         else
3109                 err = trace__run(&trace, argc, argv);
3110
3111 out_close:
3112         if (output_name != NULL)
3113                 fclose(trace.output);
3114 out:
3115         return err;
3116 }