Driver: spi: fix compiling err for rk3288
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
42
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK              0x20000
46 #endif
47
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON          100
50
51 #endif
52
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE         12
55 #endif
56
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE       13
59 #endif
60
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE          1
63 #endif
64
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK           00004000
67 #endif
68
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC            02000000
71 #endif
72
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC              02000000
75 #endif
76
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP              6
79 #endif
80
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC           02000000
83 #endif
84
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK          00004000
87 #endif
88
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC       0x40000000
91 #endif
92
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
95 #endif
96
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
99 #endif
100
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
104
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
107 #endif
108
109
110 struct tp_field {
111         int offset;
112         union {
113                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115         };
116 };
117
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120 { \
121         u##bits value; \
122         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123         return value;  \
124 }
125
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
130
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134         u##bits value; \
135         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136         return bswap_##bits(value);\
137 }
138
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
142
143 static int tp_field__init_uint(struct tp_field *field,
144                                struct format_field *format_field,
145                                bool needs_swap)
146 {
147         field->offset = format_field->offset;
148
149         switch (format_field->size) {
150         case 1:
151                 field->integer = tp_field__u8;
152                 break;
153         case 2:
154                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155                 break;
156         case 4:
157                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158                 break;
159         case 8:
160                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161                 break;
162         default:
163                 return -1;
164         }
165
166         return 0;
167 }
168
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170 {
171         return sample->raw_data + field->offset;
172 }
173
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175 {
176         field->offset = format_field->offset;
177         field->pointer = tp_field__ptr;
178         return 0;
179 }
180
181 struct syscall_tp {
182         struct tp_field id;
183         union {
184                 struct tp_field args, ret;
185         };
186 };
187
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189                                           struct tp_field *field,
190                                           const char *name)
191 {
192         struct format_field *format_field = perf_evsel__field(evsel, name);
193
194         if (format_field == NULL)
195                 return -1;
196
197         return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 }
199
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201         ({ struct syscall_tp *sc = evsel->priv;\
202            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205                                          struct tp_field *field,
206                                          const char *name)
207 {
208         struct format_field *format_field = perf_evsel__field(evsel, name);
209
210         if (format_field == NULL)
211                 return -1;
212
213         return tp_field__init_ptr(field, format_field);
214 }
215
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217         ({ struct syscall_tp *sc = evsel->priv;\
218            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221 {
222         zfree(&evsel->priv);
223         perf_evsel__delete(evsel);
224 }
225
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227 {
228         evsel->priv = malloc(sizeof(struct syscall_tp));
229         if (evsel->priv != NULL) {
230                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231                         goto out_delete;
232
233                 evsel->handler = handler;
234                 return 0;
235         }
236
237         return -ENOMEM;
238
239 out_delete:
240         zfree(&evsel->priv);
241         return -ENOENT;
242 }
243
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245 {
246         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247
248         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249         if (IS_ERR(evsel))
250                 evsel = perf_evsel__newtp("syscalls", direction);
251
252         if (IS_ERR(evsel))
253                 return NULL;
254
255         if (perf_evsel__init_syscall_tp(evsel, handler))
256                 goto out_delete;
257
258         return evsel;
259
260 out_delete:
261         perf_evsel__delete_priv(evsel);
262         return NULL;
263 }
264
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266         ({ struct syscall_tp *fields = evsel->priv; \
267            fields->name.integer(&fields->name, sample); })
268
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270         ({ struct syscall_tp *fields = evsel->priv; \
271            fields->name.pointer(&fields->name, sample); })
272
273 struct syscall_arg {
274         unsigned long val;
275         struct thread *thread;
276         struct trace  *trace;
277         void          *parm;
278         u8            idx;
279         u8            mask;
280 };
281
282 struct strarray {
283         int         offset;
284         int         nr_entries;
285         const char **entries;
286 };
287
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289         .nr_entries = ARRAY_SIZE(array), \
290         .entries = array, \
291 }
292
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294         .offset     = off, \
295         .nr_entries = ARRAY_SIZE(array), \
296         .entries = array, \
297 }
298
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300                                                 const char *intfmt,
301                                                 struct syscall_arg *arg)
302 {
303         struct strarray *sa = arg->parm;
304         int idx = arg->val - sa->offset;
305
306         if (idx < 0 || idx >= sa->nr_entries)
307                 return scnprintf(bf, size, intfmt, arg->val);
308
309         return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313                                               struct syscall_arg *arg)
314 {
315         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322  * FIXME: Make this available to all arches as soon as the ioctl beautifier
323  *        gets rewritten to support all arches.
324  */
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326                                                  struct syscall_arg *arg)
327 {
328         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335                                         struct syscall_arg *arg);
336
337 #define SCA_FD syscall_arg__scnprintf_fd
338
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340                                            struct syscall_arg *arg)
341 {
342         int fd = arg->val;
343
344         if (fd == AT_FDCWD)
345                 return scnprintf(bf, size, "CWD");
346
347         return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353                                               struct syscall_arg *arg);
354
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358                                          struct syscall_arg *arg)
359 {
360         return scnprintf(bf, size, "%#lx", arg->val);
361 }
362
363 #define SCA_HEX syscall_arg__scnprintf_hex
364
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366                                          struct syscall_arg *arg)
367 {
368         return scnprintf(bf, size, "%d", arg->val);
369 }
370
371 #define SCA_INT syscall_arg__scnprintf_int
372
373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374                                                struct syscall_arg *arg)
375 {
376         int printed = 0, prot = arg->val;
377
378         if (prot == PROT_NONE)
379                 return scnprintf(bf, size, "NONE");
380 #define P_MMAP_PROT(n) \
381         if (prot & PROT_##n) { \
382                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383                 prot &= ~PROT_##n; \
384         }
385
386         P_MMAP_PROT(EXEC);
387         P_MMAP_PROT(READ);
388         P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390         P_MMAP_PROT(SEM);
391 #endif
392         P_MMAP_PROT(GROWSDOWN);
393         P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
395
396         if (prot)
397                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398
399         return printed;
400 }
401
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403
404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405                                                 struct syscall_arg *arg)
406 {
407         int printed = 0, flags = arg->val;
408
409 #define P_MMAP_FLAG(n) \
410         if (flags & MAP_##n) { \
411                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412                 flags &= ~MAP_##n; \
413         }
414
415         P_MMAP_FLAG(SHARED);
416         P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418         P_MMAP_FLAG(32BIT);
419 #endif
420         P_MMAP_FLAG(ANONYMOUS);
421         P_MMAP_FLAG(DENYWRITE);
422         P_MMAP_FLAG(EXECUTABLE);
423         P_MMAP_FLAG(FILE);
424         P_MMAP_FLAG(FIXED);
425         P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427         P_MMAP_FLAG(HUGETLB);
428 #endif
429         P_MMAP_FLAG(LOCKED);
430         P_MMAP_FLAG(NONBLOCK);
431         P_MMAP_FLAG(NORESERVE);
432         P_MMAP_FLAG(POPULATE);
433         P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435         P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
438
439         if (flags)
440                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441
442         return printed;
443 }
444
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446
447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448                                                   struct syscall_arg *arg)
449 {
450         int printed = 0, flags = arg->val;
451
452 #define P_MREMAP_FLAG(n) \
453         if (flags & MREMAP_##n) { \
454                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455                 flags &= ~MREMAP_##n; \
456         }
457
458         P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460         P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
463
464         if (flags)
465                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466
467         return printed;
468 }
469
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471
472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473                                                       struct syscall_arg *arg)
474 {
475         int behavior = arg->val;
476
477         switch (behavior) {
478 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479         P_MADV_BHV(NORMAL);
480         P_MADV_BHV(RANDOM);
481         P_MADV_BHV(SEQUENTIAL);
482         P_MADV_BHV(WILLNEED);
483         P_MADV_BHV(DONTNEED);
484         P_MADV_BHV(REMOVE);
485         P_MADV_BHV(DONTFORK);
486         P_MADV_BHV(DOFORK);
487         P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489         P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491         P_MADV_BHV(MERGEABLE);
492         P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494         P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497         P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500         P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503         P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506         default: break;
507         }
508
509         return scnprintf(bf, size, "%#x", behavior);
510 }
511
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513
514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515                                            struct syscall_arg *arg)
516 {
517         int printed = 0, op = arg->val;
518
519         if (op == 0)
520                 return scnprintf(bf, size, "NONE");
521 #define P_CMD(cmd) \
522         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524                 op &= ~LOCK_##cmd; \
525         }
526
527         P_CMD(SH);
528         P_CMD(EX);
529         P_CMD(NB);
530         P_CMD(UN);
531         P_CMD(MAND);
532         P_CMD(RW);
533         P_CMD(READ);
534         P_CMD(WRITE);
535 #undef P_OP
536
537         if (op)
538                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539
540         return printed;
541 }
542
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
544
545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546 {
547         enum syscall_futex_args {
548                 SCF_UADDR   = (1 << 0),
549                 SCF_OP      = (1 << 1),
550                 SCF_VAL     = (1 << 2),
551                 SCF_TIMEOUT = (1 << 3),
552                 SCF_UADDR2  = (1 << 4),
553                 SCF_VAL3    = (1 << 5),
554         };
555         int op = arg->val;
556         int cmd = op & FUTEX_CMD_MASK;
557         size_t printed = 0;
558
559         switch (cmd) {
560 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
562         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
565         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
566         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
567         P_FUTEX_OP(WAKE_OP);                                                      break;
568         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
571         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
572         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
573         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
574         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
575         }
576
577         if (op & FUTEX_PRIVATE_FLAG)
578                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
579
580         if (op & FUTEX_CLOCK_REALTIME)
581                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582
583         return printed;
584 }
585
586 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
587
588 static const char *bpf_cmd[] = {
589         "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590         "MAP_GET_NEXT_KEY", "PROG_LOAD",
591 };
592 static DEFINE_STRARRAY(bpf_cmd);
593
594 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
596
597 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598 static DEFINE_STRARRAY(itimers);
599
600 static const char *keyctl_options[] = {
601         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
606 };
607 static DEFINE_STRARRAY(keyctl_options);
608
609 static const char *whences[] = { "SET", "CUR", "END",
610 #ifdef SEEK_DATA
611 "DATA",
612 #endif
613 #ifdef SEEK_HOLE
614 "HOLE",
615 #endif
616 };
617 static DEFINE_STRARRAY(whences);
618
619 static const char *fcntl_cmds[] = {
620         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623         "F_GETOWNER_UIDS",
624 };
625 static DEFINE_STRARRAY(fcntl_cmds);
626
627 static const char *rlimit_resources[] = {
628         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630         "RTTIME",
631 };
632 static DEFINE_STRARRAY(rlimit_resources);
633
634 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635 static DEFINE_STRARRAY(sighow);
636
637 static const char *clockid[] = {
638         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
641 };
642 static DEFINE_STRARRAY(clockid);
643
644 static const char *socket_families[] = {
645         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650         "ALG", "NFC", "VSOCK",
651 };
652 static DEFINE_STRARRAY(socket_families);
653
654 #ifndef SOCK_TYPE_MASK
655 #define SOCK_TYPE_MASK 0xf
656 #endif
657
658 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659                                                       struct syscall_arg *arg)
660 {
661         size_t printed;
662         int type = arg->val,
663             flags = type & ~SOCK_TYPE_MASK;
664
665         type &= SOCK_TYPE_MASK;
666         /*
667          * Can't use a strarray, MIPS may override for ABI reasons.
668          */
669         switch (type) {
670 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671         P_SK_TYPE(STREAM);
672         P_SK_TYPE(DGRAM);
673         P_SK_TYPE(RAW);
674         P_SK_TYPE(RDM);
675         P_SK_TYPE(SEQPACKET);
676         P_SK_TYPE(DCCP);
677         P_SK_TYPE(PACKET);
678 #undef P_SK_TYPE
679         default:
680                 printed = scnprintf(bf, size, "%#x", type);
681         }
682
683 #define P_SK_FLAG(n) \
684         if (flags & SOCK_##n) { \
685                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686                 flags &= ~SOCK_##n; \
687         }
688
689         P_SK_FLAG(CLOEXEC);
690         P_SK_FLAG(NONBLOCK);
691 #undef P_SK_FLAG
692
693         if (flags)
694                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
695
696         return printed;
697 }
698
699 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
700
701 #ifndef MSG_PROBE
702 #define MSG_PROBE            0x10
703 #endif
704 #ifndef MSG_WAITFORONE
705 #define MSG_WAITFORONE  0x10000
706 #endif
707 #ifndef MSG_SENDPAGE_NOTLAST
708 #define MSG_SENDPAGE_NOTLAST 0x20000
709 #endif
710 #ifndef MSG_FASTOPEN
711 #define MSG_FASTOPEN         0x20000000
712 #endif
713
714 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715                                                struct syscall_arg *arg)
716 {
717         int printed = 0, flags = arg->val;
718
719         if (flags == 0)
720                 return scnprintf(bf, size, "NONE");
721 #define P_MSG_FLAG(n) \
722         if (flags & MSG_##n) { \
723                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724                 flags &= ~MSG_##n; \
725         }
726
727         P_MSG_FLAG(OOB);
728         P_MSG_FLAG(PEEK);
729         P_MSG_FLAG(DONTROUTE);
730         P_MSG_FLAG(TRYHARD);
731         P_MSG_FLAG(CTRUNC);
732         P_MSG_FLAG(PROBE);
733         P_MSG_FLAG(TRUNC);
734         P_MSG_FLAG(DONTWAIT);
735         P_MSG_FLAG(EOR);
736         P_MSG_FLAG(WAITALL);
737         P_MSG_FLAG(FIN);
738         P_MSG_FLAG(SYN);
739         P_MSG_FLAG(CONFIRM);
740         P_MSG_FLAG(RST);
741         P_MSG_FLAG(ERRQUEUE);
742         P_MSG_FLAG(NOSIGNAL);
743         P_MSG_FLAG(MORE);
744         P_MSG_FLAG(WAITFORONE);
745         P_MSG_FLAG(SENDPAGE_NOTLAST);
746         P_MSG_FLAG(FASTOPEN);
747         P_MSG_FLAG(CMSG_CLOEXEC);
748 #undef P_MSG_FLAG
749
750         if (flags)
751                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753         return printed;
754 }
755
756 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
757
758 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759                                                  struct syscall_arg *arg)
760 {
761         size_t printed = 0;
762         int mode = arg->val;
763
764         if (mode == F_OK) /* 0 */
765                 return scnprintf(bf, size, "F");
766 #define P_MODE(n) \
767         if (mode & n##_OK) { \
768                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769                 mode &= ~n##_OK; \
770         }
771
772         P_MODE(R);
773         P_MODE(W);
774         P_MODE(X);
775 #undef P_MODE
776
777         if (mode)
778                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
779
780         return printed;
781 }
782
783 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
784
785 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786                                               struct syscall_arg *arg);
787
788 #define SCA_FILENAME syscall_arg__scnprintf_filename
789
790 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791                                                struct syscall_arg *arg)
792 {
793         int printed = 0, flags = arg->val;
794
795         if (!(flags & O_CREAT))
796                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
797
798         if (flags == 0)
799                 return scnprintf(bf, size, "RDONLY");
800 #define P_FLAG(n) \
801         if (flags & O_##n) { \
802                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803                 flags &= ~O_##n; \
804         }
805
806         P_FLAG(APPEND);
807         P_FLAG(ASYNC);
808         P_FLAG(CLOEXEC);
809         P_FLAG(CREAT);
810         P_FLAG(DIRECT);
811         P_FLAG(DIRECTORY);
812         P_FLAG(EXCL);
813         P_FLAG(LARGEFILE);
814         P_FLAG(NOATIME);
815         P_FLAG(NOCTTY);
816 #ifdef O_NONBLOCK
817         P_FLAG(NONBLOCK);
818 #elif O_NDELAY
819         P_FLAG(NDELAY);
820 #endif
821 #ifdef O_PATH
822         P_FLAG(PATH);
823 #endif
824         P_FLAG(RDWR);
825 #ifdef O_DSYNC
826         if ((flags & O_SYNC) == O_SYNC)
827                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828         else {
829                 P_FLAG(DSYNC);
830         }
831 #else
832         P_FLAG(SYNC);
833 #endif
834         P_FLAG(TRUNC);
835         P_FLAG(WRONLY);
836 #undef P_FLAG
837
838         if (flags)
839                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841         return printed;
842 }
843
844 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
845
846 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847                                                 struct syscall_arg *arg)
848 {
849         int printed = 0, flags = arg->val;
850
851         if (flags == 0)
852                 return 0;
853
854 #define P_FLAG(n) \
855         if (flags & PERF_FLAG_##n) { \
856                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857                 flags &= ~PERF_FLAG_##n; \
858         }
859
860         P_FLAG(FD_NO_GROUP);
861         P_FLAG(FD_OUTPUT);
862         P_FLAG(PID_CGROUP);
863         P_FLAG(FD_CLOEXEC);
864 #undef P_FLAG
865
866         if (flags)
867                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
868
869         return printed;
870 }
871
872 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
873
874 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875                                                    struct syscall_arg *arg)
876 {
877         int printed = 0, flags = arg->val;
878
879         if (flags == 0)
880                 return scnprintf(bf, size, "NONE");
881 #define P_FLAG(n) \
882         if (flags & EFD_##n) { \
883                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884                 flags &= ~EFD_##n; \
885         }
886
887         P_FLAG(SEMAPHORE);
888         P_FLAG(CLOEXEC);
889         P_FLAG(NONBLOCK);
890 #undef P_FLAG
891
892         if (flags)
893                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
894
895         return printed;
896 }
897
898 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
899
900 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901                                                 struct syscall_arg *arg)
902 {
903         int printed = 0, flags = arg->val;
904
905 #define P_FLAG(n) \
906         if (flags & O_##n) { \
907                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908                 flags &= ~O_##n; \
909         }
910
911         P_FLAG(CLOEXEC);
912         P_FLAG(NONBLOCK);
913 #undef P_FLAG
914
915         if (flags)
916                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
917
918         return printed;
919 }
920
921 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
922
923 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
924 {
925         int sig = arg->val;
926
927         switch (sig) {
928 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929         P_SIGNUM(HUP);
930         P_SIGNUM(INT);
931         P_SIGNUM(QUIT);
932         P_SIGNUM(ILL);
933         P_SIGNUM(TRAP);
934         P_SIGNUM(ABRT);
935         P_SIGNUM(BUS);
936         P_SIGNUM(FPE);
937         P_SIGNUM(KILL);
938         P_SIGNUM(USR1);
939         P_SIGNUM(SEGV);
940         P_SIGNUM(USR2);
941         P_SIGNUM(PIPE);
942         P_SIGNUM(ALRM);
943         P_SIGNUM(TERM);
944         P_SIGNUM(CHLD);
945         P_SIGNUM(CONT);
946         P_SIGNUM(STOP);
947         P_SIGNUM(TSTP);
948         P_SIGNUM(TTIN);
949         P_SIGNUM(TTOU);
950         P_SIGNUM(URG);
951         P_SIGNUM(XCPU);
952         P_SIGNUM(XFSZ);
953         P_SIGNUM(VTALRM);
954         P_SIGNUM(PROF);
955         P_SIGNUM(WINCH);
956         P_SIGNUM(IO);
957         P_SIGNUM(PWR);
958         P_SIGNUM(SYS);
959 #ifdef SIGEMT
960         P_SIGNUM(EMT);
961 #endif
962 #ifdef SIGSTKFLT
963         P_SIGNUM(STKFLT);
964 #endif
965 #ifdef SIGSWI
966         P_SIGNUM(SWI);
967 #endif
968         default: break;
969         }
970
971         return scnprintf(bf, size, "%#x", sig);
972 }
973
974 #define SCA_SIGNUM syscall_arg__scnprintf_signum
975
976 #if defined(__i386__) || defined(__x86_64__)
977 /*
978  * FIXME: Make this available to all arches.
979  */
980 #define TCGETS          0x5401
981
982 static const char *tioctls[] = {
983         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
998 };
999
1000 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001 #endif /* defined(__i386__) || defined(__x86_64__) */
1002
1003 #define STRARRAY(arg, name, array) \
1004           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005           .arg_parm      = { [arg] = &strarray__##array, }
1006
1007 static struct syscall_fmt {
1008         const char *name;
1009         const char *alias;
1010         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011         void       *arg_parm[6];
1012         bool       errmsg;
1013         bool       timeout;
1014         bool       hexret;
1015 } syscall_fmts[] = {
1016         { .name     = "access",     .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018                              [1] = SCA_ACCMODE,  /* mode */ }, },
1019         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020         { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021         { .name     = "brk",        .hexret = true,
1022           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023         { .name     = "chdir",      .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025         { .name     = "chmod",      .errmsg = true,
1026           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027         { .name     = "chroot",     .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030         { .name     = "close",      .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032         { .name     = "connect",    .errmsg = true, },
1033         { .name     = "creat",      .errmsg = true,
1034           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035         { .name     = "dup",        .errmsg = true,
1036           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037         { .name     = "dup2",       .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039         { .name     = "dup3",       .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042         { .name     = "eventfd2",   .errmsg = true,
1043           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044         { .name     = "faccessat",  .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                              [1] = SCA_FILENAME, /* filename */ }, },
1047         { .name     = "fadvise64",  .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049         { .name     = "fallocate",  .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051         { .name     = "fchdir",     .errmsg = true,
1052           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053         { .name     = "fchmod",     .errmsg = true,
1054           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055         { .name     = "fchmodat",   .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057                              [1] = SCA_FILENAME, /* filename */ }, },
1058         { .name     = "fchown",     .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060         { .name     = "fchownat",   .errmsg = true,
1061           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062                              [1] = SCA_FILENAME, /* filename */ }, },
1063         { .name     = "fcntl",      .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065                              [1] = SCA_STRARRAY, /* cmd */ },
1066           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067         { .name     = "fdatasync",  .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "flock",      .errmsg = true,
1070           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071                              [1] = SCA_FLOCK, /* cmd */ }, },
1072         { .name     = "fsetxattr",  .errmsg = true,
1073           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1075           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1077           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078                              [1] = SCA_FILENAME, /* filename */ }, },
1079         { .name     = "fstatfs",    .errmsg = true,
1080           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081         { .name     = "fsync",    .errmsg = true,
1082           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083         { .name     = "ftruncate", .errmsg = true,
1084           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085         { .name     = "futex",      .errmsg = true,
1086           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087         { .name     = "futimesat", .errmsg = true,
1088           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089                              [1] = SCA_FILENAME, /* filename */ }, },
1090         { .name     = "getdents",   .errmsg = true,
1091           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092         { .name     = "getdents64", .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096         { .name     = "getxattr",    .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "inotify_add_watch",          .errmsg = true,
1099           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100         { .name     = "ioctl",      .errmsg = true,
1101           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102 #if defined(__i386__) || defined(__x86_64__)
1103 /*
1104  * FIXME: Make this available to all arches.
1105  */
1106                              [1] = SCA_STRHEXARRAY, /* cmd */
1107                              [2] = SCA_HEX, /* arg */ },
1108           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1109 #else
1110                              [2] = SCA_HEX, /* arg */ }, },
1111 #endif
1112         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113         { .name     = "kill",       .errmsg = true,
1114           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115         { .name     = "lchown",    .errmsg = true,
1116           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117         { .name     = "lgetxattr",  .errmsg = true,
1118           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119         { .name     = "linkat",     .errmsg = true,
1120           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121         { .name     = "listxattr",  .errmsg = true,
1122           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123         { .name     = "llistxattr", .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125         { .name     = "lremovexattr",  .errmsg = true,
1126           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127         { .name     = "lseek",      .errmsg = true,
1128           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129                              [2] = SCA_STRARRAY, /* whence */ },
1130           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1131         { .name     = "lsetxattr",  .errmsg = true,
1132           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1134           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135         { .name     = "lsxattr",    .errmsg = true,
1136           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137         { .name     = "madvise",    .errmsg = true,
1138           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1139                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1140         { .name     = "mkdir",    .errmsg = true,
1141           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142         { .name     = "mkdirat",    .errmsg = true,
1143           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144                              [1] = SCA_FILENAME, /* pathname */ }, },
1145         { .name     = "mknod",      .errmsg = true,
1146           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147         { .name     = "mknodat",    .errmsg = true,
1148           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149                              [1] = SCA_FILENAME, /* filename */ }, },
1150         { .name     = "mlock",      .errmsg = true,
1151           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152         { .name     = "mlockall",   .errmsg = true,
1153           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154         { .name     = "mmap",       .hexret = true,
1155           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1156                              [2] = SCA_MMAP_PROT, /* prot */
1157                              [3] = SCA_MMAP_FLAGS, /* flags */
1158                              [4] = SCA_FD,        /* fd */ }, },
1159         { .name     = "mprotect",   .errmsg = true,
1160           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1161                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1162         { .name     = "mq_unlink", .errmsg = true,
1163           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1164         { .name     = "mremap",     .hexret = true,
1165           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1166                              [3] = SCA_MREMAP_FLAGS, /* flags */
1167                              [4] = SCA_HEX, /* new_addr */ }, },
1168         { .name     = "munlock",    .errmsg = true,
1169           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1170         { .name     = "munmap",     .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1172         { .name     = "name_to_handle_at", .errmsg = true,
1173           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1174         { .name     = "newfstatat", .errmsg = true,
1175           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1176                              [1] = SCA_FILENAME, /* filename */ }, },
1177         { .name     = "open",       .errmsg = true,
1178           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1179                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1180         { .name     = "open_by_handle_at", .errmsg = true,
1181           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1182                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1183         { .name     = "openat",     .errmsg = true,
1184           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1185                              [1] = SCA_FILENAME, /* filename */
1186                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187         { .name     = "perf_event_open", .errmsg = true,
1188           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1189                              [2] = SCA_INT, /* cpu */
1190                              [3] = SCA_FD,  /* group_fd */
1191                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1192         { .name     = "pipe2",      .errmsg = true,
1193           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1194         { .name     = "poll",       .errmsg = true, .timeout = true, },
1195         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1196         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1197           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1199           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1201         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1202           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203         { .name     = "pwritev",    .errmsg = true,
1204           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205         { .name     = "read",       .errmsg = true,
1206           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207         { .name     = "readlink",   .errmsg = true,
1208           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1209         { .name     = "readlinkat", .errmsg = true,
1210           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1211                              [1] = SCA_FILENAME, /* pathname */ }, },
1212         { .name     = "readv",      .errmsg = true,
1213           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1214         { .name     = "recvfrom",   .errmsg = true,
1215           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1216                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1217         { .name     = "recvmmsg",   .errmsg = true,
1218           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1219                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1220         { .name     = "recvmsg",    .errmsg = true,
1221           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1222                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1223         { .name     = "removexattr", .errmsg = true,
1224           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1225         { .name     = "renameat",   .errmsg = true,
1226           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1227         { .name     = "rmdir",    .errmsg = true,
1228           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229         { .name     = "rt_sigaction", .errmsg = true,
1230           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1231         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1232         { .name     = "rt_sigqueueinfo", .errmsg = true,
1233           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1234         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1235           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1236         { .name     = "select",     .errmsg = true, .timeout = true, },
1237         { .name     = "sendmmsg",    .errmsg = true,
1238           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1239                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1240         { .name     = "sendmsg",    .errmsg = true,
1241           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1242                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1243         { .name     = "sendto",     .errmsg = true,
1244           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1245                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1246         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1247         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1248         { .name     = "setxattr",   .errmsg = true,
1249           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1250         { .name     = "shutdown",   .errmsg = true,
1251           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1252         { .name     = "socket",     .errmsg = true,
1253           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1254                              [1] = SCA_SK_TYPE, /* type */ },
1255           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1256         { .name     = "socketpair", .errmsg = true,
1257           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258                              [1] = SCA_SK_TYPE, /* type */ },
1259           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1260         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1261           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1262         { .name     = "statfs",     .errmsg = true,
1263           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1264         { .name     = "swapoff",    .errmsg = true,
1265           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1266         { .name     = "swapon",     .errmsg = true,
1267           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1268         { .name     = "symlinkat",  .errmsg = true,
1269           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1270         { .name     = "tgkill",     .errmsg = true,
1271           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1272         { .name     = "tkill",      .errmsg = true,
1273           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1274         { .name     = "truncate",   .errmsg = true,
1275           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1276         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1277         { .name     = "unlinkat",   .errmsg = true,
1278           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1279                              [1] = SCA_FILENAME, /* pathname */ }, },
1280         { .name     = "utime",  .errmsg = true,
1281           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1282         { .name     = "utimensat",  .errmsg = true,
1283           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1284                              [1] = SCA_FILENAME, /* filename */ }, },
1285         { .name     = "utimes",  .errmsg = true,
1286           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1287         { .name     = "vmsplice",  .errmsg = true,
1288           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1289         { .name     = "write",      .errmsg = true,
1290           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1291         { .name     = "writev",     .errmsg = true,
1292           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293 };
1294
1295 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1296 {
1297         const struct syscall_fmt *fmt = fmtp;
1298         return strcmp(name, fmt->name);
1299 }
1300
1301 static struct syscall_fmt *syscall_fmt__find(const char *name)
1302 {
1303         const int nmemb = ARRAY_SIZE(syscall_fmts);
1304         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1305 }
1306
1307 struct syscall {
1308         struct event_format *tp_format;
1309         int                 nr_args;
1310         struct format_field *args;
1311         const char          *name;
1312         bool                is_exit;
1313         struct syscall_fmt  *fmt;
1314         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1315         void                **arg_parm;
1316 };
1317
1318 static size_t fprintf_duration(unsigned long t, FILE *fp)
1319 {
1320         double duration = (double)t / NSEC_PER_MSEC;
1321         size_t printed = fprintf(fp, "(");
1322
1323         if (duration >= 1.0)
1324                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1325         else if (duration >= 0.01)
1326                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1327         else
1328                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1329         return printed + fprintf(fp, "): ");
1330 }
1331
1332 /**
1333  * filename.ptr: The filename char pointer that will be vfs_getname'd
1334  * filename.entry_str_pos: Where to insert the string translated from
1335  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1336  */
1337 struct thread_trace {
1338         u64               entry_time;
1339         u64               exit_time;
1340         bool              entry_pending;
1341         unsigned long     nr_events;
1342         unsigned long     pfmaj, pfmin;
1343         char              *entry_str;
1344         double            runtime_ms;
1345         struct {
1346                 unsigned long ptr;
1347                 short int     entry_str_pos;
1348                 bool          pending_open;
1349                 unsigned int  namelen;
1350                 char          *name;
1351         } filename;
1352         struct {
1353                 int       max;
1354                 char      **table;
1355         } paths;
1356
1357         struct intlist *syscall_stats;
1358 };
1359
1360 static struct thread_trace *thread_trace__new(void)
1361 {
1362         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1363
1364         if (ttrace)
1365                 ttrace->paths.max = -1;
1366
1367         ttrace->syscall_stats = intlist__new(NULL);
1368
1369         return ttrace;
1370 }
1371
1372 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1373 {
1374         struct thread_trace *ttrace;
1375
1376         if (thread == NULL)
1377                 goto fail;
1378
1379         if (thread__priv(thread) == NULL)
1380                 thread__set_priv(thread, thread_trace__new());
1381
1382         if (thread__priv(thread) == NULL)
1383                 goto fail;
1384
1385         ttrace = thread__priv(thread);
1386         ++ttrace->nr_events;
1387
1388         return ttrace;
1389 fail:
1390         color_fprintf(fp, PERF_COLOR_RED,
1391                       "WARNING: not enough memory, dropping samples!\n");
1392         return NULL;
1393 }
1394
1395 #define TRACE_PFMAJ             (1 << 0)
1396 #define TRACE_PFMIN             (1 << 1)
1397
1398 static const size_t trace__entry_str_size = 2048;
1399
1400 struct trace {
1401         struct perf_tool        tool;
1402         struct {
1403                 int             machine;
1404                 int             open_id;
1405         }                       audit;
1406         struct {
1407                 int             max;
1408                 struct syscall  *table;
1409                 struct {
1410                         struct perf_evsel *sys_enter,
1411                                           *sys_exit;
1412                 }               events;
1413         } syscalls;
1414         struct record_opts      opts;
1415         struct perf_evlist      *evlist;
1416         struct machine          *host;
1417         struct thread           *current;
1418         u64                     base_time;
1419         FILE                    *output;
1420         unsigned long           nr_events;
1421         struct strlist          *ev_qualifier;
1422         struct {
1423                 size_t          nr;
1424                 int             *entries;
1425         }                       ev_qualifier_ids;
1426         struct intlist          *tid_list;
1427         struct intlist          *pid_list;
1428         struct {
1429                 size_t          nr;
1430                 pid_t           *entries;
1431         }                       filter_pids;
1432         double                  duration_filter;
1433         double                  runtime_ms;
1434         struct {
1435                 u64             vfs_getname,
1436                                 proc_getname;
1437         } stats;
1438         bool                    not_ev_qualifier;
1439         bool                    live;
1440         bool                    full_time;
1441         bool                    sched;
1442         bool                    multiple_threads;
1443         bool                    summary;
1444         bool                    summary_only;
1445         bool                    show_comm;
1446         bool                    show_tool_stats;
1447         bool                    trace_syscalls;
1448         bool                    force;
1449         bool                    vfs_getname;
1450         int                     trace_pgfaults;
1451 };
1452
1453 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1454 {
1455         struct thread_trace *ttrace = thread__priv(thread);
1456
1457         if (fd > ttrace->paths.max) {
1458                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1459
1460                 if (npath == NULL)
1461                         return -1;
1462
1463                 if (ttrace->paths.max != -1) {
1464                         memset(npath + ttrace->paths.max + 1, 0,
1465                                (fd - ttrace->paths.max) * sizeof(char *));
1466                 } else {
1467                         memset(npath, 0, (fd + 1) * sizeof(char *));
1468                 }
1469
1470                 ttrace->paths.table = npath;
1471                 ttrace->paths.max   = fd;
1472         }
1473
1474         ttrace->paths.table[fd] = strdup(pathname);
1475
1476         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1477 }
1478
1479 static int thread__read_fd_path(struct thread *thread, int fd)
1480 {
1481         char linkname[PATH_MAX], pathname[PATH_MAX];
1482         struct stat st;
1483         int ret;
1484
1485         if (thread->pid_ == thread->tid) {
1486                 scnprintf(linkname, sizeof(linkname),
1487                           "/proc/%d/fd/%d", thread->pid_, fd);
1488         } else {
1489                 scnprintf(linkname, sizeof(linkname),
1490                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1491         }
1492
1493         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1494                 return -1;
1495
1496         ret = readlink(linkname, pathname, sizeof(pathname));
1497
1498         if (ret < 0 || ret > st.st_size)
1499                 return -1;
1500
1501         pathname[ret] = '\0';
1502         return trace__set_fd_pathname(thread, fd, pathname);
1503 }
1504
1505 static const char *thread__fd_path(struct thread *thread, int fd,
1506                                    struct trace *trace)
1507 {
1508         struct thread_trace *ttrace = thread__priv(thread);
1509
1510         if (ttrace == NULL)
1511                 return NULL;
1512
1513         if (fd < 0)
1514                 return NULL;
1515
1516         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1517                 if (!trace->live)
1518                         return NULL;
1519                 ++trace->stats.proc_getname;
1520                 if (thread__read_fd_path(thread, fd))
1521                         return NULL;
1522         }
1523
1524         return ttrace->paths.table[fd];
1525 }
1526
1527 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1528                                         struct syscall_arg *arg)
1529 {
1530         int fd = arg->val;
1531         size_t printed = scnprintf(bf, size, "%d", fd);
1532         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1533
1534         if (path)
1535                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1536
1537         return printed;
1538 }
1539
1540 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1541                                               struct syscall_arg *arg)
1542 {
1543         int fd = arg->val;
1544         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1545         struct thread_trace *ttrace = thread__priv(arg->thread);
1546
1547         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1548                 zfree(&ttrace->paths.table[fd]);
1549
1550         return printed;
1551 }
1552
1553 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1554                                      unsigned long ptr)
1555 {
1556         struct thread_trace *ttrace = thread__priv(thread);
1557
1558         ttrace->filename.ptr = ptr;
1559         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1560 }
1561
1562 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1563                                               struct syscall_arg *arg)
1564 {
1565         unsigned long ptr = arg->val;
1566
1567         if (!arg->trace->vfs_getname)
1568                 return scnprintf(bf, size, "%#x", ptr);
1569
1570         thread__set_filename_pos(arg->thread, bf, ptr);
1571         return 0;
1572 }
1573
1574 static bool trace__filter_duration(struct trace *trace, double t)
1575 {
1576         return t < (trace->duration_filter * NSEC_PER_MSEC);
1577 }
1578
1579 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1580 {
1581         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1582
1583         return fprintf(fp, "%10.3f ", ts);
1584 }
1585
1586 static bool done = false;
1587 static bool interrupted = false;
1588
1589 static void sig_handler(int sig)
1590 {
1591         done = true;
1592         interrupted = sig == SIGINT;
1593 }
1594
1595 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1596                                         u64 duration, u64 tstamp, FILE *fp)
1597 {
1598         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1599         printed += fprintf_duration(duration, fp);
1600
1601         if (trace->multiple_threads) {
1602                 if (trace->show_comm)
1603                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1604                 printed += fprintf(fp, "%d ", thread->tid);
1605         }
1606
1607         return printed;
1608 }
1609
1610 static int trace__process_event(struct trace *trace, struct machine *machine,
1611                                 union perf_event *event, struct perf_sample *sample)
1612 {
1613         int ret = 0;
1614
1615         switch (event->header.type) {
1616         case PERF_RECORD_LOST:
1617                 color_fprintf(trace->output, PERF_COLOR_RED,
1618                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1619                 ret = machine__process_lost_event(machine, event, sample);
1620                 break;
1621         default:
1622                 ret = machine__process_event(machine, event, sample);
1623                 break;
1624         }
1625
1626         return ret;
1627 }
1628
1629 static int trace__tool_process(struct perf_tool *tool,
1630                                union perf_event *event,
1631                                struct perf_sample *sample,
1632                                struct machine *machine)
1633 {
1634         struct trace *trace = container_of(tool, struct trace, tool);
1635         return trace__process_event(trace, machine, event, sample);
1636 }
1637
1638 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1639 {
1640         int err = symbol__init(NULL);
1641
1642         if (err)
1643                 return err;
1644
1645         trace->host = machine__new_host();
1646         if (trace->host == NULL)
1647                 return -ENOMEM;
1648
1649         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1650                 return -errno;
1651
1652         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1653                                             evlist->threads, trace__tool_process, false,
1654                                             trace->opts.proc_map_timeout);
1655         if (err)
1656                 symbol__exit();
1657
1658         return err;
1659 }
1660
1661 static int syscall__set_arg_fmts(struct syscall *sc)
1662 {
1663         struct format_field *field;
1664         int idx = 0;
1665
1666         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1667         if (sc->arg_scnprintf == NULL)
1668                 return -1;
1669
1670         if (sc->fmt)
1671                 sc->arg_parm = sc->fmt->arg_parm;
1672
1673         for (field = sc->args; field; field = field->next) {
1674                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1675                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1676                 else if (field->flags & FIELD_IS_POINTER)
1677                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1678                 ++idx;
1679         }
1680
1681         return 0;
1682 }
1683
1684 static int trace__read_syscall_info(struct trace *trace, int id)
1685 {
1686         char tp_name[128];
1687         struct syscall *sc;
1688         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1689
1690         if (name == NULL)
1691                 return -1;
1692
1693         if (id > trace->syscalls.max) {
1694                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1695
1696                 if (nsyscalls == NULL)
1697                         return -1;
1698
1699                 if (trace->syscalls.max != -1) {
1700                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1701                                (id - trace->syscalls.max) * sizeof(*sc));
1702                 } else {
1703                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1704                 }
1705
1706                 trace->syscalls.table = nsyscalls;
1707                 trace->syscalls.max   = id;
1708         }
1709
1710         sc = trace->syscalls.table + id;
1711         sc->name = name;
1712
1713         sc->fmt  = syscall_fmt__find(sc->name);
1714
1715         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1716         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1717
1718         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1719                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1720                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721         }
1722
1723         if (IS_ERR(sc->tp_format))
1724                 return -1;
1725
1726         sc->args = sc->tp_format->format.fields;
1727         sc->nr_args = sc->tp_format->format.nr_fields;
1728         /* drop nr field - not relevant here; does not exist on older kernels */
1729         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1730                 sc->args = sc->args->next;
1731                 --sc->nr_args;
1732         }
1733
1734         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1735
1736         return syscall__set_arg_fmts(sc);
1737 }
1738
1739 static int trace__validate_ev_qualifier(struct trace *trace)
1740 {
1741         int err = 0, i;
1742         struct str_node *pos;
1743
1744         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1745         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1746                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1747
1748         if (trace->ev_qualifier_ids.entries == NULL) {
1749                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1750                        trace->output);
1751                 err = -EINVAL;
1752                 goto out;
1753         }
1754
1755         i = 0;
1756
1757         strlist__for_each(pos, trace->ev_qualifier) {
1758                 const char *sc = pos->s;
1759                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1760
1761                 if (id < 0) {
1762                         if (err == 0) {
1763                                 fputs("Error:\tInvalid syscall ", trace->output);
1764                                 err = -EINVAL;
1765                         } else {
1766                                 fputs(", ", trace->output);
1767                         }
1768
1769                         fputs(sc, trace->output);
1770                 }
1771
1772                 trace->ev_qualifier_ids.entries[i++] = id;
1773         }
1774
1775         if (err < 0) {
1776                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1777                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1778                 zfree(&trace->ev_qualifier_ids.entries);
1779                 trace->ev_qualifier_ids.nr = 0;
1780         }
1781 out:
1782         return err;
1783 }
1784
1785 /*
1786  * args is to be interpreted as a series of longs but we need to handle
1787  * 8-byte unaligned accesses. args points to raw_data within the event
1788  * and raw_data is guaranteed to be 8-byte unaligned because it is
1789  * preceded by raw_size which is a u32. So we need to copy args to a temp
1790  * variable to read it. Most notably this avoids extended load instructions
1791  * on unaligned addresses
1792  */
1793
1794 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1795                                       unsigned char *args, struct trace *trace,
1796                                       struct thread *thread)
1797 {
1798         size_t printed = 0;
1799         unsigned char *p;
1800         unsigned long val;
1801
1802         if (sc->args != NULL) {
1803                 struct format_field *field;
1804                 u8 bit = 1;
1805                 struct syscall_arg arg = {
1806                         .idx    = 0,
1807                         .mask   = 0,
1808                         .trace  = trace,
1809                         .thread = thread,
1810                 };
1811
1812                 for (field = sc->args; field;
1813                      field = field->next, ++arg.idx, bit <<= 1) {
1814                         if (arg.mask & bit)
1815                                 continue;
1816
1817                         /* special care for unaligned accesses */
1818                         p = args + sizeof(unsigned long) * arg.idx;
1819                         memcpy(&val, p, sizeof(val));
1820
1821                         /*
1822                          * Suppress this argument if its value is zero and
1823                          * and we don't have a string associated in an
1824                          * strarray for it.
1825                          */
1826                         if (val == 0 &&
1827                             !(sc->arg_scnprintf &&
1828                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1829                               sc->arg_parm[arg.idx]))
1830                                 continue;
1831
1832                         printed += scnprintf(bf + printed, size - printed,
1833                                              "%s%s: ", printed ? ", " : "", field->name);
1834                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1835                                 arg.val = val;
1836                                 if (sc->arg_parm)
1837                                         arg.parm = sc->arg_parm[arg.idx];
1838                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1839                                                                       size - printed, &arg);
1840                         } else {
1841                                 printed += scnprintf(bf + printed, size - printed,
1842                                                      "%ld", val);
1843                         }
1844                 }
1845         } else {
1846                 int i = 0;
1847
1848                 while (i < 6) {
1849                         /* special care for unaligned accesses */
1850                         p = args + sizeof(unsigned long) * i;
1851                         memcpy(&val, p, sizeof(val));
1852                         printed += scnprintf(bf + printed, size - printed,
1853                                              "%sarg%d: %ld",
1854                                              printed ? ", " : "", i, val);
1855                         ++i;
1856                 }
1857         }
1858
1859         return printed;
1860 }
1861
1862 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1863                                   union perf_event *event,
1864                                   struct perf_sample *sample);
1865
1866 static struct syscall *trace__syscall_info(struct trace *trace,
1867                                            struct perf_evsel *evsel, int id)
1868 {
1869
1870         if (id < 0) {
1871
1872                 /*
1873                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1874                  * before that, leaving at a higher verbosity level till that is
1875                  * explained. Reproduced with plain ftrace with:
1876                  *
1877                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1878                  * grep "NR -1 " /t/trace_pipe
1879                  *
1880                  * After generating some load on the machine.
1881                  */
1882                 if (verbose > 1) {
1883                         static u64 n;
1884                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1885                                 id, perf_evsel__name(evsel), ++n);
1886                 }
1887                 return NULL;
1888         }
1889
1890         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1891             trace__read_syscall_info(trace, id))
1892                 goto out_cant_read;
1893
1894         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1895                 goto out_cant_read;
1896
1897         return &trace->syscalls.table[id];
1898
1899 out_cant_read:
1900         if (verbose) {
1901                 fprintf(trace->output, "Problems reading syscall %d", id);
1902                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1903                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1904                 fputs(" information\n", trace->output);
1905         }
1906         return NULL;
1907 }
1908
1909 static void thread__update_stats(struct thread_trace *ttrace,
1910                                  int id, struct perf_sample *sample)
1911 {
1912         struct int_node *inode;
1913         struct stats *stats;
1914         u64 duration = 0;
1915
1916         inode = intlist__findnew(ttrace->syscall_stats, id);
1917         if (inode == NULL)
1918                 return;
1919
1920         stats = inode->priv;
1921         if (stats == NULL) {
1922                 stats = malloc(sizeof(struct stats));
1923                 if (stats == NULL)
1924                         return;
1925                 init_stats(stats);
1926                 inode->priv = stats;
1927         }
1928
1929         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1930                 duration = sample->time - ttrace->entry_time;
1931
1932         update_stats(stats, duration);
1933 }
1934
1935 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1936 {
1937         struct thread_trace *ttrace;
1938         u64 duration;
1939         size_t printed;
1940
1941         if (trace->current == NULL)
1942                 return 0;
1943
1944         ttrace = thread__priv(trace->current);
1945
1946         if (!ttrace->entry_pending)
1947                 return 0;
1948
1949         duration = sample->time - ttrace->entry_time;
1950
1951         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1952         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1953         ttrace->entry_pending = false;
1954
1955         return printed;
1956 }
1957
1958 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1959                             union perf_event *event __maybe_unused,
1960                             struct perf_sample *sample)
1961 {
1962         char *msg;
1963         void *args;
1964         size_t printed = 0;
1965         struct thread *thread;
1966         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1967         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1968         struct thread_trace *ttrace;
1969
1970         if (sc == NULL)
1971                 return -1;
1972
1973         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1974         ttrace = thread__trace(thread, trace->output);
1975         if (ttrace == NULL)
1976                 goto out_put;
1977
1978         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1979
1980         if (ttrace->entry_str == NULL) {
1981                 ttrace->entry_str = malloc(trace__entry_str_size);
1982                 if (!ttrace->entry_str)
1983                         goto out_put;
1984         }
1985
1986         if (!trace->summary_only)
1987                 trace__printf_interrupted_entry(trace, sample);
1988
1989         ttrace->entry_time = sample->time;
1990         msg = ttrace->entry_str;
1991         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1992
1993         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1994                                            args, trace, thread);
1995
1996         if (sc->is_exit) {
1997                 if (!trace->duration_filter && !trace->summary_only) {
1998                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1999                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2000                 }
2001         } else {
2002                 ttrace->entry_pending = true;
2003                 /* See trace__vfs_getname & trace__sys_exit */
2004                 ttrace->filename.pending_open = false;
2005         }
2006
2007         if (trace->current != thread) {
2008                 thread__put(trace->current);
2009                 trace->current = thread__get(thread);
2010         }
2011         err = 0;
2012 out_put:
2013         thread__put(thread);
2014         return err;
2015 }
2016
2017 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2018                            union perf_event *event __maybe_unused,
2019                            struct perf_sample *sample)
2020 {
2021         long ret;
2022         u64 duration = 0;
2023         struct thread *thread;
2024         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2025         struct syscall *sc = trace__syscall_info(trace, evsel, id);
2026         struct thread_trace *ttrace;
2027
2028         if (sc == NULL)
2029                 return -1;
2030
2031         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2032         ttrace = thread__trace(thread, trace->output);
2033         if (ttrace == NULL)
2034                 goto out_put;
2035
2036         if (trace->summary)
2037                 thread__update_stats(ttrace, id, sample);
2038
2039         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2040
2041         if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2042                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2043                 ttrace->filename.pending_open = false;
2044                 ++trace->stats.vfs_getname;
2045         }
2046
2047         ttrace->exit_time = sample->time;
2048
2049         if (ttrace->entry_time) {
2050                 duration = sample->time - ttrace->entry_time;
2051                 if (trace__filter_duration(trace, duration))
2052                         goto out;
2053         } else if (trace->duration_filter)
2054                 goto out;
2055
2056         if (trace->summary_only)
2057                 goto out;
2058
2059         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2060
2061         if (ttrace->entry_pending) {
2062                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2063         } else {
2064                 fprintf(trace->output, " ... [");
2065                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2066                 fprintf(trace->output, "]: %s()", sc->name);
2067         }
2068
2069         if (sc->fmt == NULL) {
2070 signed_print:
2071                 fprintf(trace->output, ") = %ld", ret);
2072         } else if (ret < 0 && sc->fmt->errmsg) {
2073                 char bf[STRERR_BUFSIZE];
2074                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2075                            *e = audit_errno_to_name(-ret);
2076
2077                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2078         } else if (ret == 0 && sc->fmt->timeout)
2079                 fprintf(trace->output, ") = 0 Timeout");
2080         else if (sc->fmt->hexret)
2081                 fprintf(trace->output, ") = %#lx", ret);
2082         else
2083                 goto signed_print;
2084
2085         fputc('\n', trace->output);
2086 out:
2087         ttrace->entry_pending = false;
2088         err = 0;
2089 out_put:
2090         thread__put(thread);
2091         return err;
2092 }
2093
2094 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2095                               union perf_event *event __maybe_unused,
2096                               struct perf_sample *sample)
2097 {
2098         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2099         struct thread_trace *ttrace;
2100         size_t filename_len, entry_str_len, to_move;
2101         ssize_t remaining_space;
2102         char *pos;
2103         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2104
2105         if (!thread)
2106                 goto out;
2107
2108         ttrace = thread__priv(thread);
2109         if (!ttrace)
2110                 goto out;
2111
2112         filename_len = strlen(filename);
2113
2114         if (ttrace->filename.namelen < filename_len) {
2115                 char *f = realloc(ttrace->filename.name, filename_len + 1);
2116
2117                 if (f == NULL)
2118                                 goto out;
2119
2120                 ttrace->filename.namelen = filename_len;
2121                 ttrace->filename.name = f;
2122         }
2123
2124         strcpy(ttrace->filename.name, filename);
2125         ttrace->filename.pending_open = true;
2126
2127         if (!ttrace->filename.ptr)
2128                 goto out;
2129
2130         entry_str_len = strlen(ttrace->entry_str);
2131         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2132         if (remaining_space <= 0)
2133                 goto out;
2134
2135         if (filename_len > (size_t)remaining_space) {
2136                 filename += filename_len - remaining_space;
2137                 filename_len = remaining_space;
2138         }
2139
2140         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2141         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2142         memmove(pos + filename_len, pos, to_move);
2143         memcpy(pos, filename, filename_len);
2144
2145         ttrace->filename.ptr = 0;
2146         ttrace->filename.entry_str_pos = 0;
2147 out:
2148         return 0;
2149 }
2150
2151 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2152                                      union perf_event *event __maybe_unused,
2153                                      struct perf_sample *sample)
2154 {
2155         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2156         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2157         struct thread *thread = machine__findnew_thread(trace->host,
2158                                                         sample->pid,
2159                                                         sample->tid);
2160         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2161
2162         if (ttrace == NULL)
2163                 goto out_dump;
2164
2165         ttrace->runtime_ms += runtime_ms;
2166         trace->runtime_ms += runtime_ms;
2167         thread__put(thread);
2168         return 0;
2169
2170 out_dump:
2171         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2172                evsel->name,
2173                perf_evsel__strval(evsel, sample, "comm"),
2174                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2175                runtime,
2176                perf_evsel__intval(evsel, sample, "vruntime"));
2177         thread__put(thread);
2178         return 0;
2179 }
2180
2181 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2182                                 union perf_event *event __maybe_unused,
2183                                 struct perf_sample *sample)
2184 {
2185         trace__printf_interrupted_entry(trace, sample);
2186         trace__fprintf_tstamp(trace, sample->time, trace->output);
2187
2188         if (trace->trace_syscalls)
2189                 fprintf(trace->output, "(         ): ");
2190
2191         fprintf(trace->output, "%s:", evsel->name);
2192
2193         if (evsel->tp_format) {
2194                 event_format__fprintf(evsel->tp_format, sample->cpu,
2195                                       sample->raw_data, sample->raw_size,
2196                                       trace->output);
2197         }
2198
2199         fprintf(trace->output, ")\n");
2200         return 0;
2201 }
2202
2203 static void print_location(FILE *f, struct perf_sample *sample,
2204                            struct addr_location *al,
2205                            bool print_dso, bool print_sym)
2206 {
2207
2208         if ((verbose || print_dso) && al->map)
2209                 fprintf(f, "%s@", al->map->dso->long_name);
2210
2211         if ((verbose || print_sym) && al->sym)
2212                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2213                         al->addr - al->sym->start);
2214         else if (al->map)
2215                 fprintf(f, "0x%" PRIx64, al->addr);
2216         else
2217                 fprintf(f, "0x%" PRIx64, sample->addr);
2218 }
2219
2220 static int trace__pgfault(struct trace *trace,
2221                           struct perf_evsel *evsel,
2222                           union perf_event *event,
2223                           struct perf_sample *sample)
2224 {
2225         struct thread *thread;
2226         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2227         struct addr_location al;
2228         char map_type = 'd';
2229         struct thread_trace *ttrace;
2230         int err = -1;
2231
2232         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2233         ttrace = thread__trace(thread, trace->output);
2234         if (ttrace == NULL)
2235                 goto out_put;
2236
2237         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2238                 ttrace->pfmaj++;
2239         else
2240                 ttrace->pfmin++;
2241
2242         if (trace->summary_only)
2243                 goto out;
2244
2245         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2246                               sample->ip, &al);
2247
2248         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2249
2250         fprintf(trace->output, "%sfault [",
2251                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2252                 "maj" : "min");
2253
2254         print_location(trace->output, sample, &al, false, true);
2255
2256         fprintf(trace->output, "] => ");
2257
2258         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2259                                    sample->addr, &al);
2260
2261         if (!al.map) {
2262                 thread__find_addr_location(thread, cpumode,
2263                                            MAP__FUNCTION, sample->addr, &al);
2264
2265                 if (al.map)
2266                         map_type = 'x';
2267                 else
2268                         map_type = '?';
2269         }
2270
2271         print_location(trace->output, sample, &al, true, false);
2272
2273         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2274 out:
2275         err = 0;
2276 out_put:
2277         thread__put(thread);
2278         return err;
2279 }
2280
2281 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2282 {
2283         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2284             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2285                 return false;
2286
2287         if (trace->pid_list || trace->tid_list)
2288                 return true;
2289
2290         return false;
2291 }
2292
2293 static int trace__process_sample(struct perf_tool *tool,
2294                                  union perf_event *event,
2295                                  struct perf_sample *sample,
2296                                  struct perf_evsel *evsel,
2297                                  struct machine *machine __maybe_unused)
2298 {
2299         struct trace *trace = container_of(tool, struct trace, tool);
2300         int err = 0;
2301
2302         tracepoint_handler handler = evsel->handler;
2303
2304         if (skip_sample(trace, sample))
2305                 return 0;
2306
2307         if (!trace->full_time && trace->base_time == 0)
2308                 trace->base_time = sample->time;
2309
2310         if (handler) {
2311                 ++trace->nr_events;
2312                 handler(trace, evsel, event, sample);
2313         }
2314
2315         return err;
2316 }
2317
2318 static int parse_target_str(struct trace *trace)
2319 {
2320         if (trace->opts.target.pid) {
2321                 trace->pid_list = intlist__new(trace->opts.target.pid);
2322                 if (trace->pid_list == NULL) {
2323                         pr_err("Error parsing process id string\n");
2324                         return -EINVAL;
2325                 }
2326         }
2327
2328         if (trace->opts.target.tid) {
2329                 trace->tid_list = intlist__new(trace->opts.target.tid);
2330                 if (trace->tid_list == NULL) {
2331                         pr_err("Error parsing thread id string\n");
2332                         return -EINVAL;
2333                 }
2334         }
2335
2336         return 0;
2337 }
2338
2339 static int trace__record(struct trace *trace, int argc, const char **argv)
2340 {
2341         unsigned int rec_argc, i, j;
2342         const char **rec_argv;
2343         const char * const record_args[] = {
2344                 "record",
2345                 "-R",
2346                 "-m", "1024",
2347                 "-c", "1",
2348         };
2349
2350         const char * const sc_args[] = { "-e", };
2351         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2352         const char * const majpf_args[] = { "-e", "major-faults" };
2353         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2354         const char * const minpf_args[] = { "-e", "minor-faults" };
2355         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2356
2357         /* +1 is for the event string below */
2358         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2359                 majpf_args_nr + minpf_args_nr + argc;
2360         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2361
2362         if (rec_argv == NULL)
2363                 return -ENOMEM;
2364
2365         j = 0;
2366         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2367                 rec_argv[j++] = record_args[i];
2368
2369         if (trace->trace_syscalls) {
2370                 for (i = 0; i < sc_args_nr; i++)
2371                         rec_argv[j++] = sc_args[i];
2372
2373                 /* event string may be different for older kernels - e.g., RHEL6 */
2374                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2375                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2376                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2377                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2378                 else {
2379                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2380                         return -1;
2381                 }
2382         }
2383
2384         if (trace->trace_pgfaults & TRACE_PFMAJ)
2385                 for (i = 0; i < majpf_args_nr; i++)
2386                         rec_argv[j++] = majpf_args[i];
2387
2388         if (trace->trace_pgfaults & TRACE_PFMIN)
2389                 for (i = 0; i < minpf_args_nr; i++)
2390                         rec_argv[j++] = minpf_args[i];
2391
2392         for (i = 0; i < (unsigned int)argc; i++)
2393                 rec_argv[j++] = argv[i];
2394
2395         return cmd_record(j, rec_argv, NULL);
2396 }
2397
2398 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2399
2400 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2401 {
2402         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2403
2404         if (IS_ERR(evsel))
2405                 return false;
2406
2407         if (perf_evsel__field(evsel, "pathname") == NULL) {
2408                 perf_evsel__delete(evsel);
2409                 return false;
2410         }
2411
2412         evsel->handler = trace__vfs_getname;
2413         perf_evlist__add(evlist, evsel);
2414         return true;
2415 }
2416
2417 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2418                                     u64 config)
2419 {
2420         struct perf_evsel *evsel;
2421         struct perf_event_attr attr = {
2422                 .type = PERF_TYPE_SOFTWARE,
2423                 .mmap_data = 1,
2424         };
2425
2426         attr.config = config;
2427         attr.sample_period = 1;
2428
2429         event_attr_init(&attr);
2430
2431         evsel = perf_evsel__new(&attr);
2432         if (!evsel)
2433                 return -ENOMEM;
2434
2435         evsel->handler = trace__pgfault;
2436         perf_evlist__add(evlist, evsel);
2437
2438         return 0;
2439 }
2440
2441 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2442 {
2443         const u32 type = event->header.type;
2444         struct perf_evsel *evsel;
2445
2446         if (!trace->full_time && trace->base_time == 0)
2447                 trace->base_time = sample->time;
2448
2449         if (type != PERF_RECORD_SAMPLE) {
2450                 trace__process_event(trace, trace->host, event, sample);
2451                 return;
2452         }
2453
2454         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2455         if (evsel == NULL) {
2456                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2457                 return;
2458         }
2459
2460         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2461             sample->raw_data == NULL) {
2462                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2463                        perf_evsel__name(evsel), sample->tid,
2464                        sample->cpu, sample->raw_size);
2465         } else {
2466                 tracepoint_handler handler = evsel->handler;
2467                 handler(trace, evsel, event, sample);
2468         }
2469 }
2470
2471 static int trace__add_syscall_newtp(struct trace *trace)
2472 {
2473         int ret = -1;
2474         struct perf_evlist *evlist = trace->evlist;
2475         struct perf_evsel *sys_enter, *sys_exit;
2476
2477         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2478         if (sys_enter == NULL)
2479                 goto out;
2480
2481         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2482                 goto out_delete_sys_enter;
2483
2484         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2485         if (sys_exit == NULL)
2486                 goto out_delete_sys_enter;
2487
2488         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2489                 goto out_delete_sys_exit;
2490
2491         perf_evlist__add(evlist, sys_enter);
2492         perf_evlist__add(evlist, sys_exit);
2493
2494         trace->syscalls.events.sys_enter = sys_enter;
2495         trace->syscalls.events.sys_exit  = sys_exit;
2496
2497         ret = 0;
2498 out:
2499         return ret;
2500
2501 out_delete_sys_exit:
2502         perf_evsel__delete_priv(sys_exit);
2503 out_delete_sys_enter:
2504         perf_evsel__delete_priv(sys_enter);
2505         goto out;
2506 }
2507
2508 static int trace__set_ev_qualifier_filter(struct trace *trace)
2509 {
2510         int err = -1;
2511         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2512                                                 trace->ev_qualifier_ids.nr,
2513                                                 trace->ev_qualifier_ids.entries);
2514
2515         if (filter == NULL)
2516                 goto out_enomem;
2517
2518         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2519                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2520
2521         free(filter);
2522 out:
2523         return err;
2524 out_enomem:
2525         errno = ENOMEM;
2526         goto out;
2527 }
2528
2529 static int trace__run(struct trace *trace, int argc, const char **argv)
2530 {
2531         struct perf_evlist *evlist = trace->evlist;
2532         struct perf_evsel *evsel;
2533         int err = -1, i;
2534         unsigned long before;
2535         const bool forks = argc > 0;
2536         bool draining = false;
2537
2538         trace->live = true;
2539
2540         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2541                 goto out_error_raw_syscalls;
2542
2543         if (trace->trace_syscalls)
2544                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2545
2546         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2547             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2548                 goto out_error_mem;
2549         }
2550
2551         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2552             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2553                 goto out_error_mem;
2554
2555         if (trace->sched &&
2556             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2557                                    trace__sched_stat_runtime))
2558                 goto out_error_sched_stat_runtime;
2559
2560         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2561         if (err < 0) {
2562                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2563                 goto out_delete_evlist;
2564         }
2565
2566         err = trace__symbols_init(trace, evlist);
2567         if (err < 0) {
2568                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2569                 goto out_delete_evlist;
2570         }
2571
2572         perf_evlist__config(evlist, &trace->opts);
2573
2574         signal(SIGCHLD, sig_handler);
2575         signal(SIGINT, sig_handler);
2576
2577         if (forks) {
2578                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2579                                                     argv, false, NULL);
2580                 if (err < 0) {
2581                         fprintf(trace->output, "Couldn't run the workload!\n");
2582                         goto out_delete_evlist;
2583                 }
2584         }
2585
2586         err = perf_evlist__open(evlist);
2587         if (err < 0)
2588                 goto out_error_open;
2589
2590         /*
2591          * Better not use !target__has_task() here because we need to cover the
2592          * case where no threads were specified in the command line, but a
2593          * workload was, and in that case we will fill in the thread_map when
2594          * we fork the workload in perf_evlist__prepare_workload.
2595          */
2596         if (trace->filter_pids.nr > 0)
2597                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2598         else if (thread_map__pid(evlist->threads, 0) == -1)
2599                 err = perf_evlist__set_filter_pid(evlist, getpid());
2600
2601         if (err < 0)
2602                 goto out_error_mem;
2603
2604         if (trace->ev_qualifier_ids.nr > 0) {
2605                 err = trace__set_ev_qualifier_filter(trace);
2606                 if (err < 0)
2607                         goto out_errno;
2608
2609                 pr_debug("event qualifier tracepoint filter: %s\n",
2610                          trace->syscalls.events.sys_exit->filter);
2611         }
2612
2613         err = perf_evlist__apply_filters(evlist, &evsel);
2614         if (err < 0)
2615                 goto out_error_apply_filters;
2616
2617         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2618         if (err < 0)
2619                 goto out_error_mmap;
2620
2621         if (!target__none(&trace->opts.target))
2622                 perf_evlist__enable(evlist);
2623
2624         if (forks)
2625                 perf_evlist__start_workload(evlist);
2626
2627         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2628                                   evlist->threads->nr > 1 ||
2629                                   perf_evlist__first(evlist)->attr.inherit;
2630 again:
2631         before = trace->nr_events;
2632
2633         for (i = 0; i < evlist->nr_mmaps; i++) {
2634                 union perf_event *event;
2635
2636                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2637                         struct perf_sample sample;
2638
2639                         ++trace->nr_events;
2640
2641                         err = perf_evlist__parse_sample(evlist, event, &sample);
2642                         if (err) {
2643                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2644                                 goto next_event;
2645                         }
2646
2647                         trace__handle_event(trace, event, &sample);
2648 next_event:
2649                         perf_evlist__mmap_consume(evlist, i);
2650
2651                         if (interrupted)
2652                                 goto out_disable;
2653
2654                         if (done && !draining) {
2655                                 perf_evlist__disable(evlist);
2656                                 draining = true;
2657                         }
2658                 }
2659         }
2660
2661         if (trace->nr_events == before) {
2662                 int timeout = done ? 100 : -1;
2663
2664                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2665                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2666                                 draining = true;
2667
2668                         goto again;
2669                 }
2670         } else {
2671                 goto again;
2672         }
2673
2674 out_disable:
2675         thread__zput(trace->current);
2676
2677         perf_evlist__disable(evlist);
2678
2679         if (!err) {
2680                 if (trace->summary)
2681                         trace__fprintf_thread_summary(trace, trace->output);
2682
2683                 if (trace->show_tool_stats) {
2684                         fprintf(trace->output, "Stats:\n "
2685                                                " vfs_getname : %" PRIu64 "\n"
2686                                                " proc_getname: %" PRIu64 "\n",
2687                                 trace->stats.vfs_getname,
2688                                 trace->stats.proc_getname);
2689                 }
2690         }
2691
2692 out_delete_evlist:
2693         perf_evlist__delete(evlist);
2694         trace->evlist = NULL;
2695         trace->live = false;
2696         return err;
2697 {
2698         char errbuf[BUFSIZ];
2699
2700 out_error_sched_stat_runtime:
2701         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2702         goto out_error;
2703
2704 out_error_raw_syscalls:
2705         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2706         goto out_error;
2707
2708 out_error_mmap:
2709         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2710         goto out_error;
2711
2712 out_error_open:
2713         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2714
2715 out_error:
2716         fprintf(trace->output, "%s\n", errbuf);
2717         goto out_delete_evlist;
2718
2719 out_error_apply_filters:
2720         fprintf(trace->output,
2721                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2722                 evsel->filter, perf_evsel__name(evsel), errno,
2723                 strerror_r(errno, errbuf, sizeof(errbuf)));
2724         goto out_delete_evlist;
2725 }
2726 out_error_mem:
2727         fprintf(trace->output, "Not enough memory to run!\n");
2728         goto out_delete_evlist;
2729
2730 out_errno:
2731         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2732         goto out_delete_evlist;
2733 }
2734
2735 static int trace__replay(struct trace *trace)
2736 {
2737         const struct perf_evsel_str_handler handlers[] = {
2738                 { "probe:vfs_getname",       trace__vfs_getname, },
2739         };
2740         struct perf_data_file file = {
2741                 .path  = input_name,
2742                 .mode  = PERF_DATA_MODE_READ,
2743                 .force = trace->force,
2744         };
2745         struct perf_session *session;
2746         struct perf_evsel *evsel;
2747         int err = -1;
2748
2749         trace->tool.sample        = trace__process_sample;
2750         trace->tool.mmap          = perf_event__process_mmap;
2751         trace->tool.mmap2         = perf_event__process_mmap2;
2752         trace->tool.comm          = perf_event__process_comm;
2753         trace->tool.exit          = perf_event__process_exit;
2754         trace->tool.fork          = perf_event__process_fork;
2755         trace->tool.attr          = perf_event__process_attr;
2756         trace->tool.tracing_data = perf_event__process_tracing_data;
2757         trace->tool.build_id      = perf_event__process_build_id;
2758
2759         trace->tool.ordered_events = true;
2760         trace->tool.ordering_requires_timestamps = true;
2761
2762         /* add tid to output */
2763         trace->multiple_threads = true;
2764
2765         session = perf_session__new(&file, false, &trace->tool);
2766         if (session == NULL)
2767                 return -1;
2768
2769         if (symbol__init(&session->header.env) < 0)
2770                 goto out;
2771
2772         trace->host = &session->machines.host;
2773
2774         err = perf_session__set_tracepoints_handlers(session, handlers);
2775         if (err)
2776                 goto out;
2777
2778         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2779                                                      "raw_syscalls:sys_enter");
2780         /* older kernels have syscalls tp versus raw_syscalls */
2781         if (evsel == NULL)
2782                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2783                                                              "syscalls:sys_enter");
2784
2785         if (evsel &&
2786             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2787             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2788                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2789                 goto out;
2790         }
2791
2792         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2793                                                      "raw_syscalls:sys_exit");
2794         if (evsel == NULL)
2795                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2796                                                              "syscalls:sys_exit");
2797         if (evsel &&
2798             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2799             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2800                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2801                 goto out;
2802         }
2803
2804         evlist__for_each(session->evlist, evsel) {
2805                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2806                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2807                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2808                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2809                         evsel->handler = trace__pgfault;
2810         }
2811
2812         err = parse_target_str(trace);
2813         if (err != 0)
2814                 goto out;
2815
2816         setup_pager();
2817
2818         err = perf_session__process_events(session);
2819         if (err)
2820                 pr_err("Failed to process events, error %d", err);
2821
2822         else if (trace->summary)
2823                 trace__fprintf_thread_summary(trace, trace->output);
2824
2825 out:
2826         perf_session__delete(session);
2827
2828         return err;
2829 }
2830
2831 static size_t trace__fprintf_threads_header(FILE *fp)
2832 {
2833         size_t printed;
2834
2835         printed  = fprintf(fp, "\n Summary of events:\n\n");
2836
2837         return printed;
2838 }
2839
2840 static size_t thread__dump_stats(struct thread_trace *ttrace,
2841                                  struct trace *trace, FILE *fp)
2842 {
2843         struct stats *stats;
2844         size_t printed = 0;
2845         struct syscall *sc;
2846         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2847
2848         if (inode == NULL)
2849                 return 0;
2850
2851         printed += fprintf(fp, "\n");
2852
2853         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2854         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2855         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2856
2857         /* each int_node is a syscall */
2858         while (inode) {
2859                 stats = inode->priv;
2860                 if (stats) {
2861                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2862                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2863                         double avg = avg_stats(stats);
2864                         double pct;
2865                         u64 n = (u64) stats->n;
2866
2867                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2868                         avg /= NSEC_PER_MSEC;
2869
2870                         sc = &trace->syscalls.table[inode->i];
2871                         printed += fprintf(fp, "   %-15s", sc->name);
2872                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2873                                            n, avg * n, min, avg);
2874                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2875                 }
2876
2877                 inode = intlist__next(inode);
2878         }
2879
2880         printed += fprintf(fp, "\n\n");
2881
2882         return printed;
2883 }
2884
2885 /* struct used to pass data to per-thread function */
2886 struct summary_data {
2887         FILE *fp;
2888         struct trace *trace;
2889         size_t printed;
2890 };
2891
2892 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2893 {
2894         struct summary_data *data = priv;
2895         FILE *fp = data->fp;
2896         size_t printed = data->printed;
2897         struct trace *trace = data->trace;
2898         struct thread_trace *ttrace = thread__priv(thread);
2899         double ratio;
2900
2901         if (ttrace == NULL)
2902                 return 0;
2903
2904         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2905
2906         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2907         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2908         printed += fprintf(fp, "%.1f%%", ratio);
2909         if (ttrace->pfmaj)
2910                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2911         if (ttrace->pfmin)
2912                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2913         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2914         printed += thread__dump_stats(ttrace, trace, fp);
2915
2916         data->printed += printed;
2917
2918         return 0;
2919 }
2920
2921 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2922 {
2923         struct summary_data data = {
2924                 .fp = fp,
2925                 .trace = trace
2926         };
2927         data.printed = trace__fprintf_threads_header(fp);
2928
2929         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2930
2931         return data.printed;
2932 }
2933
2934 static int trace__set_duration(const struct option *opt, const char *str,
2935                                int unset __maybe_unused)
2936 {
2937         struct trace *trace = opt->value;
2938
2939         trace->duration_filter = atof(str);
2940         return 0;
2941 }
2942
2943 static int trace__set_filter_pids(const struct option *opt, const char *str,
2944                                   int unset __maybe_unused)
2945 {
2946         int ret = -1;
2947         size_t i;
2948         struct trace *trace = opt->value;
2949         /*
2950          * FIXME: introduce a intarray class, plain parse csv and create a
2951          * { int nr, int entries[] } struct...
2952          */
2953         struct intlist *list = intlist__new(str);
2954
2955         if (list == NULL)
2956                 return -1;
2957
2958         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2959         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2960
2961         if (trace->filter_pids.entries == NULL)
2962                 goto out;
2963
2964         trace->filter_pids.entries[0] = getpid();
2965
2966         for (i = 1; i < trace->filter_pids.nr; ++i)
2967                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2968
2969         intlist__delete(list);
2970         ret = 0;
2971 out:
2972         return ret;
2973 }
2974
2975 static int trace__open_output(struct trace *trace, const char *filename)
2976 {
2977         struct stat st;
2978
2979         if (!stat(filename, &st) && st.st_size) {
2980                 char oldname[PATH_MAX];
2981
2982                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2983                 unlink(oldname);
2984                 rename(filename, oldname);
2985         }
2986
2987         trace->output = fopen(filename, "w");
2988
2989         return trace->output == NULL ? -errno : 0;
2990 }
2991
2992 static int parse_pagefaults(const struct option *opt, const char *str,
2993                             int unset __maybe_unused)
2994 {
2995         int *trace_pgfaults = opt->value;
2996
2997         if (strcmp(str, "all") == 0)
2998                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2999         else if (strcmp(str, "maj") == 0)
3000                 *trace_pgfaults |= TRACE_PFMAJ;
3001         else if (strcmp(str, "min") == 0)
3002                 *trace_pgfaults |= TRACE_PFMIN;
3003         else
3004                 return -1;
3005
3006         return 0;
3007 }
3008
3009 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3010 {
3011         struct perf_evsel *evsel;
3012
3013         evlist__for_each(evlist, evsel)
3014                 evsel->handler = handler;
3015 }
3016
3017 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3018 {
3019         const char *trace_usage[] = {
3020                 "perf trace [<options>] [<command>]",
3021                 "perf trace [<options>] -- <command> [<options>]",
3022                 "perf trace record [<options>] [<command>]",
3023                 "perf trace record [<options>] -- <command> [<options>]",
3024                 NULL
3025         };
3026         struct trace trace = {
3027                 .audit = {
3028                         .machine = audit_detect_machine(),
3029                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
3030                 },
3031                 .syscalls = {
3032                         . max = -1,
3033                 },
3034                 .opts = {
3035                         .target = {
3036                                 .uid       = UINT_MAX,
3037                                 .uses_mmap = true,
3038                         },
3039                         .user_freq     = UINT_MAX,
3040                         .user_interval = ULLONG_MAX,
3041                         .no_buffering  = true,
3042                         .mmap_pages    = UINT_MAX,
3043                         .proc_map_timeout  = 500,
3044                 },
3045                 .output = stderr,
3046                 .show_comm = true,
3047                 .trace_syscalls = true,
3048         };
3049         const char *output_name = NULL;
3050         const char *ev_qualifier_str = NULL;
3051         const struct option trace_options[] = {
3052         OPT_CALLBACK(0, "event", &trace.evlist, "event",
3053                      "event selector. use 'perf list' to list available events",
3054                      parse_events_option),
3055         OPT_BOOLEAN(0, "comm", &trace.show_comm,
3056                     "show the thread COMM next to its id"),
3057         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3058         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3059         OPT_STRING('o', "output", &output_name, "file", "output file name"),
3060         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3061         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3062                     "trace events on existing process id"),
3063         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3064                     "trace events on existing thread id"),
3065         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3066                      "pids to filter (by the kernel)", trace__set_filter_pids),
3067         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3068                     "system-wide collection from all CPUs"),
3069         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3070                     "list of cpus to monitor"),
3071         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3072                     "child tasks do not inherit counters"),
3073         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3074                      "number of mmap data pages",
3075                      perf_evlist__parse_mmap_pages),
3076         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3077                    "user to profile"),
3078         OPT_CALLBACK(0, "duration", &trace, "float",
3079                      "show only events with duration > N.M ms",
3080                      trace__set_duration),
3081         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3082         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3083         OPT_BOOLEAN('T', "time", &trace.full_time,
3084                     "Show full timestamp, not time relative to first start"),
3085         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3086                     "Show only syscall summary with statistics"),
3087         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3088                     "Show all syscalls and summary with statistics"),
3089         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3090                      "Trace pagefaults", parse_pagefaults, "maj"),
3091         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3092         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3093         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3094                         "per thread proc mmap processing timeout in ms"),
3095         OPT_END()
3096         };
3097         const char * const trace_subcommands[] = { "record", NULL };
3098         int err;
3099         char bf[BUFSIZ];
3100
3101         signal(SIGSEGV, sighandler_dump_stack);
3102         signal(SIGFPE, sighandler_dump_stack);
3103
3104         trace.evlist = perf_evlist__new();
3105
3106         if (trace.evlist == NULL) {
3107                 pr_err("Not enough memory to run!\n");
3108                 err = -ENOMEM;
3109                 goto out;
3110         }
3111
3112         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3113                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3114
3115         if (trace.trace_pgfaults) {
3116                 trace.opts.sample_address = true;
3117                 trace.opts.sample_time = true;
3118         }
3119
3120         if (trace.evlist->nr_entries > 0)
3121                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3122
3123         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3124                 return trace__record(&trace, argc-1, &argv[1]);
3125
3126         /* summary_only implies summary option, but don't overwrite summary if set */
3127         if (trace.summary_only)
3128                 trace.summary = trace.summary_only;
3129
3130         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3131             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3132                 pr_err("Please specify something to trace.\n");
3133                 return -1;
3134         }
3135
3136         if (output_name != NULL) {
3137                 err = trace__open_output(&trace, output_name);
3138                 if (err < 0) {
3139                         perror("failed to create output file");
3140                         goto out;
3141                 }
3142         }
3143
3144         if (ev_qualifier_str != NULL) {
3145                 const char *s = ev_qualifier_str;
3146                 struct strlist_config slist_config = {
3147                         .dirname = system_path(STRACE_GROUPS_DIR),
3148                 };
3149
3150                 trace.not_ev_qualifier = *s == '!';
3151                 if (trace.not_ev_qualifier)
3152                         ++s;
3153                 trace.ev_qualifier = strlist__new(s, &slist_config);
3154                 if (trace.ev_qualifier == NULL) {
3155                         fputs("Not enough memory to parse event qualifier",
3156                               trace.output);
3157                         err = -ENOMEM;
3158                         goto out_close;
3159                 }
3160
3161                 err = trace__validate_ev_qualifier(&trace);
3162                 if (err)
3163                         goto out_close;
3164         }
3165
3166         err = target__validate(&trace.opts.target);
3167         if (err) {
3168                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3169                 fprintf(trace.output, "%s", bf);
3170                 goto out_close;
3171         }
3172
3173         err = target__parse_uid(&trace.opts.target);
3174         if (err) {
3175                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3176                 fprintf(trace.output, "%s", bf);
3177                 goto out_close;
3178         }
3179
3180         if (!argc && target__none(&trace.opts.target))
3181                 trace.opts.target.system_wide = true;
3182
3183         if (input_name)
3184                 err = trace__replay(&trace);
3185         else
3186                 err = trace__run(&trace, argc, argv);
3187
3188 out_close:
3189         if (output_name != NULL)
3190                 fclose(trace.output);
3191 out:
3192         return err;
3193 }