perf trace: Add missing clockid entries
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *whences[] = { "SET", "CUR", "END",
573 #ifdef SEEK_DATA
574 "DATA",
575 #endif
576 #ifdef SEEK_HOLE
577 "HOLE",
578 #endif
579 };
580 static DEFINE_STRARRAY(whences);
581
582 static const char *fcntl_cmds[] = {
583         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
586         "F_GETOWNER_UIDS",
587 };
588 static DEFINE_STRARRAY(fcntl_cmds);
589
590 static const char *rlimit_resources[] = {
591         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
593         "RTTIME",
594 };
595 static DEFINE_STRARRAY(rlimit_resources);
596
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
599
600 static const char *clockid[] = {
601         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
603         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
604 };
605 static DEFINE_STRARRAY(clockid);
606
607 static const char *socket_families[] = {
608         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
609         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
610         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
611         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
612         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
613         "ALG", "NFC", "VSOCK",
614 };
615 static DEFINE_STRARRAY(socket_families);
616
617 #ifndef SOCK_TYPE_MASK
618 #define SOCK_TYPE_MASK 0xf
619 #endif
620
621 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
622                                                       struct syscall_arg *arg)
623 {
624         size_t printed;
625         int type = arg->val,
626             flags = type & ~SOCK_TYPE_MASK;
627
628         type &= SOCK_TYPE_MASK;
629         /*
630          * Can't use a strarray, MIPS may override for ABI reasons.
631          */
632         switch (type) {
633 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
634         P_SK_TYPE(STREAM);
635         P_SK_TYPE(DGRAM);
636         P_SK_TYPE(RAW);
637         P_SK_TYPE(RDM);
638         P_SK_TYPE(SEQPACKET);
639         P_SK_TYPE(DCCP);
640         P_SK_TYPE(PACKET);
641 #undef P_SK_TYPE
642         default:
643                 printed = scnprintf(bf, size, "%#x", type);
644         }
645
646 #define P_SK_FLAG(n) \
647         if (flags & SOCK_##n) { \
648                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
649                 flags &= ~SOCK_##n; \
650         }
651
652         P_SK_FLAG(CLOEXEC);
653         P_SK_FLAG(NONBLOCK);
654 #undef P_SK_FLAG
655
656         if (flags)
657                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
658
659         return printed;
660 }
661
662 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
663
664 #ifndef MSG_PROBE
665 #define MSG_PROBE            0x10
666 #endif
667 #ifndef MSG_WAITFORONE
668 #define MSG_WAITFORONE  0x10000
669 #endif
670 #ifndef MSG_SENDPAGE_NOTLAST
671 #define MSG_SENDPAGE_NOTLAST 0x20000
672 #endif
673 #ifndef MSG_FASTOPEN
674 #define MSG_FASTOPEN         0x20000000
675 #endif
676
677 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
678                                                struct syscall_arg *arg)
679 {
680         int printed = 0, flags = arg->val;
681
682         if (flags == 0)
683                 return scnprintf(bf, size, "NONE");
684 #define P_MSG_FLAG(n) \
685         if (flags & MSG_##n) { \
686                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
687                 flags &= ~MSG_##n; \
688         }
689
690         P_MSG_FLAG(OOB);
691         P_MSG_FLAG(PEEK);
692         P_MSG_FLAG(DONTROUTE);
693         P_MSG_FLAG(TRYHARD);
694         P_MSG_FLAG(CTRUNC);
695         P_MSG_FLAG(PROBE);
696         P_MSG_FLAG(TRUNC);
697         P_MSG_FLAG(DONTWAIT);
698         P_MSG_FLAG(EOR);
699         P_MSG_FLAG(WAITALL);
700         P_MSG_FLAG(FIN);
701         P_MSG_FLAG(SYN);
702         P_MSG_FLAG(CONFIRM);
703         P_MSG_FLAG(RST);
704         P_MSG_FLAG(ERRQUEUE);
705         P_MSG_FLAG(NOSIGNAL);
706         P_MSG_FLAG(MORE);
707         P_MSG_FLAG(WAITFORONE);
708         P_MSG_FLAG(SENDPAGE_NOTLAST);
709         P_MSG_FLAG(FASTOPEN);
710         P_MSG_FLAG(CMSG_CLOEXEC);
711 #undef P_MSG_FLAG
712
713         if (flags)
714                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
715
716         return printed;
717 }
718
719 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
720
721 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
722                                                  struct syscall_arg *arg)
723 {
724         size_t printed = 0;
725         int mode = arg->val;
726
727         if (mode == F_OK) /* 0 */
728                 return scnprintf(bf, size, "F");
729 #define P_MODE(n) \
730         if (mode & n##_OK) { \
731                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
732                 mode &= ~n##_OK; \
733         }
734
735         P_MODE(R);
736         P_MODE(W);
737         P_MODE(X);
738 #undef P_MODE
739
740         if (mode)
741                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
742
743         return printed;
744 }
745
746 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
747
748 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
749                                               struct syscall_arg *arg);
750
751 #define SCA_FILENAME syscall_arg__scnprintf_filename
752
753 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
754                                                struct syscall_arg *arg)
755 {
756         int printed = 0, flags = arg->val;
757
758         if (!(flags & O_CREAT))
759                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
760
761         if (flags == 0)
762                 return scnprintf(bf, size, "RDONLY");
763 #define P_FLAG(n) \
764         if (flags & O_##n) { \
765                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
766                 flags &= ~O_##n; \
767         }
768
769         P_FLAG(APPEND);
770         P_FLAG(ASYNC);
771         P_FLAG(CLOEXEC);
772         P_FLAG(CREAT);
773         P_FLAG(DIRECT);
774         P_FLAG(DIRECTORY);
775         P_FLAG(EXCL);
776         P_FLAG(LARGEFILE);
777         P_FLAG(NOATIME);
778         P_FLAG(NOCTTY);
779 #ifdef O_NONBLOCK
780         P_FLAG(NONBLOCK);
781 #elif O_NDELAY
782         P_FLAG(NDELAY);
783 #endif
784 #ifdef O_PATH
785         P_FLAG(PATH);
786 #endif
787         P_FLAG(RDWR);
788 #ifdef O_DSYNC
789         if ((flags & O_SYNC) == O_SYNC)
790                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
791         else {
792                 P_FLAG(DSYNC);
793         }
794 #else
795         P_FLAG(SYNC);
796 #endif
797         P_FLAG(TRUNC);
798         P_FLAG(WRONLY);
799 #undef P_FLAG
800
801         if (flags)
802                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
803
804         return printed;
805 }
806
807 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
808
809 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
810                                                 struct syscall_arg *arg)
811 {
812         int printed = 0, flags = arg->val;
813
814         if (flags == 0)
815                 return 0;
816
817 #define P_FLAG(n) \
818         if (flags & PERF_FLAG_##n) { \
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820                 flags &= ~PERF_FLAG_##n; \
821         }
822
823         P_FLAG(FD_NO_GROUP);
824         P_FLAG(FD_OUTPUT);
825         P_FLAG(PID_CGROUP);
826         P_FLAG(FD_CLOEXEC);
827 #undef P_FLAG
828
829         if (flags)
830                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
831
832         return printed;
833 }
834
835 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
836
837 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
838                                                    struct syscall_arg *arg)
839 {
840         int printed = 0, flags = arg->val;
841
842         if (flags == 0)
843                 return scnprintf(bf, size, "NONE");
844 #define P_FLAG(n) \
845         if (flags & EFD_##n) { \
846                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
847                 flags &= ~EFD_##n; \
848         }
849
850         P_FLAG(SEMAPHORE);
851         P_FLAG(CLOEXEC);
852         P_FLAG(NONBLOCK);
853 #undef P_FLAG
854
855         if (flags)
856                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
857
858         return printed;
859 }
860
861 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
862
863 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
864                                                 struct syscall_arg *arg)
865 {
866         int printed = 0, flags = arg->val;
867
868 #define P_FLAG(n) \
869         if (flags & O_##n) { \
870                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
871                 flags &= ~O_##n; \
872         }
873
874         P_FLAG(CLOEXEC);
875         P_FLAG(NONBLOCK);
876 #undef P_FLAG
877
878         if (flags)
879                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
880
881         return printed;
882 }
883
884 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
885
886 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
887 {
888         int sig = arg->val;
889
890         switch (sig) {
891 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
892         P_SIGNUM(HUP);
893         P_SIGNUM(INT);
894         P_SIGNUM(QUIT);
895         P_SIGNUM(ILL);
896         P_SIGNUM(TRAP);
897         P_SIGNUM(ABRT);
898         P_SIGNUM(BUS);
899         P_SIGNUM(FPE);
900         P_SIGNUM(KILL);
901         P_SIGNUM(USR1);
902         P_SIGNUM(SEGV);
903         P_SIGNUM(USR2);
904         P_SIGNUM(PIPE);
905         P_SIGNUM(ALRM);
906         P_SIGNUM(TERM);
907         P_SIGNUM(CHLD);
908         P_SIGNUM(CONT);
909         P_SIGNUM(STOP);
910         P_SIGNUM(TSTP);
911         P_SIGNUM(TTIN);
912         P_SIGNUM(TTOU);
913         P_SIGNUM(URG);
914         P_SIGNUM(XCPU);
915         P_SIGNUM(XFSZ);
916         P_SIGNUM(VTALRM);
917         P_SIGNUM(PROF);
918         P_SIGNUM(WINCH);
919         P_SIGNUM(IO);
920         P_SIGNUM(PWR);
921         P_SIGNUM(SYS);
922 #ifdef SIGEMT
923         P_SIGNUM(EMT);
924 #endif
925 #ifdef SIGSTKFLT
926         P_SIGNUM(STKFLT);
927 #endif
928 #ifdef SIGSWI
929         P_SIGNUM(SWI);
930 #endif
931         default: break;
932         }
933
934         return scnprintf(bf, size, "%#x", sig);
935 }
936
937 #define SCA_SIGNUM syscall_arg__scnprintf_signum
938
939 #if defined(__i386__) || defined(__x86_64__)
940 /*
941  * FIXME: Make this available to all arches.
942  */
943 #define TCGETS          0x5401
944
945 static const char *tioctls[] = {
946         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
947         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
948         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
949         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
950         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
951         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
952         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
953         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
954         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
955         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
956         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
957         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
958         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
959         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
960         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
961 };
962
963 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
964 #endif /* defined(__i386__) || defined(__x86_64__) */
965
966 #define STRARRAY(arg, name, array) \
967           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
968           .arg_parm      = { [arg] = &strarray__##array, }
969
970 static struct syscall_fmt {
971         const char *name;
972         const char *alias;
973         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
974         void       *arg_parm[6];
975         bool       errmsg;
976         bool       timeout;
977         bool       hexret;
978 } syscall_fmts[] = {
979         { .name     = "access",     .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
981                              [1] = SCA_ACCMODE,  /* mode */ }, },
982         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
983         { .name     = "brk",        .hexret = true,
984           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
985         { .name     = "chdir",      .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
987         { .name     = "chmod",      .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
989         { .name     = "chroot",     .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
991         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
992         { .name     = "close",      .errmsg = true,
993           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
994         { .name     = "connect",    .errmsg = true, },
995         { .name     = "creat",      .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
997         { .name     = "dup",        .errmsg = true,
998           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999         { .name     = "dup2",       .errmsg = true,
1000           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001         { .name     = "dup3",       .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1004         { .name     = "eventfd2",   .errmsg = true,
1005           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1006         { .name     = "faccessat",  .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1008                              [1] = SCA_FILENAME, /* filename */ }, },
1009         { .name     = "fadvise64",  .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1011         { .name     = "fallocate",  .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013         { .name     = "fchdir",     .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1015         { .name     = "fchmod",     .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1017         { .name     = "fchmodat",   .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1019                              [1] = SCA_FILENAME, /* filename */ }, },
1020         { .name     = "fchown",     .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022         { .name     = "fchownat",   .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1024                              [1] = SCA_FILENAME, /* filename */ }, },
1025         { .name     = "fcntl",      .errmsg = true,
1026           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1027                              [1] = SCA_STRARRAY, /* cmd */ },
1028           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1029         { .name     = "fdatasync",  .errmsg = true,
1030           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031         { .name     = "flock",      .errmsg = true,
1032           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1033                              [1] = SCA_FLOCK, /* cmd */ }, },
1034         { .name     = "fsetxattr",  .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1037           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1038         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1039           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1040                              [1] = SCA_FILENAME, /* filename */ }, },
1041         { .name     = "fstatfs",    .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043         { .name     = "fsync",    .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045         { .name     = "ftruncate", .errmsg = true,
1046           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1047         { .name     = "futex",      .errmsg = true,
1048           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1049         { .name     = "futimesat", .errmsg = true,
1050           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1051                              [1] = SCA_FILENAME, /* filename */ }, },
1052         { .name     = "getdents",   .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054         { .name     = "getdents64", .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1057         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1058         { .name     = "getxattr",    .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1060         { .name     = "inotify_add_watch",          .errmsg = true,
1061           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1062         { .name     = "ioctl",      .errmsg = true,
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1064 #if defined(__i386__) || defined(__x86_64__)
1065 /*
1066  * FIXME: Make this available to all arches.
1067  */
1068                              [1] = SCA_STRHEXARRAY, /* cmd */
1069                              [2] = SCA_HEX, /* arg */ },
1070           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1071 #else
1072                              [2] = SCA_HEX, /* arg */ }, },
1073 #endif
1074         { .name     = "kill",       .errmsg = true,
1075           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1076         { .name     = "lchown",    .errmsg = true,
1077           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1078         { .name     = "lgetxattr",  .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1080         { .name     = "linkat",     .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1082         { .name     = "listxattr",  .errmsg = true,
1083           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1084         { .name     = "llistxattr", .errmsg = true,
1085           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1086         { .name     = "lremovexattr",  .errmsg = true,
1087           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088         { .name     = "lseek",      .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1090                              [2] = SCA_STRARRAY, /* whence */ },
1091           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1092         { .name     = "lsetxattr",  .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1095           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1096         { .name     = "lsxattr",    .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "madvise",    .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1100                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1101         { .name     = "mkdir",    .errmsg = true,
1102           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1103         { .name     = "mkdirat",    .errmsg = true,
1104           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1105                              [1] = SCA_FILENAME, /* pathname */ }, },
1106         { .name     = "mknod",      .errmsg = true,
1107           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1108         { .name     = "mknodat",    .errmsg = true,
1109           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1110                              [1] = SCA_FILENAME, /* filename */ }, },
1111         { .name     = "mlock",      .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1113         { .name     = "mlockall",   .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1115         { .name     = "mmap",       .hexret = true,
1116           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1117                              [2] = SCA_MMAP_PROT, /* prot */
1118                              [3] = SCA_MMAP_FLAGS, /* flags */
1119                              [4] = SCA_FD,        /* fd */ }, },
1120         { .name     = "mprotect",   .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1122                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1123         { .name     = "mq_unlink", .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1125         { .name     = "mremap",     .hexret = true,
1126           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1127                              [3] = SCA_MREMAP_FLAGS, /* flags */
1128                              [4] = SCA_HEX, /* new_addr */ }, },
1129         { .name     = "munlock",    .errmsg = true,
1130           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1131         { .name     = "munmap",     .errmsg = true,
1132           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1133         { .name     = "name_to_handle_at", .errmsg = true,
1134           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1135         { .name     = "newfstatat", .errmsg = true,
1136           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1137                              [1] = SCA_FILENAME, /* filename */ }, },
1138         { .name     = "open",       .errmsg = true,
1139           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1140                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1141         { .name     = "open_by_handle_at", .errmsg = true,
1142           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1143                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1144         { .name     = "openat",     .errmsg = true,
1145           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1146                              [1] = SCA_FILENAME, /* filename */
1147                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1148         { .name     = "perf_event_open", .errmsg = true,
1149           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1150                              [2] = SCA_INT, /* cpu */
1151                              [3] = SCA_FD,  /* group_fd */
1152                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1153         { .name     = "pipe2",      .errmsg = true,
1154           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1155         { .name     = "poll",       .errmsg = true, .timeout = true, },
1156         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1157         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1158           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1159         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1160           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1161         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1162         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1163           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1164         { .name     = "pwritev",    .errmsg = true,
1165           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1166         { .name     = "read",       .errmsg = true,
1167           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1168         { .name     = "readlink",   .errmsg = true,
1169           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1170         { .name     = "readlinkat", .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172                              [1] = SCA_FILENAME, /* pathname */ }, },
1173         { .name     = "readv",      .errmsg = true,
1174           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1175         { .name     = "recvfrom",   .errmsg = true,
1176           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1177         { .name     = "recvmmsg",   .errmsg = true,
1178           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1179         { .name     = "recvmsg",    .errmsg = true,
1180           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1181         { .name     = "removexattr", .errmsg = true,
1182           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1183         { .name     = "renameat",   .errmsg = true,
1184           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1185         { .name     = "rmdir",    .errmsg = true,
1186           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1187         { .name     = "rt_sigaction", .errmsg = true,
1188           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1189         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1190         { .name     = "rt_sigqueueinfo", .errmsg = true,
1191           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1192         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1193           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1194         { .name     = "select",     .errmsg = true, .timeout = true, },
1195         { .name     = "sendmmsg",    .errmsg = true,
1196           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1197         { .name     = "sendmsg",    .errmsg = true,
1198           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1199         { .name     = "sendto",     .errmsg = true,
1200           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1201         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1202         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1203         { .name     = "setxattr",   .errmsg = true,
1204           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1205         { .name     = "shutdown",   .errmsg = true,
1206           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207         { .name     = "socket",     .errmsg = true,
1208           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1209                              [1] = SCA_SK_TYPE, /* type */ },
1210           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1211         { .name     = "socketpair", .errmsg = true,
1212           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1213                              [1] = SCA_SK_TYPE, /* type */ },
1214           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1215         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1216           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1217         { .name     = "statfs",     .errmsg = true,
1218           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1219         { .name     = "swapoff",    .errmsg = true,
1220           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1221         { .name     = "swapon",     .errmsg = true,
1222           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1223         { .name     = "symlinkat",  .errmsg = true,
1224           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1225         { .name     = "tgkill",     .errmsg = true,
1226           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1227         { .name     = "tkill",      .errmsg = true,
1228           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1229         { .name     = "truncate",   .errmsg = true,
1230           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1231         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1232         { .name     = "unlinkat",   .errmsg = true,
1233           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1234                              [1] = SCA_FILENAME, /* pathname */ }, },
1235         { .name     = "utime",  .errmsg = true,
1236           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1237         { .name     = "utimensat",  .errmsg = true,
1238           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1239                              [1] = SCA_FILENAME, /* filename */ }, },
1240         { .name     = "utimes",  .errmsg = true,
1241           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1242         { .name     = "vmsplice",  .errmsg = true,
1243           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1244         { .name     = "write",      .errmsg = true,
1245           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1246         { .name     = "writev",     .errmsg = true,
1247           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1248 };
1249
1250 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1251 {
1252         const struct syscall_fmt *fmt = fmtp;
1253         return strcmp(name, fmt->name);
1254 }
1255
1256 static struct syscall_fmt *syscall_fmt__find(const char *name)
1257 {
1258         const int nmemb = ARRAY_SIZE(syscall_fmts);
1259         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1260 }
1261
1262 struct syscall {
1263         struct event_format *tp_format;
1264         int                 nr_args;
1265         struct format_field *args;
1266         const char          *name;
1267         bool                is_exit;
1268         struct syscall_fmt  *fmt;
1269         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1270         void                **arg_parm;
1271 };
1272
1273 static size_t fprintf_duration(unsigned long t, FILE *fp)
1274 {
1275         double duration = (double)t / NSEC_PER_MSEC;
1276         size_t printed = fprintf(fp, "(");
1277
1278         if (duration >= 1.0)
1279                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1280         else if (duration >= 0.01)
1281                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1282         else
1283                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1284         return printed + fprintf(fp, "): ");
1285 }
1286
1287 /**
1288  * filename.ptr: The filename char pointer that will be vfs_getname'd
1289  * filename.entry_str_pos: Where to insert the string translated from
1290  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1291  */
1292 struct thread_trace {
1293         u64               entry_time;
1294         u64               exit_time;
1295         bool              entry_pending;
1296         unsigned long     nr_events;
1297         unsigned long     pfmaj, pfmin;
1298         char              *entry_str;
1299         double            runtime_ms;
1300         struct {
1301                 unsigned long ptr;
1302                 int           entry_str_pos;
1303         } filename;
1304         struct {
1305                 int       max;
1306                 char      **table;
1307         } paths;
1308
1309         struct intlist *syscall_stats;
1310 };
1311
1312 static struct thread_trace *thread_trace__new(void)
1313 {
1314         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1315
1316         if (ttrace)
1317                 ttrace->paths.max = -1;
1318
1319         ttrace->syscall_stats = intlist__new(NULL);
1320
1321         return ttrace;
1322 }
1323
1324 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1325 {
1326         struct thread_trace *ttrace;
1327
1328         if (thread == NULL)
1329                 goto fail;
1330
1331         if (thread__priv(thread) == NULL)
1332                 thread__set_priv(thread, thread_trace__new());
1333
1334         if (thread__priv(thread) == NULL)
1335                 goto fail;
1336
1337         ttrace = thread__priv(thread);
1338         ++ttrace->nr_events;
1339
1340         return ttrace;
1341 fail:
1342         color_fprintf(fp, PERF_COLOR_RED,
1343                       "WARNING: not enough memory, dropping samples!\n");
1344         return NULL;
1345 }
1346
1347 #define TRACE_PFMAJ             (1 << 0)
1348 #define TRACE_PFMIN             (1 << 1)
1349
1350 static const size_t trace__entry_str_size = 2048;
1351
1352 struct trace {
1353         struct perf_tool        tool;
1354         struct {
1355                 int             machine;
1356                 int             open_id;
1357         }                       audit;
1358         struct {
1359                 int             max;
1360                 struct syscall  *table;
1361                 struct {
1362                         struct perf_evsel *sys_enter,
1363                                           *sys_exit;
1364                 }               events;
1365         } syscalls;
1366         struct record_opts      opts;
1367         struct perf_evlist      *evlist;
1368         struct machine          *host;
1369         struct thread           *current;
1370         u64                     base_time;
1371         FILE                    *output;
1372         unsigned long           nr_events;
1373         struct strlist          *ev_qualifier;
1374         struct {
1375                 size_t          nr;
1376                 int             *entries;
1377         }                       ev_qualifier_ids;
1378         const char              *last_vfs_getname;
1379         struct intlist          *tid_list;
1380         struct intlist          *pid_list;
1381         struct {
1382                 size_t          nr;
1383                 pid_t           *entries;
1384         }                       filter_pids;
1385         double                  duration_filter;
1386         double                  runtime_ms;
1387         struct {
1388                 u64             vfs_getname,
1389                                 proc_getname;
1390         } stats;
1391         bool                    not_ev_qualifier;
1392         bool                    live;
1393         bool                    full_time;
1394         bool                    sched;
1395         bool                    multiple_threads;
1396         bool                    summary;
1397         bool                    summary_only;
1398         bool                    show_comm;
1399         bool                    show_tool_stats;
1400         bool                    trace_syscalls;
1401         bool                    force;
1402         bool                    vfs_getname;
1403         int                     trace_pgfaults;
1404 };
1405
1406 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1407 {
1408         struct thread_trace *ttrace = thread__priv(thread);
1409
1410         if (fd > ttrace->paths.max) {
1411                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1412
1413                 if (npath == NULL)
1414                         return -1;
1415
1416                 if (ttrace->paths.max != -1) {
1417                         memset(npath + ttrace->paths.max + 1, 0,
1418                                (fd - ttrace->paths.max) * sizeof(char *));
1419                 } else {
1420                         memset(npath, 0, (fd + 1) * sizeof(char *));
1421                 }
1422
1423                 ttrace->paths.table = npath;
1424                 ttrace->paths.max   = fd;
1425         }
1426
1427         ttrace->paths.table[fd] = strdup(pathname);
1428
1429         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1430 }
1431
1432 static int thread__read_fd_path(struct thread *thread, int fd)
1433 {
1434         char linkname[PATH_MAX], pathname[PATH_MAX];
1435         struct stat st;
1436         int ret;
1437
1438         if (thread->pid_ == thread->tid) {
1439                 scnprintf(linkname, sizeof(linkname),
1440                           "/proc/%d/fd/%d", thread->pid_, fd);
1441         } else {
1442                 scnprintf(linkname, sizeof(linkname),
1443                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1444         }
1445
1446         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1447                 return -1;
1448
1449         ret = readlink(linkname, pathname, sizeof(pathname));
1450
1451         if (ret < 0 || ret > st.st_size)
1452                 return -1;
1453
1454         pathname[ret] = '\0';
1455         return trace__set_fd_pathname(thread, fd, pathname);
1456 }
1457
1458 static const char *thread__fd_path(struct thread *thread, int fd,
1459                                    struct trace *trace)
1460 {
1461         struct thread_trace *ttrace = thread__priv(thread);
1462
1463         if (ttrace == NULL)
1464                 return NULL;
1465
1466         if (fd < 0)
1467                 return NULL;
1468
1469         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1470                 if (!trace->live)
1471                         return NULL;
1472                 ++trace->stats.proc_getname;
1473                 if (thread__read_fd_path(thread, fd))
1474                         return NULL;
1475         }
1476
1477         return ttrace->paths.table[fd];
1478 }
1479
1480 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1481                                         struct syscall_arg *arg)
1482 {
1483         int fd = arg->val;
1484         size_t printed = scnprintf(bf, size, "%d", fd);
1485         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1486
1487         if (path)
1488                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1489
1490         return printed;
1491 }
1492
1493 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1494                                               struct syscall_arg *arg)
1495 {
1496         int fd = arg->val;
1497         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1498         struct thread_trace *ttrace = thread__priv(arg->thread);
1499
1500         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1501                 zfree(&ttrace->paths.table[fd]);
1502
1503         return printed;
1504 }
1505
1506 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1507                                      unsigned long ptr)
1508 {
1509         struct thread_trace *ttrace = thread__priv(thread);
1510
1511         ttrace->filename.ptr = ptr;
1512         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1513 }
1514
1515 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1516                                               struct syscall_arg *arg)
1517 {
1518         unsigned long ptr = arg->val;
1519
1520         if (!arg->trace->vfs_getname)
1521                 return scnprintf(bf, size, "%#x", ptr);
1522
1523         thread__set_filename_pos(arg->thread, bf, ptr);
1524         return 0;
1525 }
1526
1527 static bool trace__filter_duration(struct trace *trace, double t)
1528 {
1529         return t < (trace->duration_filter * NSEC_PER_MSEC);
1530 }
1531
1532 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1533 {
1534         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1535
1536         return fprintf(fp, "%10.3f ", ts);
1537 }
1538
1539 static bool done = false;
1540 static bool interrupted = false;
1541
1542 static void sig_handler(int sig)
1543 {
1544         done = true;
1545         interrupted = sig == SIGINT;
1546 }
1547
1548 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1549                                         u64 duration, u64 tstamp, FILE *fp)
1550 {
1551         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1552         printed += fprintf_duration(duration, fp);
1553
1554         if (trace->multiple_threads) {
1555                 if (trace->show_comm)
1556                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1557                 printed += fprintf(fp, "%d ", thread->tid);
1558         }
1559
1560         return printed;
1561 }
1562
1563 static int trace__process_event(struct trace *trace, struct machine *machine,
1564                                 union perf_event *event, struct perf_sample *sample)
1565 {
1566         int ret = 0;
1567
1568         switch (event->header.type) {
1569         case PERF_RECORD_LOST:
1570                 color_fprintf(trace->output, PERF_COLOR_RED,
1571                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1572                 ret = machine__process_lost_event(machine, event, sample);
1573         default:
1574                 ret = machine__process_event(machine, event, sample);
1575                 break;
1576         }
1577
1578         return ret;
1579 }
1580
1581 static int trace__tool_process(struct perf_tool *tool,
1582                                union perf_event *event,
1583                                struct perf_sample *sample,
1584                                struct machine *machine)
1585 {
1586         struct trace *trace = container_of(tool, struct trace, tool);
1587         return trace__process_event(trace, machine, event, sample);
1588 }
1589
1590 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1591 {
1592         int err = symbol__init(NULL);
1593
1594         if (err)
1595                 return err;
1596
1597         trace->host = machine__new_host();
1598         if (trace->host == NULL)
1599                 return -ENOMEM;
1600
1601         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1602                 return -errno;
1603
1604         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1605                                             evlist->threads, trace__tool_process, false,
1606                                             trace->opts.proc_map_timeout);
1607         if (err)
1608                 symbol__exit();
1609
1610         return err;
1611 }
1612
1613 static int syscall__set_arg_fmts(struct syscall *sc)
1614 {
1615         struct format_field *field;
1616         int idx = 0;
1617
1618         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1619         if (sc->arg_scnprintf == NULL)
1620                 return -1;
1621
1622         if (sc->fmt)
1623                 sc->arg_parm = sc->fmt->arg_parm;
1624
1625         for (field = sc->args; field; field = field->next) {
1626                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1627                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1628                 else if (field->flags & FIELD_IS_POINTER)
1629                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1630                 ++idx;
1631         }
1632
1633         return 0;
1634 }
1635
1636 static int trace__read_syscall_info(struct trace *trace, int id)
1637 {
1638         char tp_name[128];
1639         struct syscall *sc;
1640         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1641
1642         if (name == NULL)
1643                 return -1;
1644
1645         if (id > trace->syscalls.max) {
1646                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1647
1648                 if (nsyscalls == NULL)
1649                         return -1;
1650
1651                 if (trace->syscalls.max != -1) {
1652                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1653                                (id - trace->syscalls.max) * sizeof(*sc));
1654                 } else {
1655                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1656                 }
1657
1658                 trace->syscalls.table = nsyscalls;
1659                 trace->syscalls.max   = id;
1660         }
1661
1662         sc = trace->syscalls.table + id;
1663         sc->name = name;
1664
1665         sc->fmt  = syscall_fmt__find(sc->name);
1666
1667         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1668         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1669
1670         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1671                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1672                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1673         }
1674
1675         if (sc->tp_format == NULL)
1676                 return -1;
1677
1678         sc->args = sc->tp_format->format.fields;
1679         sc->nr_args = sc->tp_format->format.nr_fields;
1680         /* drop nr field - not relevant here; does not exist on older kernels */
1681         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1682                 sc->args = sc->args->next;
1683                 --sc->nr_args;
1684         }
1685
1686         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1687
1688         return syscall__set_arg_fmts(sc);
1689 }
1690
1691 static int trace__validate_ev_qualifier(struct trace *trace)
1692 {
1693         int err = 0, i;
1694         struct str_node *pos;
1695
1696         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1697         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1698                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1699
1700         if (trace->ev_qualifier_ids.entries == NULL) {
1701                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1702                        trace->output);
1703                 err = -EINVAL;
1704                 goto out;
1705         }
1706
1707         i = 0;
1708
1709         strlist__for_each(pos, trace->ev_qualifier) {
1710                 const char *sc = pos->s;
1711                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1712
1713                 if (id < 0) {
1714                         if (err == 0) {
1715                                 fputs("Error:\tInvalid syscall ", trace->output);
1716                                 err = -EINVAL;
1717                         } else {
1718                                 fputs(", ", trace->output);
1719                         }
1720
1721                         fputs(sc, trace->output);
1722                 }
1723
1724                 trace->ev_qualifier_ids.entries[i++] = id;
1725         }
1726
1727         if (err < 0) {
1728                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1729                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1730                 zfree(&trace->ev_qualifier_ids.entries);
1731                 trace->ev_qualifier_ids.nr = 0;
1732         }
1733 out:
1734         return err;
1735 }
1736
1737 /*
1738  * args is to be interpreted as a series of longs but we need to handle
1739  * 8-byte unaligned accesses. args points to raw_data within the event
1740  * and raw_data is guaranteed to be 8-byte unaligned because it is
1741  * preceded by raw_size which is a u32. So we need to copy args to a temp
1742  * variable to read it. Most notably this avoids extended load instructions
1743  * on unaligned addresses
1744  */
1745
1746 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1747                                       unsigned char *args, struct trace *trace,
1748                                       struct thread *thread)
1749 {
1750         size_t printed = 0;
1751         unsigned char *p;
1752         unsigned long val;
1753
1754         if (sc->args != NULL) {
1755                 struct format_field *field;
1756                 u8 bit = 1;
1757                 struct syscall_arg arg = {
1758                         .idx    = 0,
1759                         .mask   = 0,
1760                         .trace  = trace,
1761                         .thread = thread,
1762                 };
1763
1764                 for (field = sc->args; field;
1765                      field = field->next, ++arg.idx, bit <<= 1) {
1766                         if (arg.mask & bit)
1767                                 continue;
1768
1769                         /* special care for unaligned accesses */
1770                         p = args + sizeof(unsigned long) * arg.idx;
1771                         memcpy(&val, p, sizeof(val));
1772
1773                         /*
1774                          * Suppress this argument if its value is zero and
1775                          * and we don't have a string associated in an
1776                          * strarray for it.
1777                          */
1778                         if (val == 0 &&
1779                             !(sc->arg_scnprintf &&
1780                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1781                               sc->arg_parm[arg.idx]))
1782                                 continue;
1783
1784                         printed += scnprintf(bf + printed, size - printed,
1785                                              "%s%s: ", printed ? ", " : "", field->name);
1786                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1787                                 arg.val = val;
1788                                 if (sc->arg_parm)
1789                                         arg.parm = sc->arg_parm[arg.idx];
1790                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1791                                                                       size - printed, &arg);
1792                         } else {
1793                                 printed += scnprintf(bf + printed, size - printed,
1794                                                      "%ld", val);
1795                         }
1796                 }
1797         } else {
1798                 int i = 0;
1799
1800                 while (i < 6) {
1801                         /* special care for unaligned accesses */
1802                         p = args + sizeof(unsigned long) * i;
1803                         memcpy(&val, p, sizeof(val));
1804                         printed += scnprintf(bf + printed, size - printed,
1805                                              "%sarg%d: %ld",
1806                                              printed ? ", " : "", i, val);
1807                         ++i;
1808                 }
1809         }
1810
1811         return printed;
1812 }
1813
1814 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1815                                   union perf_event *event,
1816                                   struct perf_sample *sample);
1817
1818 static struct syscall *trace__syscall_info(struct trace *trace,
1819                                            struct perf_evsel *evsel, int id)
1820 {
1821
1822         if (id < 0) {
1823
1824                 /*
1825                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1826                  * before that, leaving at a higher verbosity level till that is
1827                  * explained. Reproduced with plain ftrace with:
1828                  *
1829                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1830                  * grep "NR -1 " /t/trace_pipe
1831                  *
1832                  * After generating some load on the machine.
1833                  */
1834                 if (verbose > 1) {
1835                         static u64 n;
1836                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1837                                 id, perf_evsel__name(evsel), ++n);
1838                 }
1839                 return NULL;
1840         }
1841
1842         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1843             trace__read_syscall_info(trace, id))
1844                 goto out_cant_read;
1845
1846         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1847                 goto out_cant_read;
1848
1849         return &trace->syscalls.table[id];
1850
1851 out_cant_read:
1852         if (verbose) {
1853                 fprintf(trace->output, "Problems reading syscall %d", id);
1854                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1855                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1856                 fputs(" information\n", trace->output);
1857         }
1858         return NULL;
1859 }
1860
1861 static void thread__update_stats(struct thread_trace *ttrace,
1862                                  int id, struct perf_sample *sample)
1863 {
1864         struct int_node *inode;
1865         struct stats *stats;
1866         u64 duration = 0;
1867
1868         inode = intlist__findnew(ttrace->syscall_stats, id);
1869         if (inode == NULL)
1870                 return;
1871
1872         stats = inode->priv;
1873         if (stats == NULL) {
1874                 stats = malloc(sizeof(struct stats));
1875                 if (stats == NULL)
1876                         return;
1877                 init_stats(stats);
1878                 inode->priv = stats;
1879         }
1880
1881         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1882                 duration = sample->time - ttrace->entry_time;
1883
1884         update_stats(stats, duration);
1885 }
1886
1887 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1888 {
1889         struct thread_trace *ttrace;
1890         u64 duration;
1891         size_t printed;
1892
1893         if (trace->current == NULL)
1894                 return 0;
1895
1896         ttrace = thread__priv(trace->current);
1897
1898         if (!ttrace->entry_pending)
1899                 return 0;
1900
1901         duration = sample->time - ttrace->entry_time;
1902
1903         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1904         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1905         ttrace->entry_pending = false;
1906
1907         return printed;
1908 }
1909
1910 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1911                             union perf_event *event __maybe_unused,
1912                             struct perf_sample *sample)
1913 {
1914         char *msg;
1915         void *args;
1916         size_t printed = 0;
1917         struct thread *thread;
1918         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1919         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1920         struct thread_trace *ttrace;
1921
1922         if (sc == NULL)
1923                 return -1;
1924
1925         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1926         ttrace = thread__trace(thread, trace->output);
1927         if (ttrace == NULL)
1928                 goto out_put;
1929
1930         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1931
1932         if (ttrace->entry_str == NULL) {
1933                 ttrace->entry_str = malloc(trace__entry_str_size);
1934                 if (!ttrace->entry_str)
1935                         goto out_put;
1936         }
1937
1938         if (!trace->summary_only)
1939                 trace__printf_interrupted_entry(trace, sample);
1940
1941         ttrace->entry_time = sample->time;
1942         msg = ttrace->entry_str;
1943         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1944
1945         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1946                                            args, trace, thread);
1947
1948         if (sc->is_exit) {
1949                 if (!trace->duration_filter && !trace->summary_only) {
1950                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1951                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1952                 }
1953         } else
1954                 ttrace->entry_pending = true;
1955
1956         if (trace->current != thread) {
1957                 thread__put(trace->current);
1958                 trace->current = thread__get(thread);
1959         }
1960         err = 0;
1961 out_put:
1962         thread__put(thread);
1963         return err;
1964 }
1965
1966 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1967                            union perf_event *event __maybe_unused,
1968                            struct perf_sample *sample)
1969 {
1970         long ret;
1971         u64 duration = 0;
1972         struct thread *thread;
1973         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1974         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1975         struct thread_trace *ttrace;
1976
1977         if (sc == NULL)
1978                 return -1;
1979
1980         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1981         ttrace = thread__trace(thread, trace->output);
1982         if (ttrace == NULL)
1983                 goto out_put;
1984
1985         if (trace->summary)
1986                 thread__update_stats(ttrace, id, sample);
1987
1988         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1989
1990         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1991                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1992                 trace->last_vfs_getname = NULL;
1993                 ++trace->stats.vfs_getname;
1994         }
1995
1996         ttrace->exit_time = sample->time;
1997
1998         if (ttrace->entry_time) {
1999                 duration = sample->time - ttrace->entry_time;
2000                 if (trace__filter_duration(trace, duration))
2001                         goto out;
2002         } else if (trace->duration_filter)
2003                 goto out;
2004
2005         if (trace->summary_only)
2006                 goto out;
2007
2008         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2009
2010         if (ttrace->entry_pending) {
2011                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2012         } else {
2013                 fprintf(trace->output, " ... [");
2014                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2015                 fprintf(trace->output, "]: %s()", sc->name);
2016         }
2017
2018         if (sc->fmt == NULL) {
2019 signed_print:
2020                 fprintf(trace->output, ") = %ld", ret);
2021         } else if (ret < 0 && sc->fmt->errmsg) {
2022                 char bf[STRERR_BUFSIZE];
2023                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2024                            *e = audit_errno_to_name(-ret);
2025
2026                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2027         } else if (ret == 0 && sc->fmt->timeout)
2028                 fprintf(trace->output, ") = 0 Timeout");
2029         else if (sc->fmt->hexret)
2030                 fprintf(trace->output, ") = %#lx", ret);
2031         else
2032                 goto signed_print;
2033
2034         fputc('\n', trace->output);
2035 out:
2036         ttrace->entry_pending = false;
2037         err = 0;
2038 out_put:
2039         thread__put(thread);
2040         return err;
2041 }
2042
2043 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2044                               union perf_event *event __maybe_unused,
2045                               struct perf_sample *sample)
2046 {
2047         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2048         struct thread_trace *ttrace;
2049         size_t filename_len, entry_str_len, to_move;
2050         ssize_t remaining_space;
2051         char *pos;
2052         const char *filename;
2053
2054         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2055
2056         if (!thread)
2057                 goto out;
2058
2059         ttrace = thread__priv(thread);
2060         if (!ttrace)
2061                 goto out;
2062
2063         if (!ttrace->filename.ptr)
2064                 goto out;
2065
2066         entry_str_len = strlen(ttrace->entry_str);
2067         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2068         if (remaining_space <= 0)
2069                 goto out;
2070
2071         filename = trace->last_vfs_getname;
2072         filename_len = strlen(filename);
2073         if (filename_len > (size_t)remaining_space) {
2074                 filename += filename_len - remaining_space;
2075                 filename_len = remaining_space;
2076         }
2077
2078         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2079         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2080         memmove(pos + filename_len, pos, to_move);
2081         memcpy(pos, filename, filename_len);
2082
2083         ttrace->filename.ptr = 0;
2084         ttrace->filename.entry_str_pos = 0;
2085 out:
2086         return 0;
2087 }
2088
2089 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2090                                      union perf_event *event __maybe_unused,
2091                                      struct perf_sample *sample)
2092 {
2093         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2094         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2095         struct thread *thread = machine__findnew_thread(trace->host,
2096                                                         sample->pid,
2097                                                         sample->tid);
2098         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2099
2100         if (ttrace == NULL)
2101                 goto out_dump;
2102
2103         ttrace->runtime_ms += runtime_ms;
2104         trace->runtime_ms += runtime_ms;
2105         thread__put(thread);
2106         return 0;
2107
2108 out_dump:
2109         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2110                evsel->name,
2111                perf_evsel__strval(evsel, sample, "comm"),
2112                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2113                runtime,
2114                perf_evsel__intval(evsel, sample, "vruntime"));
2115         thread__put(thread);
2116         return 0;
2117 }
2118
2119 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2120                                 union perf_event *event __maybe_unused,
2121                                 struct perf_sample *sample)
2122 {
2123         trace__printf_interrupted_entry(trace, sample);
2124         trace__fprintf_tstamp(trace, sample->time, trace->output);
2125
2126         if (trace->trace_syscalls)
2127                 fprintf(trace->output, "(         ): ");
2128
2129         fprintf(trace->output, "%s:", evsel->name);
2130
2131         if (evsel->tp_format) {
2132                 event_format__fprintf(evsel->tp_format, sample->cpu,
2133                                       sample->raw_data, sample->raw_size,
2134                                       trace->output);
2135         }
2136
2137         fprintf(trace->output, ")\n");
2138         return 0;
2139 }
2140
2141 static void print_location(FILE *f, struct perf_sample *sample,
2142                            struct addr_location *al,
2143                            bool print_dso, bool print_sym)
2144 {
2145
2146         if ((verbose || print_dso) && al->map)
2147                 fprintf(f, "%s@", al->map->dso->long_name);
2148
2149         if ((verbose || print_sym) && al->sym)
2150                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2151                         al->addr - al->sym->start);
2152         else if (al->map)
2153                 fprintf(f, "0x%" PRIx64, al->addr);
2154         else
2155                 fprintf(f, "0x%" PRIx64, sample->addr);
2156 }
2157
2158 static int trace__pgfault(struct trace *trace,
2159                           struct perf_evsel *evsel,
2160                           union perf_event *event,
2161                           struct perf_sample *sample)
2162 {
2163         struct thread *thread;
2164         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2165         struct addr_location al;
2166         char map_type = 'd';
2167         struct thread_trace *ttrace;
2168         int err = -1;
2169
2170         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2171         ttrace = thread__trace(thread, trace->output);
2172         if (ttrace == NULL)
2173                 goto out_put;
2174
2175         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2176                 ttrace->pfmaj++;
2177         else
2178                 ttrace->pfmin++;
2179
2180         if (trace->summary_only)
2181                 goto out;
2182
2183         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2184                               sample->ip, &al);
2185
2186         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2187
2188         fprintf(trace->output, "%sfault [",
2189                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2190                 "maj" : "min");
2191
2192         print_location(trace->output, sample, &al, false, true);
2193
2194         fprintf(trace->output, "] => ");
2195
2196         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2197                                    sample->addr, &al);
2198
2199         if (!al.map) {
2200                 thread__find_addr_location(thread, cpumode,
2201                                            MAP__FUNCTION, sample->addr, &al);
2202
2203                 if (al.map)
2204                         map_type = 'x';
2205                 else
2206                         map_type = '?';
2207         }
2208
2209         print_location(trace->output, sample, &al, true, false);
2210
2211         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2212 out:
2213         err = 0;
2214 out_put:
2215         thread__put(thread);
2216         return err;
2217 }
2218
2219 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2220 {
2221         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2222             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2223                 return false;
2224
2225         if (trace->pid_list || trace->tid_list)
2226                 return true;
2227
2228         return false;
2229 }
2230
2231 static int trace__process_sample(struct perf_tool *tool,
2232                                  union perf_event *event,
2233                                  struct perf_sample *sample,
2234                                  struct perf_evsel *evsel,
2235                                  struct machine *machine __maybe_unused)
2236 {
2237         struct trace *trace = container_of(tool, struct trace, tool);
2238         int err = 0;
2239
2240         tracepoint_handler handler = evsel->handler;
2241
2242         if (skip_sample(trace, sample))
2243                 return 0;
2244
2245         if (!trace->full_time && trace->base_time == 0)
2246                 trace->base_time = sample->time;
2247
2248         if (handler) {
2249                 ++trace->nr_events;
2250                 handler(trace, evsel, event, sample);
2251         }
2252
2253         return err;
2254 }
2255
2256 static int parse_target_str(struct trace *trace)
2257 {
2258         if (trace->opts.target.pid) {
2259                 trace->pid_list = intlist__new(trace->opts.target.pid);
2260                 if (trace->pid_list == NULL) {
2261                         pr_err("Error parsing process id string\n");
2262                         return -EINVAL;
2263                 }
2264         }
2265
2266         if (trace->opts.target.tid) {
2267                 trace->tid_list = intlist__new(trace->opts.target.tid);
2268                 if (trace->tid_list == NULL) {
2269                         pr_err("Error parsing thread id string\n");
2270                         return -EINVAL;
2271                 }
2272         }
2273
2274         return 0;
2275 }
2276
2277 static int trace__record(struct trace *trace, int argc, const char **argv)
2278 {
2279         unsigned int rec_argc, i, j;
2280         const char **rec_argv;
2281         const char * const record_args[] = {
2282                 "record",
2283                 "-R",
2284                 "-m", "1024",
2285                 "-c", "1",
2286         };
2287
2288         const char * const sc_args[] = { "-e", };
2289         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2290         const char * const majpf_args[] = { "-e", "major-faults" };
2291         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2292         const char * const minpf_args[] = { "-e", "minor-faults" };
2293         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2294
2295         /* +1 is for the event string below */
2296         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2297                 majpf_args_nr + minpf_args_nr + argc;
2298         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2299
2300         if (rec_argv == NULL)
2301                 return -ENOMEM;
2302
2303         j = 0;
2304         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2305                 rec_argv[j++] = record_args[i];
2306
2307         if (trace->trace_syscalls) {
2308                 for (i = 0; i < sc_args_nr; i++)
2309                         rec_argv[j++] = sc_args[i];
2310
2311                 /* event string may be different for older kernels - e.g., RHEL6 */
2312                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2313                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2314                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2315                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2316                 else {
2317                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2318                         return -1;
2319                 }
2320         }
2321
2322         if (trace->trace_pgfaults & TRACE_PFMAJ)
2323                 for (i = 0; i < majpf_args_nr; i++)
2324                         rec_argv[j++] = majpf_args[i];
2325
2326         if (trace->trace_pgfaults & TRACE_PFMIN)
2327                 for (i = 0; i < minpf_args_nr; i++)
2328                         rec_argv[j++] = minpf_args[i];
2329
2330         for (i = 0; i < (unsigned int)argc; i++)
2331                 rec_argv[j++] = argv[i];
2332
2333         return cmd_record(j, rec_argv, NULL);
2334 }
2335
2336 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2337
2338 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2339 {
2340         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2341         if (evsel == NULL)
2342                 return false;
2343
2344         if (perf_evsel__field(evsel, "pathname") == NULL) {
2345                 perf_evsel__delete(evsel);
2346                 return false;
2347         }
2348
2349         evsel->handler = trace__vfs_getname;
2350         perf_evlist__add(evlist, evsel);
2351         return true;
2352 }
2353
2354 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2355                                     u64 config)
2356 {
2357         struct perf_evsel *evsel;
2358         struct perf_event_attr attr = {
2359                 .type = PERF_TYPE_SOFTWARE,
2360                 .mmap_data = 1,
2361         };
2362
2363         attr.config = config;
2364         attr.sample_period = 1;
2365
2366         event_attr_init(&attr);
2367
2368         evsel = perf_evsel__new(&attr);
2369         if (!evsel)
2370                 return -ENOMEM;
2371
2372         evsel->handler = trace__pgfault;
2373         perf_evlist__add(evlist, evsel);
2374
2375         return 0;
2376 }
2377
2378 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2379 {
2380         const u32 type = event->header.type;
2381         struct perf_evsel *evsel;
2382
2383         if (!trace->full_time && trace->base_time == 0)
2384                 trace->base_time = sample->time;
2385
2386         if (type != PERF_RECORD_SAMPLE) {
2387                 trace__process_event(trace, trace->host, event, sample);
2388                 return;
2389         }
2390
2391         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2392         if (evsel == NULL) {
2393                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2394                 return;
2395         }
2396
2397         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2398             sample->raw_data == NULL) {
2399                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2400                        perf_evsel__name(evsel), sample->tid,
2401                        sample->cpu, sample->raw_size);
2402         } else {
2403                 tracepoint_handler handler = evsel->handler;
2404                 handler(trace, evsel, event, sample);
2405         }
2406 }
2407
2408 static int trace__add_syscall_newtp(struct trace *trace)
2409 {
2410         int ret = -1;
2411         struct perf_evlist *evlist = trace->evlist;
2412         struct perf_evsel *sys_enter, *sys_exit;
2413
2414         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2415         if (sys_enter == NULL)
2416                 goto out;
2417
2418         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2419                 goto out_delete_sys_enter;
2420
2421         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2422         if (sys_exit == NULL)
2423                 goto out_delete_sys_enter;
2424
2425         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2426                 goto out_delete_sys_exit;
2427
2428         perf_evlist__add(evlist, sys_enter);
2429         perf_evlist__add(evlist, sys_exit);
2430
2431         trace->syscalls.events.sys_enter = sys_enter;
2432         trace->syscalls.events.sys_exit  = sys_exit;
2433
2434         ret = 0;
2435 out:
2436         return ret;
2437
2438 out_delete_sys_exit:
2439         perf_evsel__delete_priv(sys_exit);
2440 out_delete_sys_enter:
2441         perf_evsel__delete_priv(sys_enter);
2442         goto out;
2443 }
2444
2445 static int trace__set_ev_qualifier_filter(struct trace *trace)
2446 {
2447         int err = -1;
2448         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2449                                                 trace->ev_qualifier_ids.nr,
2450                                                 trace->ev_qualifier_ids.entries);
2451
2452         if (filter == NULL)
2453                 goto out_enomem;
2454
2455         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2456                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2457
2458         free(filter);
2459 out:
2460         return err;
2461 out_enomem:
2462         errno = ENOMEM;
2463         goto out;
2464 }
2465
2466 static int trace__run(struct trace *trace, int argc, const char **argv)
2467 {
2468         struct perf_evlist *evlist = trace->evlist;
2469         struct perf_evsel *evsel;
2470         int err = -1, i;
2471         unsigned long before;
2472         const bool forks = argc > 0;
2473         bool draining = false;
2474
2475         trace->live = true;
2476
2477         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2478                 goto out_error_raw_syscalls;
2479
2480         if (trace->trace_syscalls)
2481                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2482
2483         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2484             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2485                 goto out_error_mem;
2486         }
2487
2488         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2489             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2490                 goto out_error_mem;
2491
2492         if (trace->sched &&
2493             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2494                                    trace__sched_stat_runtime))
2495                 goto out_error_sched_stat_runtime;
2496
2497         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2498         if (err < 0) {
2499                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2500                 goto out_delete_evlist;
2501         }
2502
2503         err = trace__symbols_init(trace, evlist);
2504         if (err < 0) {
2505                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2506                 goto out_delete_evlist;
2507         }
2508
2509         perf_evlist__config(evlist, &trace->opts);
2510
2511         signal(SIGCHLD, sig_handler);
2512         signal(SIGINT, sig_handler);
2513
2514         if (forks) {
2515                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2516                                                     argv, false, NULL);
2517                 if (err < 0) {
2518                         fprintf(trace->output, "Couldn't run the workload!\n");
2519                         goto out_delete_evlist;
2520                 }
2521         }
2522
2523         err = perf_evlist__open(evlist);
2524         if (err < 0)
2525                 goto out_error_open;
2526
2527         /*
2528          * Better not use !target__has_task() here because we need to cover the
2529          * case where no threads were specified in the command line, but a
2530          * workload was, and in that case we will fill in the thread_map when
2531          * we fork the workload in perf_evlist__prepare_workload.
2532          */
2533         if (trace->filter_pids.nr > 0)
2534                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2535         else if (thread_map__pid(evlist->threads, 0) == -1)
2536                 err = perf_evlist__set_filter_pid(evlist, getpid());
2537
2538         if (err < 0)
2539                 goto out_error_mem;
2540
2541         if (trace->ev_qualifier_ids.nr > 0) {
2542                 err = trace__set_ev_qualifier_filter(trace);
2543                 if (err < 0)
2544                         goto out_errno;
2545
2546                 pr_debug("event qualifier tracepoint filter: %s\n",
2547                          trace->syscalls.events.sys_exit->filter);
2548         }
2549
2550         err = perf_evlist__apply_filters(evlist, &evsel);
2551         if (err < 0)
2552                 goto out_error_apply_filters;
2553
2554         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2555         if (err < 0)
2556                 goto out_error_mmap;
2557
2558         if (!target__none(&trace->opts.target))
2559                 perf_evlist__enable(evlist);
2560
2561         if (forks)
2562                 perf_evlist__start_workload(evlist);
2563
2564         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2565                                   evlist->threads->nr > 1 ||
2566                                   perf_evlist__first(evlist)->attr.inherit;
2567 again:
2568         before = trace->nr_events;
2569
2570         for (i = 0; i < evlist->nr_mmaps; i++) {
2571                 union perf_event *event;
2572
2573                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2574                         struct perf_sample sample;
2575
2576                         ++trace->nr_events;
2577
2578                         err = perf_evlist__parse_sample(evlist, event, &sample);
2579                         if (err) {
2580                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2581                                 goto next_event;
2582                         }
2583
2584                         trace__handle_event(trace, event, &sample);
2585 next_event:
2586                         perf_evlist__mmap_consume(evlist, i);
2587
2588                         if (interrupted)
2589                                 goto out_disable;
2590
2591                         if (done && !draining) {
2592                                 perf_evlist__disable(evlist);
2593                                 draining = true;
2594                         }
2595                 }
2596         }
2597
2598         if (trace->nr_events == before) {
2599                 int timeout = done ? 100 : -1;
2600
2601                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2602                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2603                                 draining = true;
2604
2605                         goto again;
2606                 }
2607         } else {
2608                 goto again;
2609         }
2610
2611 out_disable:
2612         thread__zput(trace->current);
2613
2614         perf_evlist__disable(evlist);
2615
2616         if (!err) {
2617                 if (trace->summary)
2618                         trace__fprintf_thread_summary(trace, trace->output);
2619
2620                 if (trace->show_tool_stats) {
2621                         fprintf(trace->output, "Stats:\n "
2622                                                " vfs_getname : %" PRIu64 "\n"
2623                                                " proc_getname: %" PRIu64 "\n",
2624                                 trace->stats.vfs_getname,
2625                                 trace->stats.proc_getname);
2626                 }
2627         }
2628
2629 out_delete_evlist:
2630         perf_evlist__delete(evlist);
2631         trace->evlist = NULL;
2632         trace->live = false;
2633         return err;
2634 {
2635         char errbuf[BUFSIZ];
2636
2637 out_error_sched_stat_runtime:
2638         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2639         goto out_error;
2640
2641 out_error_raw_syscalls:
2642         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2643         goto out_error;
2644
2645 out_error_mmap:
2646         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2647         goto out_error;
2648
2649 out_error_open:
2650         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2651
2652 out_error:
2653         fprintf(trace->output, "%s\n", errbuf);
2654         goto out_delete_evlist;
2655
2656 out_error_apply_filters:
2657         fprintf(trace->output,
2658                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2659                 evsel->filter, perf_evsel__name(evsel), errno,
2660                 strerror_r(errno, errbuf, sizeof(errbuf)));
2661         goto out_delete_evlist;
2662 }
2663 out_error_mem:
2664         fprintf(trace->output, "Not enough memory to run!\n");
2665         goto out_delete_evlist;
2666
2667 out_errno:
2668         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2669         goto out_delete_evlist;
2670 }
2671
2672 static int trace__replay(struct trace *trace)
2673 {
2674         const struct perf_evsel_str_handler handlers[] = {
2675                 { "probe:vfs_getname",       trace__vfs_getname, },
2676         };
2677         struct perf_data_file file = {
2678                 .path  = input_name,
2679                 .mode  = PERF_DATA_MODE_READ,
2680                 .force = trace->force,
2681         };
2682         struct perf_session *session;
2683         struct perf_evsel *evsel;
2684         int err = -1;
2685
2686         trace->tool.sample        = trace__process_sample;
2687         trace->tool.mmap          = perf_event__process_mmap;
2688         trace->tool.mmap2         = perf_event__process_mmap2;
2689         trace->tool.comm          = perf_event__process_comm;
2690         trace->tool.exit          = perf_event__process_exit;
2691         trace->tool.fork          = perf_event__process_fork;
2692         trace->tool.attr          = perf_event__process_attr;
2693         trace->tool.tracing_data = perf_event__process_tracing_data;
2694         trace->tool.build_id      = perf_event__process_build_id;
2695
2696         trace->tool.ordered_events = true;
2697         trace->tool.ordering_requires_timestamps = true;
2698
2699         /* add tid to output */
2700         trace->multiple_threads = true;
2701
2702         session = perf_session__new(&file, false, &trace->tool);
2703         if (session == NULL)
2704                 return -1;
2705
2706         if (symbol__init(&session->header.env) < 0)
2707                 goto out;
2708
2709         trace->host = &session->machines.host;
2710
2711         err = perf_session__set_tracepoints_handlers(session, handlers);
2712         if (err)
2713                 goto out;
2714
2715         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2716                                                      "raw_syscalls:sys_enter");
2717         /* older kernels have syscalls tp versus raw_syscalls */
2718         if (evsel == NULL)
2719                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2720                                                              "syscalls:sys_enter");
2721
2722         if (evsel &&
2723             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2724             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2725                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2726                 goto out;
2727         }
2728
2729         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2730                                                      "raw_syscalls:sys_exit");
2731         if (evsel == NULL)
2732                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2733                                                              "syscalls:sys_exit");
2734         if (evsel &&
2735             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2736             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2737                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2738                 goto out;
2739         }
2740
2741         evlist__for_each(session->evlist, evsel) {
2742                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2743                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2744                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2745                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2746                         evsel->handler = trace__pgfault;
2747         }
2748
2749         err = parse_target_str(trace);
2750         if (err != 0)
2751                 goto out;
2752
2753         setup_pager();
2754
2755         err = perf_session__process_events(session);
2756         if (err)
2757                 pr_err("Failed to process events, error %d", err);
2758
2759         else if (trace->summary)
2760                 trace__fprintf_thread_summary(trace, trace->output);
2761
2762 out:
2763         perf_session__delete(session);
2764
2765         return err;
2766 }
2767
2768 static size_t trace__fprintf_threads_header(FILE *fp)
2769 {
2770         size_t printed;
2771
2772         printed  = fprintf(fp, "\n Summary of events:\n\n");
2773
2774         return printed;
2775 }
2776
2777 static size_t thread__dump_stats(struct thread_trace *ttrace,
2778                                  struct trace *trace, FILE *fp)
2779 {
2780         struct stats *stats;
2781         size_t printed = 0;
2782         struct syscall *sc;
2783         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2784
2785         if (inode == NULL)
2786                 return 0;
2787
2788         printed += fprintf(fp, "\n");
2789
2790         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2791         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2792         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2793
2794         /* each int_node is a syscall */
2795         while (inode) {
2796                 stats = inode->priv;
2797                 if (stats) {
2798                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2799                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2800                         double avg = avg_stats(stats);
2801                         double pct;
2802                         u64 n = (u64) stats->n;
2803
2804                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2805                         avg /= NSEC_PER_MSEC;
2806
2807                         sc = &trace->syscalls.table[inode->i];
2808                         printed += fprintf(fp, "   %-15s", sc->name);
2809                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2810                                            n, avg * n, min, avg);
2811                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2812                 }
2813
2814                 inode = intlist__next(inode);
2815         }
2816
2817         printed += fprintf(fp, "\n\n");
2818
2819         return printed;
2820 }
2821
2822 /* struct used to pass data to per-thread function */
2823 struct summary_data {
2824         FILE *fp;
2825         struct trace *trace;
2826         size_t printed;
2827 };
2828
2829 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2830 {
2831         struct summary_data *data = priv;
2832         FILE *fp = data->fp;
2833         size_t printed = data->printed;
2834         struct trace *trace = data->trace;
2835         struct thread_trace *ttrace = thread__priv(thread);
2836         double ratio;
2837
2838         if (ttrace == NULL)
2839                 return 0;
2840
2841         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2842
2843         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2844         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2845         printed += fprintf(fp, "%.1f%%", ratio);
2846         if (ttrace->pfmaj)
2847                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2848         if (ttrace->pfmin)
2849                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2850         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2851         printed += thread__dump_stats(ttrace, trace, fp);
2852
2853         data->printed += printed;
2854
2855         return 0;
2856 }
2857
2858 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2859 {
2860         struct summary_data data = {
2861                 .fp = fp,
2862                 .trace = trace
2863         };
2864         data.printed = trace__fprintf_threads_header(fp);
2865
2866         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2867
2868         return data.printed;
2869 }
2870
2871 static int trace__set_duration(const struct option *opt, const char *str,
2872                                int unset __maybe_unused)
2873 {
2874         struct trace *trace = opt->value;
2875
2876         trace->duration_filter = atof(str);
2877         return 0;
2878 }
2879
2880 static int trace__set_filter_pids(const struct option *opt, const char *str,
2881                                   int unset __maybe_unused)
2882 {
2883         int ret = -1;
2884         size_t i;
2885         struct trace *trace = opt->value;
2886         /*
2887          * FIXME: introduce a intarray class, plain parse csv and create a
2888          * { int nr, int entries[] } struct...
2889          */
2890         struct intlist *list = intlist__new(str);
2891
2892         if (list == NULL)
2893                 return -1;
2894
2895         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2896         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2897
2898         if (trace->filter_pids.entries == NULL)
2899                 goto out;
2900
2901         trace->filter_pids.entries[0] = getpid();
2902
2903         for (i = 1; i < trace->filter_pids.nr; ++i)
2904                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2905
2906         intlist__delete(list);
2907         ret = 0;
2908 out:
2909         return ret;
2910 }
2911
2912 static int trace__open_output(struct trace *trace, const char *filename)
2913 {
2914         struct stat st;
2915
2916         if (!stat(filename, &st) && st.st_size) {
2917                 char oldname[PATH_MAX];
2918
2919                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2920                 unlink(oldname);
2921                 rename(filename, oldname);
2922         }
2923
2924         trace->output = fopen(filename, "w");
2925
2926         return trace->output == NULL ? -errno : 0;
2927 }
2928
2929 static int parse_pagefaults(const struct option *opt, const char *str,
2930                             int unset __maybe_unused)
2931 {
2932         int *trace_pgfaults = opt->value;
2933
2934         if (strcmp(str, "all") == 0)
2935                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2936         else if (strcmp(str, "maj") == 0)
2937                 *trace_pgfaults |= TRACE_PFMAJ;
2938         else if (strcmp(str, "min") == 0)
2939                 *trace_pgfaults |= TRACE_PFMIN;
2940         else
2941                 return -1;
2942
2943         return 0;
2944 }
2945
2946 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2947 {
2948         struct perf_evsel *evsel;
2949
2950         evlist__for_each(evlist, evsel)
2951                 evsel->handler = handler;
2952 }
2953
2954 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2955 {
2956         const char *trace_usage[] = {
2957                 "perf trace [<options>] [<command>]",
2958                 "perf trace [<options>] -- <command> [<options>]",
2959                 "perf trace record [<options>] [<command>]",
2960                 "perf trace record [<options>] -- <command> [<options>]",
2961                 NULL
2962         };
2963         struct trace trace = {
2964                 .audit = {
2965                         .machine = audit_detect_machine(),
2966                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2967                 },
2968                 .syscalls = {
2969                         . max = -1,
2970                 },
2971                 .opts = {
2972                         .target = {
2973                                 .uid       = UINT_MAX,
2974                                 .uses_mmap = true,
2975                         },
2976                         .user_freq     = UINT_MAX,
2977                         .user_interval = ULLONG_MAX,
2978                         .no_buffering  = true,
2979                         .mmap_pages    = UINT_MAX,
2980                         .proc_map_timeout  = 500,
2981                 },
2982                 .output = stderr,
2983                 .show_comm = true,
2984                 .trace_syscalls = true,
2985         };
2986         const char *output_name = NULL;
2987         const char *ev_qualifier_str = NULL;
2988         const struct option trace_options[] = {
2989         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2990                      "event selector. use 'perf list' to list available events",
2991                      parse_events_option),
2992         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2993                     "show the thread COMM next to its id"),
2994         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2995         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2996         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2997         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2998         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2999                     "trace events on existing process id"),
3000         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3001                     "trace events on existing thread id"),
3002         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3003                      "pids to filter (by the kernel)", trace__set_filter_pids),
3004         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3005                     "system-wide collection from all CPUs"),
3006         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3007                     "list of cpus to monitor"),
3008         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3009                     "child tasks do not inherit counters"),
3010         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3011                      "number of mmap data pages",
3012                      perf_evlist__parse_mmap_pages),
3013         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3014                    "user to profile"),
3015         OPT_CALLBACK(0, "duration", &trace, "float",
3016                      "show only events with duration > N.M ms",
3017                      trace__set_duration),
3018         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3019         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3020         OPT_BOOLEAN('T', "time", &trace.full_time,
3021                     "Show full timestamp, not time relative to first start"),
3022         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3023                     "Show only syscall summary with statistics"),
3024         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3025                     "Show all syscalls and summary with statistics"),
3026         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3027                      "Trace pagefaults", parse_pagefaults, "maj"),
3028         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3029         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3030         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3031                         "per thread proc mmap processing timeout in ms"),
3032         OPT_END()
3033         };
3034         const char * const trace_subcommands[] = { "record", NULL };
3035         int err;
3036         char bf[BUFSIZ];
3037
3038         signal(SIGSEGV, sighandler_dump_stack);
3039         signal(SIGFPE, sighandler_dump_stack);
3040
3041         trace.evlist = perf_evlist__new();
3042
3043         if (trace.evlist == NULL) {
3044                 pr_err("Not enough memory to run!\n");
3045                 err = -ENOMEM;
3046                 goto out;
3047         }
3048
3049         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3050                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3051
3052         if (trace.trace_pgfaults) {
3053                 trace.opts.sample_address = true;
3054                 trace.opts.sample_time = true;
3055         }
3056
3057         if (trace.evlist->nr_entries > 0)
3058                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3059
3060         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3061                 return trace__record(&trace, argc-1, &argv[1]);
3062
3063         /* summary_only implies summary option, but don't overwrite summary if set */
3064         if (trace.summary_only)
3065                 trace.summary = trace.summary_only;
3066
3067         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3068             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3069                 pr_err("Please specify something to trace.\n");
3070                 return -1;
3071         }
3072
3073         if (output_name != NULL) {
3074                 err = trace__open_output(&trace, output_name);
3075                 if (err < 0) {
3076                         perror("failed to create output file");
3077                         goto out;
3078                 }
3079         }
3080
3081         if (ev_qualifier_str != NULL) {
3082                 const char *s = ev_qualifier_str;
3083                 struct strlist_config slist_config = {
3084                         .dirname = system_path(STRACE_GROUPS_DIR),
3085                 };
3086
3087                 trace.not_ev_qualifier = *s == '!';
3088                 if (trace.not_ev_qualifier)
3089                         ++s;
3090                 trace.ev_qualifier = strlist__new(s, &slist_config);
3091                 if (trace.ev_qualifier == NULL) {
3092                         fputs("Not enough memory to parse event qualifier",
3093                               trace.output);
3094                         err = -ENOMEM;
3095                         goto out_close;
3096                 }
3097
3098                 err = trace__validate_ev_qualifier(&trace);
3099                 if (err)
3100                         goto out_close;
3101         }
3102
3103         err = target__validate(&trace.opts.target);
3104         if (err) {
3105                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3106                 fprintf(trace.output, "%s", bf);
3107                 goto out_close;
3108         }
3109
3110         err = target__parse_uid(&trace.opts.target);
3111         if (err) {
3112                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3113                 fprintf(trace.output, "%s", bf);
3114                 goto out_close;
3115         }
3116
3117         if (!argc && target__none(&trace.opts.target))
3118                 trace.opts.target.system_wide = true;
3119
3120         if (input_name)
3121                 err = trace__replay(&trace);
3122         else
3123                 err = trace__run(&trace, argc, argv);
3124
3125 out_close:
3126         if (output_name != NULL)
3127                 fclose(trace.output);
3128 out:
3129         return err;
3130 }