perf trace: Do not show syscall tracepoint filter in the --no-syscalls case
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *whences[] = { "SET", "CUR", "END",
573 #ifdef SEEK_DATA
574 "DATA",
575 #endif
576 #ifdef SEEK_HOLE
577 "HOLE",
578 #endif
579 };
580 static DEFINE_STRARRAY(whences);
581
582 static const char *fcntl_cmds[] = {
583         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
586         "F_GETOWNER_UIDS",
587 };
588 static DEFINE_STRARRAY(fcntl_cmds);
589
590 static const char *rlimit_resources[] = {
591         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
593         "RTTIME",
594 };
595 static DEFINE_STRARRAY(rlimit_resources);
596
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
599
600 static const char *clockid[] = {
601         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
603 };
604 static DEFINE_STRARRAY(clockid);
605
606 static const char *socket_families[] = {
607         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612         "ALG", "NFC", "VSOCK",
613 };
614 static DEFINE_STRARRAY(socket_families);
615
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
618 #endif
619
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621                                                       struct syscall_arg *arg)
622 {
623         size_t printed;
624         int type = arg->val,
625             flags = type & ~SOCK_TYPE_MASK;
626
627         type &= SOCK_TYPE_MASK;
628         /*
629          * Can't use a strarray, MIPS may override for ABI reasons.
630          */
631         switch (type) {
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
633         P_SK_TYPE(STREAM);
634         P_SK_TYPE(DGRAM);
635         P_SK_TYPE(RAW);
636         P_SK_TYPE(RDM);
637         P_SK_TYPE(SEQPACKET);
638         P_SK_TYPE(DCCP);
639         P_SK_TYPE(PACKET);
640 #undef P_SK_TYPE
641         default:
642                 printed = scnprintf(bf, size, "%#x", type);
643         }
644
645 #define P_SK_FLAG(n) \
646         if (flags & SOCK_##n) { \
647                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648                 flags &= ~SOCK_##n; \
649         }
650
651         P_SK_FLAG(CLOEXEC);
652         P_SK_FLAG(NONBLOCK);
653 #undef P_SK_FLAG
654
655         if (flags)
656                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
657
658         return printed;
659 }
660
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
662
663 #ifndef MSG_PROBE
664 #define MSG_PROBE            0x10
665 #endif
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE  0x10000
668 #endif
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
671 #endif
672 #ifndef MSG_FASTOPEN
673 #define MSG_FASTOPEN         0x20000000
674 #endif
675
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677                                                struct syscall_arg *arg)
678 {
679         int printed = 0, flags = arg->val;
680
681         if (flags == 0)
682                 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684         if (flags & MSG_##n) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
686                 flags &= ~MSG_##n; \
687         }
688
689         P_MSG_FLAG(OOB);
690         P_MSG_FLAG(PEEK);
691         P_MSG_FLAG(DONTROUTE);
692         P_MSG_FLAG(TRYHARD);
693         P_MSG_FLAG(CTRUNC);
694         P_MSG_FLAG(PROBE);
695         P_MSG_FLAG(TRUNC);
696         P_MSG_FLAG(DONTWAIT);
697         P_MSG_FLAG(EOR);
698         P_MSG_FLAG(WAITALL);
699         P_MSG_FLAG(FIN);
700         P_MSG_FLAG(SYN);
701         P_MSG_FLAG(CONFIRM);
702         P_MSG_FLAG(RST);
703         P_MSG_FLAG(ERRQUEUE);
704         P_MSG_FLAG(NOSIGNAL);
705         P_MSG_FLAG(MORE);
706         P_MSG_FLAG(WAITFORONE);
707         P_MSG_FLAG(SENDPAGE_NOTLAST);
708         P_MSG_FLAG(FASTOPEN);
709         P_MSG_FLAG(CMSG_CLOEXEC);
710 #undef P_MSG_FLAG
711
712         if (flags)
713                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
714
715         return printed;
716 }
717
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
719
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721                                                  struct syscall_arg *arg)
722 {
723         size_t printed = 0;
724         int mode = arg->val;
725
726         if (mode == F_OK) /* 0 */
727                 return scnprintf(bf, size, "F");
728 #define P_MODE(n) \
729         if (mode & n##_OK) { \
730                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
731                 mode &= ~n##_OK; \
732         }
733
734         P_MODE(R);
735         P_MODE(W);
736         P_MODE(X);
737 #undef P_MODE
738
739         if (mode)
740                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
741
742         return printed;
743 }
744
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
746
747 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
748                                                struct syscall_arg *arg)
749 {
750         int printed = 0, flags = arg->val;
751
752         if (!(flags & O_CREAT))
753                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
754
755         if (flags == 0)
756                 return scnprintf(bf, size, "RDONLY");
757 #define P_FLAG(n) \
758         if (flags & O_##n) { \
759                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
760                 flags &= ~O_##n; \
761         }
762
763         P_FLAG(APPEND);
764         P_FLAG(ASYNC);
765         P_FLAG(CLOEXEC);
766         P_FLAG(CREAT);
767         P_FLAG(DIRECT);
768         P_FLAG(DIRECTORY);
769         P_FLAG(EXCL);
770         P_FLAG(LARGEFILE);
771         P_FLAG(NOATIME);
772         P_FLAG(NOCTTY);
773 #ifdef O_NONBLOCK
774         P_FLAG(NONBLOCK);
775 #elif O_NDELAY
776         P_FLAG(NDELAY);
777 #endif
778 #ifdef O_PATH
779         P_FLAG(PATH);
780 #endif
781         P_FLAG(RDWR);
782 #ifdef O_DSYNC
783         if ((flags & O_SYNC) == O_SYNC)
784                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
785         else {
786                 P_FLAG(DSYNC);
787         }
788 #else
789         P_FLAG(SYNC);
790 #endif
791         P_FLAG(TRUNC);
792         P_FLAG(WRONLY);
793 #undef P_FLAG
794
795         if (flags)
796                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
797
798         return printed;
799 }
800
801 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
802
803 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
804                                                 struct syscall_arg *arg)
805 {
806         int printed = 0, flags = arg->val;
807
808         if (flags == 0)
809                 return 0;
810
811 #define P_FLAG(n) \
812         if (flags & PERF_FLAG_##n) { \
813                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
814                 flags &= ~PERF_FLAG_##n; \
815         }
816
817         P_FLAG(FD_NO_GROUP);
818         P_FLAG(FD_OUTPUT);
819         P_FLAG(PID_CGROUP);
820         P_FLAG(FD_CLOEXEC);
821 #undef P_FLAG
822
823         if (flags)
824                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
825
826         return printed;
827 }
828
829 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
830
831 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
832                                                    struct syscall_arg *arg)
833 {
834         int printed = 0, flags = arg->val;
835
836         if (flags == 0)
837                 return scnprintf(bf, size, "NONE");
838 #define P_FLAG(n) \
839         if (flags & EFD_##n) { \
840                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
841                 flags &= ~EFD_##n; \
842         }
843
844         P_FLAG(SEMAPHORE);
845         P_FLAG(CLOEXEC);
846         P_FLAG(NONBLOCK);
847 #undef P_FLAG
848
849         if (flags)
850                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
851
852         return printed;
853 }
854
855 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
856
857 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
858                                                 struct syscall_arg *arg)
859 {
860         int printed = 0, flags = arg->val;
861
862 #define P_FLAG(n) \
863         if (flags & O_##n) { \
864                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
865                 flags &= ~O_##n; \
866         }
867
868         P_FLAG(CLOEXEC);
869         P_FLAG(NONBLOCK);
870 #undef P_FLAG
871
872         if (flags)
873                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
874
875         return printed;
876 }
877
878 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
879
880 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
881 {
882         int sig = arg->val;
883
884         switch (sig) {
885 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
886         P_SIGNUM(HUP);
887         P_SIGNUM(INT);
888         P_SIGNUM(QUIT);
889         P_SIGNUM(ILL);
890         P_SIGNUM(TRAP);
891         P_SIGNUM(ABRT);
892         P_SIGNUM(BUS);
893         P_SIGNUM(FPE);
894         P_SIGNUM(KILL);
895         P_SIGNUM(USR1);
896         P_SIGNUM(SEGV);
897         P_SIGNUM(USR2);
898         P_SIGNUM(PIPE);
899         P_SIGNUM(ALRM);
900         P_SIGNUM(TERM);
901         P_SIGNUM(CHLD);
902         P_SIGNUM(CONT);
903         P_SIGNUM(STOP);
904         P_SIGNUM(TSTP);
905         P_SIGNUM(TTIN);
906         P_SIGNUM(TTOU);
907         P_SIGNUM(URG);
908         P_SIGNUM(XCPU);
909         P_SIGNUM(XFSZ);
910         P_SIGNUM(VTALRM);
911         P_SIGNUM(PROF);
912         P_SIGNUM(WINCH);
913         P_SIGNUM(IO);
914         P_SIGNUM(PWR);
915         P_SIGNUM(SYS);
916 #ifdef SIGEMT
917         P_SIGNUM(EMT);
918 #endif
919 #ifdef SIGSTKFLT
920         P_SIGNUM(STKFLT);
921 #endif
922 #ifdef SIGSWI
923         P_SIGNUM(SWI);
924 #endif
925         default: break;
926         }
927
928         return scnprintf(bf, size, "%#x", sig);
929 }
930
931 #define SCA_SIGNUM syscall_arg__scnprintf_signum
932
933 #if defined(__i386__) || defined(__x86_64__)
934 /*
935  * FIXME: Make this available to all arches.
936  */
937 #define TCGETS          0x5401
938
939 static const char *tioctls[] = {
940         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
941         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
942         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
943         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
944         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
945         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
946         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
947         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
948         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
949         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
950         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
951         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
952         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
953         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
954         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
955 };
956
957 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
958 #endif /* defined(__i386__) || defined(__x86_64__) */
959
960 #define STRARRAY(arg, name, array) \
961           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
962           .arg_parm      = { [arg] = &strarray__##array, }
963
964 static struct syscall_fmt {
965         const char *name;
966         const char *alias;
967         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
968         void       *arg_parm[6];
969         bool       errmsg;
970         bool       timeout;
971         bool       hexret;
972 } syscall_fmts[] = {
973         { .name     = "access",     .errmsg = true,
974           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
975         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
976         { .name     = "brk",        .hexret = true,
977           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
978         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
979         { .name     = "close",      .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
981         { .name     = "connect",    .errmsg = true, },
982         { .name     = "dup",        .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
984         { .name     = "dup2",       .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986         { .name     = "dup3",       .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
989         { .name     = "eventfd2",   .errmsg = true,
990           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
991         { .name     = "faccessat",  .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
993         { .name     = "fadvise64",  .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
995         { .name     = "fallocate",  .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
997         { .name     = "fchdir",     .errmsg = true,
998           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999         { .name     = "fchmod",     .errmsg = true,
1000           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001         { .name     = "fchmodat",   .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1003         { .name     = "fchown",     .errmsg = true,
1004           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005         { .name     = "fchownat",   .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1007         { .name     = "fcntl",      .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1009                              [1] = SCA_STRARRAY, /* cmd */ },
1010           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1011         { .name     = "fdatasync",  .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013         { .name     = "flock",      .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1015                              [1] = SCA_FLOCK, /* cmd */ }, },
1016         { .name     = "fsetxattr",  .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1018         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1019           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1021           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1022         { .name     = "fstatfs",    .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024         { .name     = "fsync",    .errmsg = true,
1025           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1026         { .name     = "ftruncate", .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1028         { .name     = "futex",      .errmsg = true,
1029           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1030         { .name     = "futimesat", .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1032         { .name     = "getdents",   .errmsg = true,
1033           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034         { .name     = "getdents64", .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1037         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1038         { .name     = "ioctl",      .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1040 #if defined(__i386__) || defined(__x86_64__)
1041 /*
1042  * FIXME: Make this available to all arches.
1043  */
1044                              [1] = SCA_STRHEXARRAY, /* cmd */
1045                              [2] = SCA_HEX, /* arg */ },
1046           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1047 #else
1048                              [2] = SCA_HEX, /* arg */ }, },
1049 #endif
1050         { .name     = "kill",       .errmsg = true,
1051           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1052         { .name     = "linkat",     .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1054         { .name     = "lseek",      .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1056                              [2] = SCA_STRARRAY, /* whence */ },
1057           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1058         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1059         { .name     = "madvise",    .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1061                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1062         { .name     = "mkdirat",    .errmsg = true,
1063           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1064         { .name     = "mknodat",    .errmsg = true,
1065           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1066         { .name     = "mlock",      .errmsg = true,
1067           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1068         { .name     = "mlockall",   .errmsg = true,
1069           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1070         { .name     = "mmap",       .hexret = true,
1071           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1072                              [2] = SCA_MMAP_PROT, /* prot */
1073                              [3] = SCA_MMAP_FLAGS, /* flags */
1074                              [4] = SCA_FD,        /* fd */ }, },
1075         { .name     = "mprotect",   .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1077                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1078         { .name     = "mremap",     .hexret = true,
1079           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1080                              [3] = SCA_MREMAP_FLAGS, /* flags */
1081                              [4] = SCA_HEX, /* new_addr */ }, },
1082         { .name     = "munlock",    .errmsg = true,
1083           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1084         { .name     = "munmap",     .errmsg = true,
1085           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1086         { .name     = "name_to_handle_at", .errmsg = true,
1087           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1088         { .name     = "newfstatat", .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1090         { .name     = "open",       .errmsg = true,
1091           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1092         { .name     = "open_by_handle_at", .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1094                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1095         { .name     = "openat",     .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1097                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1098         { .name     = "perf_event_open", .errmsg = true,
1099           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1100                              [2] = SCA_INT, /* cpu */
1101                              [3] = SCA_FD,  /* group_fd */
1102                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1103         { .name     = "pipe2",      .errmsg = true,
1104           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1105         { .name     = "poll",       .errmsg = true, .timeout = true, },
1106         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1107         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1108           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1109         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1110           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1111         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1112         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1113           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1114         { .name     = "pwritev",    .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1116         { .name     = "read",       .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1118         { .name     = "readlinkat", .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1120         { .name     = "readv",      .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122         { .name     = "recvfrom",   .errmsg = true,
1123           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1124         { .name     = "recvmmsg",   .errmsg = true,
1125           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1126         { .name     = "recvmsg",    .errmsg = true,
1127           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1128         { .name     = "renameat",   .errmsg = true,
1129           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1130         { .name     = "rt_sigaction", .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1132         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1133         { .name     = "rt_sigqueueinfo", .errmsg = true,
1134           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1135         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1136           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1137         { .name     = "select",     .errmsg = true, .timeout = true, },
1138         { .name     = "sendmmsg",    .errmsg = true,
1139           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1140         { .name     = "sendmsg",    .errmsg = true,
1141           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1142         { .name     = "sendto",     .errmsg = true,
1143           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1144         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1145         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1146         { .name     = "shutdown",   .errmsg = true,
1147           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1148         { .name     = "socket",     .errmsg = true,
1149           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1150                              [1] = SCA_SK_TYPE, /* type */ },
1151           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1152         { .name     = "socketpair", .errmsg = true,
1153           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1154                              [1] = SCA_SK_TYPE, /* type */ },
1155           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1156         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1157         { .name     = "symlinkat",  .errmsg = true,
1158           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1159         { .name     = "tgkill",     .errmsg = true,
1160           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1161         { .name     = "tkill",      .errmsg = true,
1162           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1163         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1164         { .name     = "unlinkat",   .errmsg = true,
1165           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1166         { .name     = "utimensat",  .errmsg = true,
1167           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1168         { .name     = "write",      .errmsg = true,
1169           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1170         { .name     = "writev",     .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1172 };
1173
1174 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1175 {
1176         const struct syscall_fmt *fmt = fmtp;
1177         return strcmp(name, fmt->name);
1178 }
1179
1180 static struct syscall_fmt *syscall_fmt__find(const char *name)
1181 {
1182         const int nmemb = ARRAY_SIZE(syscall_fmts);
1183         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1184 }
1185
1186 struct syscall {
1187         struct event_format *tp_format;
1188         int                 nr_args;
1189         struct format_field *args;
1190         const char          *name;
1191         bool                is_exit;
1192         struct syscall_fmt  *fmt;
1193         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1194         void                **arg_parm;
1195 };
1196
1197 static size_t fprintf_duration(unsigned long t, FILE *fp)
1198 {
1199         double duration = (double)t / NSEC_PER_MSEC;
1200         size_t printed = fprintf(fp, "(");
1201
1202         if (duration >= 1.0)
1203                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1204         else if (duration >= 0.01)
1205                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1206         else
1207                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1208         return printed + fprintf(fp, "): ");
1209 }
1210
1211 struct thread_trace {
1212         u64               entry_time;
1213         u64               exit_time;
1214         bool              entry_pending;
1215         unsigned long     nr_events;
1216         unsigned long     pfmaj, pfmin;
1217         char              *entry_str;
1218         double            runtime_ms;
1219         struct {
1220                 int       max;
1221                 char      **table;
1222         } paths;
1223
1224         struct intlist *syscall_stats;
1225 };
1226
1227 static struct thread_trace *thread_trace__new(void)
1228 {
1229         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1230
1231         if (ttrace)
1232                 ttrace->paths.max = -1;
1233
1234         ttrace->syscall_stats = intlist__new(NULL);
1235
1236         return ttrace;
1237 }
1238
1239 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1240 {
1241         struct thread_trace *ttrace;
1242
1243         if (thread == NULL)
1244                 goto fail;
1245
1246         if (thread__priv(thread) == NULL)
1247                 thread__set_priv(thread, thread_trace__new());
1248
1249         if (thread__priv(thread) == NULL)
1250                 goto fail;
1251
1252         ttrace = thread__priv(thread);
1253         ++ttrace->nr_events;
1254
1255         return ttrace;
1256 fail:
1257         color_fprintf(fp, PERF_COLOR_RED,
1258                       "WARNING: not enough memory, dropping samples!\n");
1259         return NULL;
1260 }
1261
1262 #define TRACE_PFMAJ             (1 << 0)
1263 #define TRACE_PFMIN             (1 << 1)
1264
1265 struct trace {
1266         struct perf_tool        tool;
1267         struct {
1268                 int             machine;
1269                 int             open_id;
1270         }                       audit;
1271         struct {
1272                 int             max;
1273                 struct syscall  *table;
1274                 struct {
1275                         struct perf_evsel *sys_enter,
1276                                           *sys_exit;
1277                 }               events;
1278         } syscalls;
1279         struct record_opts      opts;
1280         struct perf_evlist      *evlist;
1281         struct machine          *host;
1282         struct thread           *current;
1283         u64                     base_time;
1284         FILE                    *output;
1285         unsigned long           nr_events;
1286         struct strlist          *ev_qualifier;
1287         struct {
1288                 size_t          nr;
1289                 int             *entries;
1290         }                       ev_qualifier_ids;
1291         const char              *last_vfs_getname;
1292         struct intlist          *tid_list;
1293         struct intlist          *pid_list;
1294         struct {
1295                 size_t          nr;
1296                 pid_t           *entries;
1297         }                       filter_pids;
1298         double                  duration_filter;
1299         double                  runtime_ms;
1300         struct {
1301                 u64             vfs_getname,
1302                                 proc_getname;
1303         } stats;
1304         bool                    not_ev_qualifier;
1305         bool                    live;
1306         bool                    full_time;
1307         bool                    sched;
1308         bool                    multiple_threads;
1309         bool                    summary;
1310         bool                    summary_only;
1311         bool                    show_comm;
1312         bool                    show_tool_stats;
1313         bool                    trace_syscalls;
1314         bool                    force;
1315         int                     trace_pgfaults;
1316 };
1317
1318 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1319 {
1320         struct thread_trace *ttrace = thread__priv(thread);
1321
1322         if (fd > ttrace->paths.max) {
1323                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1324
1325                 if (npath == NULL)
1326                         return -1;
1327
1328                 if (ttrace->paths.max != -1) {
1329                         memset(npath + ttrace->paths.max + 1, 0,
1330                                (fd - ttrace->paths.max) * sizeof(char *));
1331                 } else {
1332                         memset(npath, 0, (fd + 1) * sizeof(char *));
1333                 }
1334
1335                 ttrace->paths.table = npath;
1336                 ttrace->paths.max   = fd;
1337         }
1338
1339         ttrace->paths.table[fd] = strdup(pathname);
1340
1341         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1342 }
1343
1344 static int thread__read_fd_path(struct thread *thread, int fd)
1345 {
1346         char linkname[PATH_MAX], pathname[PATH_MAX];
1347         struct stat st;
1348         int ret;
1349
1350         if (thread->pid_ == thread->tid) {
1351                 scnprintf(linkname, sizeof(linkname),
1352                           "/proc/%d/fd/%d", thread->pid_, fd);
1353         } else {
1354                 scnprintf(linkname, sizeof(linkname),
1355                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1356         }
1357
1358         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1359                 return -1;
1360
1361         ret = readlink(linkname, pathname, sizeof(pathname));
1362
1363         if (ret < 0 || ret > st.st_size)
1364                 return -1;
1365
1366         pathname[ret] = '\0';
1367         return trace__set_fd_pathname(thread, fd, pathname);
1368 }
1369
1370 static const char *thread__fd_path(struct thread *thread, int fd,
1371                                    struct trace *trace)
1372 {
1373         struct thread_trace *ttrace = thread__priv(thread);
1374
1375         if (ttrace == NULL)
1376                 return NULL;
1377
1378         if (fd < 0)
1379                 return NULL;
1380
1381         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1382                 if (!trace->live)
1383                         return NULL;
1384                 ++trace->stats.proc_getname;
1385                 if (thread__read_fd_path(thread, fd))
1386                         return NULL;
1387         }
1388
1389         return ttrace->paths.table[fd];
1390 }
1391
1392 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1393                                         struct syscall_arg *arg)
1394 {
1395         int fd = arg->val;
1396         size_t printed = scnprintf(bf, size, "%d", fd);
1397         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1398
1399         if (path)
1400                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1401
1402         return printed;
1403 }
1404
1405 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1406                                               struct syscall_arg *arg)
1407 {
1408         int fd = arg->val;
1409         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1410         struct thread_trace *ttrace = thread__priv(arg->thread);
1411
1412         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1413                 zfree(&ttrace->paths.table[fd]);
1414
1415         return printed;
1416 }
1417
1418 static bool trace__filter_duration(struct trace *trace, double t)
1419 {
1420         return t < (trace->duration_filter * NSEC_PER_MSEC);
1421 }
1422
1423 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1424 {
1425         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1426
1427         return fprintf(fp, "%10.3f ", ts);
1428 }
1429
1430 static bool done = false;
1431 static bool interrupted = false;
1432
1433 static void sig_handler(int sig)
1434 {
1435         done = true;
1436         interrupted = sig == SIGINT;
1437 }
1438
1439 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1440                                         u64 duration, u64 tstamp, FILE *fp)
1441 {
1442         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1443         printed += fprintf_duration(duration, fp);
1444
1445         if (trace->multiple_threads) {
1446                 if (trace->show_comm)
1447                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1448                 printed += fprintf(fp, "%d ", thread->tid);
1449         }
1450
1451         return printed;
1452 }
1453
1454 static int trace__process_event(struct trace *trace, struct machine *machine,
1455                                 union perf_event *event, struct perf_sample *sample)
1456 {
1457         int ret = 0;
1458
1459         switch (event->header.type) {
1460         case PERF_RECORD_LOST:
1461                 color_fprintf(trace->output, PERF_COLOR_RED,
1462                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1463                 ret = machine__process_lost_event(machine, event, sample);
1464         default:
1465                 ret = machine__process_event(machine, event, sample);
1466                 break;
1467         }
1468
1469         return ret;
1470 }
1471
1472 static int trace__tool_process(struct perf_tool *tool,
1473                                union perf_event *event,
1474                                struct perf_sample *sample,
1475                                struct machine *machine)
1476 {
1477         struct trace *trace = container_of(tool, struct trace, tool);
1478         return trace__process_event(trace, machine, event, sample);
1479 }
1480
1481 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1482 {
1483         int err = symbol__init(NULL);
1484
1485         if (err)
1486                 return err;
1487
1488         trace->host = machine__new_host();
1489         if (trace->host == NULL)
1490                 return -ENOMEM;
1491
1492         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1493                 return -errno;
1494
1495         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1496                                             evlist->threads, trace__tool_process, false,
1497                                             trace->opts.proc_map_timeout);
1498         if (err)
1499                 symbol__exit();
1500
1501         return err;
1502 }
1503
1504 static int syscall__set_arg_fmts(struct syscall *sc)
1505 {
1506         struct format_field *field;
1507         int idx = 0;
1508
1509         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1510         if (sc->arg_scnprintf == NULL)
1511                 return -1;
1512
1513         if (sc->fmt)
1514                 sc->arg_parm = sc->fmt->arg_parm;
1515
1516         for (field = sc->args; field; field = field->next) {
1517                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1518                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1519                 else if (field->flags & FIELD_IS_POINTER)
1520                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1521                 ++idx;
1522         }
1523
1524         return 0;
1525 }
1526
1527 static int trace__read_syscall_info(struct trace *trace, int id)
1528 {
1529         char tp_name[128];
1530         struct syscall *sc;
1531         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1532
1533         if (name == NULL)
1534                 return -1;
1535
1536         if (id > trace->syscalls.max) {
1537                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1538
1539                 if (nsyscalls == NULL)
1540                         return -1;
1541
1542                 if (trace->syscalls.max != -1) {
1543                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1544                                (id - trace->syscalls.max) * sizeof(*sc));
1545                 } else {
1546                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1547                 }
1548
1549                 trace->syscalls.table = nsyscalls;
1550                 trace->syscalls.max   = id;
1551         }
1552
1553         sc = trace->syscalls.table + id;
1554         sc->name = name;
1555
1556         sc->fmt  = syscall_fmt__find(sc->name);
1557
1558         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1559         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1560
1561         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1562                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1563                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1564         }
1565
1566         if (sc->tp_format == NULL)
1567                 return -1;
1568
1569         sc->args = sc->tp_format->format.fields;
1570         sc->nr_args = sc->tp_format->format.nr_fields;
1571         /* drop nr field - not relevant here; does not exist on older kernels */
1572         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1573                 sc->args = sc->args->next;
1574                 --sc->nr_args;
1575         }
1576
1577         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1578
1579         return syscall__set_arg_fmts(sc);
1580 }
1581
1582 static int trace__validate_ev_qualifier(struct trace *trace)
1583 {
1584         int err = 0, i;
1585         struct str_node *pos;
1586
1587         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1588         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1589                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1590
1591         if (trace->ev_qualifier_ids.entries == NULL) {
1592                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1593                        trace->output);
1594                 err = -EINVAL;
1595                 goto out;
1596         }
1597
1598         i = 0;
1599
1600         strlist__for_each(pos, trace->ev_qualifier) {
1601                 const char *sc = pos->s;
1602                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1603
1604                 if (id < 0) {
1605                         if (err == 0) {
1606                                 fputs("Error:\tInvalid syscall ", trace->output);
1607                                 err = -EINVAL;
1608                         } else {
1609                                 fputs(", ", trace->output);
1610                         }
1611
1612                         fputs(sc, trace->output);
1613                 }
1614
1615                 trace->ev_qualifier_ids.entries[i++] = id;
1616         }
1617
1618         if (err < 0) {
1619                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1620                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1621                 zfree(&trace->ev_qualifier_ids.entries);
1622                 trace->ev_qualifier_ids.nr = 0;
1623         }
1624 out:
1625         return err;
1626 }
1627
1628 /*
1629  * args is to be interpreted as a series of longs but we need to handle
1630  * 8-byte unaligned accesses. args points to raw_data within the event
1631  * and raw_data is guaranteed to be 8-byte unaligned because it is
1632  * preceded by raw_size which is a u32. So we need to copy args to a temp
1633  * variable to read it. Most notably this avoids extended load instructions
1634  * on unaligned addresses
1635  */
1636
1637 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1638                                       unsigned char *args, struct trace *trace,
1639                                       struct thread *thread)
1640 {
1641         size_t printed = 0;
1642         unsigned char *p;
1643         unsigned long val;
1644
1645         if (sc->args != NULL) {
1646                 struct format_field *field;
1647                 u8 bit = 1;
1648                 struct syscall_arg arg = {
1649                         .idx    = 0,
1650                         .mask   = 0,
1651                         .trace  = trace,
1652                         .thread = thread,
1653                 };
1654
1655                 for (field = sc->args; field;
1656                      field = field->next, ++arg.idx, bit <<= 1) {
1657                         if (arg.mask & bit)
1658                                 continue;
1659
1660                         /* special care for unaligned accesses */
1661                         p = args + sizeof(unsigned long) * arg.idx;
1662                         memcpy(&val, p, sizeof(val));
1663
1664                         /*
1665                          * Suppress this argument if its value is zero and
1666                          * and we don't have a string associated in an
1667                          * strarray for it.
1668                          */
1669                         if (val == 0 &&
1670                             !(sc->arg_scnprintf &&
1671                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1672                               sc->arg_parm[arg.idx]))
1673                                 continue;
1674
1675                         printed += scnprintf(bf + printed, size - printed,
1676                                              "%s%s: ", printed ? ", " : "", field->name);
1677                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1678                                 arg.val = val;
1679                                 if (sc->arg_parm)
1680                                         arg.parm = sc->arg_parm[arg.idx];
1681                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1682                                                                       size - printed, &arg);
1683                         } else {
1684                                 printed += scnprintf(bf + printed, size - printed,
1685                                                      "%ld", val);
1686                         }
1687                 }
1688         } else {
1689                 int i = 0;
1690
1691                 while (i < 6) {
1692                         /* special care for unaligned accesses */
1693                         p = args + sizeof(unsigned long) * i;
1694                         memcpy(&val, p, sizeof(val));
1695                         printed += scnprintf(bf + printed, size - printed,
1696                                              "%sarg%d: %ld",
1697                                              printed ? ", " : "", i, val);
1698                         ++i;
1699                 }
1700         }
1701
1702         return printed;
1703 }
1704
1705 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1706                                   union perf_event *event,
1707                                   struct perf_sample *sample);
1708
1709 static struct syscall *trace__syscall_info(struct trace *trace,
1710                                            struct perf_evsel *evsel, int id)
1711 {
1712
1713         if (id < 0) {
1714
1715                 /*
1716                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1717                  * before that, leaving at a higher verbosity level till that is
1718                  * explained. Reproduced with plain ftrace with:
1719                  *
1720                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1721                  * grep "NR -1 " /t/trace_pipe
1722                  *
1723                  * After generating some load on the machine.
1724                  */
1725                 if (verbose > 1) {
1726                         static u64 n;
1727                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1728                                 id, perf_evsel__name(evsel), ++n);
1729                 }
1730                 return NULL;
1731         }
1732
1733         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1734             trace__read_syscall_info(trace, id))
1735                 goto out_cant_read;
1736
1737         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1738                 goto out_cant_read;
1739
1740         return &trace->syscalls.table[id];
1741
1742 out_cant_read:
1743         if (verbose) {
1744                 fprintf(trace->output, "Problems reading syscall %d", id);
1745                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1746                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1747                 fputs(" information\n", trace->output);
1748         }
1749         return NULL;
1750 }
1751
1752 static void thread__update_stats(struct thread_trace *ttrace,
1753                                  int id, struct perf_sample *sample)
1754 {
1755         struct int_node *inode;
1756         struct stats *stats;
1757         u64 duration = 0;
1758
1759         inode = intlist__findnew(ttrace->syscall_stats, id);
1760         if (inode == NULL)
1761                 return;
1762
1763         stats = inode->priv;
1764         if (stats == NULL) {
1765                 stats = malloc(sizeof(struct stats));
1766                 if (stats == NULL)
1767                         return;
1768                 init_stats(stats);
1769                 inode->priv = stats;
1770         }
1771
1772         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1773                 duration = sample->time - ttrace->entry_time;
1774
1775         update_stats(stats, duration);
1776 }
1777
1778 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1779 {
1780         struct thread_trace *ttrace;
1781         u64 duration;
1782         size_t printed;
1783
1784         if (trace->current == NULL)
1785                 return 0;
1786
1787         ttrace = thread__priv(trace->current);
1788
1789         if (!ttrace->entry_pending)
1790                 return 0;
1791
1792         duration = sample->time - ttrace->entry_time;
1793
1794         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1795         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1796         ttrace->entry_pending = false;
1797
1798         return printed;
1799 }
1800
1801 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1802                             union perf_event *event __maybe_unused,
1803                             struct perf_sample *sample)
1804 {
1805         char *msg;
1806         void *args;
1807         size_t printed = 0;
1808         struct thread *thread;
1809         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1810         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1811         struct thread_trace *ttrace;
1812
1813         if (sc == NULL)
1814                 return -1;
1815
1816         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1817         ttrace = thread__trace(thread, trace->output);
1818         if (ttrace == NULL)
1819                 goto out_put;
1820
1821         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1822
1823         if (ttrace->entry_str == NULL) {
1824                 ttrace->entry_str = malloc(1024);
1825                 if (!ttrace->entry_str)
1826                         goto out_put;
1827         }
1828
1829         if (!trace->summary_only)
1830                 trace__printf_interrupted_entry(trace, sample);
1831
1832         ttrace->entry_time = sample->time;
1833         msg = ttrace->entry_str;
1834         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1835
1836         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1837                                            args, trace, thread);
1838
1839         if (sc->is_exit) {
1840                 if (!trace->duration_filter && !trace->summary_only) {
1841                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1842                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1843                 }
1844         } else
1845                 ttrace->entry_pending = true;
1846
1847         if (trace->current != thread) {
1848                 thread__put(trace->current);
1849                 trace->current = thread__get(thread);
1850         }
1851         err = 0;
1852 out_put:
1853         thread__put(thread);
1854         return err;
1855 }
1856
1857 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1858                            union perf_event *event __maybe_unused,
1859                            struct perf_sample *sample)
1860 {
1861         long ret;
1862         u64 duration = 0;
1863         struct thread *thread;
1864         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1865         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1866         struct thread_trace *ttrace;
1867
1868         if (sc == NULL)
1869                 return -1;
1870
1871         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1872         ttrace = thread__trace(thread, trace->output);
1873         if (ttrace == NULL)
1874                 goto out_put;
1875
1876         if (trace->summary)
1877                 thread__update_stats(ttrace, id, sample);
1878
1879         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1880
1881         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1882                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1883                 trace->last_vfs_getname = NULL;
1884                 ++trace->stats.vfs_getname;
1885         }
1886
1887         ttrace->exit_time = sample->time;
1888
1889         if (ttrace->entry_time) {
1890                 duration = sample->time - ttrace->entry_time;
1891                 if (trace__filter_duration(trace, duration))
1892                         goto out;
1893         } else if (trace->duration_filter)
1894                 goto out;
1895
1896         if (trace->summary_only)
1897                 goto out;
1898
1899         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1900
1901         if (ttrace->entry_pending) {
1902                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1903         } else {
1904                 fprintf(trace->output, " ... [");
1905                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1906                 fprintf(trace->output, "]: %s()", sc->name);
1907         }
1908
1909         if (sc->fmt == NULL) {
1910 signed_print:
1911                 fprintf(trace->output, ") = %ld", ret);
1912         } else if (ret < 0 && sc->fmt->errmsg) {
1913                 char bf[STRERR_BUFSIZE];
1914                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1915                            *e = audit_errno_to_name(-ret);
1916
1917                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1918         } else if (ret == 0 && sc->fmt->timeout)
1919                 fprintf(trace->output, ") = 0 Timeout");
1920         else if (sc->fmt->hexret)
1921                 fprintf(trace->output, ") = %#lx", ret);
1922         else
1923                 goto signed_print;
1924
1925         fputc('\n', trace->output);
1926 out:
1927         ttrace->entry_pending = false;
1928         err = 0;
1929 out_put:
1930         thread__put(thread);
1931         return err;
1932 }
1933
1934 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1935                               union perf_event *event __maybe_unused,
1936                               struct perf_sample *sample)
1937 {
1938         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1939         return 0;
1940 }
1941
1942 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1943                                      union perf_event *event __maybe_unused,
1944                                      struct perf_sample *sample)
1945 {
1946         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1947         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1948         struct thread *thread = machine__findnew_thread(trace->host,
1949                                                         sample->pid,
1950                                                         sample->tid);
1951         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1952
1953         if (ttrace == NULL)
1954                 goto out_dump;
1955
1956         ttrace->runtime_ms += runtime_ms;
1957         trace->runtime_ms += runtime_ms;
1958         thread__put(thread);
1959         return 0;
1960
1961 out_dump:
1962         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1963                evsel->name,
1964                perf_evsel__strval(evsel, sample, "comm"),
1965                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1966                runtime,
1967                perf_evsel__intval(evsel, sample, "vruntime"));
1968         thread__put(thread);
1969         return 0;
1970 }
1971
1972 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1973                                 union perf_event *event __maybe_unused,
1974                                 struct perf_sample *sample)
1975 {
1976         trace__printf_interrupted_entry(trace, sample);
1977         trace__fprintf_tstamp(trace, sample->time, trace->output);
1978
1979         if (trace->trace_syscalls)
1980                 fprintf(trace->output, "(         ): ");
1981
1982         fprintf(trace->output, "%s:", evsel->name);
1983
1984         if (evsel->tp_format) {
1985                 event_format__fprintf(evsel->tp_format, sample->cpu,
1986                                       sample->raw_data, sample->raw_size,
1987                                       trace->output);
1988         }
1989
1990         fprintf(trace->output, ")\n");
1991         return 0;
1992 }
1993
1994 static void print_location(FILE *f, struct perf_sample *sample,
1995                            struct addr_location *al,
1996                            bool print_dso, bool print_sym)
1997 {
1998
1999         if ((verbose || print_dso) && al->map)
2000                 fprintf(f, "%s@", al->map->dso->long_name);
2001
2002         if ((verbose || print_sym) && al->sym)
2003                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2004                         al->addr - al->sym->start);
2005         else if (al->map)
2006                 fprintf(f, "0x%" PRIx64, al->addr);
2007         else
2008                 fprintf(f, "0x%" PRIx64, sample->addr);
2009 }
2010
2011 static int trace__pgfault(struct trace *trace,
2012                           struct perf_evsel *evsel,
2013                           union perf_event *event,
2014                           struct perf_sample *sample)
2015 {
2016         struct thread *thread;
2017         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2018         struct addr_location al;
2019         char map_type = 'd';
2020         struct thread_trace *ttrace;
2021         int err = -1;
2022
2023         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2024         ttrace = thread__trace(thread, trace->output);
2025         if (ttrace == NULL)
2026                 goto out_put;
2027
2028         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2029                 ttrace->pfmaj++;
2030         else
2031                 ttrace->pfmin++;
2032
2033         if (trace->summary_only)
2034                 goto out;
2035
2036         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2037                               sample->ip, &al);
2038
2039         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2040
2041         fprintf(trace->output, "%sfault [",
2042                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2043                 "maj" : "min");
2044
2045         print_location(trace->output, sample, &al, false, true);
2046
2047         fprintf(trace->output, "] => ");
2048
2049         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2050                                    sample->addr, &al);
2051
2052         if (!al.map) {
2053                 thread__find_addr_location(thread, cpumode,
2054                                            MAP__FUNCTION, sample->addr, &al);
2055
2056                 if (al.map)
2057                         map_type = 'x';
2058                 else
2059                         map_type = '?';
2060         }
2061
2062         print_location(trace->output, sample, &al, true, false);
2063
2064         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2065 out:
2066         err = 0;
2067 out_put:
2068         thread__put(thread);
2069         return err;
2070 }
2071
2072 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2073 {
2074         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2075             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2076                 return false;
2077
2078         if (trace->pid_list || trace->tid_list)
2079                 return true;
2080
2081         return false;
2082 }
2083
2084 static int trace__process_sample(struct perf_tool *tool,
2085                                  union perf_event *event,
2086                                  struct perf_sample *sample,
2087                                  struct perf_evsel *evsel,
2088                                  struct machine *machine __maybe_unused)
2089 {
2090         struct trace *trace = container_of(tool, struct trace, tool);
2091         int err = 0;
2092
2093         tracepoint_handler handler = evsel->handler;
2094
2095         if (skip_sample(trace, sample))
2096                 return 0;
2097
2098         if (!trace->full_time && trace->base_time == 0)
2099                 trace->base_time = sample->time;
2100
2101         if (handler) {
2102                 ++trace->nr_events;
2103                 handler(trace, evsel, event, sample);
2104         }
2105
2106         return err;
2107 }
2108
2109 static int parse_target_str(struct trace *trace)
2110 {
2111         if (trace->opts.target.pid) {
2112                 trace->pid_list = intlist__new(trace->opts.target.pid);
2113                 if (trace->pid_list == NULL) {
2114                         pr_err("Error parsing process id string\n");
2115                         return -EINVAL;
2116                 }
2117         }
2118
2119         if (trace->opts.target.tid) {
2120                 trace->tid_list = intlist__new(trace->opts.target.tid);
2121                 if (trace->tid_list == NULL) {
2122                         pr_err("Error parsing thread id string\n");
2123                         return -EINVAL;
2124                 }
2125         }
2126
2127         return 0;
2128 }
2129
2130 static int trace__record(struct trace *trace, int argc, const char **argv)
2131 {
2132         unsigned int rec_argc, i, j;
2133         const char **rec_argv;
2134         const char * const record_args[] = {
2135                 "record",
2136                 "-R",
2137                 "-m", "1024",
2138                 "-c", "1",
2139         };
2140
2141         const char * const sc_args[] = { "-e", };
2142         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2143         const char * const majpf_args[] = { "-e", "major-faults" };
2144         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2145         const char * const minpf_args[] = { "-e", "minor-faults" };
2146         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2147
2148         /* +1 is for the event string below */
2149         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2150                 majpf_args_nr + minpf_args_nr + argc;
2151         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2152
2153         if (rec_argv == NULL)
2154                 return -ENOMEM;
2155
2156         j = 0;
2157         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2158                 rec_argv[j++] = record_args[i];
2159
2160         if (trace->trace_syscalls) {
2161                 for (i = 0; i < sc_args_nr; i++)
2162                         rec_argv[j++] = sc_args[i];
2163
2164                 /* event string may be different for older kernels - e.g., RHEL6 */
2165                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2166                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2167                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2168                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2169                 else {
2170                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2171                         return -1;
2172                 }
2173         }
2174
2175         if (trace->trace_pgfaults & TRACE_PFMAJ)
2176                 for (i = 0; i < majpf_args_nr; i++)
2177                         rec_argv[j++] = majpf_args[i];
2178
2179         if (trace->trace_pgfaults & TRACE_PFMIN)
2180                 for (i = 0; i < minpf_args_nr; i++)
2181                         rec_argv[j++] = minpf_args[i];
2182
2183         for (i = 0; i < (unsigned int)argc; i++)
2184                 rec_argv[j++] = argv[i];
2185
2186         return cmd_record(j, rec_argv, NULL);
2187 }
2188
2189 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2190
2191 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2192 {
2193         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2194         if (evsel == NULL)
2195                 return;
2196
2197         if (perf_evsel__field(evsel, "pathname") == NULL) {
2198                 perf_evsel__delete(evsel);
2199                 return;
2200         }
2201
2202         evsel->handler = trace__vfs_getname;
2203         perf_evlist__add(evlist, evsel);
2204 }
2205
2206 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2207                                     u64 config)
2208 {
2209         struct perf_evsel *evsel;
2210         struct perf_event_attr attr = {
2211                 .type = PERF_TYPE_SOFTWARE,
2212                 .mmap_data = 1,
2213         };
2214
2215         attr.config = config;
2216         attr.sample_period = 1;
2217
2218         event_attr_init(&attr);
2219
2220         evsel = perf_evsel__new(&attr);
2221         if (!evsel)
2222                 return -ENOMEM;
2223
2224         evsel->handler = trace__pgfault;
2225         perf_evlist__add(evlist, evsel);
2226
2227         return 0;
2228 }
2229
2230 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2231 {
2232         const u32 type = event->header.type;
2233         struct perf_evsel *evsel;
2234
2235         if (!trace->full_time && trace->base_time == 0)
2236                 trace->base_time = sample->time;
2237
2238         if (type != PERF_RECORD_SAMPLE) {
2239                 trace__process_event(trace, trace->host, event, sample);
2240                 return;
2241         }
2242
2243         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2244         if (evsel == NULL) {
2245                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2246                 return;
2247         }
2248
2249         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2250             sample->raw_data == NULL) {
2251                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2252                        perf_evsel__name(evsel), sample->tid,
2253                        sample->cpu, sample->raw_size);
2254         } else {
2255                 tracepoint_handler handler = evsel->handler;
2256                 handler(trace, evsel, event, sample);
2257         }
2258 }
2259
2260 static int trace__add_syscall_newtp(struct trace *trace)
2261 {
2262         int ret = -1;
2263         struct perf_evlist *evlist = trace->evlist;
2264         struct perf_evsel *sys_enter, *sys_exit;
2265
2266         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2267         if (sys_enter == NULL)
2268                 goto out;
2269
2270         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2271                 goto out_delete_sys_enter;
2272
2273         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2274         if (sys_exit == NULL)
2275                 goto out_delete_sys_enter;
2276
2277         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2278                 goto out_delete_sys_exit;
2279
2280         perf_evlist__add(evlist, sys_enter);
2281         perf_evlist__add(evlist, sys_exit);
2282
2283         trace->syscalls.events.sys_enter = sys_enter;
2284         trace->syscalls.events.sys_exit  = sys_exit;
2285
2286         ret = 0;
2287 out:
2288         return ret;
2289
2290 out_delete_sys_exit:
2291         perf_evsel__delete_priv(sys_exit);
2292 out_delete_sys_enter:
2293         perf_evsel__delete_priv(sys_enter);
2294         goto out;
2295 }
2296
2297 static int trace__set_ev_qualifier_filter(struct trace *trace)
2298 {
2299         int err = -1;
2300         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2301                                                 trace->ev_qualifier_ids.nr,
2302                                                 trace->ev_qualifier_ids.entries);
2303
2304         if (filter == NULL)
2305                 goto out_enomem;
2306
2307         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2308                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2309
2310         free(filter);
2311 out:
2312         return err;
2313 out_enomem:
2314         errno = ENOMEM;
2315         goto out;
2316 }
2317
2318 static int trace__run(struct trace *trace, int argc, const char **argv)
2319 {
2320         struct perf_evlist *evlist = trace->evlist;
2321         struct perf_evsel *evsel;
2322         int err = -1, i;
2323         unsigned long before;
2324         const bool forks = argc > 0;
2325         bool draining = false;
2326
2327         trace->live = true;
2328
2329         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2330                 goto out_error_raw_syscalls;
2331
2332         if (trace->trace_syscalls)
2333                 perf_evlist__add_vfs_getname(evlist);
2334
2335         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2336             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2337                 goto out_error_mem;
2338         }
2339
2340         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2341             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2342                 goto out_error_mem;
2343
2344         if (trace->sched &&
2345             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2346                                    trace__sched_stat_runtime))
2347                 goto out_error_sched_stat_runtime;
2348
2349         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2350         if (err < 0) {
2351                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2352                 goto out_delete_evlist;
2353         }
2354
2355         err = trace__symbols_init(trace, evlist);
2356         if (err < 0) {
2357                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2358                 goto out_delete_evlist;
2359         }
2360
2361         perf_evlist__config(evlist, &trace->opts);
2362
2363         signal(SIGCHLD, sig_handler);
2364         signal(SIGINT, sig_handler);
2365
2366         if (forks) {
2367                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2368                                                     argv, false, NULL);
2369                 if (err < 0) {
2370                         fprintf(trace->output, "Couldn't run the workload!\n");
2371                         goto out_delete_evlist;
2372                 }
2373         }
2374
2375         err = perf_evlist__open(evlist);
2376         if (err < 0)
2377                 goto out_error_open;
2378
2379         /*
2380          * Better not use !target__has_task() here because we need to cover the
2381          * case where no threads were specified in the command line, but a
2382          * workload was, and in that case we will fill in the thread_map when
2383          * we fork the workload in perf_evlist__prepare_workload.
2384          */
2385         if (trace->filter_pids.nr > 0)
2386                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2387         else if (thread_map__pid(evlist->threads, 0) == -1)
2388                 err = perf_evlist__set_filter_pid(evlist, getpid());
2389
2390         if (err < 0)
2391                 goto out_error_mem;
2392
2393         if (trace->ev_qualifier_ids.nr > 0) {
2394                 err = trace__set_ev_qualifier_filter(trace);
2395                 if (err < 0)
2396                         goto out_errno;
2397
2398                 pr_debug("event qualifier tracepoint filter: %s\n",
2399                          trace->syscalls.events.sys_exit->filter);
2400         }
2401
2402         err = perf_evlist__apply_filters(evlist, &evsel);
2403         if (err < 0)
2404                 goto out_error_apply_filters;
2405
2406         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2407         if (err < 0)
2408                 goto out_error_mmap;
2409
2410         if (!target__none(&trace->opts.target))
2411                 perf_evlist__enable(evlist);
2412
2413         if (forks)
2414                 perf_evlist__start_workload(evlist);
2415
2416         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2417                                   evlist->threads->nr > 1 ||
2418                                   perf_evlist__first(evlist)->attr.inherit;
2419 again:
2420         before = trace->nr_events;
2421
2422         for (i = 0; i < evlist->nr_mmaps; i++) {
2423                 union perf_event *event;
2424
2425                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2426                         struct perf_sample sample;
2427
2428                         ++trace->nr_events;
2429
2430                         err = perf_evlist__parse_sample(evlist, event, &sample);
2431                         if (err) {
2432                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2433                                 goto next_event;
2434                         }
2435
2436                         trace__handle_event(trace, event, &sample);
2437 next_event:
2438                         perf_evlist__mmap_consume(evlist, i);
2439
2440                         if (interrupted)
2441                                 goto out_disable;
2442
2443                         if (done && !draining) {
2444                                 perf_evlist__disable(evlist);
2445                                 draining = true;
2446                         }
2447                 }
2448         }
2449
2450         if (trace->nr_events == before) {
2451                 int timeout = done ? 100 : -1;
2452
2453                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2454                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2455                                 draining = true;
2456
2457                         goto again;
2458                 }
2459         } else {
2460                 goto again;
2461         }
2462
2463 out_disable:
2464         thread__zput(trace->current);
2465
2466         perf_evlist__disable(evlist);
2467
2468         if (!err) {
2469                 if (trace->summary)
2470                         trace__fprintf_thread_summary(trace, trace->output);
2471
2472                 if (trace->show_tool_stats) {
2473                         fprintf(trace->output, "Stats:\n "
2474                                                " vfs_getname : %" PRIu64 "\n"
2475                                                " proc_getname: %" PRIu64 "\n",
2476                                 trace->stats.vfs_getname,
2477                                 trace->stats.proc_getname);
2478                 }
2479         }
2480
2481 out_delete_evlist:
2482         perf_evlist__delete(evlist);
2483         trace->evlist = NULL;
2484         trace->live = false;
2485         return err;
2486 {
2487         char errbuf[BUFSIZ];
2488
2489 out_error_sched_stat_runtime:
2490         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2491         goto out_error;
2492
2493 out_error_raw_syscalls:
2494         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2495         goto out_error;
2496
2497 out_error_mmap:
2498         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2499         goto out_error;
2500
2501 out_error_open:
2502         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2503
2504 out_error:
2505         fprintf(trace->output, "%s\n", errbuf);
2506         goto out_delete_evlist;
2507
2508 out_error_apply_filters:
2509         fprintf(trace->output,
2510                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2511                 evsel->filter, perf_evsel__name(evsel), errno,
2512                 strerror_r(errno, errbuf, sizeof(errbuf)));
2513         goto out_delete_evlist;
2514 }
2515 out_error_mem:
2516         fprintf(trace->output, "Not enough memory to run!\n");
2517         goto out_delete_evlist;
2518
2519 out_errno:
2520         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2521         goto out_delete_evlist;
2522 }
2523
2524 static int trace__replay(struct trace *trace)
2525 {
2526         const struct perf_evsel_str_handler handlers[] = {
2527                 { "probe:vfs_getname",       trace__vfs_getname, },
2528         };
2529         struct perf_data_file file = {
2530                 .path  = input_name,
2531                 .mode  = PERF_DATA_MODE_READ,
2532                 .force = trace->force,
2533         };
2534         struct perf_session *session;
2535         struct perf_evsel *evsel;
2536         int err = -1;
2537
2538         trace->tool.sample        = trace__process_sample;
2539         trace->tool.mmap          = perf_event__process_mmap;
2540         trace->tool.mmap2         = perf_event__process_mmap2;
2541         trace->tool.comm          = perf_event__process_comm;
2542         trace->tool.exit          = perf_event__process_exit;
2543         trace->tool.fork          = perf_event__process_fork;
2544         trace->tool.attr          = perf_event__process_attr;
2545         trace->tool.tracing_data = perf_event__process_tracing_data;
2546         trace->tool.build_id      = perf_event__process_build_id;
2547
2548         trace->tool.ordered_events = true;
2549         trace->tool.ordering_requires_timestamps = true;
2550
2551         /* add tid to output */
2552         trace->multiple_threads = true;
2553
2554         session = perf_session__new(&file, false, &trace->tool);
2555         if (session == NULL)
2556                 return -1;
2557
2558         if (symbol__init(&session->header.env) < 0)
2559                 goto out;
2560
2561         trace->host = &session->machines.host;
2562
2563         err = perf_session__set_tracepoints_handlers(session, handlers);
2564         if (err)
2565                 goto out;
2566
2567         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2568                                                      "raw_syscalls:sys_enter");
2569         /* older kernels have syscalls tp versus raw_syscalls */
2570         if (evsel == NULL)
2571                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2572                                                              "syscalls:sys_enter");
2573
2574         if (evsel &&
2575             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2576             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2577                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2578                 goto out;
2579         }
2580
2581         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2582                                                      "raw_syscalls:sys_exit");
2583         if (evsel == NULL)
2584                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2585                                                              "syscalls:sys_exit");
2586         if (evsel &&
2587             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2588             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2589                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2590                 goto out;
2591         }
2592
2593         evlist__for_each(session->evlist, evsel) {
2594                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2595                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2596                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2597                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2598                         evsel->handler = trace__pgfault;
2599         }
2600
2601         err = parse_target_str(trace);
2602         if (err != 0)
2603                 goto out;
2604
2605         setup_pager();
2606
2607         err = perf_session__process_events(session);
2608         if (err)
2609                 pr_err("Failed to process events, error %d", err);
2610
2611         else if (trace->summary)
2612                 trace__fprintf_thread_summary(trace, trace->output);
2613
2614 out:
2615         perf_session__delete(session);
2616
2617         return err;
2618 }
2619
2620 static size_t trace__fprintf_threads_header(FILE *fp)
2621 {
2622         size_t printed;
2623
2624         printed  = fprintf(fp, "\n Summary of events:\n\n");
2625
2626         return printed;
2627 }
2628
2629 static size_t thread__dump_stats(struct thread_trace *ttrace,
2630                                  struct trace *trace, FILE *fp)
2631 {
2632         struct stats *stats;
2633         size_t printed = 0;
2634         struct syscall *sc;
2635         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2636
2637         if (inode == NULL)
2638                 return 0;
2639
2640         printed += fprintf(fp, "\n");
2641
2642         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2643         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2644         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2645
2646         /* each int_node is a syscall */
2647         while (inode) {
2648                 stats = inode->priv;
2649                 if (stats) {
2650                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2651                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2652                         double avg = avg_stats(stats);
2653                         double pct;
2654                         u64 n = (u64) stats->n;
2655
2656                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2657                         avg /= NSEC_PER_MSEC;
2658
2659                         sc = &trace->syscalls.table[inode->i];
2660                         printed += fprintf(fp, "   %-15s", sc->name);
2661                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2662                                            n, min, avg);
2663                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2664                 }
2665
2666                 inode = intlist__next(inode);
2667         }
2668
2669         printed += fprintf(fp, "\n\n");
2670
2671         return printed;
2672 }
2673
2674 /* struct used to pass data to per-thread function */
2675 struct summary_data {
2676         FILE *fp;
2677         struct trace *trace;
2678         size_t printed;
2679 };
2680
2681 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2682 {
2683         struct summary_data *data = priv;
2684         FILE *fp = data->fp;
2685         size_t printed = data->printed;
2686         struct trace *trace = data->trace;
2687         struct thread_trace *ttrace = thread__priv(thread);
2688         double ratio;
2689
2690         if (ttrace == NULL)
2691                 return 0;
2692
2693         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2694
2695         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2696         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2697         printed += fprintf(fp, "%.1f%%", ratio);
2698         if (ttrace->pfmaj)
2699                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2700         if (ttrace->pfmin)
2701                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2702         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2703         printed += thread__dump_stats(ttrace, trace, fp);
2704
2705         data->printed += printed;
2706
2707         return 0;
2708 }
2709
2710 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2711 {
2712         struct summary_data data = {
2713                 .fp = fp,
2714                 .trace = trace
2715         };
2716         data.printed = trace__fprintf_threads_header(fp);
2717
2718         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2719
2720         return data.printed;
2721 }
2722
2723 static int trace__set_duration(const struct option *opt, const char *str,
2724                                int unset __maybe_unused)
2725 {
2726         struct trace *trace = opt->value;
2727
2728         trace->duration_filter = atof(str);
2729         return 0;
2730 }
2731
2732 static int trace__set_filter_pids(const struct option *opt, const char *str,
2733                                   int unset __maybe_unused)
2734 {
2735         int ret = -1;
2736         size_t i;
2737         struct trace *trace = opt->value;
2738         /*
2739          * FIXME: introduce a intarray class, plain parse csv and create a
2740          * { int nr, int entries[] } struct...
2741          */
2742         struct intlist *list = intlist__new(str);
2743
2744         if (list == NULL)
2745                 return -1;
2746
2747         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2748         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2749
2750         if (trace->filter_pids.entries == NULL)
2751                 goto out;
2752
2753         trace->filter_pids.entries[0] = getpid();
2754
2755         for (i = 1; i < trace->filter_pids.nr; ++i)
2756                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2757
2758         intlist__delete(list);
2759         ret = 0;
2760 out:
2761         return ret;
2762 }
2763
2764 static int trace__open_output(struct trace *trace, const char *filename)
2765 {
2766         struct stat st;
2767
2768         if (!stat(filename, &st) && st.st_size) {
2769                 char oldname[PATH_MAX];
2770
2771                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2772                 unlink(oldname);
2773                 rename(filename, oldname);
2774         }
2775
2776         trace->output = fopen(filename, "w");
2777
2778         return trace->output == NULL ? -errno : 0;
2779 }
2780
2781 static int parse_pagefaults(const struct option *opt, const char *str,
2782                             int unset __maybe_unused)
2783 {
2784         int *trace_pgfaults = opt->value;
2785
2786         if (strcmp(str, "all") == 0)
2787                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2788         else if (strcmp(str, "maj") == 0)
2789                 *trace_pgfaults |= TRACE_PFMAJ;
2790         else if (strcmp(str, "min") == 0)
2791                 *trace_pgfaults |= TRACE_PFMIN;
2792         else
2793                 return -1;
2794
2795         return 0;
2796 }
2797
2798 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2799 {
2800         struct perf_evsel *evsel;
2801
2802         evlist__for_each(evlist, evsel)
2803                 evsel->handler = handler;
2804 }
2805
2806 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2807 {
2808         const char *trace_usage[] = {
2809                 "perf trace [<options>] [<command>]",
2810                 "perf trace [<options>] -- <command> [<options>]",
2811                 "perf trace record [<options>] [<command>]",
2812                 "perf trace record [<options>] -- <command> [<options>]",
2813                 NULL
2814         };
2815         struct trace trace = {
2816                 .audit = {
2817                         .machine = audit_detect_machine(),
2818                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2819                 },
2820                 .syscalls = {
2821                         . max = -1,
2822                 },
2823                 .opts = {
2824                         .target = {
2825                                 .uid       = UINT_MAX,
2826                                 .uses_mmap = true,
2827                         },
2828                         .user_freq     = UINT_MAX,
2829                         .user_interval = ULLONG_MAX,
2830                         .no_buffering  = true,
2831                         .mmap_pages    = UINT_MAX,
2832                         .proc_map_timeout  = 500,
2833                 },
2834                 .output = stdout,
2835                 .show_comm = true,
2836                 .trace_syscalls = true,
2837         };
2838         const char *output_name = NULL;
2839         const char *ev_qualifier_str = NULL;
2840         const struct option trace_options[] = {
2841         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2842                      "event selector. use 'perf list' to list available events",
2843                      parse_events_option),
2844         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2845                     "show the thread COMM next to its id"),
2846         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2847         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2848         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2849         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2850         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2851                     "trace events on existing process id"),
2852         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2853                     "trace events on existing thread id"),
2854         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2855                      "pids to filter (by the kernel)", trace__set_filter_pids),
2856         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2857                     "system-wide collection from all CPUs"),
2858         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2859                     "list of cpus to monitor"),
2860         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2861                     "child tasks do not inherit counters"),
2862         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2863                      "number of mmap data pages",
2864                      perf_evlist__parse_mmap_pages),
2865         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2866                    "user to profile"),
2867         OPT_CALLBACK(0, "duration", &trace, "float",
2868                      "show only events with duration > N.M ms",
2869                      trace__set_duration),
2870         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2871         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2872         OPT_BOOLEAN('T', "time", &trace.full_time,
2873                     "Show full timestamp, not time relative to first start"),
2874         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2875                     "Show only syscall summary with statistics"),
2876         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2877                     "Show all syscalls and summary with statistics"),
2878         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2879                      "Trace pagefaults", parse_pagefaults, "maj"),
2880         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2881         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2882         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2883                         "per thread proc mmap processing timeout in ms"),
2884         OPT_END()
2885         };
2886         const char * const trace_subcommands[] = { "record", NULL };
2887         int err;
2888         char bf[BUFSIZ];
2889
2890         signal(SIGSEGV, sighandler_dump_stack);
2891         signal(SIGFPE, sighandler_dump_stack);
2892
2893         trace.evlist = perf_evlist__new();
2894
2895         if (trace.evlist == NULL) {
2896                 pr_err("Not enough memory to run!\n");
2897                 err = -ENOMEM;
2898                 goto out;
2899         }
2900
2901         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2902                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2903
2904         if (trace.trace_pgfaults) {
2905                 trace.opts.sample_address = true;
2906                 trace.opts.sample_time = true;
2907         }
2908
2909         if (trace.evlist->nr_entries > 0)
2910                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2911
2912         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2913                 return trace__record(&trace, argc-1, &argv[1]);
2914
2915         /* summary_only implies summary option, but don't overwrite summary if set */
2916         if (trace.summary_only)
2917                 trace.summary = trace.summary_only;
2918
2919         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2920             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2921                 pr_err("Please specify something to trace.\n");
2922                 return -1;
2923         }
2924
2925         if (output_name != NULL) {
2926                 err = trace__open_output(&trace, output_name);
2927                 if (err < 0) {
2928                         perror("failed to create output file");
2929                         goto out;
2930                 }
2931         }
2932
2933         if (ev_qualifier_str != NULL) {
2934                 const char *s = ev_qualifier_str;
2935                 struct strlist_config slist_config = {
2936                         .dirname = system_path(STRACE_GROUPS_DIR),
2937                 };
2938
2939                 trace.not_ev_qualifier = *s == '!';
2940                 if (trace.not_ev_qualifier)
2941                         ++s;
2942                 trace.ev_qualifier = strlist__new(s, &slist_config);
2943                 if (trace.ev_qualifier == NULL) {
2944                         fputs("Not enough memory to parse event qualifier",
2945                               trace.output);
2946                         err = -ENOMEM;
2947                         goto out_close;
2948                 }
2949
2950                 err = trace__validate_ev_qualifier(&trace);
2951                 if (err)
2952                         goto out_close;
2953         }
2954
2955         err = target__validate(&trace.opts.target);
2956         if (err) {
2957                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2958                 fprintf(trace.output, "%s", bf);
2959                 goto out_close;
2960         }
2961
2962         err = target__parse_uid(&trace.opts.target);
2963         if (err) {
2964                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2965                 fprintf(trace.output, "%s", bf);
2966                 goto out_close;
2967         }
2968
2969         if (!argc && target__none(&trace.opts.target))
2970                 trace.opts.target.system_wide = true;
2971
2972         if (input_name)
2973                 err = trace__replay(&trace);
2974         else
2975                 err = trace__run(&trace, argc, argv);
2976
2977 out_close:
2978         if (output_name != NULL)
2979                 fclose(trace.output);
2980 out:
2981         return err;
2982 }