Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *whences[] = { "SET", "CUR", "END",
573 #ifdef SEEK_DATA
574 "DATA",
575 #endif
576 #ifdef SEEK_HOLE
577 "HOLE",
578 #endif
579 };
580 static DEFINE_STRARRAY(whences);
581
582 static const char *fcntl_cmds[] = {
583         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
584         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
585         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
586         "F_GETOWNER_UIDS",
587 };
588 static DEFINE_STRARRAY(fcntl_cmds);
589
590 static const char *rlimit_resources[] = {
591         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
592         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
593         "RTTIME",
594 };
595 static DEFINE_STRARRAY(rlimit_resources);
596
597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
598 static DEFINE_STRARRAY(sighow);
599
600 static const char *clockid[] = {
601         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
602         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
603 };
604 static DEFINE_STRARRAY(clockid);
605
606 static const char *socket_families[] = {
607         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
608         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
609         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
610         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
611         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
612         "ALG", "NFC", "VSOCK",
613 };
614 static DEFINE_STRARRAY(socket_families);
615
616 #ifndef SOCK_TYPE_MASK
617 #define SOCK_TYPE_MASK 0xf
618 #endif
619
620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
621                                                       struct syscall_arg *arg)
622 {
623         size_t printed;
624         int type = arg->val,
625             flags = type & ~SOCK_TYPE_MASK;
626
627         type &= SOCK_TYPE_MASK;
628         /*
629          * Can't use a strarray, MIPS may override for ABI reasons.
630          */
631         switch (type) {
632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
633         P_SK_TYPE(STREAM);
634         P_SK_TYPE(DGRAM);
635         P_SK_TYPE(RAW);
636         P_SK_TYPE(RDM);
637         P_SK_TYPE(SEQPACKET);
638         P_SK_TYPE(DCCP);
639         P_SK_TYPE(PACKET);
640 #undef P_SK_TYPE
641         default:
642                 printed = scnprintf(bf, size, "%#x", type);
643         }
644
645 #define P_SK_FLAG(n) \
646         if (flags & SOCK_##n) { \
647                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
648                 flags &= ~SOCK_##n; \
649         }
650
651         P_SK_FLAG(CLOEXEC);
652         P_SK_FLAG(NONBLOCK);
653 #undef P_SK_FLAG
654
655         if (flags)
656                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
657
658         return printed;
659 }
660
661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
662
663 #ifndef MSG_PROBE
664 #define MSG_PROBE            0x10
665 #endif
666 #ifndef MSG_WAITFORONE
667 #define MSG_WAITFORONE  0x10000
668 #endif
669 #ifndef MSG_SENDPAGE_NOTLAST
670 #define MSG_SENDPAGE_NOTLAST 0x20000
671 #endif
672 #ifndef MSG_FASTOPEN
673 #define MSG_FASTOPEN         0x20000000
674 #endif
675
676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
677                                                struct syscall_arg *arg)
678 {
679         int printed = 0, flags = arg->val;
680
681         if (flags == 0)
682                 return scnprintf(bf, size, "NONE");
683 #define P_MSG_FLAG(n) \
684         if (flags & MSG_##n) { \
685                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
686                 flags &= ~MSG_##n; \
687         }
688
689         P_MSG_FLAG(OOB);
690         P_MSG_FLAG(PEEK);
691         P_MSG_FLAG(DONTROUTE);
692         P_MSG_FLAG(TRYHARD);
693         P_MSG_FLAG(CTRUNC);
694         P_MSG_FLAG(PROBE);
695         P_MSG_FLAG(TRUNC);
696         P_MSG_FLAG(DONTWAIT);
697         P_MSG_FLAG(EOR);
698         P_MSG_FLAG(WAITALL);
699         P_MSG_FLAG(FIN);
700         P_MSG_FLAG(SYN);
701         P_MSG_FLAG(CONFIRM);
702         P_MSG_FLAG(RST);
703         P_MSG_FLAG(ERRQUEUE);
704         P_MSG_FLAG(NOSIGNAL);
705         P_MSG_FLAG(MORE);
706         P_MSG_FLAG(WAITFORONE);
707         P_MSG_FLAG(SENDPAGE_NOTLAST);
708         P_MSG_FLAG(FASTOPEN);
709         P_MSG_FLAG(CMSG_CLOEXEC);
710 #undef P_MSG_FLAG
711
712         if (flags)
713                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
714
715         return printed;
716 }
717
718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
719
720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
721                                                  struct syscall_arg *arg)
722 {
723         size_t printed = 0;
724         int mode = arg->val;
725
726         if (mode == F_OK) /* 0 */
727                 return scnprintf(bf, size, "F");
728 #define P_MODE(n) \
729         if (mode & n##_OK) { \
730                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
731                 mode &= ~n##_OK; \
732         }
733
734         P_MODE(R);
735         P_MODE(W);
736         P_MODE(X);
737 #undef P_MODE
738
739         if (mode)
740                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
741
742         return printed;
743 }
744
745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
746
747 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
748                                                struct syscall_arg *arg)
749 {
750         int printed = 0, flags = arg->val;
751
752         if (!(flags & O_CREAT))
753                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
754
755         if (flags == 0)
756                 return scnprintf(bf, size, "RDONLY");
757 #define P_FLAG(n) \
758         if (flags & O_##n) { \
759                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
760                 flags &= ~O_##n; \
761         }
762
763         P_FLAG(APPEND);
764         P_FLAG(ASYNC);
765         P_FLAG(CLOEXEC);
766         P_FLAG(CREAT);
767         P_FLAG(DIRECT);
768         P_FLAG(DIRECTORY);
769         P_FLAG(EXCL);
770         P_FLAG(LARGEFILE);
771         P_FLAG(NOATIME);
772         P_FLAG(NOCTTY);
773 #ifdef O_NONBLOCK
774         P_FLAG(NONBLOCK);
775 #elif O_NDELAY
776         P_FLAG(NDELAY);
777 #endif
778 #ifdef O_PATH
779         P_FLAG(PATH);
780 #endif
781         P_FLAG(RDWR);
782 #ifdef O_DSYNC
783         if ((flags & O_SYNC) == O_SYNC)
784                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
785         else {
786                 P_FLAG(DSYNC);
787         }
788 #else
789         P_FLAG(SYNC);
790 #endif
791         P_FLAG(TRUNC);
792         P_FLAG(WRONLY);
793 #undef P_FLAG
794
795         if (flags)
796                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
797
798         return printed;
799 }
800
801 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
802
803 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
804                                                 struct syscall_arg *arg)
805 {
806         int printed = 0, flags = arg->val;
807
808         if (flags == 0)
809                 return 0;
810
811 #define P_FLAG(n) \
812         if (flags & PERF_FLAG_##n) { \
813                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
814                 flags &= ~PERF_FLAG_##n; \
815         }
816
817         P_FLAG(FD_NO_GROUP);
818         P_FLAG(FD_OUTPUT);
819         P_FLAG(PID_CGROUP);
820         P_FLAG(FD_CLOEXEC);
821 #undef P_FLAG
822
823         if (flags)
824                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
825
826         return printed;
827 }
828
829 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
830
831 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
832                                                    struct syscall_arg *arg)
833 {
834         int printed = 0, flags = arg->val;
835
836         if (flags == 0)
837                 return scnprintf(bf, size, "NONE");
838 #define P_FLAG(n) \
839         if (flags & EFD_##n) { \
840                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
841                 flags &= ~EFD_##n; \
842         }
843
844         P_FLAG(SEMAPHORE);
845         P_FLAG(CLOEXEC);
846         P_FLAG(NONBLOCK);
847 #undef P_FLAG
848
849         if (flags)
850                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
851
852         return printed;
853 }
854
855 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
856
857 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
858                                                 struct syscall_arg *arg)
859 {
860         int printed = 0, flags = arg->val;
861
862 #define P_FLAG(n) \
863         if (flags & O_##n) { \
864                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
865                 flags &= ~O_##n; \
866         }
867
868         P_FLAG(CLOEXEC);
869         P_FLAG(NONBLOCK);
870 #undef P_FLAG
871
872         if (flags)
873                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
874
875         return printed;
876 }
877
878 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
879
880 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
881 {
882         int sig = arg->val;
883
884         switch (sig) {
885 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
886         P_SIGNUM(HUP);
887         P_SIGNUM(INT);
888         P_SIGNUM(QUIT);
889         P_SIGNUM(ILL);
890         P_SIGNUM(TRAP);
891         P_SIGNUM(ABRT);
892         P_SIGNUM(BUS);
893         P_SIGNUM(FPE);
894         P_SIGNUM(KILL);
895         P_SIGNUM(USR1);
896         P_SIGNUM(SEGV);
897         P_SIGNUM(USR2);
898         P_SIGNUM(PIPE);
899         P_SIGNUM(ALRM);
900         P_SIGNUM(TERM);
901         P_SIGNUM(CHLD);
902         P_SIGNUM(CONT);
903         P_SIGNUM(STOP);
904         P_SIGNUM(TSTP);
905         P_SIGNUM(TTIN);
906         P_SIGNUM(TTOU);
907         P_SIGNUM(URG);
908         P_SIGNUM(XCPU);
909         P_SIGNUM(XFSZ);
910         P_SIGNUM(VTALRM);
911         P_SIGNUM(PROF);
912         P_SIGNUM(WINCH);
913         P_SIGNUM(IO);
914         P_SIGNUM(PWR);
915         P_SIGNUM(SYS);
916 #ifdef SIGEMT
917         P_SIGNUM(EMT);
918 #endif
919 #ifdef SIGSTKFLT
920         P_SIGNUM(STKFLT);
921 #endif
922 #ifdef SIGSWI
923         P_SIGNUM(SWI);
924 #endif
925         default: break;
926         }
927
928         return scnprintf(bf, size, "%#x", sig);
929 }
930
931 #define SCA_SIGNUM syscall_arg__scnprintf_signum
932
933 #if defined(__i386__) || defined(__x86_64__)
934 /*
935  * FIXME: Make this available to all arches.
936  */
937 #define TCGETS          0x5401
938
939 static const char *tioctls[] = {
940         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
941         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
942         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
943         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
944         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
945         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
946         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
947         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
948         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
949         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
950         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
951         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
952         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
953         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
954         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
955 };
956
957 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
958 #endif /* defined(__i386__) || defined(__x86_64__) */
959
960 #define STRARRAY(arg, name, array) \
961           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
962           .arg_parm      = { [arg] = &strarray__##array, }
963
964 static struct syscall_fmt {
965         const char *name;
966         const char *alias;
967         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
968         void       *arg_parm[6];
969         bool       errmsg;
970         bool       timeout;
971         bool       hexret;
972 } syscall_fmts[] = {
973         { .name     = "access",     .errmsg = true,
974           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
975         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
976         { .name     = "brk",        .hexret = true,
977           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
978         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
979         { .name     = "close",      .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
981         { .name     = "connect",    .errmsg = true, },
982         { .name     = "dup",        .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
984         { .name     = "dup2",       .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986         { .name     = "dup3",       .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
989         { .name     = "eventfd2",   .errmsg = true,
990           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
991         { .name     = "faccessat",  .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
993         { .name     = "fadvise64",  .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
995         { .name     = "fallocate",  .errmsg = true,
996           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
997         { .name     = "fchdir",     .errmsg = true,
998           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
999         { .name     = "fchmod",     .errmsg = true,
1000           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001         { .name     = "fchmodat",   .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1003         { .name     = "fchown",     .errmsg = true,
1004           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005         { .name     = "fchownat",   .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1007         { .name     = "fcntl",      .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1009                              [1] = SCA_STRARRAY, /* cmd */ },
1010           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1011         { .name     = "fdatasync",  .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013         { .name     = "flock",      .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1015                              [1] = SCA_FLOCK, /* cmd */ }, },
1016         { .name     = "fsetxattr",  .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1018         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1019           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1021           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1022         { .name     = "fstatfs",    .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024         { .name     = "fsync",    .errmsg = true,
1025           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1026         { .name     = "ftruncate", .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1028         { .name     = "futex",      .errmsg = true,
1029           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1030         { .name     = "futimesat", .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1032         { .name     = "getdents",   .errmsg = true,
1033           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034         { .name     = "getdents64", .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1036         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1037         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1038         { .name     = "ioctl",      .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1040 #if defined(__i386__) || defined(__x86_64__)
1041 /*
1042  * FIXME: Make this available to all arches.
1043  */
1044                              [1] = SCA_STRHEXARRAY, /* cmd */
1045                              [2] = SCA_HEX, /* arg */ },
1046           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1047 #else
1048                              [2] = SCA_HEX, /* arg */ }, },
1049 #endif
1050         { .name     = "kill",       .errmsg = true,
1051           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1052         { .name     = "linkat",     .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1054         { .name     = "lseek",      .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1056                              [2] = SCA_STRARRAY, /* whence */ },
1057           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1058         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1059         { .name     = "madvise",    .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1061                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1062         { .name     = "mkdirat",    .errmsg = true,
1063           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1064         { .name     = "mknodat",    .errmsg = true,
1065           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1066         { .name     = "mlock",      .errmsg = true,
1067           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1068         { .name     = "mlockall",   .errmsg = true,
1069           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1070         { .name     = "mmap",       .hexret = true,
1071           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1072                              [2] = SCA_MMAP_PROT, /* prot */
1073                              [3] = SCA_MMAP_FLAGS, /* flags */
1074                              [4] = SCA_FD,        /* fd */ }, },
1075         { .name     = "mprotect",   .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1077                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1078         { .name     = "mremap",     .hexret = true,
1079           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1080                              [3] = SCA_MREMAP_FLAGS, /* flags */
1081                              [4] = SCA_HEX, /* new_addr */ }, },
1082         { .name     = "munlock",    .errmsg = true,
1083           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1084         { .name     = "munmap",     .errmsg = true,
1085           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1086         { .name     = "name_to_handle_at", .errmsg = true,
1087           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1088         { .name     = "newfstatat", .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1090         { .name     = "open",       .errmsg = true,
1091           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1092         { .name     = "open_by_handle_at", .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1094                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1095         { .name     = "openat",     .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1097                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1098         { .name     = "perf_event_open", .errmsg = true,
1099           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1100                              [2] = SCA_INT, /* cpu */
1101                              [3] = SCA_FD,  /* group_fd */
1102                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1103         { .name     = "pipe2",      .errmsg = true,
1104           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1105         { .name     = "poll",       .errmsg = true, .timeout = true, },
1106         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1107         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1108           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1109         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1110           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1111         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1112         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1113           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1114         { .name     = "pwritev",    .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1116         { .name     = "read",       .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1118         { .name     = "readlinkat", .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1120         { .name     = "readv",      .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122         { .name     = "recvfrom",   .errmsg = true,
1123           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1124         { .name     = "recvmmsg",   .errmsg = true,
1125           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1126         { .name     = "recvmsg",    .errmsg = true,
1127           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1128         { .name     = "renameat",   .errmsg = true,
1129           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1130         { .name     = "rt_sigaction", .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1132         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1133         { .name     = "rt_sigqueueinfo", .errmsg = true,
1134           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1135         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1136           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1137         { .name     = "select",     .errmsg = true, .timeout = true, },
1138         { .name     = "sendmmsg",    .errmsg = true,
1139           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1140         { .name     = "sendmsg",    .errmsg = true,
1141           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1142         { .name     = "sendto",     .errmsg = true,
1143           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1144         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1145         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1146         { .name     = "shutdown",   .errmsg = true,
1147           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1148         { .name     = "socket",     .errmsg = true,
1149           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1150                              [1] = SCA_SK_TYPE, /* type */ },
1151           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1152         { .name     = "socketpair", .errmsg = true,
1153           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1154                              [1] = SCA_SK_TYPE, /* type */ },
1155           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1156         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1157         { .name     = "symlinkat",  .errmsg = true,
1158           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1159         { .name     = "tgkill",     .errmsg = true,
1160           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1161         { .name     = "tkill",      .errmsg = true,
1162           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1163         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1164         { .name     = "unlinkat",   .errmsg = true,
1165           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1166         { .name     = "utimensat",  .errmsg = true,
1167           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1168         { .name     = "write",      .errmsg = true,
1169           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1170         { .name     = "writev",     .errmsg = true,
1171           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1172 };
1173
1174 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1175 {
1176         const struct syscall_fmt *fmt = fmtp;
1177         return strcmp(name, fmt->name);
1178 }
1179
1180 static struct syscall_fmt *syscall_fmt__find(const char *name)
1181 {
1182         const int nmemb = ARRAY_SIZE(syscall_fmts);
1183         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1184 }
1185
1186 struct syscall {
1187         struct event_format *tp_format;
1188         int                 nr_args;
1189         struct format_field *args;
1190         const char          *name;
1191         bool                is_exit;
1192         struct syscall_fmt  *fmt;
1193         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1194         void                **arg_parm;
1195 };
1196
1197 static size_t fprintf_duration(unsigned long t, FILE *fp)
1198 {
1199         double duration = (double)t / NSEC_PER_MSEC;
1200         size_t printed = fprintf(fp, "(");
1201
1202         if (duration >= 1.0)
1203                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1204         else if (duration >= 0.01)
1205                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1206         else
1207                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1208         return printed + fprintf(fp, "): ");
1209 }
1210
1211 struct thread_trace {
1212         u64               entry_time;
1213         u64               exit_time;
1214         bool              entry_pending;
1215         unsigned long     nr_events;
1216         unsigned long     pfmaj, pfmin;
1217         char              *entry_str;
1218         double            runtime_ms;
1219         struct {
1220                 int       max;
1221                 char      **table;
1222         } paths;
1223
1224         struct intlist *syscall_stats;
1225 };
1226
1227 static struct thread_trace *thread_trace__new(void)
1228 {
1229         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1230
1231         if (ttrace)
1232                 ttrace->paths.max = -1;
1233
1234         ttrace->syscall_stats = intlist__new(NULL);
1235
1236         return ttrace;
1237 }
1238
1239 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1240 {
1241         struct thread_trace *ttrace;
1242
1243         if (thread == NULL)
1244                 goto fail;
1245
1246         if (thread__priv(thread) == NULL)
1247                 thread__set_priv(thread, thread_trace__new());
1248
1249         if (thread__priv(thread) == NULL)
1250                 goto fail;
1251
1252         ttrace = thread__priv(thread);
1253         ++ttrace->nr_events;
1254
1255         return ttrace;
1256 fail:
1257         color_fprintf(fp, PERF_COLOR_RED,
1258                       "WARNING: not enough memory, dropping samples!\n");
1259         return NULL;
1260 }
1261
1262 #define TRACE_PFMAJ             (1 << 0)
1263 #define TRACE_PFMIN             (1 << 1)
1264
1265 struct trace {
1266         struct perf_tool        tool;
1267         struct {
1268                 int             machine;
1269                 int             open_id;
1270         }                       audit;
1271         struct {
1272                 int             max;
1273                 struct syscall  *table;
1274                 struct {
1275                         struct perf_evsel *sys_enter,
1276                                           *sys_exit;
1277                 }               events;
1278         } syscalls;
1279         struct record_opts      opts;
1280         struct perf_evlist      *evlist;
1281         struct machine          *host;
1282         struct thread           *current;
1283         u64                     base_time;
1284         FILE                    *output;
1285         unsigned long           nr_events;
1286         struct strlist          *ev_qualifier;
1287         struct {
1288                 size_t          nr;
1289                 int             *entries;
1290         }                       ev_qualifier_ids;
1291         const char              *last_vfs_getname;
1292         struct intlist          *tid_list;
1293         struct intlist          *pid_list;
1294         struct {
1295                 size_t          nr;
1296                 pid_t           *entries;
1297         }                       filter_pids;
1298         double                  duration_filter;
1299         double                  runtime_ms;
1300         struct {
1301                 u64             vfs_getname,
1302                                 proc_getname;
1303         } stats;
1304         bool                    not_ev_qualifier;
1305         bool                    live;
1306         bool                    full_time;
1307         bool                    sched;
1308         bool                    multiple_threads;
1309         bool                    summary;
1310         bool                    summary_only;
1311         bool                    show_comm;
1312         bool                    show_tool_stats;
1313         bool                    trace_syscalls;
1314         bool                    force;
1315         int                     trace_pgfaults;
1316 };
1317
1318 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1319 {
1320         struct thread_trace *ttrace = thread__priv(thread);
1321
1322         if (fd > ttrace->paths.max) {
1323                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1324
1325                 if (npath == NULL)
1326                         return -1;
1327
1328                 if (ttrace->paths.max != -1) {
1329                         memset(npath + ttrace->paths.max + 1, 0,
1330                                (fd - ttrace->paths.max) * sizeof(char *));
1331                 } else {
1332                         memset(npath, 0, (fd + 1) * sizeof(char *));
1333                 }
1334
1335                 ttrace->paths.table = npath;
1336                 ttrace->paths.max   = fd;
1337         }
1338
1339         ttrace->paths.table[fd] = strdup(pathname);
1340
1341         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1342 }
1343
1344 static int thread__read_fd_path(struct thread *thread, int fd)
1345 {
1346         char linkname[PATH_MAX], pathname[PATH_MAX];
1347         struct stat st;
1348         int ret;
1349
1350         if (thread->pid_ == thread->tid) {
1351                 scnprintf(linkname, sizeof(linkname),
1352                           "/proc/%d/fd/%d", thread->pid_, fd);
1353         } else {
1354                 scnprintf(linkname, sizeof(linkname),
1355                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1356         }
1357
1358         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1359                 return -1;
1360
1361         ret = readlink(linkname, pathname, sizeof(pathname));
1362
1363         if (ret < 0 || ret > st.st_size)
1364                 return -1;
1365
1366         pathname[ret] = '\0';
1367         return trace__set_fd_pathname(thread, fd, pathname);
1368 }
1369
1370 static const char *thread__fd_path(struct thread *thread, int fd,
1371                                    struct trace *trace)
1372 {
1373         struct thread_trace *ttrace = thread__priv(thread);
1374
1375         if (ttrace == NULL)
1376                 return NULL;
1377
1378         if (fd < 0)
1379                 return NULL;
1380
1381         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1382                 if (!trace->live)
1383                         return NULL;
1384                 ++trace->stats.proc_getname;
1385                 if (thread__read_fd_path(thread, fd))
1386                         return NULL;
1387         }
1388
1389         return ttrace->paths.table[fd];
1390 }
1391
1392 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1393                                         struct syscall_arg *arg)
1394 {
1395         int fd = arg->val;
1396         size_t printed = scnprintf(bf, size, "%d", fd);
1397         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1398
1399         if (path)
1400                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1401
1402         return printed;
1403 }
1404
1405 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1406                                               struct syscall_arg *arg)
1407 {
1408         int fd = arg->val;
1409         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1410         struct thread_trace *ttrace = thread__priv(arg->thread);
1411
1412         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1413                 zfree(&ttrace->paths.table[fd]);
1414
1415         return printed;
1416 }
1417
1418 static bool trace__filter_duration(struct trace *trace, double t)
1419 {
1420         return t < (trace->duration_filter * NSEC_PER_MSEC);
1421 }
1422
1423 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1424 {
1425         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1426
1427         return fprintf(fp, "%10.3f ", ts);
1428 }
1429
1430 static bool done = false;
1431 static bool interrupted = false;
1432
1433 static void sig_handler(int sig)
1434 {
1435         done = true;
1436         interrupted = sig == SIGINT;
1437 }
1438
1439 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1440                                         u64 duration, u64 tstamp, FILE *fp)
1441 {
1442         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1443         printed += fprintf_duration(duration, fp);
1444
1445         if (trace->multiple_threads) {
1446                 if (trace->show_comm)
1447                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1448                 printed += fprintf(fp, "%d ", thread->tid);
1449         }
1450
1451         return printed;
1452 }
1453
1454 static int trace__process_event(struct trace *trace, struct machine *machine,
1455                                 union perf_event *event, struct perf_sample *sample)
1456 {
1457         int ret = 0;
1458
1459         switch (event->header.type) {
1460         case PERF_RECORD_LOST:
1461                 color_fprintf(trace->output, PERF_COLOR_RED,
1462                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1463                 ret = machine__process_lost_event(machine, event, sample);
1464         default:
1465                 ret = machine__process_event(machine, event, sample);
1466                 break;
1467         }
1468
1469         return ret;
1470 }
1471
1472 static int trace__tool_process(struct perf_tool *tool,
1473                                union perf_event *event,
1474                                struct perf_sample *sample,
1475                                struct machine *machine)
1476 {
1477         struct trace *trace = container_of(tool, struct trace, tool);
1478         return trace__process_event(trace, machine, event, sample);
1479 }
1480
1481 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1482 {
1483         int err = symbol__init(NULL);
1484
1485         if (err)
1486                 return err;
1487
1488         trace->host = machine__new_host();
1489         if (trace->host == NULL)
1490                 return -ENOMEM;
1491
1492         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1493                                             evlist->threads, trace__tool_process, false,
1494                                             trace->opts.proc_map_timeout);
1495         if (err)
1496                 symbol__exit();
1497
1498         return err;
1499 }
1500
1501 static int syscall__set_arg_fmts(struct syscall *sc)
1502 {
1503         struct format_field *field;
1504         int idx = 0;
1505
1506         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1507         if (sc->arg_scnprintf == NULL)
1508                 return -1;
1509
1510         if (sc->fmt)
1511                 sc->arg_parm = sc->fmt->arg_parm;
1512
1513         for (field = sc->args; field; field = field->next) {
1514                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1515                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1516                 else if (field->flags & FIELD_IS_POINTER)
1517                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1518                 ++idx;
1519         }
1520
1521         return 0;
1522 }
1523
1524 static int trace__read_syscall_info(struct trace *trace, int id)
1525 {
1526         char tp_name[128];
1527         struct syscall *sc;
1528         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1529
1530         if (name == NULL)
1531                 return -1;
1532
1533         if (id > trace->syscalls.max) {
1534                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1535
1536                 if (nsyscalls == NULL)
1537                         return -1;
1538
1539                 if (trace->syscalls.max != -1) {
1540                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1541                                (id - trace->syscalls.max) * sizeof(*sc));
1542                 } else {
1543                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1544                 }
1545
1546                 trace->syscalls.table = nsyscalls;
1547                 trace->syscalls.max   = id;
1548         }
1549
1550         sc = trace->syscalls.table + id;
1551         sc->name = name;
1552
1553         sc->fmt  = syscall_fmt__find(sc->name);
1554
1555         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1556         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1557
1558         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1559                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1560                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1561         }
1562
1563         if (sc->tp_format == NULL)
1564                 return -1;
1565
1566         sc->args = sc->tp_format->format.fields;
1567         sc->nr_args = sc->tp_format->format.nr_fields;
1568         /* drop nr field - not relevant here; does not exist on older kernels */
1569         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1570                 sc->args = sc->args->next;
1571                 --sc->nr_args;
1572         }
1573
1574         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1575
1576         return syscall__set_arg_fmts(sc);
1577 }
1578
1579 static int trace__validate_ev_qualifier(struct trace *trace)
1580 {
1581         int err = 0, i;
1582         struct str_node *pos;
1583
1584         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1585         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1586                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1587
1588         if (trace->ev_qualifier_ids.entries == NULL) {
1589                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1590                        trace->output);
1591                 err = -EINVAL;
1592                 goto out;
1593         }
1594
1595         i = 0;
1596
1597         strlist__for_each(pos, trace->ev_qualifier) {
1598                 const char *sc = pos->s;
1599                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1600
1601                 if (id < 0) {
1602                         if (err == 0) {
1603                                 fputs("Error:\tInvalid syscall ", trace->output);
1604                                 err = -EINVAL;
1605                         } else {
1606                                 fputs(", ", trace->output);
1607                         }
1608
1609                         fputs(sc, trace->output);
1610                 }
1611
1612                 trace->ev_qualifier_ids.entries[i++] = id;
1613         }
1614
1615         if (err < 0) {
1616                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1617                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1618                 zfree(&trace->ev_qualifier_ids.entries);
1619                 trace->ev_qualifier_ids.nr = 0;
1620         }
1621 out:
1622         return err;
1623 }
1624
1625 /*
1626  * args is to be interpreted as a series of longs but we need to handle
1627  * 8-byte unaligned accesses. args points to raw_data within the event
1628  * and raw_data is guaranteed to be 8-byte unaligned because it is
1629  * preceded by raw_size which is a u32. So we need to copy args to a temp
1630  * variable to read it. Most notably this avoids extended load instructions
1631  * on unaligned addresses
1632  */
1633
1634 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1635                                       unsigned char *args, struct trace *trace,
1636                                       struct thread *thread)
1637 {
1638         size_t printed = 0;
1639         unsigned char *p;
1640         unsigned long val;
1641
1642         if (sc->args != NULL) {
1643                 struct format_field *field;
1644                 u8 bit = 1;
1645                 struct syscall_arg arg = {
1646                         .idx    = 0,
1647                         .mask   = 0,
1648                         .trace  = trace,
1649                         .thread = thread,
1650                 };
1651
1652                 for (field = sc->args; field;
1653                      field = field->next, ++arg.idx, bit <<= 1) {
1654                         if (arg.mask & bit)
1655                                 continue;
1656
1657                         /* special care for unaligned accesses */
1658                         p = args + sizeof(unsigned long) * arg.idx;
1659                         memcpy(&val, p, sizeof(val));
1660
1661                         /*
1662                          * Suppress this argument if its value is zero and
1663                          * and we don't have a string associated in an
1664                          * strarray for it.
1665                          */
1666                         if (val == 0 &&
1667                             !(sc->arg_scnprintf &&
1668                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1669                               sc->arg_parm[arg.idx]))
1670                                 continue;
1671
1672                         printed += scnprintf(bf + printed, size - printed,
1673                                              "%s%s: ", printed ? ", " : "", field->name);
1674                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1675                                 arg.val = val;
1676                                 if (sc->arg_parm)
1677                                         arg.parm = sc->arg_parm[arg.idx];
1678                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1679                                                                       size - printed, &arg);
1680                         } else {
1681                                 printed += scnprintf(bf + printed, size - printed,
1682                                                      "%ld", val);
1683                         }
1684                 }
1685         } else {
1686                 int i = 0;
1687
1688                 while (i < 6) {
1689                         /* special care for unaligned accesses */
1690                         p = args + sizeof(unsigned long) * i;
1691                         memcpy(&val, p, sizeof(val));
1692                         printed += scnprintf(bf + printed, size - printed,
1693                                              "%sarg%d: %ld",
1694                                              printed ? ", " : "", i, val);
1695                         ++i;
1696                 }
1697         }
1698
1699         return printed;
1700 }
1701
1702 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1703                                   union perf_event *event,
1704                                   struct perf_sample *sample);
1705
1706 static struct syscall *trace__syscall_info(struct trace *trace,
1707                                            struct perf_evsel *evsel, int id)
1708 {
1709
1710         if (id < 0) {
1711
1712                 /*
1713                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1714                  * before that, leaving at a higher verbosity level till that is
1715                  * explained. Reproduced with plain ftrace with:
1716                  *
1717                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1718                  * grep "NR -1 " /t/trace_pipe
1719                  *
1720                  * After generating some load on the machine.
1721                  */
1722                 if (verbose > 1) {
1723                         static u64 n;
1724                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1725                                 id, perf_evsel__name(evsel), ++n);
1726                 }
1727                 return NULL;
1728         }
1729
1730         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1731             trace__read_syscall_info(trace, id))
1732                 goto out_cant_read;
1733
1734         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1735                 goto out_cant_read;
1736
1737         return &trace->syscalls.table[id];
1738
1739 out_cant_read:
1740         if (verbose) {
1741                 fprintf(trace->output, "Problems reading syscall %d", id);
1742                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1743                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1744                 fputs(" information\n", trace->output);
1745         }
1746         return NULL;
1747 }
1748
1749 static void thread__update_stats(struct thread_trace *ttrace,
1750                                  int id, struct perf_sample *sample)
1751 {
1752         struct int_node *inode;
1753         struct stats *stats;
1754         u64 duration = 0;
1755
1756         inode = intlist__findnew(ttrace->syscall_stats, id);
1757         if (inode == NULL)
1758                 return;
1759
1760         stats = inode->priv;
1761         if (stats == NULL) {
1762                 stats = malloc(sizeof(struct stats));
1763                 if (stats == NULL)
1764                         return;
1765                 init_stats(stats);
1766                 inode->priv = stats;
1767         }
1768
1769         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1770                 duration = sample->time - ttrace->entry_time;
1771
1772         update_stats(stats, duration);
1773 }
1774
1775 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1776 {
1777         struct thread_trace *ttrace;
1778         u64 duration;
1779         size_t printed;
1780
1781         if (trace->current == NULL)
1782                 return 0;
1783
1784         ttrace = thread__priv(trace->current);
1785
1786         if (!ttrace->entry_pending)
1787                 return 0;
1788
1789         duration = sample->time - ttrace->entry_time;
1790
1791         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1792         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1793         ttrace->entry_pending = false;
1794
1795         return printed;
1796 }
1797
1798 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1799                             union perf_event *event __maybe_unused,
1800                             struct perf_sample *sample)
1801 {
1802         char *msg;
1803         void *args;
1804         size_t printed = 0;
1805         struct thread *thread;
1806         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1807         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1808         struct thread_trace *ttrace;
1809
1810         if (sc == NULL)
1811                 return -1;
1812
1813         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1814         ttrace = thread__trace(thread, trace->output);
1815         if (ttrace == NULL)
1816                 goto out_put;
1817
1818         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1819
1820         if (ttrace->entry_str == NULL) {
1821                 ttrace->entry_str = malloc(1024);
1822                 if (!ttrace->entry_str)
1823                         goto out_put;
1824         }
1825
1826         if (!trace->summary_only)
1827                 trace__printf_interrupted_entry(trace, sample);
1828
1829         ttrace->entry_time = sample->time;
1830         msg = ttrace->entry_str;
1831         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1832
1833         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1834                                            args, trace, thread);
1835
1836         if (sc->is_exit) {
1837                 if (!trace->duration_filter && !trace->summary_only) {
1838                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1839                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1840                 }
1841         } else
1842                 ttrace->entry_pending = true;
1843
1844         if (trace->current != thread) {
1845                 thread__put(trace->current);
1846                 trace->current = thread__get(thread);
1847         }
1848         err = 0;
1849 out_put:
1850         thread__put(thread);
1851         return err;
1852 }
1853
1854 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1855                            union perf_event *event __maybe_unused,
1856                            struct perf_sample *sample)
1857 {
1858         long ret;
1859         u64 duration = 0;
1860         struct thread *thread;
1861         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1862         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1863         struct thread_trace *ttrace;
1864
1865         if (sc == NULL)
1866                 return -1;
1867
1868         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1869         ttrace = thread__trace(thread, trace->output);
1870         if (ttrace == NULL)
1871                 goto out_put;
1872
1873         if (trace->summary)
1874                 thread__update_stats(ttrace, id, sample);
1875
1876         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1877
1878         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1879                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1880                 trace->last_vfs_getname = NULL;
1881                 ++trace->stats.vfs_getname;
1882         }
1883
1884         ttrace->exit_time = sample->time;
1885
1886         if (ttrace->entry_time) {
1887                 duration = sample->time - ttrace->entry_time;
1888                 if (trace__filter_duration(trace, duration))
1889                         goto out;
1890         } else if (trace->duration_filter)
1891                 goto out;
1892
1893         if (trace->summary_only)
1894                 goto out;
1895
1896         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1897
1898         if (ttrace->entry_pending) {
1899                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1900         } else {
1901                 fprintf(trace->output, " ... [");
1902                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1903                 fprintf(trace->output, "]: %s()", sc->name);
1904         }
1905
1906         if (sc->fmt == NULL) {
1907 signed_print:
1908                 fprintf(trace->output, ") = %ld", ret);
1909         } else if (ret < 0 && sc->fmt->errmsg) {
1910                 char bf[STRERR_BUFSIZE];
1911                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1912                            *e = audit_errno_to_name(-ret);
1913
1914                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1915         } else if (ret == 0 && sc->fmt->timeout)
1916                 fprintf(trace->output, ") = 0 Timeout");
1917         else if (sc->fmt->hexret)
1918                 fprintf(trace->output, ") = %#lx", ret);
1919         else
1920                 goto signed_print;
1921
1922         fputc('\n', trace->output);
1923 out:
1924         ttrace->entry_pending = false;
1925         err = 0;
1926 out_put:
1927         thread__put(thread);
1928         return err;
1929 }
1930
1931 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1932                               union perf_event *event __maybe_unused,
1933                               struct perf_sample *sample)
1934 {
1935         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1936         return 0;
1937 }
1938
1939 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1940                                      union perf_event *event __maybe_unused,
1941                                      struct perf_sample *sample)
1942 {
1943         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1944         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1945         struct thread *thread = machine__findnew_thread(trace->host,
1946                                                         sample->pid,
1947                                                         sample->tid);
1948         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1949
1950         if (ttrace == NULL)
1951                 goto out_dump;
1952
1953         ttrace->runtime_ms += runtime_ms;
1954         trace->runtime_ms += runtime_ms;
1955         thread__put(thread);
1956         return 0;
1957
1958 out_dump:
1959         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1960                evsel->name,
1961                perf_evsel__strval(evsel, sample, "comm"),
1962                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1963                runtime,
1964                perf_evsel__intval(evsel, sample, "vruntime"));
1965         thread__put(thread);
1966         return 0;
1967 }
1968
1969 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1970                                 union perf_event *event __maybe_unused,
1971                                 struct perf_sample *sample)
1972 {
1973         trace__printf_interrupted_entry(trace, sample);
1974         trace__fprintf_tstamp(trace, sample->time, trace->output);
1975
1976         if (trace->trace_syscalls)
1977                 fprintf(trace->output, "(         ): ");
1978
1979         fprintf(trace->output, "%s:", evsel->name);
1980
1981         if (evsel->tp_format) {
1982                 event_format__fprintf(evsel->tp_format, sample->cpu,
1983                                       sample->raw_data, sample->raw_size,
1984                                       trace->output);
1985         }
1986
1987         fprintf(trace->output, ")\n");
1988         return 0;
1989 }
1990
1991 static void print_location(FILE *f, struct perf_sample *sample,
1992                            struct addr_location *al,
1993                            bool print_dso, bool print_sym)
1994 {
1995
1996         if ((verbose || print_dso) && al->map)
1997                 fprintf(f, "%s@", al->map->dso->long_name);
1998
1999         if ((verbose || print_sym) && al->sym)
2000                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2001                         al->addr - al->sym->start);
2002         else if (al->map)
2003                 fprintf(f, "0x%" PRIx64, al->addr);
2004         else
2005                 fprintf(f, "0x%" PRIx64, sample->addr);
2006 }
2007
2008 static int trace__pgfault(struct trace *trace,
2009                           struct perf_evsel *evsel,
2010                           union perf_event *event,
2011                           struct perf_sample *sample)
2012 {
2013         struct thread *thread;
2014         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2015         struct addr_location al;
2016         char map_type = 'd';
2017         struct thread_trace *ttrace;
2018         int err = -1;
2019
2020         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2021         ttrace = thread__trace(thread, trace->output);
2022         if (ttrace == NULL)
2023                 goto out_put;
2024
2025         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2026                 ttrace->pfmaj++;
2027         else
2028                 ttrace->pfmin++;
2029
2030         if (trace->summary_only)
2031                 goto out;
2032
2033         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2034                               sample->ip, &al);
2035
2036         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2037
2038         fprintf(trace->output, "%sfault [",
2039                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2040                 "maj" : "min");
2041
2042         print_location(trace->output, sample, &al, false, true);
2043
2044         fprintf(trace->output, "] => ");
2045
2046         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2047                                    sample->addr, &al);
2048
2049         if (!al.map) {
2050                 thread__find_addr_location(thread, cpumode,
2051                                            MAP__FUNCTION, sample->addr, &al);
2052
2053                 if (al.map)
2054                         map_type = 'x';
2055                 else
2056                         map_type = '?';
2057         }
2058
2059         print_location(trace->output, sample, &al, true, false);
2060
2061         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2062 out:
2063         err = 0;
2064 out_put:
2065         thread__put(thread);
2066         return err;
2067 }
2068
2069 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2070 {
2071         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2072             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2073                 return false;
2074
2075         if (trace->pid_list || trace->tid_list)
2076                 return true;
2077
2078         return false;
2079 }
2080
2081 static int trace__process_sample(struct perf_tool *tool,
2082                                  union perf_event *event,
2083                                  struct perf_sample *sample,
2084                                  struct perf_evsel *evsel,
2085                                  struct machine *machine __maybe_unused)
2086 {
2087         struct trace *trace = container_of(tool, struct trace, tool);
2088         int err = 0;
2089
2090         tracepoint_handler handler = evsel->handler;
2091
2092         if (skip_sample(trace, sample))
2093                 return 0;
2094
2095         if (!trace->full_time && trace->base_time == 0)
2096                 trace->base_time = sample->time;
2097
2098         if (handler) {
2099                 ++trace->nr_events;
2100                 handler(trace, evsel, event, sample);
2101         }
2102
2103         return err;
2104 }
2105
2106 static int parse_target_str(struct trace *trace)
2107 {
2108         if (trace->opts.target.pid) {
2109                 trace->pid_list = intlist__new(trace->opts.target.pid);
2110                 if (trace->pid_list == NULL) {
2111                         pr_err("Error parsing process id string\n");
2112                         return -EINVAL;
2113                 }
2114         }
2115
2116         if (trace->opts.target.tid) {
2117                 trace->tid_list = intlist__new(trace->opts.target.tid);
2118                 if (trace->tid_list == NULL) {
2119                         pr_err("Error parsing thread id string\n");
2120                         return -EINVAL;
2121                 }
2122         }
2123
2124         return 0;
2125 }
2126
2127 static int trace__record(struct trace *trace, int argc, const char **argv)
2128 {
2129         unsigned int rec_argc, i, j;
2130         const char **rec_argv;
2131         const char * const record_args[] = {
2132                 "record",
2133                 "-R",
2134                 "-m", "1024",
2135                 "-c", "1",
2136         };
2137
2138         const char * const sc_args[] = { "-e", };
2139         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2140         const char * const majpf_args[] = { "-e", "major-faults" };
2141         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2142         const char * const minpf_args[] = { "-e", "minor-faults" };
2143         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2144
2145         /* +1 is for the event string below */
2146         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2147                 majpf_args_nr + minpf_args_nr + argc;
2148         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2149
2150         if (rec_argv == NULL)
2151                 return -ENOMEM;
2152
2153         j = 0;
2154         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2155                 rec_argv[j++] = record_args[i];
2156
2157         if (trace->trace_syscalls) {
2158                 for (i = 0; i < sc_args_nr; i++)
2159                         rec_argv[j++] = sc_args[i];
2160
2161                 /* event string may be different for older kernels - e.g., RHEL6 */
2162                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2163                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2164                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2165                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2166                 else {
2167                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2168                         return -1;
2169                 }
2170         }
2171
2172         if (trace->trace_pgfaults & TRACE_PFMAJ)
2173                 for (i = 0; i < majpf_args_nr; i++)
2174                         rec_argv[j++] = majpf_args[i];
2175
2176         if (trace->trace_pgfaults & TRACE_PFMIN)
2177                 for (i = 0; i < minpf_args_nr; i++)
2178                         rec_argv[j++] = minpf_args[i];
2179
2180         for (i = 0; i < (unsigned int)argc; i++)
2181                 rec_argv[j++] = argv[i];
2182
2183         return cmd_record(j, rec_argv, NULL);
2184 }
2185
2186 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2187
2188 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2189 {
2190         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2191         if (evsel == NULL)
2192                 return;
2193
2194         if (perf_evsel__field(evsel, "pathname") == NULL) {
2195                 perf_evsel__delete(evsel);
2196                 return;
2197         }
2198
2199         evsel->handler = trace__vfs_getname;
2200         perf_evlist__add(evlist, evsel);
2201 }
2202
2203 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2204                                     u64 config)
2205 {
2206         struct perf_evsel *evsel;
2207         struct perf_event_attr attr = {
2208                 .type = PERF_TYPE_SOFTWARE,
2209                 .mmap_data = 1,
2210         };
2211
2212         attr.config = config;
2213         attr.sample_period = 1;
2214
2215         event_attr_init(&attr);
2216
2217         evsel = perf_evsel__new(&attr);
2218         if (!evsel)
2219                 return -ENOMEM;
2220
2221         evsel->handler = trace__pgfault;
2222         perf_evlist__add(evlist, evsel);
2223
2224         return 0;
2225 }
2226
2227 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2228 {
2229         const u32 type = event->header.type;
2230         struct perf_evsel *evsel;
2231
2232         if (!trace->full_time && trace->base_time == 0)
2233                 trace->base_time = sample->time;
2234
2235         if (type != PERF_RECORD_SAMPLE) {
2236                 trace__process_event(trace, trace->host, event, sample);
2237                 return;
2238         }
2239
2240         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2241         if (evsel == NULL) {
2242                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2243                 return;
2244         }
2245
2246         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2247             sample->raw_data == NULL) {
2248                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2249                        perf_evsel__name(evsel), sample->tid,
2250                        sample->cpu, sample->raw_size);
2251         } else {
2252                 tracepoint_handler handler = evsel->handler;
2253                 handler(trace, evsel, event, sample);
2254         }
2255 }
2256
2257 static int trace__add_syscall_newtp(struct trace *trace)
2258 {
2259         int ret = -1;
2260         struct perf_evlist *evlist = trace->evlist;
2261         struct perf_evsel *sys_enter, *sys_exit;
2262
2263         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2264         if (sys_enter == NULL)
2265                 goto out;
2266
2267         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2268                 goto out_delete_sys_enter;
2269
2270         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2271         if (sys_exit == NULL)
2272                 goto out_delete_sys_enter;
2273
2274         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2275                 goto out_delete_sys_exit;
2276
2277         perf_evlist__add(evlist, sys_enter);
2278         perf_evlist__add(evlist, sys_exit);
2279
2280         trace->syscalls.events.sys_enter = sys_enter;
2281         trace->syscalls.events.sys_exit  = sys_exit;
2282
2283         ret = 0;
2284 out:
2285         return ret;
2286
2287 out_delete_sys_exit:
2288         perf_evsel__delete_priv(sys_exit);
2289 out_delete_sys_enter:
2290         perf_evsel__delete_priv(sys_enter);
2291         goto out;
2292 }
2293
2294 static int trace__set_ev_qualifier_filter(struct trace *trace)
2295 {
2296         int err = -1;
2297         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2298                                                 trace->ev_qualifier_ids.nr,
2299                                                 trace->ev_qualifier_ids.entries);
2300
2301         if (filter == NULL)
2302                 goto out_enomem;
2303
2304         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2305                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2306
2307         free(filter);
2308 out:
2309         return err;
2310 out_enomem:
2311         errno = ENOMEM;
2312         goto out;
2313 }
2314
2315 static int trace__run(struct trace *trace, int argc, const char **argv)
2316 {
2317         struct perf_evlist *evlist = trace->evlist;
2318         struct perf_evsel *evsel;
2319         int err = -1, i;
2320         unsigned long before;
2321         const bool forks = argc > 0;
2322         bool draining = false;
2323
2324         trace->live = true;
2325
2326         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2327                 goto out_error_raw_syscalls;
2328
2329         if (trace->trace_syscalls)
2330                 perf_evlist__add_vfs_getname(evlist);
2331
2332         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2333             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2334                 goto out_error_mem;
2335         }
2336
2337         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2338             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2339                 goto out_error_mem;
2340
2341         if (trace->sched &&
2342             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2343                                    trace__sched_stat_runtime))
2344                 goto out_error_sched_stat_runtime;
2345
2346         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2347         if (err < 0) {
2348                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2349                 goto out_delete_evlist;
2350         }
2351
2352         err = trace__symbols_init(trace, evlist);
2353         if (err < 0) {
2354                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2355                 goto out_delete_evlist;
2356         }
2357
2358         perf_evlist__config(evlist, &trace->opts);
2359
2360         signal(SIGCHLD, sig_handler);
2361         signal(SIGINT, sig_handler);
2362
2363         if (forks) {
2364                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2365                                                     argv, false, NULL);
2366                 if (err < 0) {
2367                         fprintf(trace->output, "Couldn't run the workload!\n");
2368                         goto out_delete_evlist;
2369                 }
2370         }
2371
2372         err = perf_evlist__open(evlist);
2373         if (err < 0)
2374                 goto out_error_open;
2375
2376         /*
2377          * Better not use !target__has_task() here because we need to cover the
2378          * case where no threads were specified in the command line, but a
2379          * workload was, and in that case we will fill in the thread_map when
2380          * we fork the workload in perf_evlist__prepare_workload.
2381          */
2382         if (trace->filter_pids.nr > 0)
2383                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2384         else if (thread_map__pid(evlist->threads, 0) == -1)
2385                 err = perf_evlist__set_filter_pid(evlist, getpid());
2386
2387         if (err < 0)
2388                 goto out_error_mem;
2389
2390         if (trace->ev_qualifier_ids.nr > 0) {
2391                 err = trace__set_ev_qualifier_filter(trace);
2392                 if (err < 0)
2393                         goto out_errno;
2394         }
2395
2396         pr_debug("%s\n", trace->syscalls.events.sys_exit->filter);
2397
2398         err = perf_evlist__apply_filters(evlist, &evsel);
2399         if (err < 0)
2400                 goto out_error_apply_filters;
2401
2402         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2403         if (err < 0)
2404                 goto out_error_mmap;
2405
2406         if (!target__none(&trace->opts.target))
2407                 perf_evlist__enable(evlist);
2408
2409         if (forks)
2410                 perf_evlist__start_workload(evlist);
2411
2412         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2413                                   evlist->threads->nr > 1 ||
2414                                   perf_evlist__first(evlist)->attr.inherit;
2415 again:
2416         before = trace->nr_events;
2417
2418         for (i = 0; i < evlist->nr_mmaps; i++) {
2419                 union perf_event *event;
2420
2421                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2422                         struct perf_sample sample;
2423
2424                         ++trace->nr_events;
2425
2426                         err = perf_evlist__parse_sample(evlist, event, &sample);
2427                         if (err) {
2428                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2429                                 goto next_event;
2430                         }
2431
2432                         trace__handle_event(trace, event, &sample);
2433 next_event:
2434                         perf_evlist__mmap_consume(evlist, i);
2435
2436                         if (interrupted)
2437                                 goto out_disable;
2438
2439                         if (done && !draining) {
2440                                 perf_evlist__disable(evlist);
2441                                 draining = true;
2442                         }
2443                 }
2444         }
2445
2446         if (trace->nr_events == before) {
2447                 int timeout = done ? 100 : -1;
2448
2449                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2450                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2451                                 draining = true;
2452
2453                         goto again;
2454                 }
2455         } else {
2456                 goto again;
2457         }
2458
2459 out_disable:
2460         thread__zput(trace->current);
2461
2462         perf_evlist__disable(evlist);
2463
2464         if (!err) {
2465                 if (trace->summary)
2466                         trace__fprintf_thread_summary(trace, trace->output);
2467
2468                 if (trace->show_tool_stats) {
2469                         fprintf(trace->output, "Stats:\n "
2470                                                " vfs_getname : %" PRIu64 "\n"
2471                                                " proc_getname: %" PRIu64 "\n",
2472                                 trace->stats.vfs_getname,
2473                                 trace->stats.proc_getname);
2474                 }
2475         }
2476
2477 out_delete_evlist:
2478         perf_evlist__delete(evlist);
2479         trace->evlist = NULL;
2480         trace->live = false;
2481         return err;
2482 {
2483         char errbuf[BUFSIZ];
2484
2485 out_error_sched_stat_runtime:
2486         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2487         goto out_error;
2488
2489 out_error_raw_syscalls:
2490         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2491         goto out_error;
2492
2493 out_error_mmap:
2494         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2495         goto out_error;
2496
2497 out_error_open:
2498         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2499
2500 out_error:
2501         fprintf(trace->output, "%s\n", errbuf);
2502         goto out_delete_evlist;
2503
2504 out_error_apply_filters:
2505         fprintf(trace->output,
2506                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2507                 evsel->filter, perf_evsel__name(evsel), errno,
2508                 strerror_r(errno, errbuf, sizeof(errbuf)));
2509         goto out_delete_evlist;
2510 }
2511 out_error_mem:
2512         fprintf(trace->output, "Not enough memory to run!\n");
2513         goto out_delete_evlist;
2514
2515 out_errno:
2516         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2517         goto out_delete_evlist;
2518 }
2519
2520 static int trace__replay(struct trace *trace)
2521 {
2522         const struct perf_evsel_str_handler handlers[] = {
2523                 { "probe:vfs_getname",       trace__vfs_getname, },
2524         };
2525         struct perf_data_file file = {
2526                 .path  = input_name,
2527                 .mode  = PERF_DATA_MODE_READ,
2528                 .force = trace->force,
2529         };
2530         struct perf_session *session;
2531         struct perf_evsel *evsel;
2532         int err = -1;
2533
2534         trace->tool.sample        = trace__process_sample;
2535         trace->tool.mmap          = perf_event__process_mmap;
2536         trace->tool.mmap2         = perf_event__process_mmap2;
2537         trace->tool.comm          = perf_event__process_comm;
2538         trace->tool.exit          = perf_event__process_exit;
2539         trace->tool.fork          = perf_event__process_fork;
2540         trace->tool.attr          = perf_event__process_attr;
2541         trace->tool.tracing_data = perf_event__process_tracing_data;
2542         trace->tool.build_id      = perf_event__process_build_id;
2543
2544         trace->tool.ordered_events = true;
2545         trace->tool.ordering_requires_timestamps = true;
2546
2547         /* add tid to output */
2548         trace->multiple_threads = true;
2549
2550         session = perf_session__new(&file, false, &trace->tool);
2551         if (session == NULL)
2552                 return -1;
2553
2554         if (symbol__init(&session->header.env) < 0)
2555                 goto out;
2556
2557         trace->host = &session->machines.host;
2558
2559         err = perf_session__set_tracepoints_handlers(session, handlers);
2560         if (err)
2561                 goto out;
2562
2563         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2564                                                      "raw_syscalls:sys_enter");
2565         /* older kernels have syscalls tp versus raw_syscalls */
2566         if (evsel == NULL)
2567                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2568                                                              "syscalls:sys_enter");
2569
2570         if (evsel &&
2571             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2572             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2573                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2574                 goto out;
2575         }
2576
2577         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2578                                                      "raw_syscalls:sys_exit");
2579         if (evsel == NULL)
2580                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2581                                                              "syscalls:sys_exit");
2582         if (evsel &&
2583             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2584             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2585                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2586                 goto out;
2587         }
2588
2589         evlist__for_each(session->evlist, evsel) {
2590                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2591                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2592                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2593                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2594                         evsel->handler = trace__pgfault;
2595         }
2596
2597         err = parse_target_str(trace);
2598         if (err != 0)
2599                 goto out;
2600
2601         setup_pager();
2602
2603         err = perf_session__process_events(session);
2604         if (err)
2605                 pr_err("Failed to process events, error %d", err);
2606
2607         else if (trace->summary)
2608                 trace__fprintf_thread_summary(trace, trace->output);
2609
2610 out:
2611         perf_session__delete(session);
2612
2613         return err;
2614 }
2615
2616 static size_t trace__fprintf_threads_header(FILE *fp)
2617 {
2618         size_t printed;
2619
2620         printed  = fprintf(fp, "\n Summary of events:\n\n");
2621
2622         return printed;
2623 }
2624
2625 static size_t thread__dump_stats(struct thread_trace *ttrace,
2626                                  struct trace *trace, FILE *fp)
2627 {
2628         struct stats *stats;
2629         size_t printed = 0;
2630         struct syscall *sc;
2631         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2632
2633         if (inode == NULL)
2634                 return 0;
2635
2636         printed += fprintf(fp, "\n");
2637
2638         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2639         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2640         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2641
2642         /* each int_node is a syscall */
2643         while (inode) {
2644                 stats = inode->priv;
2645                 if (stats) {
2646                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2647                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2648                         double avg = avg_stats(stats);
2649                         double pct;
2650                         u64 n = (u64) stats->n;
2651
2652                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2653                         avg /= NSEC_PER_MSEC;
2654
2655                         sc = &trace->syscalls.table[inode->i];
2656                         printed += fprintf(fp, "   %-15s", sc->name);
2657                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2658                                            n, min, avg);
2659                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2660                 }
2661
2662                 inode = intlist__next(inode);
2663         }
2664
2665         printed += fprintf(fp, "\n\n");
2666
2667         return printed;
2668 }
2669
2670 /* struct used to pass data to per-thread function */
2671 struct summary_data {
2672         FILE *fp;
2673         struct trace *trace;
2674         size_t printed;
2675 };
2676
2677 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2678 {
2679         struct summary_data *data = priv;
2680         FILE *fp = data->fp;
2681         size_t printed = data->printed;
2682         struct trace *trace = data->trace;
2683         struct thread_trace *ttrace = thread__priv(thread);
2684         double ratio;
2685
2686         if (ttrace == NULL)
2687                 return 0;
2688
2689         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2690
2691         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2692         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2693         printed += fprintf(fp, "%.1f%%", ratio);
2694         if (ttrace->pfmaj)
2695                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2696         if (ttrace->pfmin)
2697                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2698         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2699         printed += thread__dump_stats(ttrace, trace, fp);
2700
2701         data->printed += printed;
2702
2703         return 0;
2704 }
2705
2706 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2707 {
2708         struct summary_data data = {
2709                 .fp = fp,
2710                 .trace = trace
2711         };
2712         data.printed = trace__fprintf_threads_header(fp);
2713
2714         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2715
2716         return data.printed;
2717 }
2718
2719 static int trace__set_duration(const struct option *opt, const char *str,
2720                                int unset __maybe_unused)
2721 {
2722         struct trace *trace = opt->value;
2723
2724         trace->duration_filter = atof(str);
2725         return 0;
2726 }
2727
2728 static int trace__set_filter_pids(const struct option *opt, const char *str,
2729                                   int unset __maybe_unused)
2730 {
2731         int ret = -1;
2732         size_t i;
2733         struct trace *trace = opt->value;
2734         /*
2735          * FIXME: introduce a intarray class, plain parse csv and create a
2736          * { int nr, int entries[] } struct...
2737          */
2738         struct intlist *list = intlist__new(str);
2739
2740         if (list == NULL)
2741                 return -1;
2742
2743         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2744         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2745
2746         if (trace->filter_pids.entries == NULL)
2747                 goto out;
2748
2749         trace->filter_pids.entries[0] = getpid();
2750
2751         for (i = 1; i < trace->filter_pids.nr; ++i)
2752                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2753
2754         intlist__delete(list);
2755         ret = 0;
2756 out:
2757         return ret;
2758 }
2759
2760 static int trace__open_output(struct trace *trace, const char *filename)
2761 {
2762         struct stat st;
2763
2764         if (!stat(filename, &st) && st.st_size) {
2765                 char oldname[PATH_MAX];
2766
2767                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2768                 unlink(oldname);
2769                 rename(filename, oldname);
2770         }
2771
2772         trace->output = fopen(filename, "w");
2773
2774         return trace->output == NULL ? -errno : 0;
2775 }
2776
2777 static int parse_pagefaults(const struct option *opt, const char *str,
2778                             int unset __maybe_unused)
2779 {
2780         int *trace_pgfaults = opt->value;
2781
2782         if (strcmp(str, "all") == 0)
2783                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2784         else if (strcmp(str, "maj") == 0)
2785                 *trace_pgfaults |= TRACE_PFMAJ;
2786         else if (strcmp(str, "min") == 0)
2787                 *trace_pgfaults |= TRACE_PFMIN;
2788         else
2789                 return -1;
2790
2791         return 0;
2792 }
2793
2794 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2795 {
2796         struct perf_evsel *evsel;
2797
2798         evlist__for_each(evlist, evsel)
2799                 evsel->handler = handler;
2800 }
2801
2802 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2803 {
2804         const char *trace_usage[] = {
2805                 "perf trace [<options>] [<command>]",
2806                 "perf trace [<options>] -- <command> [<options>]",
2807                 "perf trace record [<options>] [<command>]",
2808                 "perf trace record [<options>] -- <command> [<options>]",
2809                 NULL
2810         };
2811         struct trace trace = {
2812                 .audit = {
2813                         .machine = audit_detect_machine(),
2814                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2815                 },
2816                 .syscalls = {
2817                         . max = -1,
2818                 },
2819                 .opts = {
2820                         .target = {
2821                                 .uid       = UINT_MAX,
2822                                 .uses_mmap = true,
2823                         },
2824                         .user_freq     = UINT_MAX,
2825                         .user_interval = ULLONG_MAX,
2826                         .no_buffering  = true,
2827                         .mmap_pages    = UINT_MAX,
2828                         .proc_map_timeout  = 500,
2829                 },
2830                 .output = stdout,
2831                 .show_comm = true,
2832                 .trace_syscalls = true,
2833         };
2834         const char *output_name = NULL;
2835         const char *ev_qualifier_str = NULL;
2836         const struct option trace_options[] = {
2837         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2838                      "event selector. use 'perf list' to list available events",
2839                      parse_events_option),
2840         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2841                     "show the thread COMM next to its id"),
2842         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2843         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2844         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2845         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2846         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2847                     "trace events on existing process id"),
2848         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2849                     "trace events on existing thread id"),
2850         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2851                      "pids to filter (by the kernel)", trace__set_filter_pids),
2852         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2853                     "system-wide collection from all CPUs"),
2854         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2855                     "list of cpus to monitor"),
2856         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2857                     "child tasks do not inherit counters"),
2858         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2859                      "number of mmap data pages",
2860                      perf_evlist__parse_mmap_pages),
2861         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2862                    "user to profile"),
2863         OPT_CALLBACK(0, "duration", &trace, "float",
2864                      "show only events with duration > N.M ms",
2865                      trace__set_duration),
2866         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2867         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2868         OPT_BOOLEAN('T', "time", &trace.full_time,
2869                     "Show full timestamp, not time relative to first start"),
2870         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2871                     "Show only syscall summary with statistics"),
2872         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2873                     "Show all syscalls and summary with statistics"),
2874         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2875                      "Trace pagefaults", parse_pagefaults, "maj"),
2876         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2877         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2878         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2879                         "per thread proc mmap processing timeout in ms"),
2880         OPT_END()
2881         };
2882         const char * const trace_subcommands[] = { "record", NULL };
2883         int err;
2884         char bf[BUFSIZ];
2885
2886         signal(SIGSEGV, sighandler_dump_stack);
2887         signal(SIGFPE, sighandler_dump_stack);
2888
2889         trace.evlist = perf_evlist__new();
2890
2891         if (trace.evlist == NULL) {
2892                 pr_err("Not enough memory to run!\n");
2893                 err = -ENOMEM;
2894                 goto out;
2895         }
2896
2897         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2898                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2899
2900         if (trace.trace_pgfaults) {
2901                 trace.opts.sample_address = true;
2902                 trace.opts.sample_time = true;
2903         }
2904
2905         if (trace.evlist->nr_entries > 0)
2906                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2907
2908         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2909                 return trace__record(&trace, argc-1, &argv[1]);
2910
2911         /* summary_only implies summary option, but don't overwrite summary if set */
2912         if (trace.summary_only)
2913                 trace.summary = trace.summary_only;
2914
2915         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2916             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2917                 pr_err("Please specify something to trace.\n");
2918                 return -1;
2919         }
2920
2921         if (output_name != NULL) {
2922                 err = trace__open_output(&trace, output_name);
2923                 if (err < 0) {
2924                         perror("failed to create output file");
2925                         goto out;
2926                 }
2927         }
2928
2929         if (ev_qualifier_str != NULL) {
2930                 const char *s = ev_qualifier_str;
2931                 struct strlist_config slist_config = {
2932                         .dirname = system_path(STRACE_GROUPS_DIR),
2933                 };
2934
2935                 trace.not_ev_qualifier = *s == '!';
2936                 if (trace.not_ev_qualifier)
2937                         ++s;
2938                 trace.ev_qualifier = strlist__new(s, &slist_config);
2939                 if (trace.ev_qualifier == NULL) {
2940                         fputs("Not enough memory to parse event qualifier",
2941                               trace.output);
2942                         err = -ENOMEM;
2943                         goto out_close;
2944                 }
2945
2946                 err = trace__validate_ev_qualifier(&trace);
2947                 if (err)
2948                         goto out_close;
2949         }
2950
2951         err = target__validate(&trace.opts.target);
2952         if (err) {
2953                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2954                 fprintf(trace.output, "%s", bf);
2955                 goto out_close;
2956         }
2957
2958         err = target__parse_uid(&trace.opts.target);
2959         if (err) {
2960                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2961                 fprintf(trace.output, "%s", bf);
2962                 goto out_close;
2963         }
2964
2965         if (!argc && target__none(&trace.opts.target))
2966                 trace.opts.target.system_wide = true;
2967
2968         if (input_name)
2969                 err = trace__replay(&trace);
2970         else
2971                 err = trace__run(&trace, argc, argv);
2972
2973 out_close:
2974         if (output_name != NULL)
2975                 fclose(trace.output);
2976 out:
2977         return err;
2978 }