perf trace: Dump stack on segfaults
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
406                                                   struct syscall_arg *arg)
407 {
408         int printed = 0, flags = arg->val;
409
410 #define P_MREMAP_FLAG(n) \
411         if (flags & MREMAP_##n) { \
412                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
413                 flags &= ~MREMAP_##n; \
414         }
415
416         P_MREMAP_FLAG(MAYMOVE);
417 #ifdef MREMAP_FIXED
418         P_MREMAP_FLAG(FIXED);
419 #endif
420 #undef P_MREMAP_FLAG
421
422         if (flags)
423                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
424
425         return printed;
426 }
427
428 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
429
430 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
431                                                       struct syscall_arg *arg)
432 {
433         int behavior = arg->val;
434
435         switch (behavior) {
436 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
437         P_MADV_BHV(NORMAL);
438         P_MADV_BHV(RANDOM);
439         P_MADV_BHV(SEQUENTIAL);
440         P_MADV_BHV(WILLNEED);
441         P_MADV_BHV(DONTNEED);
442         P_MADV_BHV(REMOVE);
443         P_MADV_BHV(DONTFORK);
444         P_MADV_BHV(DOFORK);
445         P_MADV_BHV(HWPOISON);
446 #ifdef MADV_SOFT_OFFLINE
447         P_MADV_BHV(SOFT_OFFLINE);
448 #endif
449         P_MADV_BHV(MERGEABLE);
450         P_MADV_BHV(UNMERGEABLE);
451 #ifdef MADV_HUGEPAGE
452         P_MADV_BHV(HUGEPAGE);
453 #endif
454 #ifdef MADV_NOHUGEPAGE
455         P_MADV_BHV(NOHUGEPAGE);
456 #endif
457 #ifdef MADV_DONTDUMP
458         P_MADV_BHV(DONTDUMP);
459 #endif
460 #ifdef MADV_DODUMP
461         P_MADV_BHV(DODUMP);
462 #endif
463 #undef P_MADV_PHV
464         default: break;
465         }
466
467         return scnprintf(bf, size, "%#x", behavior);
468 }
469
470 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
471
472 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
473                                            struct syscall_arg *arg)
474 {
475         int printed = 0, op = arg->val;
476
477         if (op == 0)
478                 return scnprintf(bf, size, "NONE");
479 #define P_CMD(cmd) \
480         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
481                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
482                 op &= ~LOCK_##cmd; \
483         }
484
485         P_CMD(SH);
486         P_CMD(EX);
487         P_CMD(NB);
488         P_CMD(UN);
489         P_CMD(MAND);
490         P_CMD(RW);
491         P_CMD(READ);
492         P_CMD(WRITE);
493 #undef P_OP
494
495         if (op)
496                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
497
498         return printed;
499 }
500
501 #define SCA_FLOCK syscall_arg__scnprintf_flock
502
503 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
504 {
505         enum syscall_futex_args {
506                 SCF_UADDR   = (1 << 0),
507                 SCF_OP      = (1 << 1),
508                 SCF_VAL     = (1 << 2),
509                 SCF_TIMEOUT = (1 << 3),
510                 SCF_UADDR2  = (1 << 4),
511                 SCF_VAL3    = (1 << 5),
512         };
513         int op = arg->val;
514         int cmd = op & FUTEX_CMD_MASK;
515         size_t printed = 0;
516
517         switch (cmd) {
518 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
519         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
520         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
521         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
522         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
523         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
524         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
525         P_FUTEX_OP(WAKE_OP);                                                      break;
526         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
527         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
528         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
529         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
530         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
531         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
532         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
533         }
534
535         if (op & FUTEX_PRIVATE_FLAG)
536                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
537
538         if (op & FUTEX_CLOCK_REALTIME)
539                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
540
541         return printed;
542 }
543
544 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
545
546 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
547 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
548
549 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
550 static DEFINE_STRARRAY(itimers);
551
552 static const char *whences[] = { "SET", "CUR", "END",
553 #ifdef SEEK_DATA
554 "DATA",
555 #endif
556 #ifdef SEEK_HOLE
557 "HOLE",
558 #endif
559 };
560 static DEFINE_STRARRAY(whences);
561
562 static const char *fcntl_cmds[] = {
563         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
564         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
565         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
566         "F_GETOWNER_UIDS",
567 };
568 static DEFINE_STRARRAY(fcntl_cmds);
569
570 static const char *rlimit_resources[] = {
571         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
572         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
573         "RTTIME",
574 };
575 static DEFINE_STRARRAY(rlimit_resources);
576
577 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
578 static DEFINE_STRARRAY(sighow);
579
580 static const char *clockid[] = {
581         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
582         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
583 };
584 static DEFINE_STRARRAY(clockid);
585
586 static const char *socket_families[] = {
587         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
588         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
589         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
590         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
591         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
592         "ALG", "NFC", "VSOCK",
593 };
594 static DEFINE_STRARRAY(socket_families);
595
596 #ifndef SOCK_TYPE_MASK
597 #define SOCK_TYPE_MASK 0xf
598 #endif
599
600 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
601                                                       struct syscall_arg *arg)
602 {
603         size_t printed;
604         int type = arg->val,
605             flags = type & ~SOCK_TYPE_MASK;
606
607         type &= SOCK_TYPE_MASK;
608         /*
609          * Can't use a strarray, MIPS may override for ABI reasons.
610          */
611         switch (type) {
612 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
613         P_SK_TYPE(STREAM);
614         P_SK_TYPE(DGRAM);
615         P_SK_TYPE(RAW);
616         P_SK_TYPE(RDM);
617         P_SK_TYPE(SEQPACKET);
618         P_SK_TYPE(DCCP);
619         P_SK_TYPE(PACKET);
620 #undef P_SK_TYPE
621         default:
622                 printed = scnprintf(bf, size, "%#x", type);
623         }
624
625 #define P_SK_FLAG(n) \
626         if (flags & SOCK_##n) { \
627                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
628                 flags &= ~SOCK_##n; \
629         }
630
631         P_SK_FLAG(CLOEXEC);
632         P_SK_FLAG(NONBLOCK);
633 #undef P_SK_FLAG
634
635         if (flags)
636                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
637
638         return printed;
639 }
640
641 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
642
643 #ifndef MSG_PROBE
644 #define MSG_PROBE            0x10
645 #endif
646 #ifndef MSG_WAITFORONE
647 #define MSG_WAITFORONE  0x10000
648 #endif
649 #ifndef MSG_SENDPAGE_NOTLAST
650 #define MSG_SENDPAGE_NOTLAST 0x20000
651 #endif
652 #ifndef MSG_FASTOPEN
653 #define MSG_FASTOPEN         0x20000000
654 #endif
655
656 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
657                                                struct syscall_arg *arg)
658 {
659         int printed = 0, flags = arg->val;
660
661         if (flags == 0)
662                 return scnprintf(bf, size, "NONE");
663 #define P_MSG_FLAG(n) \
664         if (flags & MSG_##n) { \
665                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
666                 flags &= ~MSG_##n; \
667         }
668
669         P_MSG_FLAG(OOB);
670         P_MSG_FLAG(PEEK);
671         P_MSG_FLAG(DONTROUTE);
672         P_MSG_FLAG(TRYHARD);
673         P_MSG_FLAG(CTRUNC);
674         P_MSG_FLAG(PROBE);
675         P_MSG_FLAG(TRUNC);
676         P_MSG_FLAG(DONTWAIT);
677         P_MSG_FLAG(EOR);
678         P_MSG_FLAG(WAITALL);
679         P_MSG_FLAG(FIN);
680         P_MSG_FLAG(SYN);
681         P_MSG_FLAG(CONFIRM);
682         P_MSG_FLAG(RST);
683         P_MSG_FLAG(ERRQUEUE);
684         P_MSG_FLAG(NOSIGNAL);
685         P_MSG_FLAG(MORE);
686         P_MSG_FLAG(WAITFORONE);
687         P_MSG_FLAG(SENDPAGE_NOTLAST);
688         P_MSG_FLAG(FASTOPEN);
689         P_MSG_FLAG(CMSG_CLOEXEC);
690 #undef P_MSG_FLAG
691
692         if (flags)
693                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
694
695         return printed;
696 }
697
698 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
699
700 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
701                                                  struct syscall_arg *arg)
702 {
703         size_t printed = 0;
704         int mode = arg->val;
705
706         if (mode == F_OK) /* 0 */
707                 return scnprintf(bf, size, "F");
708 #define P_MODE(n) \
709         if (mode & n##_OK) { \
710                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
711                 mode &= ~n##_OK; \
712         }
713
714         P_MODE(R);
715         P_MODE(W);
716         P_MODE(X);
717 #undef P_MODE
718
719         if (mode)
720                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
721
722         return printed;
723 }
724
725 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
726
727 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
728                                                struct syscall_arg *arg)
729 {
730         int printed = 0, flags = arg->val;
731
732         if (!(flags & O_CREAT))
733                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
734
735         if (flags == 0)
736                 return scnprintf(bf, size, "RDONLY");
737 #define P_FLAG(n) \
738         if (flags & O_##n) { \
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
740                 flags &= ~O_##n; \
741         }
742
743         P_FLAG(APPEND);
744         P_FLAG(ASYNC);
745         P_FLAG(CLOEXEC);
746         P_FLAG(CREAT);
747         P_FLAG(DIRECT);
748         P_FLAG(DIRECTORY);
749         P_FLAG(EXCL);
750         P_FLAG(LARGEFILE);
751         P_FLAG(NOATIME);
752         P_FLAG(NOCTTY);
753 #ifdef O_NONBLOCK
754         P_FLAG(NONBLOCK);
755 #elif O_NDELAY
756         P_FLAG(NDELAY);
757 #endif
758 #ifdef O_PATH
759         P_FLAG(PATH);
760 #endif
761         P_FLAG(RDWR);
762 #ifdef O_DSYNC
763         if ((flags & O_SYNC) == O_SYNC)
764                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
765         else {
766                 P_FLAG(DSYNC);
767         }
768 #else
769         P_FLAG(SYNC);
770 #endif
771         P_FLAG(TRUNC);
772         P_FLAG(WRONLY);
773 #undef P_FLAG
774
775         if (flags)
776                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
777
778         return printed;
779 }
780
781 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
782
783 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
784                                                    struct syscall_arg *arg)
785 {
786         int printed = 0, flags = arg->val;
787
788         if (flags == 0)
789                 return scnprintf(bf, size, "NONE");
790 #define P_FLAG(n) \
791         if (flags & EFD_##n) { \
792                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
793                 flags &= ~EFD_##n; \
794         }
795
796         P_FLAG(SEMAPHORE);
797         P_FLAG(CLOEXEC);
798         P_FLAG(NONBLOCK);
799 #undef P_FLAG
800
801         if (flags)
802                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
803
804         return printed;
805 }
806
807 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
808
809 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
810                                                 struct syscall_arg *arg)
811 {
812         int printed = 0, flags = arg->val;
813
814 #define P_FLAG(n) \
815         if (flags & O_##n) { \
816                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
817                 flags &= ~O_##n; \
818         }
819
820         P_FLAG(CLOEXEC);
821         P_FLAG(NONBLOCK);
822 #undef P_FLAG
823
824         if (flags)
825                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
826
827         return printed;
828 }
829
830 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
831
832 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
833 {
834         int sig = arg->val;
835
836         switch (sig) {
837 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
838         P_SIGNUM(HUP);
839         P_SIGNUM(INT);
840         P_SIGNUM(QUIT);
841         P_SIGNUM(ILL);
842         P_SIGNUM(TRAP);
843         P_SIGNUM(ABRT);
844         P_SIGNUM(BUS);
845         P_SIGNUM(FPE);
846         P_SIGNUM(KILL);
847         P_SIGNUM(USR1);
848         P_SIGNUM(SEGV);
849         P_SIGNUM(USR2);
850         P_SIGNUM(PIPE);
851         P_SIGNUM(ALRM);
852         P_SIGNUM(TERM);
853         P_SIGNUM(CHLD);
854         P_SIGNUM(CONT);
855         P_SIGNUM(STOP);
856         P_SIGNUM(TSTP);
857         P_SIGNUM(TTIN);
858         P_SIGNUM(TTOU);
859         P_SIGNUM(URG);
860         P_SIGNUM(XCPU);
861         P_SIGNUM(XFSZ);
862         P_SIGNUM(VTALRM);
863         P_SIGNUM(PROF);
864         P_SIGNUM(WINCH);
865         P_SIGNUM(IO);
866         P_SIGNUM(PWR);
867         P_SIGNUM(SYS);
868 #ifdef SIGEMT
869         P_SIGNUM(EMT);
870 #endif
871 #ifdef SIGSTKFLT
872         P_SIGNUM(STKFLT);
873 #endif
874 #ifdef SIGSWI
875         P_SIGNUM(SWI);
876 #endif
877         default: break;
878         }
879
880         return scnprintf(bf, size, "%#x", sig);
881 }
882
883 #define SCA_SIGNUM syscall_arg__scnprintf_signum
884
885 #if defined(__i386__) || defined(__x86_64__)
886 /*
887  * FIXME: Make this available to all arches.
888  */
889 #define TCGETS          0x5401
890
891 static const char *tioctls[] = {
892         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
893         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
894         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
895         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
896         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
897         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
898         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
899         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
900         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
901         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
902         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
903         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
904         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
905         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
906         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
907 };
908
909 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
910 #endif /* defined(__i386__) || defined(__x86_64__) */
911
912 #define STRARRAY(arg, name, array) \
913           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
914           .arg_parm      = { [arg] = &strarray__##array, }
915
916 static struct syscall_fmt {
917         const char *name;
918         const char *alias;
919         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
920         void       *arg_parm[6];
921         bool       errmsg;
922         bool       timeout;
923         bool       hexret;
924 } syscall_fmts[] = {
925         { .name     = "access",     .errmsg = true,
926           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
927         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
928         { .name     = "brk",        .hexret = true,
929           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
930         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
931         { .name     = "close",      .errmsg = true,
932           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
933         { .name     = "connect",    .errmsg = true, },
934         { .name     = "dup",        .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936         { .name     = "dup2",       .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938         { .name     = "dup3",       .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
941         { .name     = "eventfd2",   .errmsg = true,
942           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
943         { .name     = "faccessat",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
945         { .name     = "fadvise64",  .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947         { .name     = "fallocate",  .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949         { .name     = "fchdir",     .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951         { .name     = "fchmod",     .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953         { .name     = "fchmodat",   .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955         { .name     = "fchown",     .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
957         { .name     = "fchownat",   .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959         { .name     = "fcntl",      .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */
961                              [1] = SCA_STRARRAY, /* cmd */ },
962           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963         { .name     = "fdatasync",  .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965         { .name     = "flock",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */
967                              [1] = SCA_FLOCK, /* cmd */ }, },
968         { .name     = "fsetxattr",  .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
971           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
973           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
974         { .name     = "fstatfs",    .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
976         { .name     = "fsync",    .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
978         { .name     = "ftruncate", .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
980         { .name     = "futex",      .errmsg = true,
981           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
982         { .name     = "futimesat", .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
984         { .name     = "getdents",   .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986         { .name     = "getdents64", .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
989         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
990         { .name     = "ioctl",      .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FD, /* fd */
992 #if defined(__i386__) || defined(__x86_64__)
993 /*
994  * FIXME: Make this available to all arches.
995  */
996                              [1] = SCA_STRHEXARRAY, /* cmd */
997                              [2] = SCA_HEX, /* arg */ },
998           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
999 #else
1000                              [2] = SCA_HEX, /* arg */ }, },
1001 #endif
1002         { .name     = "kill",       .errmsg = true,
1003           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004         { .name     = "linkat",     .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006         { .name     = "lseek",      .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008                              [2] = SCA_STRARRAY, /* whence */ },
1009           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1010         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1011         { .name     = "madvise",    .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1013                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1014         { .name     = "mkdirat",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016         { .name     = "mknodat",    .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018         { .name     = "mlock",      .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020         { .name     = "mlockall",   .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022         { .name     = "mmap",       .hexret = true,
1023           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1024                              [2] = SCA_MMAP_PROT, /* prot */
1025                              [3] = SCA_MMAP_FLAGS, /* flags */
1026                              [4] = SCA_FD,        /* fd */ }, },
1027         { .name     = "mprotect",   .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1030         { .name     = "mremap",     .hexret = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032                              [3] = SCA_MREMAP_FLAGS, /* flags */
1033                              [4] = SCA_HEX, /* new_addr */ }, },
1034         { .name     = "munlock",    .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036         { .name     = "munmap",     .errmsg = true,
1037           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038         { .name     = "name_to_handle_at", .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040         { .name     = "newfstatat", .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042         { .name     = "open",       .errmsg = true,
1043           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044         { .name     = "open_by_handle_at", .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "openat",     .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "pipe2",      .errmsg = true,
1051           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052         { .name     = "poll",       .errmsg = true, .timeout = true, },
1053         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1054         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1057           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "pwritev",    .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063         { .name     = "read",       .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065         { .name     = "readlinkat", .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067         { .name     = "readv",      .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "recvfrom",   .errmsg = true,
1070           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071         { .name     = "recvmmsg",   .errmsg = true,
1072           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073         { .name     = "recvmsg",    .errmsg = true,
1074           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075         { .name     = "renameat",   .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077         { .name     = "rt_sigaction", .errmsg = true,
1078           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1080         { .name     = "rt_sigqueueinfo", .errmsg = true,
1081           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1083           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "select",     .errmsg = true, .timeout = true, },
1085         { .name     = "sendmmsg",    .errmsg = true,
1086           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087         { .name     = "sendmsg",    .errmsg = true,
1088           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089         { .name     = "sendto",     .errmsg = true,
1090           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1092         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093         { .name     = "shutdown",   .errmsg = true,
1094           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095         { .name     = "socket",     .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097                              [1] = SCA_SK_TYPE, /* type */ },
1098           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1099         { .name     = "socketpair", .errmsg = true,
1100           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101                              [1] = SCA_SK_TYPE, /* type */ },
1102           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1103         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1104         { .name     = "symlinkat",  .errmsg = true,
1105           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106         { .name     = "tgkill",     .errmsg = true,
1107           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108         { .name     = "tkill",      .errmsg = true,
1109           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1111         { .name     = "unlinkat",   .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113         { .name     = "utimensat",  .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115         { .name     = "write",      .errmsg = true,
1116           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117         { .name     = "writev",     .errmsg = true,
1118           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1119 };
1120
1121 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1122 {
1123         const struct syscall_fmt *fmt = fmtp;
1124         return strcmp(name, fmt->name);
1125 }
1126
1127 static struct syscall_fmt *syscall_fmt__find(const char *name)
1128 {
1129         const int nmemb = ARRAY_SIZE(syscall_fmts);
1130         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1131 }
1132
1133 struct syscall {
1134         struct event_format *tp_format;
1135         const char          *name;
1136         bool                filtered;
1137         bool                is_exit;
1138         struct syscall_fmt  *fmt;
1139         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1140         void                **arg_parm;
1141 };
1142
1143 static size_t fprintf_duration(unsigned long t, FILE *fp)
1144 {
1145         double duration = (double)t / NSEC_PER_MSEC;
1146         size_t printed = fprintf(fp, "(");
1147
1148         if (duration >= 1.0)
1149                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150         else if (duration >= 0.01)
1151                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1152         else
1153                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154         return printed + fprintf(fp, "): ");
1155 }
1156
1157 struct thread_trace {
1158         u64               entry_time;
1159         u64               exit_time;
1160         bool              entry_pending;
1161         unsigned long     nr_events;
1162         unsigned long     pfmaj, pfmin;
1163         char              *entry_str;
1164         double            runtime_ms;
1165         struct {
1166                 int       max;
1167                 char      **table;
1168         } paths;
1169
1170         struct intlist *syscall_stats;
1171 };
1172
1173 static struct thread_trace *thread_trace__new(void)
1174 {
1175         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1176
1177         if (ttrace)
1178                 ttrace->paths.max = -1;
1179
1180         ttrace->syscall_stats = intlist__new(NULL);
1181
1182         return ttrace;
1183 }
1184
1185 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1186 {
1187         struct thread_trace *ttrace;
1188
1189         if (thread == NULL)
1190                 goto fail;
1191
1192         if (thread__priv(thread) == NULL)
1193                 thread__set_priv(thread, thread_trace__new());
1194
1195         if (thread__priv(thread) == NULL)
1196                 goto fail;
1197
1198         ttrace = thread__priv(thread);
1199         ++ttrace->nr_events;
1200
1201         return ttrace;
1202 fail:
1203         color_fprintf(fp, PERF_COLOR_RED,
1204                       "WARNING: not enough memory, dropping samples!\n");
1205         return NULL;
1206 }
1207
1208 #define TRACE_PFMAJ             (1 << 0)
1209 #define TRACE_PFMIN             (1 << 1)
1210
1211 struct trace {
1212         struct perf_tool        tool;
1213         struct {
1214                 int             machine;
1215                 int             open_id;
1216         }                       audit;
1217         struct {
1218                 int             max;
1219                 struct syscall  *table;
1220         } syscalls;
1221         struct record_opts      opts;
1222         struct perf_evlist      *evlist;
1223         struct machine          *host;
1224         struct thread           *current;
1225         u64                     base_time;
1226         FILE                    *output;
1227         unsigned long           nr_events;
1228         struct strlist          *ev_qualifier;
1229         const char              *last_vfs_getname;
1230         struct intlist          *tid_list;
1231         struct intlist          *pid_list;
1232         struct {
1233                 size_t          nr;
1234                 pid_t           *entries;
1235         }                       filter_pids;
1236         double                  duration_filter;
1237         double                  runtime_ms;
1238         struct {
1239                 u64             vfs_getname,
1240                                 proc_getname;
1241         } stats;
1242         bool                    not_ev_qualifier;
1243         bool                    live;
1244         bool                    full_time;
1245         bool                    sched;
1246         bool                    multiple_threads;
1247         bool                    summary;
1248         bool                    summary_only;
1249         bool                    show_comm;
1250         bool                    show_tool_stats;
1251         bool                    trace_syscalls;
1252         int                     trace_pgfaults;
1253 };
1254
1255 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1256 {
1257         struct thread_trace *ttrace = thread__priv(thread);
1258
1259         if (fd > ttrace->paths.max) {
1260                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1261
1262                 if (npath == NULL)
1263                         return -1;
1264
1265                 if (ttrace->paths.max != -1) {
1266                         memset(npath + ttrace->paths.max + 1, 0,
1267                                (fd - ttrace->paths.max) * sizeof(char *));
1268                 } else {
1269                         memset(npath, 0, (fd + 1) * sizeof(char *));
1270                 }
1271
1272                 ttrace->paths.table = npath;
1273                 ttrace->paths.max   = fd;
1274         }
1275
1276         ttrace->paths.table[fd] = strdup(pathname);
1277
1278         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1279 }
1280
1281 static int thread__read_fd_path(struct thread *thread, int fd)
1282 {
1283         char linkname[PATH_MAX], pathname[PATH_MAX];
1284         struct stat st;
1285         int ret;
1286
1287         if (thread->pid_ == thread->tid) {
1288                 scnprintf(linkname, sizeof(linkname),
1289                           "/proc/%d/fd/%d", thread->pid_, fd);
1290         } else {
1291                 scnprintf(linkname, sizeof(linkname),
1292                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1293         }
1294
1295         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1296                 return -1;
1297
1298         ret = readlink(linkname, pathname, sizeof(pathname));
1299
1300         if (ret < 0 || ret > st.st_size)
1301                 return -1;
1302
1303         pathname[ret] = '\0';
1304         return trace__set_fd_pathname(thread, fd, pathname);
1305 }
1306
1307 static const char *thread__fd_path(struct thread *thread, int fd,
1308                                    struct trace *trace)
1309 {
1310         struct thread_trace *ttrace = thread__priv(thread);
1311
1312         if (ttrace == NULL)
1313                 return NULL;
1314
1315         if (fd < 0)
1316                 return NULL;
1317
1318         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1319                 if (!trace->live)
1320                         return NULL;
1321                 ++trace->stats.proc_getname;
1322                 if (thread__read_fd_path(thread, fd))
1323                         return NULL;
1324         }
1325
1326         return ttrace->paths.table[fd];
1327 }
1328
1329 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1330                                         struct syscall_arg *arg)
1331 {
1332         int fd = arg->val;
1333         size_t printed = scnprintf(bf, size, "%d", fd);
1334         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1335
1336         if (path)
1337                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1338
1339         return printed;
1340 }
1341
1342 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1343                                               struct syscall_arg *arg)
1344 {
1345         int fd = arg->val;
1346         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1347         struct thread_trace *ttrace = thread__priv(arg->thread);
1348
1349         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1350                 zfree(&ttrace->paths.table[fd]);
1351
1352         return printed;
1353 }
1354
1355 static bool trace__filter_duration(struct trace *trace, double t)
1356 {
1357         return t < (trace->duration_filter * NSEC_PER_MSEC);
1358 }
1359
1360 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1361 {
1362         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1363
1364         return fprintf(fp, "%10.3f ", ts);
1365 }
1366
1367 static bool done = false;
1368 static bool interrupted = false;
1369
1370 static void sig_handler(int sig)
1371 {
1372         done = true;
1373         interrupted = sig == SIGINT;
1374 }
1375
1376 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1377                                         u64 duration, u64 tstamp, FILE *fp)
1378 {
1379         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1380         printed += fprintf_duration(duration, fp);
1381
1382         if (trace->multiple_threads) {
1383                 if (trace->show_comm)
1384                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1385                 printed += fprintf(fp, "%d ", thread->tid);
1386         }
1387
1388         return printed;
1389 }
1390
1391 static int trace__process_event(struct trace *trace, struct machine *machine,
1392                                 union perf_event *event, struct perf_sample *sample)
1393 {
1394         int ret = 0;
1395
1396         switch (event->header.type) {
1397         case PERF_RECORD_LOST:
1398                 color_fprintf(trace->output, PERF_COLOR_RED,
1399                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1400                 ret = machine__process_lost_event(machine, event, sample);
1401         default:
1402                 ret = machine__process_event(machine, event, sample);
1403                 break;
1404         }
1405
1406         return ret;
1407 }
1408
1409 static int trace__tool_process(struct perf_tool *tool,
1410                                union perf_event *event,
1411                                struct perf_sample *sample,
1412                                struct machine *machine)
1413 {
1414         struct trace *trace = container_of(tool, struct trace, tool);
1415         return trace__process_event(trace, machine, event, sample);
1416 }
1417
1418 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1419 {
1420         int err = symbol__init(NULL);
1421
1422         if (err)
1423                 return err;
1424
1425         trace->host = machine__new_host();
1426         if (trace->host == NULL)
1427                 return -ENOMEM;
1428
1429         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1430                                             evlist->threads, trace__tool_process, false);
1431         if (err)
1432                 symbol__exit();
1433
1434         return err;
1435 }
1436
1437 static int syscall__set_arg_fmts(struct syscall *sc)
1438 {
1439         struct format_field *field;
1440         int idx = 0;
1441
1442         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1443         if (sc->arg_scnprintf == NULL)
1444                 return -1;
1445
1446         if (sc->fmt)
1447                 sc->arg_parm = sc->fmt->arg_parm;
1448
1449         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1450                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1451                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1452                 else if (field->flags & FIELD_IS_POINTER)
1453                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1454                 ++idx;
1455         }
1456
1457         return 0;
1458 }
1459
1460 static int trace__read_syscall_info(struct trace *trace, int id)
1461 {
1462         char tp_name[128];
1463         struct syscall *sc;
1464         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1465
1466         if (name == NULL)
1467                 return -1;
1468
1469         if (id > trace->syscalls.max) {
1470                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1471
1472                 if (nsyscalls == NULL)
1473                         return -1;
1474
1475                 if (trace->syscalls.max != -1) {
1476                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1477                                (id - trace->syscalls.max) * sizeof(*sc));
1478                 } else {
1479                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1480                 }
1481
1482                 trace->syscalls.table = nsyscalls;
1483                 trace->syscalls.max   = id;
1484         }
1485
1486         sc = trace->syscalls.table + id;
1487         sc->name = name;
1488
1489         if (trace->ev_qualifier) {
1490                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1491
1492                 if (!(in ^ trace->not_ev_qualifier)) {
1493                         sc->filtered = true;
1494                         /*
1495                          * No need to do read tracepoint information since this will be
1496                          * filtered out.
1497                          */
1498                         return 0;
1499                 }
1500         }
1501
1502         sc->fmt  = syscall_fmt__find(sc->name);
1503
1504         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1505         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1506
1507         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1508                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1509                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1510         }
1511
1512         if (sc->tp_format == NULL)
1513                 return -1;
1514
1515         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1516
1517         return syscall__set_arg_fmts(sc);
1518 }
1519
1520 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1521                                       unsigned long *args, struct trace *trace,
1522                                       struct thread *thread)
1523 {
1524         size_t printed = 0;
1525
1526         if (sc->tp_format != NULL) {
1527                 struct format_field *field;
1528                 u8 bit = 1;
1529                 struct syscall_arg arg = {
1530                         .idx    = 0,
1531                         .mask   = 0,
1532                         .trace  = trace,
1533                         .thread = thread,
1534                 };
1535
1536                 for (field = sc->tp_format->format.fields->next; field;
1537                      field = field->next, ++arg.idx, bit <<= 1) {
1538                         if (arg.mask & bit)
1539                                 continue;
1540                         /*
1541                          * Suppress this argument if its value is zero and
1542                          * and we don't have a string associated in an
1543                          * strarray for it.
1544                          */
1545                         if (args[arg.idx] == 0 &&
1546                             !(sc->arg_scnprintf &&
1547                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1548                               sc->arg_parm[arg.idx]))
1549                                 continue;
1550
1551                         printed += scnprintf(bf + printed, size - printed,
1552                                              "%s%s: ", printed ? ", " : "", field->name);
1553                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1554                                 arg.val = args[arg.idx];
1555                                 if (sc->arg_parm)
1556                                         arg.parm = sc->arg_parm[arg.idx];
1557                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1558                                                                       size - printed, &arg);
1559                         } else {
1560                                 printed += scnprintf(bf + printed, size - printed,
1561                                                      "%ld", args[arg.idx]);
1562                         }
1563                 }
1564         } else {
1565                 int i = 0;
1566
1567                 while (i < 6) {
1568                         printed += scnprintf(bf + printed, size - printed,
1569                                              "%sarg%d: %ld",
1570                                              printed ? ", " : "", i, args[i]);
1571                         ++i;
1572                 }
1573         }
1574
1575         return printed;
1576 }
1577
1578 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1579                                   union perf_event *event,
1580                                   struct perf_sample *sample);
1581
1582 static struct syscall *trace__syscall_info(struct trace *trace,
1583                                            struct perf_evsel *evsel, int id)
1584 {
1585
1586         if (id < 0) {
1587
1588                 /*
1589                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1590                  * before that, leaving at a higher verbosity level till that is
1591                  * explained. Reproduced with plain ftrace with:
1592                  *
1593                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1594                  * grep "NR -1 " /t/trace_pipe
1595                  *
1596                  * After generating some load on the machine.
1597                  */
1598                 if (verbose > 1) {
1599                         static u64 n;
1600                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1601                                 id, perf_evsel__name(evsel), ++n);
1602                 }
1603                 return NULL;
1604         }
1605
1606         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1607             trace__read_syscall_info(trace, id))
1608                 goto out_cant_read;
1609
1610         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1611                 goto out_cant_read;
1612
1613         return &trace->syscalls.table[id];
1614
1615 out_cant_read:
1616         if (verbose) {
1617                 fprintf(trace->output, "Problems reading syscall %d", id);
1618                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1619                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1620                 fputs(" information\n", trace->output);
1621         }
1622         return NULL;
1623 }
1624
1625 static void thread__update_stats(struct thread_trace *ttrace,
1626                                  int id, struct perf_sample *sample)
1627 {
1628         struct int_node *inode;
1629         struct stats *stats;
1630         u64 duration = 0;
1631
1632         inode = intlist__findnew(ttrace->syscall_stats, id);
1633         if (inode == NULL)
1634                 return;
1635
1636         stats = inode->priv;
1637         if (stats == NULL) {
1638                 stats = malloc(sizeof(struct stats));
1639                 if (stats == NULL)
1640                         return;
1641                 init_stats(stats);
1642                 inode->priv = stats;
1643         }
1644
1645         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1646                 duration = sample->time - ttrace->entry_time;
1647
1648         update_stats(stats, duration);
1649 }
1650
1651 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1652 {
1653         struct thread_trace *ttrace;
1654         u64 duration;
1655         size_t printed;
1656
1657         if (trace->current == NULL)
1658                 return 0;
1659
1660         ttrace = thread__priv(trace->current);
1661
1662         if (!ttrace->entry_pending)
1663                 return 0;
1664
1665         duration = sample->time - ttrace->entry_time;
1666
1667         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1668         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1669         ttrace->entry_pending = false;
1670
1671         return printed;
1672 }
1673
1674 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1675                             union perf_event *event __maybe_unused,
1676                             struct perf_sample *sample)
1677 {
1678         char *msg;
1679         void *args;
1680         size_t printed = 0;
1681         struct thread *thread;
1682         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1683         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1684         struct thread_trace *ttrace;
1685
1686         if (sc == NULL)
1687                 return -1;
1688
1689         if (sc->filtered)
1690                 return 0;
1691
1692         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1693         ttrace = thread__trace(thread, trace->output);
1694         if (ttrace == NULL)
1695                 return -1;
1696
1697         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1698
1699         if (ttrace->entry_str == NULL) {
1700                 ttrace->entry_str = malloc(1024);
1701                 if (!ttrace->entry_str)
1702                         return -1;
1703         }
1704
1705         printed += trace__printf_interrupted_entry(trace, sample);
1706
1707         ttrace->entry_time = sample->time;
1708         msg = ttrace->entry_str;
1709         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1710
1711         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1712                                            args, trace, thread);
1713
1714         if (sc->is_exit) {
1715                 if (!trace->duration_filter && !trace->summary_only) {
1716                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1717                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1718                 }
1719         } else
1720                 ttrace->entry_pending = true;
1721
1722         trace->current = thread;
1723
1724         return 0;
1725 }
1726
1727 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1728                            union perf_event *event __maybe_unused,
1729                            struct perf_sample *sample)
1730 {
1731         long ret;
1732         u64 duration = 0;
1733         struct thread *thread;
1734         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1735         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1736         struct thread_trace *ttrace;
1737
1738         if (sc == NULL)
1739                 return -1;
1740
1741         if (sc->filtered)
1742                 return 0;
1743
1744         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1745         ttrace = thread__trace(thread, trace->output);
1746         if (ttrace == NULL)
1747                 return -1;
1748
1749         if (trace->summary)
1750                 thread__update_stats(ttrace, id, sample);
1751
1752         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1753
1754         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1755                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1756                 trace->last_vfs_getname = NULL;
1757                 ++trace->stats.vfs_getname;
1758         }
1759
1760         ttrace->exit_time = sample->time;
1761
1762         if (ttrace->entry_time) {
1763                 duration = sample->time - ttrace->entry_time;
1764                 if (trace__filter_duration(trace, duration))
1765                         goto out;
1766         } else if (trace->duration_filter)
1767                 goto out;
1768
1769         if (trace->summary_only)
1770                 goto out;
1771
1772         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1773
1774         if (ttrace->entry_pending) {
1775                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1776         } else {
1777                 fprintf(trace->output, " ... [");
1778                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1779                 fprintf(trace->output, "]: %s()", sc->name);
1780         }
1781
1782         if (sc->fmt == NULL) {
1783 signed_print:
1784                 fprintf(trace->output, ") = %ld", ret);
1785         } else if (ret < 0 && sc->fmt->errmsg) {
1786                 char bf[STRERR_BUFSIZE];
1787                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1788                            *e = audit_errno_to_name(-ret);
1789
1790                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1791         } else if (ret == 0 && sc->fmt->timeout)
1792                 fprintf(trace->output, ") = 0 Timeout");
1793         else if (sc->fmt->hexret)
1794                 fprintf(trace->output, ") = %#lx", ret);
1795         else
1796                 goto signed_print;
1797
1798         fputc('\n', trace->output);
1799 out:
1800         ttrace->entry_pending = false;
1801
1802         return 0;
1803 }
1804
1805 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1806                               union perf_event *event __maybe_unused,
1807                               struct perf_sample *sample)
1808 {
1809         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1810         return 0;
1811 }
1812
1813 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1814                                      union perf_event *event __maybe_unused,
1815                                      struct perf_sample *sample)
1816 {
1817         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1818         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1819         struct thread *thread = machine__findnew_thread(trace->host,
1820                                                         sample->pid,
1821                                                         sample->tid);
1822         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1823
1824         if (ttrace == NULL)
1825                 goto out_dump;
1826
1827         ttrace->runtime_ms += runtime_ms;
1828         trace->runtime_ms += runtime_ms;
1829         return 0;
1830
1831 out_dump:
1832         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1833                evsel->name,
1834                perf_evsel__strval(evsel, sample, "comm"),
1835                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1836                runtime,
1837                perf_evsel__intval(evsel, sample, "vruntime"));
1838         return 0;
1839 }
1840
1841 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1842                                 union perf_event *event __maybe_unused,
1843                                 struct perf_sample *sample)
1844 {
1845         trace__printf_interrupted_entry(trace, sample);
1846         trace__fprintf_tstamp(trace, sample->time, trace->output);
1847
1848         if (trace->trace_syscalls)
1849                 fprintf(trace->output, "(         ): ");
1850
1851         fprintf(trace->output, "%s:", evsel->name);
1852
1853         if (evsel->tp_format) {
1854                 event_format__fprintf(evsel->tp_format, sample->cpu,
1855                                       sample->raw_data, sample->raw_size,
1856                                       trace->output);
1857         }
1858
1859         fprintf(trace->output, ")\n");
1860         return 0;
1861 }
1862
1863 static void print_location(FILE *f, struct perf_sample *sample,
1864                            struct addr_location *al,
1865                            bool print_dso, bool print_sym)
1866 {
1867
1868         if ((verbose || print_dso) && al->map)
1869                 fprintf(f, "%s@", al->map->dso->long_name);
1870
1871         if ((verbose || print_sym) && al->sym)
1872                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1873                         al->addr - al->sym->start);
1874         else if (al->map)
1875                 fprintf(f, "0x%" PRIx64, al->addr);
1876         else
1877                 fprintf(f, "0x%" PRIx64, sample->addr);
1878 }
1879
1880 static int trace__pgfault(struct trace *trace,
1881                           struct perf_evsel *evsel,
1882                           union perf_event *event,
1883                           struct perf_sample *sample)
1884 {
1885         struct thread *thread;
1886         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1887         struct addr_location al;
1888         char map_type = 'd';
1889         struct thread_trace *ttrace;
1890
1891         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1892         ttrace = thread__trace(thread, trace->output);
1893         if (ttrace == NULL)
1894                 return -1;
1895
1896         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1897                 ttrace->pfmaj++;
1898         else
1899                 ttrace->pfmin++;
1900
1901         if (trace->summary_only)
1902                 return 0;
1903
1904         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1905                               sample->ip, &al);
1906
1907         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1908
1909         fprintf(trace->output, "%sfault [",
1910                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1911                 "maj" : "min");
1912
1913         print_location(trace->output, sample, &al, false, true);
1914
1915         fprintf(trace->output, "] => ");
1916
1917         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1918                                    sample->addr, &al);
1919
1920         if (!al.map) {
1921                 thread__find_addr_location(thread, cpumode,
1922                                            MAP__FUNCTION, sample->addr, &al);
1923
1924                 if (al.map)
1925                         map_type = 'x';
1926                 else
1927                         map_type = '?';
1928         }
1929
1930         print_location(trace->output, sample, &al, true, false);
1931
1932         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1933
1934         return 0;
1935 }
1936
1937 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1938 {
1939         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1940             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1941                 return false;
1942
1943         if (trace->pid_list || trace->tid_list)
1944                 return true;
1945
1946         return false;
1947 }
1948
1949 static int trace__process_sample(struct perf_tool *tool,
1950                                  union perf_event *event,
1951                                  struct perf_sample *sample,
1952                                  struct perf_evsel *evsel,
1953                                  struct machine *machine __maybe_unused)
1954 {
1955         struct trace *trace = container_of(tool, struct trace, tool);
1956         int err = 0;
1957
1958         tracepoint_handler handler = evsel->handler;
1959
1960         if (skip_sample(trace, sample))
1961                 return 0;
1962
1963         if (!trace->full_time && trace->base_time == 0)
1964                 trace->base_time = sample->time;
1965
1966         if (handler) {
1967                 ++trace->nr_events;
1968                 handler(trace, evsel, event, sample);
1969         }
1970
1971         return err;
1972 }
1973
1974 static int parse_target_str(struct trace *trace)
1975 {
1976         if (trace->opts.target.pid) {
1977                 trace->pid_list = intlist__new(trace->opts.target.pid);
1978                 if (trace->pid_list == NULL) {
1979                         pr_err("Error parsing process id string\n");
1980                         return -EINVAL;
1981                 }
1982         }
1983
1984         if (trace->opts.target.tid) {
1985                 trace->tid_list = intlist__new(trace->opts.target.tid);
1986                 if (trace->tid_list == NULL) {
1987                         pr_err("Error parsing thread id string\n");
1988                         return -EINVAL;
1989                 }
1990         }
1991
1992         return 0;
1993 }
1994
1995 static int trace__record(struct trace *trace, int argc, const char **argv)
1996 {
1997         unsigned int rec_argc, i, j;
1998         const char **rec_argv;
1999         const char * const record_args[] = {
2000                 "record",
2001                 "-R",
2002                 "-m", "1024",
2003                 "-c", "1",
2004         };
2005
2006         const char * const sc_args[] = { "-e", };
2007         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2008         const char * const majpf_args[] = { "-e", "major-faults" };
2009         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2010         const char * const minpf_args[] = { "-e", "minor-faults" };
2011         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2012
2013         /* +1 is for the event string below */
2014         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2015                 majpf_args_nr + minpf_args_nr + argc;
2016         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2017
2018         if (rec_argv == NULL)
2019                 return -ENOMEM;
2020
2021         j = 0;
2022         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2023                 rec_argv[j++] = record_args[i];
2024
2025         if (trace->trace_syscalls) {
2026                 for (i = 0; i < sc_args_nr; i++)
2027                         rec_argv[j++] = sc_args[i];
2028
2029                 /* event string may be different for older kernels - e.g., RHEL6 */
2030                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2031                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2032                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2033                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2034                 else {
2035                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2036                         return -1;
2037                 }
2038         }
2039
2040         if (trace->trace_pgfaults & TRACE_PFMAJ)
2041                 for (i = 0; i < majpf_args_nr; i++)
2042                         rec_argv[j++] = majpf_args[i];
2043
2044         if (trace->trace_pgfaults & TRACE_PFMIN)
2045                 for (i = 0; i < minpf_args_nr; i++)
2046                         rec_argv[j++] = minpf_args[i];
2047
2048         for (i = 0; i < (unsigned int)argc; i++)
2049                 rec_argv[j++] = argv[i];
2050
2051         return cmd_record(j, rec_argv, NULL);
2052 }
2053
2054 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2055
2056 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2057 {
2058         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2059         if (evsel == NULL)
2060                 return;
2061
2062         if (perf_evsel__field(evsel, "pathname") == NULL) {
2063                 perf_evsel__delete(evsel);
2064                 return;
2065         }
2066
2067         evsel->handler = trace__vfs_getname;
2068         perf_evlist__add(evlist, evsel);
2069 }
2070
2071 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2072                                     u64 config)
2073 {
2074         struct perf_evsel *evsel;
2075         struct perf_event_attr attr = {
2076                 .type = PERF_TYPE_SOFTWARE,
2077                 .mmap_data = 1,
2078         };
2079
2080         attr.config = config;
2081         attr.sample_period = 1;
2082
2083         event_attr_init(&attr);
2084
2085         evsel = perf_evsel__new(&attr);
2086         if (!evsel)
2087                 return -ENOMEM;
2088
2089         evsel->handler = trace__pgfault;
2090         perf_evlist__add(evlist, evsel);
2091
2092         return 0;
2093 }
2094
2095 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2096 {
2097         const u32 type = event->header.type;
2098         struct perf_evsel *evsel;
2099
2100         if (!trace->full_time && trace->base_time == 0)
2101                 trace->base_time = sample->time;
2102
2103         if (type != PERF_RECORD_SAMPLE) {
2104                 trace__process_event(trace, trace->host, event, sample);
2105                 return;
2106         }
2107
2108         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2109         if (evsel == NULL) {
2110                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2111                 return;
2112         }
2113
2114         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2115             sample->raw_data == NULL) {
2116                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2117                        perf_evsel__name(evsel), sample->tid,
2118                        sample->cpu, sample->raw_size);
2119         } else {
2120                 tracepoint_handler handler = evsel->handler;
2121                 handler(trace, evsel, event, sample);
2122         }
2123 }
2124
2125 static int trace__run(struct trace *trace, int argc, const char **argv)
2126 {
2127         struct perf_evlist *evlist = trace->evlist;
2128         int err = -1, i;
2129         unsigned long before;
2130         const bool forks = argc > 0;
2131         bool draining = false;
2132
2133         trace->live = true;
2134
2135         if (trace->trace_syscalls &&
2136             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2137                                            trace__sys_exit))
2138                 goto out_error_raw_syscalls;
2139
2140         if (trace->trace_syscalls)
2141                 perf_evlist__add_vfs_getname(evlist);
2142
2143         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2144             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2145                 goto out_error_mem;
2146         }
2147
2148         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2149             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2150                 goto out_error_mem;
2151
2152         if (trace->sched &&
2153             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2154                                    trace__sched_stat_runtime))
2155                 goto out_error_sched_stat_runtime;
2156
2157         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2158         if (err < 0) {
2159                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2160                 goto out_delete_evlist;
2161         }
2162
2163         err = trace__symbols_init(trace, evlist);
2164         if (err < 0) {
2165                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2166                 goto out_delete_evlist;
2167         }
2168
2169         perf_evlist__config(evlist, &trace->opts);
2170
2171         signal(SIGCHLD, sig_handler);
2172         signal(SIGINT, sig_handler);
2173
2174         if (forks) {
2175                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2176                                                     argv, false, NULL);
2177                 if (err < 0) {
2178                         fprintf(trace->output, "Couldn't run the workload!\n");
2179                         goto out_delete_evlist;
2180                 }
2181         }
2182
2183         err = perf_evlist__open(evlist);
2184         if (err < 0)
2185                 goto out_error_open;
2186
2187         /*
2188          * Better not use !target__has_task() here because we need to cover the
2189          * case where no threads were specified in the command line, but a
2190          * workload was, and in that case we will fill in the thread_map when
2191          * we fork the workload in perf_evlist__prepare_workload.
2192          */
2193         if (trace->filter_pids.nr > 0)
2194                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2195         else if (evlist->threads->map[0] == -1)
2196                 err = perf_evlist__set_filter_pid(evlist, getpid());
2197
2198         if (err < 0) {
2199                 printf("err=%d,%s\n", -err, strerror(-err));
2200                 exit(1);
2201         }
2202
2203         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2204         if (err < 0)
2205                 goto out_error_mmap;
2206
2207         if (forks)
2208                 perf_evlist__start_workload(evlist);
2209         else
2210                 perf_evlist__enable(evlist);
2211
2212         trace->multiple_threads = evlist->threads->map[0] == -1 ||
2213                                   evlist->threads->nr > 1 ||
2214                                   perf_evlist__first(evlist)->attr.inherit;
2215 again:
2216         before = trace->nr_events;
2217
2218         for (i = 0; i < evlist->nr_mmaps; i++) {
2219                 union perf_event *event;
2220
2221                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2222                         struct perf_sample sample;
2223
2224                         ++trace->nr_events;
2225
2226                         err = perf_evlist__parse_sample(evlist, event, &sample);
2227                         if (err) {
2228                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2229                                 goto next_event;
2230                         }
2231
2232                         trace__handle_event(trace, event, &sample);
2233 next_event:
2234                         perf_evlist__mmap_consume(evlist, i);
2235
2236                         if (interrupted)
2237                                 goto out_disable;
2238                 }
2239         }
2240
2241         if (trace->nr_events == before) {
2242                 int timeout = done ? 100 : -1;
2243
2244                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2245                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2246                                 draining = true;
2247
2248                         goto again;
2249                 }
2250         } else {
2251                 goto again;
2252         }
2253
2254 out_disable:
2255         perf_evlist__disable(evlist);
2256
2257         if (!err) {
2258                 if (trace->summary)
2259                         trace__fprintf_thread_summary(trace, trace->output);
2260
2261                 if (trace->show_tool_stats) {
2262                         fprintf(trace->output, "Stats:\n "
2263                                                " vfs_getname : %" PRIu64 "\n"
2264                                                " proc_getname: %" PRIu64 "\n",
2265                                 trace->stats.vfs_getname,
2266                                 trace->stats.proc_getname);
2267                 }
2268         }
2269
2270 out_delete_evlist:
2271         perf_evlist__delete(evlist);
2272         trace->evlist = NULL;
2273         trace->live = false;
2274         return err;
2275 {
2276         char errbuf[BUFSIZ];
2277
2278 out_error_sched_stat_runtime:
2279         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2280         goto out_error;
2281
2282 out_error_raw_syscalls:
2283         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2284         goto out_error;
2285
2286 out_error_mmap:
2287         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2288         goto out_error;
2289
2290 out_error_open:
2291         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2292
2293 out_error:
2294         fprintf(trace->output, "%s\n", errbuf);
2295         goto out_delete_evlist;
2296 }
2297 out_error_mem:
2298         fprintf(trace->output, "Not enough memory to run!\n");
2299         goto out_delete_evlist;
2300 }
2301
2302 static int trace__replay(struct trace *trace)
2303 {
2304         const struct perf_evsel_str_handler handlers[] = {
2305                 { "probe:vfs_getname",       trace__vfs_getname, },
2306         };
2307         struct perf_data_file file = {
2308                 .path  = input_name,
2309                 .mode  = PERF_DATA_MODE_READ,
2310         };
2311         struct perf_session *session;
2312         struct perf_evsel *evsel;
2313         int err = -1;
2314
2315         trace->tool.sample        = trace__process_sample;
2316         trace->tool.mmap          = perf_event__process_mmap;
2317         trace->tool.mmap2         = perf_event__process_mmap2;
2318         trace->tool.comm          = perf_event__process_comm;
2319         trace->tool.exit          = perf_event__process_exit;
2320         trace->tool.fork          = perf_event__process_fork;
2321         trace->tool.attr          = perf_event__process_attr;
2322         trace->tool.tracing_data = perf_event__process_tracing_data;
2323         trace->tool.build_id      = perf_event__process_build_id;
2324
2325         trace->tool.ordered_events = true;
2326         trace->tool.ordering_requires_timestamps = true;
2327
2328         /* add tid to output */
2329         trace->multiple_threads = true;
2330
2331         session = perf_session__new(&file, false, &trace->tool);
2332         if (session == NULL)
2333                 return -1;
2334
2335         if (symbol__init(&session->header.env) < 0)
2336                 goto out;
2337
2338         trace->host = &session->machines.host;
2339
2340         err = perf_session__set_tracepoints_handlers(session, handlers);
2341         if (err)
2342                 goto out;
2343
2344         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2345                                                      "raw_syscalls:sys_enter");
2346         /* older kernels have syscalls tp versus raw_syscalls */
2347         if (evsel == NULL)
2348                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2349                                                              "syscalls:sys_enter");
2350
2351         if (evsel &&
2352             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2353             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2354                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2355                 goto out;
2356         }
2357
2358         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2359                                                      "raw_syscalls:sys_exit");
2360         if (evsel == NULL)
2361                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2362                                                              "syscalls:sys_exit");
2363         if (evsel &&
2364             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2365             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2366                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2367                 goto out;
2368         }
2369
2370         evlist__for_each(session->evlist, evsel) {
2371                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2372                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2373                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2374                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2375                         evsel->handler = trace__pgfault;
2376         }
2377
2378         err = parse_target_str(trace);
2379         if (err != 0)
2380                 goto out;
2381
2382         setup_pager();
2383
2384         err = perf_session__process_events(session, &trace->tool);
2385         if (err)
2386                 pr_err("Failed to process events, error %d", err);
2387
2388         else if (trace->summary)
2389                 trace__fprintf_thread_summary(trace, trace->output);
2390
2391 out:
2392         perf_session__delete(session);
2393
2394         return err;
2395 }
2396
2397 static size_t trace__fprintf_threads_header(FILE *fp)
2398 {
2399         size_t printed;
2400
2401         printed  = fprintf(fp, "\n Summary of events:\n\n");
2402
2403         return printed;
2404 }
2405
2406 static size_t thread__dump_stats(struct thread_trace *ttrace,
2407                                  struct trace *trace, FILE *fp)
2408 {
2409         struct stats *stats;
2410         size_t printed = 0;
2411         struct syscall *sc;
2412         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2413
2414         if (inode == NULL)
2415                 return 0;
2416
2417         printed += fprintf(fp, "\n");
2418
2419         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2420         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2421         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2422
2423         /* each int_node is a syscall */
2424         while (inode) {
2425                 stats = inode->priv;
2426                 if (stats) {
2427                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2428                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2429                         double avg = avg_stats(stats);
2430                         double pct;
2431                         u64 n = (u64) stats->n;
2432
2433                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2434                         avg /= NSEC_PER_MSEC;
2435
2436                         sc = &trace->syscalls.table[inode->i];
2437                         printed += fprintf(fp, "   %-15s", sc->name);
2438                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2439                                            n, min, avg);
2440                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2441                 }
2442
2443                 inode = intlist__next(inode);
2444         }
2445
2446         printed += fprintf(fp, "\n\n");
2447
2448         return printed;
2449 }
2450
2451 /* struct used to pass data to per-thread function */
2452 struct summary_data {
2453         FILE *fp;
2454         struct trace *trace;
2455         size_t printed;
2456 };
2457
2458 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2459 {
2460         struct summary_data *data = priv;
2461         FILE *fp = data->fp;
2462         size_t printed = data->printed;
2463         struct trace *trace = data->trace;
2464         struct thread_trace *ttrace = thread__priv(thread);
2465         double ratio;
2466
2467         if (ttrace == NULL)
2468                 return 0;
2469
2470         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2471
2472         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2473         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2474         printed += fprintf(fp, "%.1f%%", ratio);
2475         if (ttrace->pfmaj)
2476                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2477         if (ttrace->pfmin)
2478                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2479         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2480         printed += thread__dump_stats(ttrace, trace, fp);
2481
2482         data->printed += printed;
2483
2484         return 0;
2485 }
2486
2487 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2488 {
2489         struct summary_data data = {
2490                 .fp = fp,
2491                 .trace = trace
2492         };
2493         data.printed = trace__fprintf_threads_header(fp);
2494
2495         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2496
2497         return data.printed;
2498 }
2499
2500 static int trace__set_duration(const struct option *opt, const char *str,
2501                                int unset __maybe_unused)
2502 {
2503         struct trace *trace = opt->value;
2504
2505         trace->duration_filter = atof(str);
2506         return 0;
2507 }
2508
2509 static int trace__set_filter_pids(const struct option *opt, const char *str,
2510                                   int unset __maybe_unused)
2511 {
2512         int ret = -1;
2513         size_t i;
2514         struct trace *trace = opt->value;
2515         /*
2516          * FIXME: introduce a intarray class, plain parse csv and create a
2517          * { int nr, int entries[] } struct...
2518          */
2519         struct intlist *list = intlist__new(str);
2520
2521         if (list == NULL)
2522                 return -1;
2523
2524         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2525         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2526
2527         if (trace->filter_pids.entries == NULL)
2528                 goto out;
2529
2530         trace->filter_pids.entries[0] = getpid();
2531
2532         for (i = 1; i < trace->filter_pids.nr; ++i)
2533                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2534
2535         intlist__delete(list);
2536         ret = 0;
2537 out:
2538         return ret;
2539 }
2540
2541 static int trace__open_output(struct trace *trace, const char *filename)
2542 {
2543         struct stat st;
2544
2545         if (!stat(filename, &st) && st.st_size) {
2546                 char oldname[PATH_MAX];
2547
2548                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2549                 unlink(oldname);
2550                 rename(filename, oldname);
2551         }
2552
2553         trace->output = fopen(filename, "w");
2554
2555         return trace->output == NULL ? -errno : 0;
2556 }
2557
2558 static int parse_pagefaults(const struct option *opt, const char *str,
2559                             int unset __maybe_unused)
2560 {
2561         int *trace_pgfaults = opt->value;
2562
2563         if (strcmp(str, "all") == 0)
2564                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2565         else if (strcmp(str, "maj") == 0)
2566                 *trace_pgfaults |= TRACE_PFMAJ;
2567         else if (strcmp(str, "min") == 0)
2568                 *trace_pgfaults |= TRACE_PFMIN;
2569         else
2570                 return -1;
2571
2572         return 0;
2573 }
2574
2575 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2576 {
2577         struct perf_evsel *evsel;
2578
2579         evlist__for_each(evlist, evsel)
2580                 evsel->handler = handler;
2581 }
2582
2583 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2584 {
2585         const char * const trace_usage[] = {
2586                 "perf trace [<options>] [<command>]",
2587                 "perf trace [<options>] -- <command> [<options>]",
2588                 "perf trace record [<options>] [<command>]",
2589                 "perf trace record [<options>] -- <command> [<options>]",
2590                 NULL
2591         };
2592         struct trace trace = {
2593                 .audit = {
2594                         .machine = audit_detect_machine(),
2595                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2596                 },
2597                 .syscalls = {
2598                         . max = -1,
2599                 },
2600                 .opts = {
2601                         .target = {
2602                                 .uid       = UINT_MAX,
2603                                 .uses_mmap = true,
2604                         },
2605                         .user_freq     = UINT_MAX,
2606                         .user_interval = ULLONG_MAX,
2607                         .no_buffering  = true,
2608                         .mmap_pages    = UINT_MAX,
2609                 },
2610                 .output = stdout,
2611                 .show_comm = true,
2612                 .trace_syscalls = true,
2613         };
2614         const char *output_name = NULL;
2615         const char *ev_qualifier_str = NULL;
2616         const struct option trace_options[] = {
2617         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2618                      "event selector. use 'perf list' to list available events",
2619                      parse_events_option),
2620         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2621                     "show the thread COMM next to its id"),
2622         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2623         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2624                     "list of events to trace"),
2625         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2626         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2627         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2628                     "trace events on existing process id"),
2629         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2630                     "trace events on existing thread id"),
2631         OPT_CALLBACK(0, "filter-pids", &trace, "float",
2632                      "show only events with duration > N.M ms", trace__set_filter_pids),
2633         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2634                     "system-wide collection from all CPUs"),
2635         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2636                     "list of cpus to monitor"),
2637         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2638                     "child tasks do not inherit counters"),
2639         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2640                      "number of mmap data pages",
2641                      perf_evlist__parse_mmap_pages),
2642         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2643                    "user to profile"),
2644         OPT_CALLBACK(0, "duration", &trace, "float",
2645                      "show only events with duration > N.M ms",
2646                      trace__set_duration),
2647         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2648         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2649         OPT_BOOLEAN('T', "time", &trace.full_time,
2650                     "Show full timestamp, not time relative to first start"),
2651         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2652                     "Show only syscall summary with statistics"),
2653         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2654                     "Show all syscalls and summary with statistics"),
2655         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2656                      "Trace pagefaults", parse_pagefaults, "maj"),
2657         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2658         OPT_END()
2659         };
2660         int err;
2661         char bf[BUFSIZ];
2662
2663         signal(SIGSEGV, sighandler_dump_stack);
2664         signal(SIGFPE, sighandler_dump_stack);
2665
2666         trace.evlist = perf_evlist__new();
2667         if (trace.evlist == NULL)
2668                 return -ENOMEM;
2669
2670         if (trace.evlist == NULL) {
2671                 pr_err("Not enough memory to run!\n");
2672                 goto out;
2673         }
2674
2675         argc = parse_options(argc, argv, trace_options, trace_usage,
2676                              PARSE_OPT_STOP_AT_NON_OPTION);
2677
2678         if (trace.trace_pgfaults) {
2679                 trace.opts.sample_address = true;
2680                 trace.opts.sample_time = true;
2681         }
2682
2683         if (trace.evlist->nr_entries > 0)
2684                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2685
2686         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2687                 return trace__record(&trace, argc-1, &argv[1]);
2688
2689         /* summary_only implies summary option, but don't overwrite summary if set */
2690         if (trace.summary_only)
2691                 trace.summary = trace.summary_only;
2692
2693         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2694             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2695                 pr_err("Please specify something to trace.\n");
2696                 return -1;
2697         }
2698
2699         if (output_name != NULL) {
2700                 err = trace__open_output(&trace, output_name);
2701                 if (err < 0) {
2702                         perror("failed to create output file");
2703                         goto out;
2704                 }
2705         }
2706
2707         if (ev_qualifier_str != NULL) {
2708                 const char *s = ev_qualifier_str;
2709
2710                 trace.not_ev_qualifier = *s == '!';
2711                 if (trace.not_ev_qualifier)
2712                         ++s;
2713                 trace.ev_qualifier = strlist__new(true, s);
2714                 if (trace.ev_qualifier == NULL) {
2715                         fputs("Not enough memory to parse event qualifier",
2716                               trace.output);
2717                         err = -ENOMEM;
2718                         goto out_close;
2719                 }
2720         }
2721
2722         err = target__validate(&trace.opts.target);
2723         if (err) {
2724                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2725                 fprintf(trace.output, "%s", bf);
2726                 goto out_close;
2727         }
2728
2729         err = target__parse_uid(&trace.opts.target);
2730         if (err) {
2731                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2732                 fprintf(trace.output, "%s", bf);
2733                 goto out_close;
2734         }
2735
2736         if (!argc && target__none(&trace.opts.target))
2737                 trace.opts.target.system_wide = true;
2738
2739         if (input_name)
2740                 err = trace__replay(&trace);
2741         else
2742                 err = trace__run(&trace, argc, argv);
2743
2744 out_close:
2745         if (output_name != NULL)
2746                 fclose(trace.output);
2747 out:
2748         return err;
2749 }