perf trace: Filter out the trace pid when no threads are specified
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67         return bswap_##bits(value);\
68 }
69
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73
74 static int tp_field__init_uint(struct tp_field *field,
75                                struct format_field *format_field,
76                                bool needs_swap)
77 {
78         field->offset = format_field->offset;
79
80         switch (format_field->size) {
81         case 1:
82                 field->integer = tp_field__u8;
83                 break;
84         case 2:
85                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86                 break;
87         case 4:
88                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89                 break;
90         case 8:
91                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92                 break;
93         default:
94                 return -1;
95         }
96
97         return 0;
98 }
99
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102         return sample->raw_data + field->offset;
103 }
104
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107         field->offset = format_field->offset;
108         field->pointer = tp_field__ptr;
109         return 0;
110 }
111
112 struct syscall_tp {
113         struct tp_field id;
114         union {
115                 struct tp_field args, ret;
116         };
117 };
118
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120                                           struct tp_field *field,
121                                           const char *name)
122 {
123         struct format_field *format_field = perf_evsel__field(evsel, name);
124
125         if (format_field == NULL)
126                 return -1;
127
128         return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132         ({ struct syscall_tp *sc = evsel->priv;\
133            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136                                          struct tp_field *field,
137                                          const char *name)
138 {
139         struct format_field *format_field = perf_evsel__field(evsel, name);
140
141         if (format_field == NULL)
142                 return -1;
143
144         return tp_field__init_ptr(field, format_field);
145 }
146
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148         ({ struct syscall_tp *sc = evsel->priv;\
149            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153         zfree(&evsel->priv);
154         perf_evsel__delete(evsel);
155 }
156
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159         evsel->priv = malloc(sizeof(struct syscall_tp));
160         if (evsel->priv != NULL) {
161                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162                         goto out_delete;
163
164                 evsel->handler = handler;
165                 return 0;
166         }
167
168         return -ENOMEM;
169
170 out_delete:
171         zfree(&evsel->priv);
172         return -ENOENT;
173 }
174
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178
179         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180         if (evsel == NULL)
181                 evsel = perf_evsel__newtp("syscalls", direction);
182
183         if (evsel) {
184                 if (perf_evsel__init_syscall_tp(evsel, handler))
185                         goto out_delete;
186         }
187
188         return evsel;
189
190 out_delete:
191         perf_evsel__delete_priv(evsel);
192         return NULL;
193 }
194
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.integer(&fields->name, sample); })
198
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200         ({ struct syscall_tp *fields = evsel->priv; \
201            fields->name.pointer(&fields->name, sample); })
202
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204                                           void *sys_enter_handler,
205                                           void *sys_exit_handler)
206 {
207         int ret = -1;
208         struct perf_evsel *sys_enter, *sys_exit;
209
210         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211         if (sys_enter == NULL)
212                 goto out;
213
214         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215                 goto out_delete_sys_enter;
216
217         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218         if (sys_exit == NULL)
219                 goto out_delete_sys_enter;
220
221         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222                 goto out_delete_sys_exit;
223
224         perf_evlist__add(evlist, sys_enter);
225         perf_evlist__add(evlist, sys_exit);
226
227         ret = 0;
228 out:
229         return ret;
230
231 out_delete_sys_exit:
232         perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234         perf_evsel__delete_priv(sys_enter);
235         goto out;
236 }
237
238
239 struct syscall_arg {
240         unsigned long val;
241         struct thread *thread;
242         struct trace  *trace;
243         void          *parm;
244         u8            idx;
245         u8            mask;
246 };
247
248 struct strarray {
249         int         offset;
250         int         nr_entries;
251         const char **entries;
252 };
253
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255         .nr_entries = ARRAY_SIZE(array), \
256         .entries = array, \
257 }
258
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260         .offset     = off, \
261         .nr_entries = ARRAY_SIZE(array), \
262         .entries = array, \
263 }
264
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266                                                 const char *intfmt,
267                                                 struct syscall_arg *arg)
268 {
269         struct strarray *sa = arg->parm;
270         int idx = arg->val - sa->offset;
271
272         if (idx < 0 || idx >= sa->nr_entries)
273                 return scnprintf(bf, size, intfmt, arg->val);
274
275         return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279                                               struct syscall_arg *arg)
280 {
281         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  *        gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292                                                  struct syscall_arg *arg)
293 {
294         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301                                         struct syscall_arg *arg);
302
303 #define SCA_FD syscall_arg__scnprintf_fd
304
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306                                            struct syscall_arg *arg)
307 {
308         int fd = arg->val;
309
310         if (fd == AT_FDCWD)
311                 return scnprintf(bf, size, "CWD");
312
313         return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319                                               struct syscall_arg *arg);
320
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324                                          struct syscall_arg *arg)
325 {
326         return scnprintf(bf, size, "%#lx", arg->val);
327 }
328
329 #define SCA_HEX syscall_arg__scnprintf_hex
330
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332                                                struct syscall_arg *arg)
333 {
334         int printed = 0, prot = arg->val;
335
336         if (prot == PROT_NONE)
337                 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339         if (prot & PROT_##n) { \
340                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341                 prot &= ~PROT_##n; \
342         }
343
344         P_MMAP_PROT(EXEC);
345         P_MMAP_PROT(READ);
346         P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348         P_MMAP_PROT(SEM);
349 #endif
350         P_MMAP_PROT(GROWSDOWN);
351         P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353
354         if (prot)
355                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356
357         return printed;
358 }
359
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363                                                 struct syscall_arg *arg)
364 {
365         int printed = 0, flags = arg->val;
366
367 #define P_MMAP_FLAG(n) \
368         if (flags & MAP_##n) { \
369                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370                 flags &= ~MAP_##n; \
371         }
372
373         P_MMAP_FLAG(SHARED);
374         P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376         P_MMAP_FLAG(32BIT);
377 #endif
378         P_MMAP_FLAG(ANONYMOUS);
379         P_MMAP_FLAG(DENYWRITE);
380         P_MMAP_FLAG(EXECUTABLE);
381         P_MMAP_FLAG(FILE);
382         P_MMAP_FLAG(FIXED);
383         P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385         P_MMAP_FLAG(HUGETLB);
386 #endif
387         P_MMAP_FLAG(LOCKED);
388         P_MMAP_FLAG(NONBLOCK);
389         P_MMAP_FLAG(NORESERVE);
390         P_MMAP_FLAG(POPULATE);
391         P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393         P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396
397         if (flags)
398                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399
400         return printed;
401 }
402
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404
405 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
406                                                   struct syscall_arg *arg)
407 {
408         int printed = 0, flags = arg->val;
409
410 #define P_MREMAP_FLAG(n) \
411         if (flags & MREMAP_##n) { \
412                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
413                 flags &= ~MREMAP_##n; \
414         }
415
416         P_MREMAP_FLAG(MAYMOVE);
417 #ifdef MREMAP_FIXED
418         P_MREMAP_FLAG(FIXED);
419 #endif
420 #undef P_MREMAP_FLAG
421
422         if (flags)
423                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
424
425         return printed;
426 }
427
428 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
429
430 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
431                                                       struct syscall_arg *arg)
432 {
433         int behavior = arg->val;
434
435         switch (behavior) {
436 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
437         P_MADV_BHV(NORMAL);
438         P_MADV_BHV(RANDOM);
439         P_MADV_BHV(SEQUENTIAL);
440         P_MADV_BHV(WILLNEED);
441         P_MADV_BHV(DONTNEED);
442         P_MADV_BHV(REMOVE);
443         P_MADV_BHV(DONTFORK);
444         P_MADV_BHV(DOFORK);
445         P_MADV_BHV(HWPOISON);
446 #ifdef MADV_SOFT_OFFLINE
447         P_MADV_BHV(SOFT_OFFLINE);
448 #endif
449         P_MADV_BHV(MERGEABLE);
450         P_MADV_BHV(UNMERGEABLE);
451 #ifdef MADV_HUGEPAGE
452         P_MADV_BHV(HUGEPAGE);
453 #endif
454 #ifdef MADV_NOHUGEPAGE
455         P_MADV_BHV(NOHUGEPAGE);
456 #endif
457 #ifdef MADV_DONTDUMP
458         P_MADV_BHV(DONTDUMP);
459 #endif
460 #ifdef MADV_DODUMP
461         P_MADV_BHV(DODUMP);
462 #endif
463 #undef P_MADV_PHV
464         default: break;
465         }
466
467         return scnprintf(bf, size, "%#x", behavior);
468 }
469
470 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
471
472 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
473                                            struct syscall_arg *arg)
474 {
475         int printed = 0, op = arg->val;
476
477         if (op == 0)
478                 return scnprintf(bf, size, "NONE");
479 #define P_CMD(cmd) \
480         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
481                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
482                 op &= ~LOCK_##cmd; \
483         }
484
485         P_CMD(SH);
486         P_CMD(EX);
487         P_CMD(NB);
488         P_CMD(UN);
489         P_CMD(MAND);
490         P_CMD(RW);
491         P_CMD(READ);
492         P_CMD(WRITE);
493 #undef P_OP
494
495         if (op)
496                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
497
498         return printed;
499 }
500
501 #define SCA_FLOCK syscall_arg__scnprintf_flock
502
503 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
504 {
505         enum syscall_futex_args {
506                 SCF_UADDR   = (1 << 0),
507                 SCF_OP      = (1 << 1),
508                 SCF_VAL     = (1 << 2),
509                 SCF_TIMEOUT = (1 << 3),
510                 SCF_UADDR2  = (1 << 4),
511                 SCF_VAL3    = (1 << 5),
512         };
513         int op = arg->val;
514         int cmd = op & FUTEX_CMD_MASK;
515         size_t printed = 0;
516
517         switch (cmd) {
518 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
519         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
520         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
521         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
522         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
523         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
524         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
525         P_FUTEX_OP(WAKE_OP);                                                      break;
526         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
527         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
528         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
529         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
530         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
531         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
532         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
533         }
534
535         if (op & FUTEX_PRIVATE_FLAG)
536                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
537
538         if (op & FUTEX_CLOCK_REALTIME)
539                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
540
541         return printed;
542 }
543
544 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
545
546 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
547 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
548
549 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
550 static DEFINE_STRARRAY(itimers);
551
552 static const char *whences[] = { "SET", "CUR", "END",
553 #ifdef SEEK_DATA
554 "DATA",
555 #endif
556 #ifdef SEEK_HOLE
557 "HOLE",
558 #endif
559 };
560 static DEFINE_STRARRAY(whences);
561
562 static const char *fcntl_cmds[] = {
563         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
564         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
565         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
566         "F_GETOWNER_UIDS",
567 };
568 static DEFINE_STRARRAY(fcntl_cmds);
569
570 static const char *rlimit_resources[] = {
571         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
572         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
573         "RTTIME",
574 };
575 static DEFINE_STRARRAY(rlimit_resources);
576
577 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
578 static DEFINE_STRARRAY(sighow);
579
580 static const char *clockid[] = {
581         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
582         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
583 };
584 static DEFINE_STRARRAY(clockid);
585
586 static const char *socket_families[] = {
587         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
588         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
589         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
590         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
591         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
592         "ALG", "NFC", "VSOCK",
593 };
594 static DEFINE_STRARRAY(socket_families);
595
596 #ifndef SOCK_TYPE_MASK
597 #define SOCK_TYPE_MASK 0xf
598 #endif
599
600 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
601                                                       struct syscall_arg *arg)
602 {
603         size_t printed;
604         int type = arg->val,
605             flags = type & ~SOCK_TYPE_MASK;
606
607         type &= SOCK_TYPE_MASK;
608         /*
609          * Can't use a strarray, MIPS may override for ABI reasons.
610          */
611         switch (type) {
612 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
613         P_SK_TYPE(STREAM);
614         P_SK_TYPE(DGRAM);
615         P_SK_TYPE(RAW);
616         P_SK_TYPE(RDM);
617         P_SK_TYPE(SEQPACKET);
618         P_SK_TYPE(DCCP);
619         P_SK_TYPE(PACKET);
620 #undef P_SK_TYPE
621         default:
622                 printed = scnprintf(bf, size, "%#x", type);
623         }
624
625 #define P_SK_FLAG(n) \
626         if (flags & SOCK_##n) { \
627                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
628                 flags &= ~SOCK_##n; \
629         }
630
631         P_SK_FLAG(CLOEXEC);
632         P_SK_FLAG(NONBLOCK);
633 #undef P_SK_FLAG
634
635         if (flags)
636                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
637
638         return printed;
639 }
640
641 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
642
643 #ifndef MSG_PROBE
644 #define MSG_PROBE            0x10
645 #endif
646 #ifndef MSG_WAITFORONE
647 #define MSG_WAITFORONE  0x10000
648 #endif
649 #ifndef MSG_SENDPAGE_NOTLAST
650 #define MSG_SENDPAGE_NOTLAST 0x20000
651 #endif
652 #ifndef MSG_FASTOPEN
653 #define MSG_FASTOPEN         0x20000000
654 #endif
655
656 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
657                                                struct syscall_arg *arg)
658 {
659         int printed = 0, flags = arg->val;
660
661         if (flags == 0)
662                 return scnprintf(bf, size, "NONE");
663 #define P_MSG_FLAG(n) \
664         if (flags & MSG_##n) { \
665                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
666                 flags &= ~MSG_##n; \
667         }
668
669         P_MSG_FLAG(OOB);
670         P_MSG_FLAG(PEEK);
671         P_MSG_FLAG(DONTROUTE);
672         P_MSG_FLAG(TRYHARD);
673         P_MSG_FLAG(CTRUNC);
674         P_MSG_FLAG(PROBE);
675         P_MSG_FLAG(TRUNC);
676         P_MSG_FLAG(DONTWAIT);
677         P_MSG_FLAG(EOR);
678         P_MSG_FLAG(WAITALL);
679         P_MSG_FLAG(FIN);
680         P_MSG_FLAG(SYN);
681         P_MSG_FLAG(CONFIRM);
682         P_MSG_FLAG(RST);
683         P_MSG_FLAG(ERRQUEUE);
684         P_MSG_FLAG(NOSIGNAL);
685         P_MSG_FLAG(MORE);
686         P_MSG_FLAG(WAITFORONE);
687         P_MSG_FLAG(SENDPAGE_NOTLAST);
688         P_MSG_FLAG(FASTOPEN);
689         P_MSG_FLAG(CMSG_CLOEXEC);
690 #undef P_MSG_FLAG
691
692         if (flags)
693                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
694
695         return printed;
696 }
697
698 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
699
700 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
701                                                  struct syscall_arg *arg)
702 {
703         size_t printed = 0;
704         int mode = arg->val;
705
706         if (mode == F_OK) /* 0 */
707                 return scnprintf(bf, size, "F");
708 #define P_MODE(n) \
709         if (mode & n##_OK) { \
710                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
711                 mode &= ~n##_OK; \
712         }
713
714         P_MODE(R);
715         P_MODE(W);
716         P_MODE(X);
717 #undef P_MODE
718
719         if (mode)
720                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
721
722         return printed;
723 }
724
725 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
726
727 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
728                                                struct syscall_arg *arg)
729 {
730         int printed = 0, flags = arg->val;
731
732         if (!(flags & O_CREAT))
733                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
734
735         if (flags == 0)
736                 return scnprintf(bf, size, "RDONLY");
737 #define P_FLAG(n) \
738         if (flags & O_##n) { \
739                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
740                 flags &= ~O_##n; \
741         }
742
743         P_FLAG(APPEND);
744         P_FLAG(ASYNC);
745         P_FLAG(CLOEXEC);
746         P_FLAG(CREAT);
747         P_FLAG(DIRECT);
748         P_FLAG(DIRECTORY);
749         P_FLAG(EXCL);
750         P_FLAG(LARGEFILE);
751         P_FLAG(NOATIME);
752         P_FLAG(NOCTTY);
753 #ifdef O_NONBLOCK
754         P_FLAG(NONBLOCK);
755 #elif O_NDELAY
756         P_FLAG(NDELAY);
757 #endif
758 #ifdef O_PATH
759         P_FLAG(PATH);
760 #endif
761         P_FLAG(RDWR);
762 #ifdef O_DSYNC
763         if ((flags & O_SYNC) == O_SYNC)
764                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
765         else {
766                 P_FLAG(DSYNC);
767         }
768 #else
769         P_FLAG(SYNC);
770 #endif
771         P_FLAG(TRUNC);
772         P_FLAG(WRONLY);
773 #undef P_FLAG
774
775         if (flags)
776                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
777
778         return printed;
779 }
780
781 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
782
783 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
784                                                    struct syscall_arg *arg)
785 {
786         int printed = 0, flags = arg->val;
787
788         if (flags == 0)
789                 return scnprintf(bf, size, "NONE");
790 #define P_FLAG(n) \
791         if (flags & EFD_##n) { \
792                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
793                 flags &= ~EFD_##n; \
794         }
795
796         P_FLAG(SEMAPHORE);
797         P_FLAG(CLOEXEC);
798         P_FLAG(NONBLOCK);
799 #undef P_FLAG
800
801         if (flags)
802                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
803
804         return printed;
805 }
806
807 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
808
809 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
810                                                 struct syscall_arg *arg)
811 {
812         int printed = 0, flags = arg->val;
813
814 #define P_FLAG(n) \
815         if (flags & O_##n) { \
816                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
817                 flags &= ~O_##n; \
818         }
819
820         P_FLAG(CLOEXEC);
821         P_FLAG(NONBLOCK);
822 #undef P_FLAG
823
824         if (flags)
825                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
826
827         return printed;
828 }
829
830 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
831
832 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
833 {
834         int sig = arg->val;
835
836         switch (sig) {
837 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
838         P_SIGNUM(HUP);
839         P_SIGNUM(INT);
840         P_SIGNUM(QUIT);
841         P_SIGNUM(ILL);
842         P_SIGNUM(TRAP);
843         P_SIGNUM(ABRT);
844         P_SIGNUM(BUS);
845         P_SIGNUM(FPE);
846         P_SIGNUM(KILL);
847         P_SIGNUM(USR1);
848         P_SIGNUM(SEGV);
849         P_SIGNUM(USR2);
850         P_SIGNUM(PIPE);
851         P_SIGNUM(ALRM);
852         P_SIGNUM(TERM);
853         P_SIGNUM(CHLD);
854         P_SIGNUM(CONT);
855         P_SIGNUM(STOP);
856         P_SIGNUM(TSTP);
857         P_SIGNUM(TTIN);
858         P_SIGNUM(TTOU);
859         P_SIGNUM(URG);
860         P_SIGNUM(XCPU);
861         P_SIGNUM(XFSZ);
862         P_SIGNUM(VTALRM);
863         P_SIGNUM(PROF);
864         P_SIGNUM(WINCH);
865         P_SIGNUM(IO);
866         P_SIGNUM(PWR);
867         P_SIGNUM(SYS);
868 #ifdef SIGEMT
869         P_SIGNUM(EMT);
870 #endif
871 #ifdef SIGSTKFLT
872         P_SIGNUM(STKFLT);
873 #endif
874 #ifdef SIGSWI
875         P_SIGNUM(SWI);
876 #endif
877         default: break;
878         }
879
880         return scnprintf(bf, size, "%#x", sig);
881 }
882
883 #define SCA_SIGNUM syscall_arg__scnprintf_signum
884
885 #if defined(__i386__) || defined(__x86_64__)
886 /*
887  * FIXME: Make this available to all arches.
888  */
889 #define TCGETS          0x5401
890
891 static const char *tioctls[] = {
892         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
893         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
894         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
895         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
896         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
897         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
898         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
899         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
900         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
901         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
902         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
903         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
904         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
905         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
906         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
907 };
908
909 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
910 #endif /* defined(__i386__) || defined(__x86_64__) */
911
912 #define STRARRAY(arg, name, array) \
913           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
914           .arg_parm      = { [arg] = &strarray__##array, }
915
916 static struct syscall_fmt {
917         const char *name;
918         const char *alias;
919         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
920         void       *arg_parm[6];
921         bool       errmsg;
922         bool       timeout;
923         bool       hexret;
924 } syscall_fmts[] = {
925         { .name     = "access",     .errmsg = true,
926           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
927         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
928         { .name     = "brk",        .hexret = true,
929           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
930         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
931         { .name     = "close",      .errmsg = true,
932           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
933         { .name     = "connect",    .errmsg = true, },
934         { .name     = "dup",        .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936         { .name     = "dup2",       .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938         { .name     = "dup3",       .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
941         { .name     = "eventfd2",   .errmsg = true,
942           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
943         { .name     = "faccessat",  .errmsg = true,
944           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
945         { .name     = "fadvise64",  .errmsg = true,
946           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947         { .name     = "fallocate",  .errmsg = true,
948           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949         { .name     = "fchdir",     .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951         { .name     = "fchmod",     .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953         { .name     = "fchmodat",   .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
955         { .name     = "fchown",     .errmsg = true,
956           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
957         { .name     = "fchownat",   .errmsg = true,
958           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959         { .name     = "fcntl",      .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FD, /* fd */
961                              [1] = SCA_STRARRAY, /* cmd */ },
962           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963         { .name     = "fdatasync",  .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965         { .name     = "flock",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_FD, /* fd */
967                              [1] = SCA_FLOCK, /* cmd */ }, },
968         { .name     = "fsetxattr",  .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
971           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
973           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
974         { .name     = "fstatfs",    .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
976         { .name     = "fsync",    .errmsg = true,
977           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
978         { .name     = "ftruncate", .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
980         { .name     = "futex",      .errmsg = true,
981           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
982         { .name     = "futimesat", .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
984         { .name     = "getdents",   .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
986         { .name     = "getdents64", .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
989         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
990         { .name     = "ioctl",      .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FD, /* fd */
992 #if defined(__i386__) || defined(__x86_64__)
993 /*
994  * FIXME: Make this available to all arches.
995  */
996                              [1] = SCA_STRHEXARRAY, /* cmd */
997                              [2] = SCA_HEX, /* arg */ },
998           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
999 #else
1000                              [2] = SCA_HEX, /* arg */ }, },
1001 #endif
1002         { .name     = "kill",       .errmsg = true,
1003           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004         { .name     = "linkat",     .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006         { .name     = "lseek",      .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008                              [2] = SCA_STRARRAY, /* whence */ },
1009           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1010         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1011         { .name     = "madvise",    .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1013                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1014         { .name     = "mkdirat",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016         { .name     = "mknodat",    .errmsg = true,
1017           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018         { .name     = "mlock",      .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020         { .name     = "mlockall",   .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022         { .name     = "mmap",       .hexret = true,
1023           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1024                              [2] = SCA_MMAP_PROT, /* prot */
1025                              [3] = SCA_MMAP_FLAGS, /* flags */
1026                              [4] = SCA_FD,        /* fd */ }, },
1027         { .name     = "mprotect",   .errmsg = true,
1028           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1030         { .name     = "mremap",     .hexret = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032                              [3] = SCA_MREMAP_FLAGS, /* flags */
1033                              [4] = SCA_HEX, /* new_addr */ }, },
1034         { .name     = "munlock",    .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036         { .name     = "munmap",     .errmsg = true,
1037           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038         { .name     = "name_to_handle_at", .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040         { .name     = "newfstatat", .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042         { .name     = "open",       .errmsg = true,
1043           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044         { .name     = "open_by_handle_at", .errmsg = true,
1045           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "openat",     .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "pipe2",      .errmsg = true,
1051           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052         { .name     = "poll",       .errmsg = true, .timeout = true, },
1053         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1054         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1057           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "pwritev",    .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063         { .name     = "read",       .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065         { .name     = "readlinkat", .errmsg = true,
1066           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067         { .name     = "readv",      .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069         { .name     = "recvfrom",   .errmsg = true,
1070           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071         { .name     = "recvmmsg",   .errmsg = true,
1072           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073         { .name     = "recvmsg",    .errmsg = true,
1074           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075         { .name     = "renameat",   .errmsg = true,
1076           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077         { .name     = "rt_sigaction", .errmsg = true,
1078           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1080         { .name     = "rt_sigqueueinfo", .errmsg = true,
1081           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1083           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084         { .name     = "select",     .errmsg = true, .timeout = true, },
1085         { .name     = "sendmmsg",    .errmsg = true,
1086           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087         { .name     = "sendmsg",    .errmsg = true,
1088           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089         { .name     = "sendto",     .errmsg = true,
1090           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1092         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093         { .name     = "shutdown",   .errmsg = true,
1094           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095         { .name     = "socket",     .errmsg = true,
1096           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097                              [1] = SCA_SK_TYPE, /* type */ },
1098           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1099         { .name     = "socketpair", .errmsg = true,
1100           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101                              [1] = SCA_SK_TYPE, /* type */ },
1102           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1103         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1104         { .name     = "symlinkat",  .errmsg = true,
1105           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106         { .name     = "tgkill",     .errmsg = true,
1107           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108         { .name     = "tkill",      .errmsg = true,
1109           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1111         { .name     = "unlinkat",   .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113         { .name     = "utimensat",  .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115         { .name     = "write",      .errmsg = true,
1116           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117         { .name     = "writev",     .errmsg = true,
1118           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1119 };
1120
1121 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1122 {
1123         const struct syscall_fmt *fmt = fmtp;
1124         return strcmp(name, fmt->name);
1125 }
1126
1127 static struct syscall_fmt *syscall_fmt__find(const char *name)
1128 {
1129         const int nmemb = ARRAY_SIZE(syscall_fmts);
1130         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1131 }
1132
1133 struct syscall {
1134         struct event_format *tp_format;
1135         const char          *name;
1136         bool                filtered;
1137         bool                is_exit;
1138         struct syscall_fmt  *fmt;
1139         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1140         void                **arg_parm;
1141 };
1142
1143 static size_t fprintf_duration(unsigned long t, FILE *fp)
1144 {
1145         double duration = (double)t / NSEC_PER_MSEC;
1146         size_t printed = fprintf(fp, "(");
1147
1148         if (duration >= 1.0)
1149                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150         else if (duration >= 0.01)
1151                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1152         else
1153                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154         return printed + fprintf(fp, "): ");
1155 }
1156
1157 struct thread_trace {
1158         u64               entry_time;
1159         u64               exit_time;
1160         bool              entry_pending;
1161         unsigned long     nr_events;
1162         unsigned long     pfmaj, pfmin;
1163         char              *entry_str;
1164         double            runtime_ms;
1165         struct {
1166                 int       max;
1167                 char      **table;
1168         } paths;
1169
1170         struct intlist *syscall_stats;
1171 };
1172
1173 static struct thread_trace *thread_trace__new(void)
1174 {
1175         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1176
1177         if (ttrace)
1178                 ttrace->paths.max = -1;
1179
1180         ttrace->syscall_stats = intlist__new(NULL);
1181
1182         return ttrace;
1183 }
1184
1185 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1186 {
1187         struct thread_trace *ttrace;
1188
1189         if (thread == NULL)
1190                 goto fail;
1191
1192         if (thread__priv(thread) == NULL)
1193                 thread__set_priv(thread, thread_trace__new());
1194
1195         if (thread__priv(thread) == NULL)
1196                 goto fail;
1197
1198         ttrace = thread__priv(thread);
1199         ++ttrace->nr_events;
1200
1201         return ttrace;
1202 fail:
1203         color_fprintf(fp, PERF_COLOR_RED,
1204                       "WARNING: not enough memory, dropping samples!\n");
1205         return NULL;
1206 }
1207
1208 #define TRACE_PFMAJ             (1 << 0)
1209 #define TRACE_PFMIN             (1 << 1)
1210
1211 struct trace {
1212         struct perf_tool        tool;
1213         struct {
1214                 int             machine;
1215                 int             open_id;
1216         }                       audit;
1217         struct {
1218                 int             max;
1219                 struct syscall  *table;
1220         } syscalls;
1221         struct record_opts      opts;
1222         struct perf_evlist      *evlist;
1223         struct machine          *host;
1224         struct thread           *current;
1225         u64                     base_time;
1226         FILE                    *output;
1227         unsigned long           nr_events;
1228         struct strlist          *ev_qualifier;
1229         const char              *last_vfs_getname;
1230         struct intlist          *tid_list;
1231         struct intlist          *pid_list;
1232         double                  duration_filter;
1233         double                  runtime_ms;
1234         struct {
1235                 u64             vfs_getname,
1236                                 proc_getname;
1237         } stats;
1238         bool                    not_ev_qualifier;
1239         bool                    live;
1240         bool                    full_time;
1241         bool                    sched;
1242         bool                    multiple_threads;
1243         bool                    summary;
1244         bool                    summary_only;
1245         bool                    show_comm;
1246         bool                    show_tool_stats;
1247         bool                    trace_syscalls;
1248         int                     trace_pgfaults;
1249 };
1250
1251 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1252 {
1253         struct thread_trace *ttrace = thread__priv(thread);
1254
1255         if (fd > ttrace->paths.max) {
1256                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1257
1258                 if (npath == NULL)
1259                         return -1;
1260
1261                 if (ttrace->paths.max != -1) {
1262                         memset(npath + ttrace->paths.max + 1, 0,
1263                                (fd - ttrace->paths.max) * sizeof(char *));
1264                 } else {
1265                         memset(npath, 0, (fd + 1) * sizeof(char *));
1266                 }
1267
1268                 ttrace->paths.table = npath;
1269                 ttrace->paths.max   = fd;
1270         }
1271
1272         ttrace->paths.table[fd] = strdup(pathname);
1273
1274         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1275 }
1276
1277 static int thread__read_fd_path(struct thread *thread, int fd)
1278 {
1279         char linkname[PATH_MAX], pathname[PATH_MAX];
1280         struct stat st;
1281         int ret;
1282
1283         if (thread->pid_ == thread->tid) {
1284                 scnprintf(linkname, sizeof(linkname),
1285                           "/proc/%d/fd/%d", thread->pid_, fd);
1286         } else {
1287                 scnprintf(linkname, sizeof(linkname),
1288                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1289         }
1290
1291         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1292                 return -1;
1293
1294         ret = readlink(linkname, pathname, sizeof(pathname));
1295
1296         if (ret < 0 || ret > st.st_size)
1297                 return -1;
1298
1299         pathname[ret] = '\0';
1300         return trace__set_fd_pathname(thread, fd, pathname);
1301 }
1302
1303 static const char *thread__fd_path(struct thread *thread, int fd,
1304                                    struct trace *trace)
1305 {
1306         struct thread_trace *ttrace = thread__priv(thread);
1307
1308         if (ttrace == NULL)
1309                 return NULL;
1310
1311         if (fd < 0)
1312                 return NULL;
1313
1314         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1315                 if (!trace->live)
1316                         return NULL;
1317                 ++trace->stats.proc_getname;
1318                 if (thread__read_fd_path(thread, fd))
1319                         return NULL;
1320         }
1321
1322         return ttrace->paths.table[fd];
1323 }
1324
1325 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1326                                         struct syscall_arg *arg)
1327 {
1328         int fd = arg->val;
1329         size_t printed = scnprintf(bf, size, "%d", fd);
1330         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1331
1332         if (path)
1333                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1334
1335         return printed;
1336 }
1337
1338 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1339                                               struct syscall_arg *arg)
1340 {
1341         int fd = arg->val;
1342         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1343         struct thread_trace *ttrace = thread__priv(arg->thread);
1344
1345         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1346                 zfree(&ttrace->paths.table[fd]);
1347
1348         return printed;
1349 }
1350
1351 static bool trace__filter_duration(struct trace *trace, double t)
1352 {
1353         return t < (trace->duration_filter * NSEC_PER_MSEC);
1354 }
1355
1356 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1357 {
1358         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1359
1360         return fprintf(fp, "%10.3f ", ts);
1361 }
1362
1363 static bool done = false;
1364 static bool interrupted = false;
1365
1366 static void sig_handler(int sig)
1367 {
1368         done = true;
1369         interrupted = sig == SIGINT;
1370 }
1371
1372 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1373                                         u64 duration, u64 tstamp, FILE *fp)
1374 {
1375         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1376         printed += fprintf_duration(duration, fp);
1377
1378         if (trace->multiple_threads) {
1379                 if (trace->show_comm)
1380                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1381                 printed += fprintf(fp, "%d ", thread->tid);
1382         }
1383
1384         return printed;
1385 }
1386
1387 static int trace__process_event(struct trace *trace, struct machine *machine,
1388                                 union perf_event *event, struct perf_sample *sample)
1389 {
1390         int ret = 0;
1391
1392         switch (event->header.type) {
1393         case PERF_RECORD_LOST:
1394                 color_fprintf(trace->output, PERF_COLOR_RED,
1395                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1396                 ret = machine__process_lost_event(machine, event, sample);
1397         default:
1398                 ret = machine__process_event(machine, event, sample);
1399                 break;
1400         }
1401
1402         return ret;
1403 }
1404
1405 static int trace__tool_process(struct perf_tool *tool,
1406                                union perf_event *event,
1407                                struct perf_sample *sample,
1408                                struct machine *machine)
1409 {
1410         struct trace *trace = container_of(tool, struct trace, tool);
1411         return trace__process_event(trace, machine, event, sample);
1412 }
1413
1414 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1415 {
1416         int err = symbol__init(NULL);
1417
1418         if (err)
1419                 return err;
1420
1421         trace->host = machine__new_host();
1422         if (trace->host == NULL)
1423                 return -ENOMEM;
1424
1425         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1426                                             evlist->threads, trace__tool_process, false);
1427         if (err)
1428                 symbol__exit();
1429
1430         return err;
1431 }
1432
1433 static int syscall__set_arg_fmts(struct syscall *sc)
1434 {
1435         struct format_field *field;
1436         int idx = 0;
1437
1438         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1439         if (sc->arg_scnprintf == NULL)
1440                 return -1;
1441
1442         if (sc->fmt)
1443                 sc->arg_parm = sc->fmt->arg_parm;
1444
1445         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1446                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1447                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1448                 else if (field->flags & FIELD_IS_POINTER)
1449                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1450                 ++idx;
1451         }
1452
1453         return 0;
1454 }
1455
1456 static int trace__read_syscall_info(struct trace *trace, int id)
1457 {
1458         char tp_name[128];
1459         struct syscall *sc;
1460         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1461
1462         if (name == NULL)
1463                 return -1;
1464
1465         if (id > trace->syscalls.max) {
1466                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1467
1468                 if (nsyscalls == NULL)
1469                         return -1;
1470
1471                 if (trace->syscalls.max != -1) {
1472                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1473                                (id - trace->syscalls.max) * sizeof(*sc));
1474                 } else {
1475                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1476                 }
1477
1478                 trace->syscalls.table = nsyscalls;
1479                 trace->syscalls.max   = id;
1480         }
1481
1482         sc = trace->syscalls.table + id;
1483         sc->name = name;
1484
1485         if (trace->ev_qualifier) {
1486                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1487
1488                 if (!(in ^ trace->not_ev_qualifier)) {
1489                         sc->filtered = true;
1490                         /*
1491                          * No need to do read tracepoint information since this will be
1492                          * filtered out.
1493                          */
1494                         return 0;
1495                 }
1496         }
1497
1498         sc->fmt  = syscall_fmt__find(sc->name);
1499
1500         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1501         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1502
1503         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1504                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1505                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1506         }
1507
1508         if (sc->tp_format == NULL)
1509                 return -1;
1510
1511         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1512
1513         return syscall__set_arg_fmts(sc);
1514 }
1515
1516 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1517                                       unsigned long *args, struct trace *trace,
1518                                       struct thread *thread)
1519 {
1520         size_t printed = 0;
1521
1522         if (sc->tp_format != NULL) {
1523                 struct format_field *field;
1524                 u8 bit = 1;
1525                 struct syscall_arg arg = {
1526                         .idx    = 0,
1527                         .mask   = 0,
1528                         .trace  = trace,
1529                         .thread = thread,
1530                 };
1531
1532                 for (field = sc->tp_format->format.fields->next; field;
1533                      field = field->next, ++arg.idx, bit <<= 1) {
1534                         if (arg.mask & bit)
1535                                 continue;
1536                         /*
1537                          * Suppress this argument if its value is zero and
1538                          * and we don't have a string associated in an
1539                          * strarray for it.
1540                          */
1541                         if (args[arg.idx] == 0 &&
1542                             !(sc->arg_scnprintf &&
1543                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1544                               sc->arg_parm[arg.idx]))
1545                                 continue;
1546
1547                         printed += scnprintf(bf + printed, size - printed,
1548                                              "%s%s: ", printed ? ", " : "", field->name);
1549                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1550                                 arg.val = args[arg.idx];
1551                                 if (sc->arg_parm)
1552                                         arg.parm = sc->arg_parm[arg.idx];
1553                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1554                                                                       size - printed, &arg);
1555                         } else {
1556                                 printed += scnprintf(bf + printed, size - printed,
1557                                                      "%ld", args[arg.idx]);
1558                         }
1559                 }
1560         } else {
1561                 int i = 0;
1562
1563                 while (i < 6) {
1564                         printed += scnprintf(bf + printed, size - printed,
1565                                              "%sarg%d: %ld",
1566                                              printed ? ", " : "", i, args[i]);
1567                         ++i;
1568                 }
1569         }
1570
1571         return printed;
1572 }
1573
1574 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1575                                   union perf_event *event,
1576                                   struct perf_sample *sample);
1577
1578 static struct syscall *trace__syscall_info(struct trace *trace,
1579                                            struct perf_evsel *evsel, int id)
1580 {
1581
1582         if (id < 0) {
1583
1584                 /*
1585                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1586                  * before that, leaving at a higher verbosity level till that is
1587                  * explained. Reproduced with plain ftrace with:
1588                  *
1589                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1590                  * grep "NR -1 " /t/trace_pipe
1591                  *
1592                  * After generating some load on the machine.
1593                  */
1594                 if (verbose > 1) {
1595                         static u64 n;
1596                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1597                                 id, perf_evsel__name(evsel), ++n);
1598                 }
1599                 return NULL;
1600         }
1601
1602         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1603             trace__read_syscall_info(trace, id))
1604                 goto out_cant_read;
1605
1606         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1607                 goto out_cant_read;
1608
1609         return &trace->syscalls.table[id];
1610
1611 out_cant_read:
1612         if (verbose) {
1613                 fprintf(trace->output, "Problems reading syscall %d", id);
1614                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1615                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1616                 fputs(" information\n", trace->output);
1617         }
1618         return NULL;
1619 }
1620
1621 static void thread__update_stats(struct thread_trace *ttrace,
1622                                  int id, struct perf_sample *sample)
1623 {
1624         struct int_node *inode;
1625         struct stats *stats;
1626         u64 duration = 0;
1627
1628         inode = intlist__findnew(ttrace->syscall_stats, id);
1629         if (inode == NULL)
1630                 return;
1631
1632         stats = inode->priv;
1633         if (stats == NULL) {
1634                 stats = malloc(sizeof(struct stats));
1635                 if (stats == NULL)
1636                         return;
1637                 init_stats(stats);
1638                 inode->priv = stats;
1639         }
1640
1641         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1642                 duration = sample->time - ttrace->entry_time;
1643
1644         update_stats(stats, duration);
1645 }
1646
1647 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1648 {
1649         struct thread_trace *ttrace;
1650         u64 duration;
1651         size_t printed;
1652
1653         if (trace->current == NULL)
1654                 return 0;
1655
1656         ttrace = thread__priv(trace->current);
1657
1658         if (!ttrace->entry_pending)
1659                 return 0;
1660
1661         duration = sample->time - ttrace->entry_time;
1662
1663         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1664         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1665         ttrace->entry_pending = false;
1666
1667         return printed;
1668 }
1669
1670 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1671                             union perf_event *event __maybe_unused,
1672                             struct perf_sample *sample)
1673 {
1674         char *msg;
1675         void *args;
1676         size_t printed = 0;
1677         struct thread *thread;
1678         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1679         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1680         struct thread_trace *ttrace;
1681
1682         if (sc == NULL)
1683                 return -1;
1684
1685         if (sc->filtered)
1686                 return 0;
1687
1688         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1689         ttrace = thread__trace(thread, trace->output);
1690         if (ttrace == NULL)
1691                 return -1;
1692
1693         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1694
1695         if (ttrace->entry_str == NULL) {
1696                 ttrace->entry_str = malloc(1024);
1697                 if (!ttrace->entry_str)
1698                         return -1;
1699         }
1700
1701         printed += trace__printf_interrupted_entry(trace, sample);
1702
1703         ttrace->entry_time = sample->time;
1704         msg = ttrace->entry_str;
1705         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1706
1707         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1708                                            args, trace, thread);
1709
1710         if (sc->is_exit) {
1711                 if (!trace->duration_filter && !trace->summary_only) {
1712                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1713                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1714                 }
1715         } else
1716                 ttrace->entry_pending = true;
1717
1718         trace->current = thread;
1719
1720         return 0;
1721 }
1722
1723 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1724                            union perf_event *event __maybe_unused,
1725                            struct perf_sample *sample)
1726 {
1727         long ret;
1728         u64 duration = 0;
1729         struct thread *thread;
1730         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1731         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1732         struct thread_trace *ttrace;
1733
1734         if (sc == NULL)
1735                 return -1;
1736
1737         if (sc->filtered)
1738                 return 0;
1739
1740         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1741         ttrace = thread__trace(thread, trace->output);
1742         if (ttrace == NULL)
1743                 return -1;
1744
1745         if (trace->summary)
1746                 thread__update_stats(ttrace, id, sample);
1747
1748         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1749
1750         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1751                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1752                 trace->last_vfs_getname = NULL;
1753                 ++trace->stats.vfs_getname;
1754         }
1755
1756         ttrace->exit_time = sample->time;
1757
1758         if (ttrace->entry_time) {
1759                 duration = sample->time - ttrace->entry_time;
1760                 if (trace__filter_duration(trace, duration))
1761                         goto out;
1762         } else if (trace->duration_filter)
1763                 goto out;
1764
1765         if (trace->summary_only)
1766                 goto out;
1767
1768         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1769
1770         if (ttrace->entry_pending) {
1771                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1772         } else {
1773                 fprintf(trace->output, " ... [");
1774                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1775                 fprintf(trace->output, "]: %s()", sc->name);
1776         }
1777
1778         if (sc->fmt == NULL) {
1779 signed_print:
1780                 fprintf(trace->output, ") = %ld", ret);
1781         } else if (ret < 0 && sc->fmt->errmsg) {
1782                 char bf[STRERR_BUFSIZE];
1783                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1784                            *e = audit_errno_to_name(-ret);
1785
1786                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1787         } else if (ret == 0 && sc->fmt->timeout)
1788                 fprintf(trace->output, ") = 0 Timeout");
1789         else if (sc->fmt->hexret)
1790                 fprintf(trace->output, ") = %#lx", ret);
1791         else
1792                 goto signed_print;
1793
1794         fputc('\n', trace->output);
1795 out:
1796         ttrace->entry_pending = false;
1797
1798         return 0;
1799 }
1800
1801 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1802                               union perf_event *event __maybe_unused,
1803                               struct perf_sample *sample)
1804 {
1805         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1806         return 0;
1807 }
1808
1809 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1810                                      union perf_event *event __maybe_unused,
1811                                      struct perf_sample *sample)
1812 {
1813         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1814         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1815         struct thread *thread = machine__findnew_thread(trace->host,
1816                                                         sample->pid,
1817                                                         sample->tid);
1818         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1819
1820         if (ttrace == NULL)
1821                 goto out_dump;
1822
1823         ttrace->runtime_ms += runtime_ms;
1824         trace->runtime_ms += runtime_ms;
1825         return 0;
1826
1827 out_dump:
1828         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1829                evsel->name,
1830                perf_evsel__strval(evsel, sample, "comm"),
1831                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1832                runtime,
1833                perf_evsel__intval(evsel, sample, "vruntime"));
1834         return 0;
1835 }
1836
1837 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1838                                 union perf_event *event __maybe_unused,
1839                                 struct perf_sample *sample)
1840 {
1841         trace__printf_interrupted_entry(trace, sample);
1842         trace__fprintf_tstamp(trace, sample->time, trace->output);
1843
1844         if (trace->trace_syscalls)
1845                 fprintf(trace->output, "(         ): ");
1846
1847         fprintf(trace->output, "%s:", evsel->name);
1848
1849         if (evsel->tp_format) {
1850                 event_format__fprintf(evsel->tp_format, sample->cpu,
1851                                       sample->raw_data, sample->raw_size,
1852                                       trace->output);
1853         }
1854
1855         fprintf(trace->output, ")\n");
1856         return 0;
1857 }
1858
1859 static void print_location(FILE *f, struct perf_sample *sample,
1860                            struct addr_location *al,
1861                            bool print_dso, bool print_sym)
1862 {
1863
1864         if ((verbose || print_dso) && al->map)
1865                 fprintf(f, "%s@", al->map->dso->long_name);
1866
1867         if ((verbose || print_sym) && al->sym)
1868                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1869                         al->addr - al->sym->start);
1870         else if (al->map)
1871                 fprintf(f, "0x%" PRIx64, al->addr);
1872         else
1873                 fprintf(f, "0x%" PRIx64, sample->addr);
1874 }
1875
1876 static int trace__pgfault(struct trace *trace,
1877                           struct perf_evsel *evsel,
1878                           union perf_event *event,
1879                           struct perf_sample *sample)
1880 {
1881         struct thread *thread;
1882         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1883         struct addr_location al;
1884         char map_type = 'd';
1885         struct thread_trace *ttrace;
1886
1887         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1888         ttrace = thread__trace(thread, trace->output);
1889         if (ttrace == NULL)
1890                 return -1;
1891
1892         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1893                 ttrace->pfmaj++;
1894         else
1895                 ttrace->pfmin++;
1896
1897         if (trace->summary_only)
1898                 return 0;
1899
1900         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1901                               sample->ip, &al);
1902
1903         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1904
1905         fprintf(trace->output, "%sfault [",
1906                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1907                 "maj" : "min");
1908
1909         print_location(trace->output, sample, &al, false, true);
1910
1911         fprintf(trace->output, "] => ");
1912
1913         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1914                                    sample->addr, &al);
1915
1916         if (!al.map) {
1917                 thread__find_addr_location(thread, cpumode,
1918                                            MAP__FUNCTION, sample->addr, &al);
1919
1920                 if (al.map)
1921                         map_type = 'x';
1922                 else
1923                         map_type = '?';
1924         }
1925
1926         print_location(trace->output, sample, &al, true, false);
1927
1928         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1929
1930         return 0;
1931 }
1932
1933 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1934 {
1935         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1936             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1937                 return false;
1938
1939         if (trace->pid_list || trace->tid_list)
1940                 return true;
1941
1942         return false;
1943 }
1944
1945 static int trace__process_sample(struct perf_tool *tool,
1946                                  union perf_event *event,
1947                                  struct perf_sample *sample,
1948                                  struct perf_evsel *evsel,
1949                                  struct machine *machine __maybe_unused)
1950 {
1951         struct trace *trace = container_of(tool, struct trace, tool);
1952         int err = 0;
1953
1954         tracepoint_handler handler = evsel->handler;
1955
1956         if (skip_sample(trace, sample))
1957                 return 0;
1958
1959         if (!trace->full_time && trace->base_time == 0)
1960                 trace->base_time = sample->time;
1961
1962         if (handler) {
1963                 ++trace->nr_events;
1964                 handler(trace, evsel, event, sample);
1965         }
1966
1967         return err;
1968 }
1969
1970 static int parse_target_str(struct trace *trace)
1971 {
1972         if (trace->opts.target.pid) {
1973                 trace->pid_list = intlist__new(trace->opts.target.pid);
1974                 if (trace->pid_list == NULL) {
1975                         pr_err("Error parsing process id string\n");
1976                         return -EINVAL;
1977                 }
1978         }
1979
1980         if (trace->opts.target.tid) {
1981                 trace->tid_list = intlist__new(trace->opts.target.tid);
1982                 if (trace->tid_list == NULL) {
1983                         pr_err("Error parsing thread id string\n");
1984                         return -EINVAL;
1985                 }
1986         }
1987
1988         return 0;
1989 }
1990
1991 static int trace__record(struct trace *trace, int argc, const char **argv)
1992 {
1993         unsigned int rec_argc, i, j;
1994         const char **rec_argv;
1995         const char * const record_args[] = {
1996                 "record",
1997                 "-R",
1998                 "-m", "1024",
1999                 "-c", "1",
2000         };
2001
2002         const char * const sc_args[] = { "-e", };
2003         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2004         const char * const majpf_args[] = { "-e", "major-faults" };
2005         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2006         const char * const minpf_args[] = { "-e", "minor-faults" };
2007         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2008
2009         /* +1 is for the event string below */
2010         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2011                 majpf_args_nr + minpf_args_nr + argc;
2012         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2013
2014         if (rec_argv == NULL)
2015                 return -ENOMEM;
2016
2017         j = 0;
2018         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2019                 rec_argv[j++] = record_args[i];
2020
2021         if (trace->trace_syscalls) {
2022                 for (i = 0; i < sc_args_nr; i++)
2023                         rec_argv[j++] = sc_args[i];
2024
2025                 /* event string may be different for older kernels - e.g., RHEL6 */
2026                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2027                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2028                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2029                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2030                 else {
2031                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2032                         return -1;
2033                 }
2034         }
2035
2036         if (trace->trace_pgfaults & TRACE_PFMAJ)
2037                 for (i = 0; i < majpf_args_nr; i++)
2038                         rec_argv[j++] = majpf_args[i];
2039
2040         if (trace->trace_pgfaults & TRACE_PFMIN)
2041                 for (i = 0; i < minpf_args_nr; i++)
2042                         rec_argv[j++] = minpf_args[i];
2043
2044         for (i = 0; i < (unsigned int)argc; i++)
2045                 rec_argv[j++] = argv[i];
2046
2047         return cmd_record(j, rec_argv, NULL);
2048 }
2049
2050 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2051
2052 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2053 {
2054         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2055         if (evsel == NULL)
2056                 return;
2057
2058         if (perf_evsel__field(evsel, "pathname") == NULL) {
2059                 perf_evsel__delete(evsel);
2060                 return;
2061         }
2062
2063         evsel->handler = trace__vfs_getname;
2064         perf_evlist__add(evlist, evsel);
2065 }
2066
2067 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2068                                     u64 config)
2069 {
2070         struct perf_evsel *evsel;
2071         struct perf_event_attr attr = {
2072                 .type = PERF_TYPE_SOFTWARE,
2073                 .mmap_data = 1,
2074         };
2075
2076         attr.config = config;
2077         attr.sample_period = 1;
2078
2079         event_attr_init(&attr);
2080
2081         evsel = perf_evsel__new(&attr);
2082         if (!evsel)
2083                 return -ENOMEM;
2084
2085         evsel->handler = trace__pgfault;
2086         perf_evlist__add(evlist, evsel);
2087
2088         return 0;
2089 }
2090
2091 static int trace__run(struct trace *trace, int argc, const char **argv)
2092 {
2093         struct perf_evlist *evlist = trace->evlist;
2094         struct perf_evsel *evsel;
2095         int err = -1, i;
2096         unsigned long before;
2097         const bool forks = argc > 0;
2098         bool draining = false;
2099
2100         trace->live = true;
2101
2102         if (trace->trace_syscalls &&
2103             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2104                                            trace__sys_exit))
2105                 goto out_error_raw_syscalls;
2106
2107         if (trace->trace_syscalls)
2108                 perf_evlist__add_vfs_getname(evlist);
2109
2110         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2111             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2112                 goto out_error_mem;
2113         }
2114
2115         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2116             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2117                 goto out_error_mem;
2118
2119         if (trace->sched &&
2120             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2121                                    trace__sched_stat_runtime))
2122                 goto out_error_sched_stat_runtime;
2123
2124         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2125         if (err < 0) {
2126                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2127                 goto out_delete_evlist;
2128         }
2129
2130         err = trace__symbols_init(trace, evlist);
2131         if (err < 0) {
2132                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2133                 goto out_delete_evlist;
2134         }
2135
2136         perf_evlist__config(evlist, &trace->opts);
2137
2138         signal(SIGCHLD, sig_handler);
2139         signal(SIGINT, sig_handler);
2140
2141         if (forks) {
2142                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2143                                                     argv, false, NULL);
2144                 if (err < 0) {
2145                         fprintf(trace->output, "Couldn't run the workload!\n");
2146                         goto out_delete_evlist;
2147                 }
2148         }
2149
2150         err = perf_evlist__open(evlist);
2151         if (err < 0)
2152                 goto out_error_open;
2153
2154         /*
2155          * Better not use !target__has_task() here because we need to cover the
2156          * case where no threads were specified in the command line, but a
2157          * workload was, and in that case we will fill in the thread_map when
2158          * we fork the workload in perf_evlist__prepare_workload.
2159          */
2160         if (evlist->threads->map[0] == -1)
2161                 perf_evlist__set_filter_pid(evlist, getpid());
2162
2163         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2164         if (err < 0)
2165                 goto out_error_mmap;
2166
2167         if (forks)
2168                 perf_evlist__start_workload(evlist);
2169         else
2170                 perf_evlist__enable(evlist);
2171
2172         trace->multiple_threads = evlist->threads->map[0] == -1 ||
2173                                   evlist->threads->nr > 1 ||
2174                                   perf_evlist__first(evlist)->attr.inherit;
2175 again:
2176         before = trace->nr_events;
2177
2178         for (i = 0; i < evlist->nr_mmaps; i++) {
2179                 union perf_event *event;
2180
2181                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2182                         const u32 type = event->header.type;
2183                         tracepoint_handler handler;
2184                         struct perf_sample sample;
2185
2186                         ++trace->nr_events;
2187
2188                         err = perf_evlist__parse_sample(evlist, event, &sample);
2189                         if (err) {
2190                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2191                                 goto next_event;
2192                         }
2193
2194                         if (!trace->full_time && trace->base_time == 0)
2195                                 trace->base_time = sample.time;
2196
2197                         if (type != PERF_RECORD_SAMPLE) {
2198                                 trace__process_event(trace, trace->host, event, &sample);
2199                                 continue;
2200                         }
2201
2202                         evsel = perf_evlist__id2evsel(evlist, sample.id);
2203                         if (evsel == NULL) {
2204                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2205                                 goto next_event;
2206                         }
2207
2208                         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2209                             sample.raw_data == NULL) {
2210                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2211                                        perf_evsel__name(evsel), sample.tid,
2212                                        sample.cpu, sample.raw_size);
2213                                 goto next_event;
2214                         }
2215
2216                         handler = evsel->handler;
2217                         handler(trace, evsel, event, &sample);
2218 next_event:
2219                         perf_evlist__mmap_consume(evlist, i);
2220
2221                         if (interrupted)
2222                                 goto out_disable;
2223                 }
2224         }
2225
2226         if (trace->nr_events == before) {
2227                 int timeout = done ? 100 : -1;
2228
2229                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2230                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2231                                 draining = true;
2232
2233                         goto again;
2234                 }
2235         } else {
2236                 goto again;
2237         }
2238
2239 out_disable:
2240         perf_evlist__disable(evlist);
2241
2242         if (!err) {
2243                 if (trace->summary)
2244                         trace__fprintf_thread_summary(trace, trace->output);
2245
2246                 if (trace->show_tool_stats) {
2247                         fprintf(trace->output, "Stats:\n "
2248                                                " vfs_getname : %" PRIu64 "\n"
2249                                                " proc_getname: %" PRIu64 "\n",
2250                                 trace->stats.vfs_getname,
2251                                 trace->stats.proc_getname);
2252                 }
2253         }
2254
2255 out_delete_evlist:
2256         perf_evlist__delete(evlist);
2257         trace->evlist = NULL;
2258         trace->live = false;
2259         return err;
2260 {
2261         char errbuf[BUFSIZ];
2262
2263 out_error_sched_stat_runtime:
2264         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2265         goto out_error;
2266
2267 out_error_raw_syscalls:
2268         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2269         goto out_error;
2270
2271 out_error_mmap:
2272         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2273         goto out_error;
2274
2275 out_error_open:
2276         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2277
2278 out_error:
2279         fprintf(trace->output, "%s\n", errbuf);
2280         goto out_delete_evlist;
2281 }
2282 out_error_mem:
2283         fprintf(trace->output, "Not enough memory to run!\n");
2284         goto out_delete_evlist;
2285 }
2286
2287 static int trace__replay(struct trace *trace)
2288 {
2289         const struct perf_evsel_str_handler handlers[] = {
2290                 { "probe:vfs_getname",       trace__vfs_getname, },
2291         };
2292         struct perf_data_file file = {
2293                 .path  = input_name,
2294                 .mode  = PERF_DATA_MODE_READ,
2295         };
2296         struct perf_session *session;
2297         struct perf_evsel *evsel;
2298         int err = -1;
2299
2300         trace->tool.sample        = trace__process_sample;
2301         trace->tool.mmap          = perf_event__process_mmap;
2302         trace->tool.mmap2         = perf_event__process_mmap2;
2303         trace->tool.comm          = perf_event__process_comm;
2304         trace->tool.exit          = perf_event__process_exit;
2305         trace->tool.fork          = perf_event__process_fork;
2306         trace->tool.attr          = perf_event__process_attr;
2307         trace->tool.tracing_data = perf_event__process_tracing_data;
2308         trace->tool.build_id      = perf_event__process_build_id;
2309
2310         trace->tool.ordered_events = true;
2311         trace->tool.ordering_requires_timestamps = true;
2312
2313         /* add tid to output */
2314         trace->multiple_threads = true;
2315
2316         session = perf_session__new(&file, false, &trace->tool);
2317         if (session == NULL)
2318                 return -1;
2319
2320         if (symbol__init(&session->header.env) < 0)
2321                 goto out;
2322
2323         trace->host = &session->machines.host;
2324
2325         err = perf_session__set_tracepoints_handlers(session, handlers);
2326         if (err)
2327                 goto out;
2328
2329         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2330                                                      "raw_syscalls:sys_enter");
2331         /* older kernels have syscalls tp versus raw_syscalls */
2332         if (evsel == NULL)
2333                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2334                                                              "syscalls:sys_enter");
2335
2336         if (evsel &&
2337             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2338             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2339                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2340                 goto out;
2341         }
2342
2343         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2344                                                      "raw_syscalls:sys_exit");
2345         if (evsel == NULL)
2346                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2347                                                              "syscalls:sys_exit");
2348         if (evsel &&
2349             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2350             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2351                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2352                 goto out;
2353         }
2354
2355         evlist__for_each(session->evlist, evsel) {
2356                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2357                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2358                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2359                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2360                         evsel->handler = trace__pgfault;
2361         }
2362
2363         err = parse_target_str(trace);
2364         if (err != 0)
2365                 goto out;
2366
2367         setup_pager();
2368
2369         err = perf_session__process_events(session, &trace->tool);
2370         if (err)
2371                 pr_err("Failed to process events, error %d", err);
2372
2373         else if (trace->summary)
2374                 trace__fprintf_thread_summary(trace, trace->output);
2375
2376 out:
2377         perf_session__delete(session);
2378
2379         return err;
2380 }
2381
2382 static size_t trace__fprintf_threads_header(FILE *fp)
2383 {
2384         size_t printed;
2385
2386         printed  = fprintf(fp, "\n Summary of events:\n\n");
2387
2388         return printed;
2389 }
2390
2391 static size_t thread__dump_stats(struct thread_trace *ttrace,
2392                                  struct trace *trace, FILE *fp)
2393 {
2394         struct stats *stats;
2395         size_t printed = 0;
2396         struct syscall *sc;
2397         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2398
2399         if (inode == NULL)
2400                 return 0;
2401
2402         printed += fprintf(fp, "\n");
2403
2404         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2405         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2406         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2407
2408         /* each int_node is a syscall */
2409         while (inode) {
2410                 stats = inode->priv;
2411                 if (stats) {
2412                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2413                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2414                         double avg = avg_stats(stats);
2415                         double pct;
2416                         u64 n = (u64) stats->n;
2417
2418                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2419                         avg /= NSEC_PER_MSEC;
2420
2421                         sc = &trace->syscalls.table[inode->i];
2422                         printed += fprintf(fp, "   %-15s", sc->name);
2423                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2424                                            n, min, avg);
2425                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2426                 }
2427
2428                 inode = intlist__next(inode);
2429         }
2430
2431         printed += fprintf(fp, "\n\n");
2432
2433         return printed;
2434 }
2435
2436 /* struct used to pass data to per-thread function */
2437 struct summary_data {
2438         FILE *fp;
2439         struct trace *trace;
2440         size_t printed;
2441 };
2442
2443 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2444 {
2445         struct summary_data *data = priv;
2446         FILE *fp = data->fp;
2447         size_t printed = data->printed;
2448         struct trace *trace = data->trace;
2449         struct thread_trace *ttrace = thread__priv(thread);
2450         double ratio;
2451
2452         if (ttrace == NULL)
2453                 return 0;
2454
2455         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2456
2457         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2458         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2459         printed += fprintf(fp, "%.1f%%", ratio);
2460         if (ttrace->pfmaj)
2461                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2462         if (ttrace->pfmin)
2463                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2464         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2465         printed += thread__dump_stats(ttrace, trace, fp);
2466
2467         data->printed += printed;
2468
2469         return 0;
2470 }
2471
2472 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2473 {
2474         struct summary_data data = {
2475                 .fp = fp,
2476                 .trace = trace
2477         };
2478         data.printed = trace__fprintf_threads_header(fp);
2479
2480         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2481
2482         return data.printed;
2483 }
2484
2485 static int trace__set_duration(const struct option *opt, const char *str,
2486                                int unset __maybe_unused)
2487 {
2488         struct trace *trace = opt->value;
2489
2490         trace->duration_filter = atof(str);
2491         return 0;
2492 }
2493
2494 static int trace__open_output(struct trace *trace, const char *filename)
2495 {
2496         struct stat st;
2497
2498         if (!stat(filename, &st) && st.st_size) {
2499                 char oldname[PATH_MAX];
2500
2501                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2502                 unlink(oldname);
2503                 rename(filename, oldname);
2504         }
2505
2506         trace->output = fopen(filename, "w");
2507
2508         return trace->output == NULL ? -errno : 0;
2509 }
2510
2511 static int parse_pagefaults(const struct option *opt, const char *str,
2512                             int unset __maybe_unused)
2513 {
2514         int *trace_pgfaults = opt->value;
2515
2516         if (strcmp(str, "all") == 0)
2517                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2518         else if (strcmp(str, "maj") == 0)
2519                 *trace_pgfaults |= TRACE_PFMAJ;
2520         else if (strcmp(str, "min") == 0)
2521                 *trace_pgfaults |= TRACE_PFMIN;
2522         else
2523                 return -1;
2524
2525         return 0;
2526 }
2527
2528 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2529 {
2530         struct perf_evsel *evsel;
2531
2532         evlist__for_each(evlist, evsel)
2533                 evsel->handler = handler;
2534 }
2535
2536 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2537 {
2538         const char * const trace_usage[] = {
2539                 "perf trace [<options>] [<command>]",
2540                 "perf trace [<options>] -- <command> [<options>]",
2541                 "perf trace record [<options>] [<command>]",
2542                 "perf trace record [<options>] -- <command> [<options>]",
2543                 NULL
2544         };
2545         struct trace trace = {
2546                 .audit = {
2547                         .machine = audit_detect_machine(),
2548                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2549                 },
2550                 .syscalls = {
2551                         . max = -1,
2552                 },
2553                 .opts = {
2554                         .target = {
2555                                 .uid       = UINT_MAX,
2556                                 .uses_mmap = true,
2557                         },
2558                         .user_freq     = UINT_MAX,
2559                         .user_interval = ULLONG_MAX,
2560                         .no_buffering  = true,
2561                         .mmap_pages    = UINT_MAX,
2562                 },
2563                 .output = stdout,
2564                 .show_comm = true,
2565                 .trace_syscalls = true,
2566         };
2567         const char *output_name = NULL;
2568         const char *ev_qualifier_str = NULL;
2569         const struct option trace_options[] = {
2570         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2571                      "event selector. use 'perf list' to list available events",
2572                      parse_events_option),
2573         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2574                     "show the thread COMM next to its id"),
2575         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2576         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2577                     "list of events to trace"),
2578         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2579         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2580         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2581                     "trace events on existing process id"),
2582         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2583                     "trace events on existing thread id"),
2584         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2585                     "system-wide collection from all CPUs"),
2586         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2587                     "list of cpus to monitor"),
2588         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2589                     "child tasks do not inherit counters"),
2590         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2591                      "number of mmap data pages",
2592                      perf_evlist__parse_mmap_pages),
2593         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2594                    "user to profile"),
2595         OPT_CALLBACK(0, "duration", &trace, "float",
2596                      "show only events with duration > N.M ms",
2597                      trace__set_duration),
2598         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2599         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2600         OPT_BOOLEAN('T', "time", &trace.full_time,
2601                     "Show full timestamp, not time relative to first start"),
2602         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2603                     "Show only syscall summary with statistics"),
2604         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2605                     "Show all syscalls and summary with statistics"),
2606         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2607                      "Trace pagefaults", parse_pagefaults, "maj"),
2608         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2609         OPT_END()
2610         };
2611         int err;
2612         char bf[BUFSIZ];
2613
2614         trace.evlist = perf_evlist__new();
2615         if (trace.evlist == NULL)
2616                 return -ENOMEM;
2617
2618         if (trace.evlist == NULL) {
2619                 pr_err("Not enough memory to run!\n");
2620                 goto out;
2621         }
2622
2623         argc = parse_options(argc, argv, trace_options, trace_usage,
2624                              PARSE_OPT_STOP_AT_NON_OPTION);
2625
2626         if (trace.trace_pgfaults) {
2627                 trace.opts.sample_address = true;
2628                 trace.opts.sample_time = true;
2629         }
2630
2631         if (trace.evlist->nr_entries > 0)
2632                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2633
2634         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2635                 return trace__record(&trace, argc-1, &argv[1]);
2636
2637         /* summary_only implies summary option, but don't overwrite summary if set */
2638         if (trace.summary_only)
2639                 trace.summary = trace.summary_only;
2640
2641         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2642             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2643                 pr_err("Please specify something to trace.\n");
2644                 return -1;
2645         }
2646
2647         if (output_name != NULL) {
2648                 err = trace__open_output(&trace, output_name);
2649                 if (err < 0) {
2650                         perror("failed to create output file");
2651                         goto out;
2652                 }
2653         }
2654
2655         if (ev_qualifier_str != NULL) {
2656                 const char *s = ev_qualifier_str;
2657
2658                 trace.not_ev_qualifier = *s == '!';
2659                 if (trace.not_ev_qualifier)
2660                         ++s;
2661                 trace.ev_qualifier = strlist__new(true, s);
2662                 if (trace.ev_qualifier == NULL) {
2663                         fputs("Not enough memory to parse event qualifier",
2664                               trace.output);
2665                         err = -ENOMEM;
2666                         goto out_close;
2667                 }
2668         }
2669
2670         err = target__validate(&trace.opts.target);
2671         if (err) {
2672                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2673                 fprintf(trace.output, "%s", bf);
2674                 goto out_close;
2675         }
2676
2677         err = target__parse_uid(&trace.opts.target);
2678         if (err) {
2679                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2680                 fprintf(trace.output, "%s", bf);
2681                 goto out_close;
2682         }
2683
2684         if (!argc && target__none(&trace.opts.target))
2685                 trace.opts.target.system_wide = true;
2686
2687         if (input_name)
2688                 err = trace__replay(&trace);
2689         else
2690                 err = trace__run(&trace, argc, argv);
2691
2692 out_close:
2693         if (output_name != NULL)
2694                 fclose(trace.output);
2695 out:
2696         return err;
2697 }