perf tools: Rename 'perf_record_opts' to 'record_opts
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 struct tp_field {
41         int offset;
42         union {
43                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
45         };
46 };
47
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 { \
51         return *(u##bits *)(sample->raw_data + field->offset); \
52 }
53
54 TP_UINT_FIELD(8);
55 TP_UINT_FIELD(16);
56 TP_UINT_FIELD(32);
57 TP_UINT_FIELD(64);
58
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 { \
62         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63         return bswap_##bits(value);\
64 }
65
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
69
70 static int tp_field__init_uint(struct tp_field *field,
71                                struct format_field *format_field,
72                                bool needs_swap)
73 {
74         field->offset = format_field->offset;
75
76         switch (format_field->size) {
77         case 1:
78                 field->integer = tp_field__u8;
79                 break;
80         case 2:
81                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82                 break;
83         case 4:
84                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85                 break;
86         case 8:
87                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
88                 break;
89         default:
90                 return -1;
91         }
92
93         return 0;
94 }
95
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 {
98         return sample->raw_data + field->offset;
99 }
100
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 {
103         field->offset = format_field->offset;
104         field->pointer = tp_field__ptr;
105         return 0;
106 }
107
108 struct syscall_tp {
109         struct tp_field id;
110         union {
111                 struct tp_field args, ret;
112         };
113 };
114
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116                                           struct tp_field *field,
117                                           const char *name)
118 {
119         struct format_field *format_field = perf_evsel__field(evsel, name);
120
121         if (format_field == NULL)
122                 return -1;
123
124         return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 }
126
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128         ({ struct syscall_tp *sc = evsel->priv;\
129            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132                                          struct tp_field *field,
133                                          const char *name)
134 {
135         struct format_field *format_field = perf_evsel__field(evsel, name);
136
137         if (format_field == NULL)
138                 return -1;
139
140         return tp_field__init_ptr(field, format_field);
141 }
142
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144         ({ struct syscall_tp *sc = evsel->priv;\
145            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
148 {
149         free(evsel->priv);
150         evsel->priv = NULL;
151         perf_evsel__delete(evsel);
152 }
153
154 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
155 {
156         evsel->priv = malloc(sizeof(struct syscall_tp));
157         if (evsel->priv != NULL) {
158                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
159                         goto out_delete;
160
161                 evsel->handler = handler;
162                 return 0;
163         }
164
165         return -ENOMEM;
166
167 out_delete:
168         free(evsel->priv);
169         evsel->priv = NULL;
170         return -ENOENT;
171 }
172
173 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
174 {
175         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
176
177         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
178         if (evsel == NULL)
179                 evsel = perf_evsel__newtp("syscalls", direction);
180
181         if (evsel) {
182                 if (perf_evsel__init_syscall_tp(evsel, handler))
183                         goto out_delete;
184         }
185
186         return evsel;
187
188 out_delete:
189         perf_evsel__delete_priv(evsel);
190         return NULL;
191 }
192
193 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
194         ({ struct syscall_tp *fields = evsel->priv; \
195            fields->name.integer(&fields->name, sample); })
196
197 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
198         ({ struct syscall_tp *fields = evsel->priv; \
199            fields->name.pointer(&fields->name, sample); })
200
201 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
202                                           void *sys_enter_handler,
203                                           void *sys_exit_handler)
204 {
205         int ret = -1;
206         struct perf_evsel *sys_enter, *sys_exit;
207
208         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
209         if (sys_enter == NULL)
210                 goto out;
211
212         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
213                 goto out_delete_sys_enter;
214
215         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
216         if (sys_exit == NULL)
217                 goto out_delete_sys_enter;
218
219         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
220                 goto out_delete_sys_exit;
221
222         perf_evlist__add(evlist, sys_enter);
223         perf_evlist__add(evlist, sys_exit);
224
225         ret = 0;
226 out:
227         return ret;
228
229 out_delete_sys_exit:
230         perf_evsel__delete_priv(sys_exit);
231 out_delete_sys_enter:
232         perf_evsel__delete_priv(sys_enter);
233         goto out;
234 }
235
236
237 struct syscall_arg {
238         unsigned long val;
239         struct thread *thread;
240         struct trace  *trace;
241         void          *parm;
242         u8            idx;
243         u8            mask;
244 };
245
246 struct strarray {
247         int         offset;
248         int         nr_entries;
249         const char **entries;
250 };
251
252 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
253         .nr_entries = ARRAY_SIZE(array), \
254         .entries = array, \
255 }
256
257 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
258         .offset     = off, \
259         .nr_entries = ARRAY_SIZE(array), \
260         .entries = array, \
261 }
262
263 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
264                                                 const char *intfmt,
265                                                 struct syscall_arg *arg)
266 {
267         struct strarray *sa = arg->parm;
268         int idx = arg->val - sa->offset;
269
270         if (idx < 0 || idx >= sa->nr_entries)
271                 return scnprintf(bf, size, intfmt, arg->val);
272
273         return scnprintf(bf, size, "%s", sa->entries[idx]);
274 }
275
276 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
277                                               struct syscall_arg *arg)
278 {
279         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
280 }
281
282 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
283
284 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
285                                                  struct syscall_arg *arg)
286 {
287         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
288 }
289
290 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
291
292 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
293                                         struct syscall_arg *arg);
294
295 #define SCA_FD syscall_arg__scnprintf_fd
296
297 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
298                                            struct syscall_arg *arg)
299 {
300         int fd = arg->val;
301
302         if (fd == AT_FDCWD)
303                 return scnprintf(bf, size, "CWD");
304
305         return syscall_arg__scnprintf_fd(bf, size, arg);
306 }
307
308 #define SCA_FDAT syscall_arg__scnprintf_fd_at
309
310 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
311                                               struct syscall_arg *arg);
312
313 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
314
315 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
316                                          struct syscall_arg *arg)
317 {
318         return scnprintf(bf, size, "%#lx", arg->val);
319 }
320
321 #define SCA_HEX syscall_arg__scnprintf_hex
322
323 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
324                                                struct syscall_arg *arg)
325 {
326         int printed = 0, prot = arg->val;
327
328         if (prot == PROT_NONE)
329                 return scnprintf(bf, size, "NONE");
330 #define P_MMAP_PROT(n) \
331         if (prot & PROT_##n) { \
332                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
333                 prot &= ~PROT_##n; \
334         }
335
336         P_MMAP_PROT(EXEC);
337         P_MMAP_PROT(READ);
338         P_MMAP_PROT(WRITE);
339 #ifdef PROT_SEM
340         P_MMAP_PROT(SEM);
341 #endif
342         P_MMAP_PROT(GROWSDOWN);
343         P_MMAP_PROT(GROWSUP);
344 #undef P_MMAP_PROT
345
346         if (prot)
347                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
348
349         return printed;
350 }
351
352 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
353
354 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
355                                                 struct syscall_arg *arg)
356 {
357         int printed = 0, flags = arg->val;
358
359 #define P_MMAP_FLAG(n) \
360         if (flags & MAP_##n) { \
361                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
362                 flags &= ~MAP_##n; \
363         }
364
365         P_MMAP_FLAG(SHARED);
366         P_MMAP_FLAG(PRIVATE);
367 #ifdef MAP_32BIT
368         P_MMAP_FLAG(32BIT);
369 #endif
370         P_MMAP_FLAG(ANONYMOUS);
371         P_MMAP_FLAG(DENYWRITE);
372         P_MMAP_FLAG(EXECUTABLE);
373         P_MMAP_FLAG(FILE);
374         P_MMAP_FLAG(FIXED);
375         P_MMAP_FLAG(GROWSDOWN);
376 #ifdef MAP_HUGETLB
377         P_MMAP_FLAG(HUGETLB);
378 #endif
379         P_MMAP_FLAG(LOCKED);
380         P_MMAP_FLAG(NONBLOCK);
381         P_MMAP_FLAG(NORESERVE);
382         P_MMAP_FLAG(POPULATE);
383         P_MMAP_FLAG(STACK);
384 #ifdef MAP_UNINITIALIZED
385         P_MMAP_FLAG(UNINITIALIZED);
386 #endif
387 #undef P_MMAP_FLAG
388
389         if (flags)
390                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
391
392         return printed;
393 }
394
395 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
396
397 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
398                                                       struct syscall_arg *arg)
399 {
400         int behavior = arg->val;
401
402         switch (behavior) {
403 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
404         P_MADV_BHV(NORMAL);
405         P_MADV_BHV(RANDOM);
406         P_MADV_BHV(SEQUENTIAL);
407         P_MADV_BHV(WILLNEED);
408         P_MADV_BHV(DONTNEED);
409         P_MADV_BHV(REMOVE);
410         P_MADV_BHV(DONTFORK);
411         P_MADV_BHV(DOFORK);
412         P_MADV_BHV(HWPOISON);
413 #ifdef MADV_SOFT_OFFLINE
414         P_MADV_BHV(SOFT_OFFLINE);
415 #endif
416         P_MADV_BHV(MERGEABLE);
417         P_MADV_BHV(UNMERGEABLE);
418 #ifdef MADV_HUGEPAGE
419         P_MADV_BHV(HUGEPAGE);
420 #endif
421 #ifdef MADV_NOHUGEPAGE
422         P_MADV_BHV(NOHUGEPAGE);
423 #endif
424 #ifdef MADV_DONTDUMP
425         P_MADV_BHV(DONTDUMP);
426 #endif
427 #ifdef MADV_DODUMP
428         P_MADV_BHV(DODUMP);
429 #endif
430 #undef P_MADV_PHV
431         default: break;
432         }
433
434         return scnprintf(bf, size, "%#x", behavior);
435 }
436
437 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
438
439 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
440                                            struct syscall_arg *arg)
441 {
442         int printed = 0, op = arg->val;
443
444         if (op == 0)
445                 return scnprintf(bf, size, "NONE");
446 #define P_CMD(cmd) \
447         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
448                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
449                 op &= ~LOCK_##cmd; \
450         }
451
452         P_CMD(SH);
453         P_CMD(EX);
454         P_CMD(NB);
455         P_CMD(UN);
456         P_CMD(MAND);
457         P_CMD(RW);
458         P_CMD(READ);
459         P_CMD(WRITE);
460 #undef P_OP
461
462         if (op)
463                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
464
465         return printed;
466 }
467
468 #define SCA_FLOCK syscall_arg__scnprintf_flock
469
470 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
471 {
472         enum syscall_futex_args {
473                 SCF_UADDR   = (1 << 0),
474                 SCF_OP      = (1 << 1),
475                 SCF_VAL     = (1 << 2),
476                 SCF_TIMEOUT = (1 << 3),
477                 SCF_UADDR2  = (1 << 4),
478                 SCF_VAL3    = (1 << 5),
479         };
480         int op = arg->val;
481         int cmd = op & FUTEX_CMD_MASK;
482         size_t printed = 0;
483
484         switch (cmd) {
485 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
486         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
487         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
490         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
491         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
492         P_FUTEX_OP(WAKE_OP);                                                      break;
493         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
494         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
495         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
496         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
497         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
498         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
499         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
500         }
501
502         if (op & FUTEX_PRIVATE_FLAG)
503                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
504
505         if (op & FUTEX_CLOCK_REALTIME)
506                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
507
508         return printed;
509 }
510
511 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
512
513 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
514 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
515
516 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
517 static DEFINE_STRARRAY(itimers);
518
519 static const char *whences[] = { "SET", "CUR", "END",
520 #ifdef SEEK_DATA
521 "DATA",
522 #endif
523 #ifdef SEEK_HOLE
524 "HOLE",
525 #endif
526 };
527 static DEFINE_STRARRAY(whences);
528
529 static const char *fcntl_cmds[] = {
530         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
531         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
532         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
533         "F_GETOWNER_UIDS",
534 };
535 static DEFINE_STRARRAY(fcntl_cmds);
536
537 static const char *rlimit_resources[] = {
538         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
539         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
540         "RTTIME",
541 };
542 static DEFINE_STRARRAY(rlimit_resources);
543
544 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
545 static DEFINE_STRARRAY(sighow);
546
547 static const char *clockid[] = {
548         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
549         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
550 };
551 static DEFINE_STRARRAY(clockid);
552
553 static const char *socket_families[] = {
554         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
555         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
556         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
557         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
558         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
559         "ALG", "NFC", "VSOCK",
560 };
561 static DEFINE_STRARRAY(socket_families);
562
563 #ifndef SOCK_TYPE_MASK
564 #define SOCK_TYPE_MASK 0xf
565 #endif
566
567 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
568                                                       struct syscall_arg *arg)
569 {
570         size_t printed;
571         int type = arg->val,
572             flags = type & ~SOCK_TYPE_MASK;
573
574         type &= SOCK_TYPE_MASK;
575         /*
576          * Can't use a strarray, MIPS may override for ABI reasons.
577          */
578         switch (type) {
579 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
580         P_SK_TYPE(STREAM);
581         P_SK_TYPE(DGRAM);
582         P_SK_TYPE(RAW);
583         P_SK_TYPE(RDM);
584         P_SK_TYPE(SEQPACKET);
585         P_SK_TYPE(DCCP);
586         P_SK_TYPE(PACKET);
587 #undef P_SK_TYPE
588         default:
589                 printed = scnprintf(bf, size, "%#x", type);
590         }
591
592 #define P_SK_FLAG(n) \
593         if (flags & SOCK_##n) { \
594                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
595                 flags &= ~SOCK_##n; \
596         }
597
598         P_SK_FLAG(CLOEXEC);
599         P_SK_FLAG(NONBLOCK);
600 #undef P_SK_FLAG
601
602         if (flags)
603                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
604
605         return printed;
606 }
607
608 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
609
610 #ifndef MSG_PROBE
611 #define MSG_PROBE            0x10
612 #endif
613 #ifndef MSG_WAITFORONE
614 #define MSG_WAITFORONE  0x10000
615 #endif
616 #ifndef MSG_SENDPAGE_NOTLAST
617 #define MSG_SENDPAGE_NOTLAST 0x20000
618 #endif
619 #ifndef MSG_FASTOPEN
620 #define MSG_FASTOPEN         0x20000000
621 #endif
622
623 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
624                                                struct syscall_arg *arg)
625 {
626         int printed = 0, flags = arg->val;
627
628         if (flags == 0)
629                 return scnprintf(bf, size, "NONE");
630 #define P_MSG_FLAG(n) \
631         if (flags & MSG_##n) { \
632                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
633                 flags &= ~MSG_##n; \
634         }
635
636         P_MSG_FLAG(OOB);
637         P_MSG_FLAG(PEEK);
638         P_MSG_FLAG(DONTROUTE);
639         P_MSG_FLAG(TRYHARD);
640         P_MSG_FLAG(CTRUNC);
641         P_MSG_FLAG(PROBE);
642         P_MSG_FLAG(TRUNC);
643         P_MSG_FLAG(DONTWAIT);
644         P_MSG_FLAG(EOR);
645         P_MSG_FLAG(WAITALL);
646         P_MSG_FLAG(FIN);
647         P_MSG_FLAG(SYN);
648         P_MSG_FLAG(CONFIRM);
649         P_MSG_FLAG(RST);
650         P_MSG_FLAG(ERRQUEUE);
651         P_MSG_FLAG(NOSIGNAL);
652         P_MSG_FLAG(MORE);
653         P_MSG_FLAG(WAITFORONE);
654         P_MSG_FLAG(SENDPAGE_NOTLAST);
655         P_MSG_FLAG(FASTOPEN);
656         P_MSG_FLAG(CMSG_CLOEXEC);
657 #undef P_MSG_FLAG
658
659         if (flags)
660                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
661
662         return printed;
663 }
664
665 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
666
667 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
668                                                  struct syscall_arg *arg)
669 {
670         size_t printed = 0;
671         int mode = arg->val;
672
673         if (mode == F_OK) /* 0 */
674                 return scnprintf(bf, size, "F");
675 #define P_MODE(n) \
676         if (mode & n##_OK) { \
677                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
678                 mode &= ~n##_OK; \
679         }
680
681         P_MODE(R);
682         P_MODE(W);
683         P_MODE(X);
684 #undef P_MODE
685
686         if (mode)
687                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
688
689         return printed;
690 }
691
692 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
693
694 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
695                                                struct syscall_arg *arg)
696 {
697         int printed = 0, flags = arg->val;
698
699         if (!(flags & O_CREAT))
700                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
701
702         if (flags == 0)
703                 return scnprintf(bf, size, "RDONLY");
704 #define P_FLAG(n) \
705         if (flags & O_##n) { \
706                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
707                 flags &= ~O_##n; \
708         }
709
710         P_FLAG(APPEND);
711         P_FLAG(ASYNC);
712         P_FLAG(CLOEXEC);
713         P_FLAG(CREAT);
714         P_FLAG(DIRECT);
715         P_FLAG(DIRECTORY);
716         P_FLAG(EXCL);
717         P_FLAG(LARGEFILE);
718         P_FLAG(NOATIME);
719         P_FLAG(NOCTTY);
720 #ifdef O_NONBLOCK
721         P_FLAG(NONBLOCK);
722 #elif O_NDELAY
723         P_FLAG(NDELAY);
724 #endif
725 #ifdef O_PATH
726         P_FLAG(PATH);
727 #endif
728         P_FLAG(RDWR);
729 #ifdef O_DSYNC
730         if ((flags & O_SYNC) == O_SYNC)
731                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
732         else {
733                 P_FLAG(DSYNC);
734         }
735 #else
736         P_FLAG(SYNC);
737 #endif
738         P_FLAG(TRUNC);
739         P_FLAG(WRONLY);
740 #undef P_FLAG
741
742         if (flags)
743                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
744
745         return printed;
746 }
747
748 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
749
750 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
751                                                    struct syscall_arg *arg)
752 {
753         int printed = 0, flags = arg->val;
754
755         if (flags == 0)
756                 return scnprintf(bf, size, "NONE");
757 #define P_FLAG(n) \
758         if (flags & EFD_##n) { \
759                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
760                 flags &= ~EFD_##n; \
761         }
762
763         P_FLAG(SEMAPHORE);
764         P_FLAG(CLOEXEC);
765         P_FLAG(NONBLOCK);
766 #undef P_FLAG
767
768         if (flags)
769                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
770
771         return printed;
772 }
773
774 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
775
776 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
777                                                 struct syscall_arg *arg)
778 {
779         int printed = 0, flags = arg->val;
780
781 #define P_FLAG(n) \
782         if (flags & O_##n) { \
783                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
784                 flags &= ~O_##n; \
785         }
786
787         P_FLAG(CLOEXEC);
788         P_FLAG(NONBLOCK);
789 #undef P_FLAG
790
791         if (flags)
792                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
793
794         return printed;
795 }
796
797 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
798
799 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
800 {
801         int sig = arg->val;
802
803         switch (sig) {
804 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
805         P_SIGNUM(HUP);
806         P_SIGNUM(INT);
807         P_SIGNUM(QUIT);
808         P_SIGNUM(ILL);
809         P_SIGNUM(TRAP);
810         P_SIGNUM(ABRT);
811         P_SIGNUM(BUS);
812         P_SIGNUM(FPE);
813         P_SIGNUM(KILL);
814         P_SIGNUM(USR1);
815         P_SIGNUM(SEGV);
816         P_SIGNUM(USR2);
817         P_SIGNUM(PIPE);
818         P_SIGNUM(ALRM);
819         P_SIGNUM(TERM);
820         P_SIGNUM(STKFLT);
821         P_SIGNUM(CHLD);
822         P_SIGNUM(CONT);
823         P_SIGNUM(STOP);
824         P_SIGNUM(TSTP);
825         P_SIGNUM(TTIN);
826         P_SIGNUM(TTOU);
827         P_SIGNUM(URG);
828         P_SIGNUM(XCPU);
829         P_SIGNUM(XFSZ);
830         P_SIGNUM(VTALRM);
831         P_SIGNUM(PROF);
832         P_SIGNUM(WINCH);
833         P_SIGNUM(IO);
834         P_SIGNUM(PWR);
835         P_SIGNUM(SYS);
836         default: break;
837         }
838
839         return scnprintf(bf, size, "%#x", sig);
840 }
841
842 #define SCA_SIGNUM syscall_arg__scnprintf_signum
843
844 #define TCGETS          0x5401
845
846 static const char *tioctls[] = {
847         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
848         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
849         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
850         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
851         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
852         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
853         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
854         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
855         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
856         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
857         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
858         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
859         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
860         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
861         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
862 };
863
864 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
865
866 #define STRARRAY(arg, name, array) \
867           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
868           .arg_parm      = { [arg] = &strarray__##array, }
869
870 static struct syscall_fmt {
871         const char *name;
872         const char *alias;
873         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
874         void       *arg_parm[6];
875         bool       errmsg;
876         bool       timeout;
877         bool       hexret;
878 } syscall_fmts[] = {
879         { .name     = "access",     .errmsg = true,
880           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
881         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
882         { .name     = "brk",        .hexret = true,
883           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
884         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
885         { .name     = "close",      .errmsg = true,
886           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
887         { .name     = "connect",    .errmsg = true, },
888         { .name     = "dup",        .errmsg = true,
889           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
890         { .name     = "dup2",       .errmsg = true,
891           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
892         { .name     = "dup3",       .errmsg = true,
893           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
894         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
895         { .name     = "eventfd2",   .errmsg = true,
896           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
897         { .name     = "faccessat",  .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
899         { .name     = "fadvise64",  .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "fallocate",  .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "fchdir",     .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchmod",     .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
907         { .name     = "fchmodat",   .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
909         { .name     = "fchown",     .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "fchownat",   .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
913         { .name     = "fcntl",      .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */
915                              [1] = SCA_STRARRAY, /* cmd */ },
916           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
917         { .name     = "fdatasync",  .errmsg = true,
918           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
919         { .name     = "flock",      .errmsg = true,
920           .arg_scnprintf = { [0] = SCA_FD, /* fd */
921                              [1] = SCA_FLOCK, /* cmd */ }, },
922         { .name     = "fsetxattr",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
927           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
928         { .name     = "fstatfs",    .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
930         { .name     = "fsync",    .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "ftruncate", .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
934         { .name     = "futex",      .errmsg = true,
935           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
936         { .name     = "futimesat", .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
938         { .name     = "getdents",   .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "getdents64", .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
942         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
943         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
944         { .name     = "ioctl",      .errmsg = true,
945           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
946                              [1] = SCA_STRHEXARRAY, /* cmd */
947                              [2] = SCA_HEX, /* arg */ },
948           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
949         { .name     = "kill",       .errmsg = true,
950           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
951         { .name     = "linkat",     .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
953         { .name     = "lseek",      .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */
955                              [2] = SCA_STRARRAY, /* whence */ },
956           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
957         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
958         { .name     = "madvise",    .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
960                              [2] = SCA_MADV_BHV, /* behavior */ }, },
961         { .name     = "mkdirat",    .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
963         { .name     = "mknodat",    .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
965         { .name     = "mlock",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967         { .name     = "mlockall",   .errmsg = true,
968           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
969         { .name     = "mmap",       .hexret = true,
970           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
971                              [2] = SCA_MMAP_PROT, /* prot */
972                              [3] = SCA_MMAP_FLAGS, /* flags */
973                              [4] = SCA_FD,        /* fd */ }, },
974         { .name     = "mprotect",   .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_HEX, /* start */
976                              [2] = SCA_MMAP_PROT, /* prot */ }, },
977         { .name     = "mremap",     .hexret = true,
978           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
979                              [4] = SCA_HEX, /* new_addr */ }, },
980         { .name     = "munlock",    .errmsg = true,
981           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982         { .name     = "munmap",     .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
984         { .name     = "name_to_handle_at", .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
986         { .name     = "newfstatat", .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
988         { .name     = "open",       .errmsg = true,
989           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
990         { .name     = "open_by_handle_at", .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
992                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
993         { .name     = "openat",     .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
995                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
996         { .name     = "pipe2",      .errmsg = true,
997           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
998         { .name     = "poll",       .errmsg = true, .timeout = true, },
999         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1000         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1002         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1003           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1004         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1005         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1006           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1007         { .name     = "pwritev",    .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "read",       .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1011         { .name     = "readlinkat", .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1013         { .name     = "readv",      .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1015         { .name     = "recvfrom",   .errmsg = true,
1016           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017         { .name     = "recvmmsg",   .errmsg = true,
1018           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1019         { .name     = "recvmsg",    .errmsg = true,
1020           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1021         { .name     = "renameat",   .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1023         { .name     = "rt_sigaction", .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1025         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1026         { .name     = "rt_sigqueueinfo", .errmsg = true,
1027           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1028         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1029           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1030         { .name     = "select",     .errmsg = true, .timeout = true, },
1031         { .name     = "sendmmsg",    .errmsg = true,
1032           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1033         { .name     = "sendmsg",    .errmsg = true,
1034           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1035         { .name     = "sendto",     .errmsg = true,
1036           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1037         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1038         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1039         { .name     = "shutdown",   .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1041         { .name     = "socket",     .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1043                              [1] = SCA_SK_TYPE, /* type */ },
1044           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1045         { .name     = "socketpair", .errmsg = true,
1046           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1047                              [1] = SCA_SK_TYPE, /* type */ },
1048           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1049         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1050         { .name     = "symlinkat",  .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1052         { .name     = "tgkill",     .errmsg = true,
1053           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1054         { .name     = "tkill",      .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1057         { .name     = "unlinkat",   .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1059         { .name     = "utimensat",  .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1061         { .name     = "write",      .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1063         { .name     = "writev",     .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1065 };
1066
1067 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1068 {
1069         const struct syscall_fmt *fmt = fmtp;
1070         return strcmp(name, fmt->name);
1071 }
1072
1073 static struct syscall_fmt *syscall_fmt__find(const char *name)
1074 {
1075         const int nmemb = ARRAY_SIZE(syscall_fmts);
1076         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1077 }
1078
1079 struct syscall {
1080         struct event_format *tp_format;
1081         const char          *name;
1082         bool                filtered;
1083         struct syscall_fmt  *fmt;
1084         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1085         void                **arg_parm;
1086 };
1087
1088 static size_t fprintf_duration(unsigned long t, FILE *fp)
1089 {
1090         double duration = (double)t / NSEC_PER_MSEC;
1091         size_t printed = fprintf(fp, "(");
1092
1093         if (duration >= 1.0)
1094                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1095         else if (duration >= 0.01)
1096                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1097         else
1098                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1099         return printed + fprintf(fp, "): ");
1100 }
1101
1102 struct thread_trace {
1103         u64               entry_time;
1104         u64               exit_time;
1105         bool              entry_pending;
1106         unsigned long     nr_events;
1107         char              *entry_str;
1108         double            runtime_ms;
1109         struct {
1110                 int       max;
1111                 char      **table;
1112         } paths;
1113
1114         struct intlist *syscall_stats;
1115 };
1116
1117 static struct thread_trace *thread_trace__new(void)
1118 {
1119         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1120
1121         if (ttrace)
1122                 ttrace->paths.max = -1;
1123
1124         ttrace->syscall_stats = intlist__new(NULL);
1125
1126         return ttrace;
1127 }
1128
1129 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1130 {
1131         struct thread_trace *ttrace;
1132
1133         if (thread == NULL)
1134                 goto fail;
1135
1136         if (thread->priv == NULL)
1137                 thread->priv = thread_trace__new();
1138                 
1139         if (thread->priv == NULL)
1140                 goto fail;
1141
1142         ttrace = thread->priv;
1143         ++ttrace->nr_events;
1144
1145         return ttrace;
1146 fail:
1147         color_fprintf(fp, PERF_COLOR_RED,
1148                       "WARNING: not enough memory, dropping samples!\n");
1149         return NULL;
1150 }
1151
1152 struct trace {
1153         struct perf_tool        tool;
1154         struct {
1155                 int             machine;
1156                 int             open_id;
1157         }                       audit;
1158         struct {
1159                 int             max;
1160                 struct syscall  *table;
1161         } syscalls;
1162         struct record_opts      opts;
1163         struct machine          *host;
1164         u64                     base_time;
1165         bool                    full_time;
1166         FILE                    *output;
1167         unsigned long           nr_events;
1168         struct strlist          *ev_qualifier;
1169         bool                    not_ev_qualifier;
1170         bool                    live;
1171         const char              *last_vfs_getname;
1172         struct intlist          *tid_list;
1173         struct intlist          *pid_list;
1174         bool                    sched;
1175         bool                    multiple_threads;
1176         bool                    summary;
1177         bool                    summary_only;
1178         bool                    show_comm;
1179         bool                    show_tool_stats;
1180         double                  duration_filter;
1181         double                  runtime_ms;
1182         struct {
1183                 u64             vfs_getname, proc_getname;
1184         } stats;
1185 };
1186
1187 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1188 {
1189         struct thread_trace *ttrace = thread->priv;
1190
1191         if (fd > ttrace->paths.max) {
1192                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1193
1194                 if (npath == NULL)
1195                         return -1;
1196
1197                 if (ttrace->paths.max != -1) {
1198                         memset(npath + ttrace->paths.max + 1, 0,
1199                                (fd - ttrace->paths.max) * sizeof(char *));
1200                 } else {
1201                         memset(npath, 0, (fd + 1) * sizeof(char *));
1202                 }
1203
1204                 ttrace->paths.table = npath;
1205                 ttrace->paths.max   = fd;
1206         }
1207
1208         ttrace->paths.table[fd] = strdup(pathname);
1209
1210         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1211 }
1212
1213 static int thread__read_fd_path(struct thread *thread, int fd)
1214 {
1215         char linkname[PATH_MAX], pathname[PATH_MAX];
1216         struct stat st;
1217         int ret;
1218
1219         if (thread->pid_ == thread->tid) {
1220                 scnprintf(linkname, sizeof(linkname),
1221                           "/proc/%d/fd/%d", thread->pid_, fd);
1222         } else {
1223                 scnprintf(linkname, sizeof(linkname),
1224                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1225         }
1226
1227         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1228                 return -1;
1229
1230         ret = readlink(linkname, pathname, sizeof(pathname));
1231
1232         if (ret < 0 || ret > st.st_size)
1233                 return -1;
1234
1235         pathname[ret] = '\0';
1236         return trace__set_fd_pathname(thread, fd, pathname);
1237 }
1238
1239 static const char *thread__fd_path(struct thread *thread, int fd,
1240                                    struct trace *trace)
1241 {
1242         struct thread_trace *ttrace = thread->priv;
1243
1244         if (ttrace == NULL)
1245                 return NULL;
1246
1247         if (fd < 0)
1248                 return NULL;
1249
1250         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1251                 if (!trace->live)
1252                         return NULL;
1253                 ++trace->stats.proc_getname;
1254                 if (thread__read_fd_path(thread, fd)) {
1255                         return NULL;
1256         }
1257
1258         return ttrace->paths.table[fd];
1259 }
1260
1261 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1262                                         struct syscall_arg *arg)
1263 {
1264         int fd = arg->val;
1265         size_t printed = scnprintf(bf, size, "%d", fd);
1266         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1267
1268         if (path)
1269                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1270
1271         return printed;
1272 }
1273
1274 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1275                                               struct syscall_arg *arg)
1276 {
1277         int fd = arg->val;
1278         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1279         struct thread_trace *ttrace = arg->thread->priv;
1280
1281         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1282                 free(ttrace->paths.table[fd]);
1283                 ttrace->paths.table[fd] = NULL;
1284         }
1285
1286         return printed;
1287 }
1288
1289 static bool trace__filter_duration(struct trace *trace, double t)
1290 {
1291         return t < (trace->duration_filter * NSEC_PER_MSEC);
1292 }
1293
1294 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1295 {
1296         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1297
1298         return fprintf(fp, "%10.3f ", ts);
1299 }
1300
1301 static bool done = false;
1302 static bool interrupted = false;
1303
1304 static void sig_handler(int sig)
1305 {
1306         done = true;
1307         interrupted = sig == SIGINT;
1308 }
1309
1310 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1311                                         u64 duration, u64 tstamp, FILE *fp)
1312 {
1313         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1314         printed += fprintf_duration(duration, fp);
1315
1316         if (trace->multiple_threads) {
1317                 if (trace->show_comm)
1318                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1319                 printed += fprintf(fp, "%d ", thread->tid);
1320         }
1321
1322         return printed;
1323 }
1324
1325 static int trace__process_event(struct trace *trace, struct machine *machine,
1326                                 union perf_event *event, struct perf_sample *sample)
1327 {
1328         int ret = 0;
1329
1330         switch (event->header.type) {
1331         case PERF_RECORD_LOST:
1332                 color_fprintf(trace->output, PERF_COLOR_RED,
1333                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1334                 ret = machine__process_lost_event(machine, event, sample);
1335         default:
1336                 ret = machine__process_event(machine, event, sample);
1337                 break;
1338         }
1339
1340         return ret;
1341 }
1342
1343 static int trace__tool_process(struct perf_tool *tool,
1344                                union perf_event *event,
1345                                struct perf_sample *sample,
1346                                struct machine *machine)
1347 {
1348         struct trace *trace = container_of(tool, struct trace, tool);
1349         return trace__process_event(trace, machine, event, sample);
1350 }
1351
1352 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1353 {
1354         int err = symbol__init();
1355
1356         if (err)
1357                 return err;
1358
1359         trace->host = machine__new_host();
1360         if (trace->host == NULL)
1361                 return -ENOMEM;
1362
1363         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1364                                             evlist->threads, trace__tool_process, false);
1365         if (err)
1366                 symbol__exit();
1367
1368         return err;
1369 }
1370
1371 static int syscall__set_arg_fmts(struct syscall *sc)
1372 {
1373         struct format_field *field;
1374         int idx = 0;
1375
1376         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1377         if (sc->arg_scnprintf == NULL)
1378                 return -1;
1379
1380         if (sc->fmt)
1381                 sc->arg_parm = sc->fmt->arg_parm;
1382
1383         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1384                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1385                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1386                 else if (field->flags & FIELD_IS_POINTER)
1387                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1388                 ++idx;
1389         }
1390
1391         return 0;
1392 }
1393
1394 static int trace__read_syscall_info(struct trace *trace, int id)
1395 {
1396         char tp_name[128];
1397         struct syscall *sc;
1398         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1399
1400         if (name == NULL)
1401                 return -1;
1402
1403         if (id > trace->syscalls.max) {
1404                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1405
1406                 if (nsyscalls == NULL)
1407                         return -1;
1408
1409                 if (trace->syscalls.max != -1) {
1410                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1411                                (id - trace->syscalls.max) * sizeof(*sc));
1412                 } else {
1413                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1414                 }
1415
1416                 trace->syscalls.table = nsyscalls;
1417                 trace->syscalls.max   = id;
1418         }
1419
1420         sc = trace->syscalls.table + id;
1421         sc->name = name;
1422
1423         if (trace->ev_qualifier) {
1424                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1425
1426                 if (!(in ^ trace->not_ev_qualifier)) {
1427                         sc->filtered = true;
1428                         /*
1429                          * No need to do read tracepoint information since this will be
1430                          * filtered out.
1431                          */
1432                         return 0;
1433                 }
1434         }
1435
1436         sc->fmt  = syscall_fmt__find(sc->name);
1437
1438         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1439         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1440
1441         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1442                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1443                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1444         }
1445
1446         if (sc->tp_format == NULL)
1447                 return -1;
1448
1449         return syscall__set_arg_fmts(sc);
1450 }
1451
1452 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1453                                       unsigned long *args, struct trace *trace,
1454                                       struct thread *thread)
1455 {
1456         size_t printed = 0;
1457
1458         if (sc->tp_format != NULL) {
1459                 struct format_field *field;
1460                 u8 bit = 1;
1461                 struct syscall_arg arg = {
1462                         .idx    = 0,
1463                         .mask   = 0,
1464                         .trace  = trace,
1465                         .thread = thread,
1466                 };
1467
1468                 for (field = sc->tp_format->format.fields->next; field;
1469                      field = field->next, ++arg.idx, bit <<= 1) {
1470                         if (arg.mask & bit)
1471                                 continue;
1472                         /*
1473                          * Suppress this argument if its value is zero and
1474                          * and we don't have a string associated in an
1475                          * strarray for it.
1476                          */
1477                         if (args[arg.idx] == 0 &&
1478                             !(sc->arg_scnprintf &&
1479                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1480                               sc->arg_parm[arg.idx]))
1481                                 continue;
1482
1483                         printed += scnprintf(bf + printed, size - printed,
1484                                              "%s%s: ", printed ? ", " : "", field->name);
1485                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1486                                 arg.val = args[arg.idx];
1487                                 if (sc->arg_parm)
1488                                         arg.parm = sc->arg_parm[arg.idx];
1489                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1490                                                                       size - printed, &arg);
1491                         } else {
1492                                 printed += scnprintf(bf + printed, size - printed,
1493                                                      "%ld", args[arg.idx]);
1494                         }
1495                 }
1496         } else {
1497                 int i = 0;
1498
1499                 while (i < 6) {
1500                         printed += scnprintf(bf + printed, size - printed,
1501                                              "%sarg%d: %ld",
1502                                              printed ? ", " : "", i, args[i]);
1503                         ++i;
1504                 }
1505         }
1506
1507         return printed;
1508 }
1509
1510 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1511                                   struct perf_sample *sample);
1512
1513 static struct syscall *trace__syscall_info(struct trace *trace,
1514                                            struct perf_evsel *evsel, int id)
1515 {
1516
1517         if (id < 0) {
1518
1519                 /*
1520                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1521                  * before that, leaving at a higher verbosity level till that is
1522                  * explained. Reproduced with plain ftrace with:
1523                  *
1524                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1525                  * grep "NR -1 " /t/trace_pipe
1526                  *
1527                  * After generating some load on the machine.
1528                  */
1529                 if (verbose > 1) {
1530                         static u64 n;
1531                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1532                                 id, perf_evsel__name(evsel), ++n);
1533                 }
1534                 return NULL;
1535         }
1536
1537         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1538             trace__read_syscall_info(trace, id))
1539                 goto out_cant_read;
1540
1541         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1542                 goto out_cant_read;
1543
1544         return &trace->syscalls.table[id];
1545
1546 out_cant_read:
1547         if (verbose) {
1548                 fprintf(trace->output, "Problems reading syscall %d", id);
1549                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1550                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1551                 fputs(" information\n", trace->output);
1552         }
1553         return NULL;
1554 }
1555
1556 static void thread__update_stats(struct thread_trace *ttrace,
1557                                  int id, struct perf_sample *sample)
1558 {
1559         struct int_node *inode;
1560         struct stats *stats;
1561         u64 duration = 0;
1562
1563         inode = intlist__findnew(ttrace->syscall_stats, id);
1564         if (inode == NULL)
1565                 return;
1566
1567         stats = inode->priv;
1568         if (stats == NULL) {
1569                 stats = malloc(sizeof(struct stats));
1570                 if (stats == NULL)
1571                         return;
1572                 init_stats(stats);
1573                 inode->priv = stats;
1574         }
1575
1576         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1577                 duration = sample->time - ttrace->entry_time;
1578
1579         update_stats(stats, duration);
1580 }
1581
1582 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1583                             struct perf_sample *sample)
1584 {
1585         char *msg;
1586         void *args;
1587         size_t printed = 0;
1588         struct thread *thread;
1589         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1590         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1591         struct thread_trace *ttrace;
1592
1593         if (sc == NULL)
1594                 return -1;
1595
1596         if (sc->filtered)
1597                 return 0;
1598
1599         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1600         ttrace = thread__trace(thread, trace->output);
1601         if (ttrace == NULL)
1602                 return -1;
1603
1604         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1605         ttrace = thread->priv;
1606
1607         if (ttrace->entry_str == NULL) {
1608                 ttrace->entry_str = malloc(1024);
1609                 if (!ttrace->entry_str)
1610                         return -1;
1611         }
1612
1613         ttrace->entry_time = sample->time;
1614         msg = ttrace->entry_str;
1615         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1616
1617         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1618                                            args, trace, thread);
1619
1620         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1621                 if (!trace->duration_filter && !trace->summary_only) {
1622                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1623                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1624                 }
1625         } else
1626                 ttrace->entry_pending = true;
1627
1628         return 0;
1629 }
1630
1631 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1632                            struct perf_sample *sample)
1633 {
1634         int ret;
1635         u64 duration = 0;
1636         struct thread *thread;
1637         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1638         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1639         struct thread_trace *ttrace;
1640
1641         if (sc == NULL)
1642                 return -1;
1643
1644         if (sc->filtered)
1645                 return 0;
1646
1647         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1648         ttrace = thread__trace(thread, trace->output);
1649         if (ttrace == NULL)
1650                 return -1;
1651
1652         if (trace->summary)
1653                 thread__update_stats(ttrace, id, sample);
1654
1655         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1656
1657         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1658                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1659                 trace->last_vfs_getname = NULL;
1660                 ++trace->stats.vfs_getname;
1661         }
1662
1663         ttrace = thread->priv;
1664
1665         ttrace->exit_time = sample->time;
1666
1667         if (ttrace->entry_time) {
1668                 duration = sample->time - ttrace->entry_time;
1669                 if (trace__filter_duration(trace, duration))
1670                         goto out;
1671         } else if (trace->duration_filter)
1672                 goto out;
1673
1674         if (trace->summary_only)
1675                 goto out;
1676
1677         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1678
1679         if (ttrace->entry_pending) {
1680                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1681         } else {
1682                 fprintf(trace->output, " ... [");
1683                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1684                 fprintf(trace->output, "]: %s()", sc->name);
1685         }
1686
1687         if (sc->fmt == NULL) {
1688 signed_print:
1689                 fprintf(trace->output, ") = %d", ret);
1690         } else if (ret < 0 && sc->fmt->errmsg) {
1691                 char bf[256];
1692                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1693                            *e = audit_errno_to_name(-ret);
1694
1695                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1696         } else if (ret == 0 && sc->fmt->timeout)
1697                 fprintf(trace->output, ") = 0 Timeout");
1698         else if (sc->fmt->hexret)
1699                 fprintf(trace->output, ") = %#x", ret);
1700         else
1701                 goto signed_print;
1702
1703         fputc('\n', trace->output);
1704 out:
1705         ttrace->entry_pending = false;
1706
1707         return 0;
1708 }
1709
1710 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1711                               struct perf_sample *sample)
1712 {
1713         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1714         return 0;
1715 }
1716
1717 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1718                                      struct perf_sample *sample)
1719 {
1720         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1721         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1722         struct thread *thread = machine__findnew_thread(trace->host,
1723                                                         sample->pid,
1724                                                         sample->tid);
1725         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1726
1727         if (ttrace == NULL)
1728                 goto out_dump;
1729
1730         ttrace->runtime_ms += runtime_ms;
1731         trace->runtime_ms += runtime_ms;
1732         return 0;
1733
1734 out_dump:
1735         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1736                evsel->name,
1737                perf_evsel__strval(evsel, sample, "comm"),
1738                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1739                runtime,
1740                perf_evsel__intval(evsel, sample, "vruntime"));
1741         return 0;
1742 }
1743
1744 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1745 {
1746         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1747             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1748                 return false;
1749
1750         if (trace->pid_list || trace->tid_list)
1751                 return true;
1752
1753         return false;
1754 }
1755
1756 static int trace__process_sample(struct perf_tool *tool,
1757                                  union perf_event *event __maybe_unused,
1758                                  struct perf_sample *sample,
1759                                  struct perf_evsel *evsel,
1760                                  struct machine *machine __maybe_unused)
1761 {
1762         struct trace *trace = container_of(tool, struct trace, tool);
1763         int err = 0;
1764
1765         tracepoint_handler handler = evsel->handler;
1766
1767         if (skip_sample(trace, sample))
1768                 return 0;
1769
1770         if (!trace->full_time && trace->base_time == 0)
1771                 trace->base_time = sample->time;
1772
1773         if (handler) {
1774                 ++trace->nr_events;
1775                 handler(trace, evsel, sample);
1776         }
1777
1778         return err;
1779 }
1780
1781 static int parse_target_str(struct trace *trace)
1782 {
1783         if (trace->opts.target.pid) {
1784                 trace->pid_list = intlist__new(trace->opts.target.pid);
1785                 if (trace->pid_list == NULL) {
1786                         pr_err("Error parsing process id string\n");
1787                         return -EINVAL;
1788                 }
1789         }
1790
1791         if (trace->opts.target.tid) {
1792                 trace->tid_list = intlist__new(trace->opts.target.tid);
1793                 if (trace->tid_list == NULL) {
1794                         pr_err("Error parsing thread id string\n");
1795                         return -EINVAL;
1796                 }
1797         }
1798
1799         return 0;
1800 }
1801
1802 static int trace__record(int argc, const char **argv)
1803 {
1804         unsigned int rec_argc, i, j;
1805         const char **rec_argv;
1806         const char * const record_args[] = {
1807                 "record",
1808                 "-R",
1809                 "-m", "1024",
1810                 "-c", "1",
1811                 "-e",
1812         };
1813
1814         /* +1 is for the event string below */
1815         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1816         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1817
1818         if (rec_argv == NULL)
1819                 return -ENOMEM;
1820
1821         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1822                 rec_argv[i] = record_args[i];
1823
1824         /* event string may be different for older kernels - e.g., RHEL6 */
1825         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1826                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1827         else if (is_valid_tracepoint("syscalls:sys_enter"))
1828                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1829         else {
1830                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1831                 return -1;
1832         }
1833         i++;
1834
1835         for (j = 0; j < (unsigned int)argc; j++, i++)
1836                 rec_argv[i] = argv[j];
1837
1838         return cmd_record(i, rec_argv, NULL);
1839 }
1840
1841 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1842
1843 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1844 {
1845         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1846         if (evsel == NULL)
1847                 return;
1848
1849         if (perf_evsel__field(evsel, "pathname") == NULL) {
1850                 perf_evsel__delete(evsel);
1851                 return;
1852         }
1853
1854         evsel->handler = trace__vfs_getname;
1855         perf_evlist__add(evlist, evsel);
1856 }
1857
1858 static int trace__run(struct trace *trace, int argc, const char **argv)
1859 {
1860         struct perf_evlist *evlist = perf_evlist__new();
1861         struct perf_evsel *evsel;
1862         int err = -1, i;
1863         unsigned long before;
1864         const bool forks = argc > 0;
1865
1866         trace->live = true;
1867
1868         if (evlist == NULL) {
1869                 fprintf(trace->output, "Not enough memory to run!\n");
1870                 goto out;
1871         }
1872
1873         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1874                 goto out_error_tp;
1875
1876         perf_evlist__add_vfs_getname(evlist);
1877
1878         if (trace->sched &&
1879                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1880                                 trace__sched_stat_runtime))
1881                 goto out_error_tp;
1882
1883         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1884         if (err < 0) {
1885                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1886                 goto out_delete_evlist;
1887         }
1888
1889         err = trace__symbols_init(trace, evlist);
1890         if (err < 0) {
1891                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1892                 goto out_delete_maps;
1893         }
1894
1895         perf_evlist__config(evlist, &trace->opts);
1896
1897         signal(SIGCHLD, sig_handler);
1898         signal(SIGINT, sig_handler);
1899
1900         if (forks) {
1901                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1902                                                     argv, false, false);
1903                 if (err < 0) {
1904                         fprintf(trace->output, "Couldn't run the workload!\n");
1905                         goto out_delete_maps;
1906                 }
1907         }
1908
1909         err = perf_evlist__open(evlist);
1910         if (err < 0)
1911                 goto out_error_open;
1912
1913         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1914         if (err < 0) {
1915                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1916                 goto out_close_evlist;
1917         }
1918
1919         perf_evlist__enable(evlist);
1920
1921         if (forks)
1922                 perf_evlist__start_workload(evlist);
1923
1924         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1925 again:
1926         before = trace->nr_events;
1927
1928         for (i = 0; i < evlist->nr_mmaps; i++) {
1929                 union perf_event *event;
1930
1931                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1932                         const u32 type = event->header.type;
1933                         tracepoint_handler handler;
1934                         struct perf_sample sample;
1935
1936                         ++trace->nr_events;
1937
1938                         err = perf_evlist__parse_sample(evlist, event, &sample);
1939                         if (err) {
1940                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1941                                 goto next_event;
1942                         }
1943
1944                         if (!trace->full_time && trace->base_time == 0)
1945                                 trace->base_time = sample.time;
1946
1947                         if (type != PERF_RECORD_SAMPLE) {
1948                                 trace__process_event(trace, trace->host, event, &sample);
1949                                 continue;
1950                         }
1951
1952                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1953                         if (evsel == NULL) {
1954                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1955                                 goto next_event;
1956                         }
1957
1958                         if (sample.raw_data == NULL) {
1959                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1960                                        perf_evsel__name(evsel), sample.tid,
1961                                        sample.cpu, sample.raw_size);
1962                                 goto next_event;
1963                         }
1964
1965                         handler = evsel->handler;
1966                         handler(trace, evsel, &sample);
1967 next_event:
1968                         perf_evlist__mmap_consume(evlist, i);
1969
1970                         if (interrupted)
1971                                 goto out_disable;
1972                 }
1973         }
1974
1975         if (trace->nr_events == before) {
1976                 int timeout = done ? 100 : -1;
1977
1978                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1979                         goto again;
1980         } else {
1981                 goto again;
1982         }
1983
1984 out_disable:
1985         perf_evlist__disable(evlist);
1986
1987         if (!err) {
1988                 if (trace->summary)
1989                         trace__fprintf_thread_summary(trace, trace->output);
1990
1991                 if (trace->show_tool_stats) {
1992                         fprintf(trace->output, "Stats:\n "
1993                                                " vfs_getname : %" PRIu64 "\n"
1994                                                " proc_getname: %" PRIu64 "\n",
1995                                 trace->stats.vfs_getname,
1996                                 trace->stats.proc_getname);
1997                 }
1998         }
1999
2000         perf_evlist__munmap(evlist);
2001 out_close_evlist:
2002         perf_evlist__close(evlist);
2003 out_delete_maps:
2004         perf_evlist__delete_maps(evlist);
2005 out_delete_evlist:
2006         perf_evlist__delete(evlist);
2007 out:
2008         trace->live = false;
2009         return err;
2010 {
2011         char errbuf[BUFSIZ];
2012
2013 out_error_tp:
2014         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2015         goto out_error;
2016
2017 out_error_open:
2018         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2019
2020 out_error:
2021         fprintf(trace->output, "%s\n", errbuf);
2022         goto out_delete_evlist;
2023 }
2024 }
2025
2026 static int trace__replay(struct trace *trace)
2027 {
2028         const struct perf_evsel_str_handler handlers[] = {
2029                 { "probe:vfs_getname",       trace__vfs_getname, },
2030         };
2031         struct perf_data_file file = {
2032                 .path  = input_name,
2033                 .mode  = PERF_DATA_MODE_READ,
2034         };
2035         struct perf_session *session;
2036         struct perf_evsel *evsel;
2037         int err = -1;
2038
2039         trace->tool.sample        = trace__process_sample;
2040         trace->tool.mmap          = perf_event__process_mmap;
2041         trace->tool.mmap2         = perf_event__process_mmap2;
2042         trace->tool.comm          = perf_event__process_comm;
2043         trace->tool.exit          = perf_event__process_exit;
2044         trace->tool.fork          = perf_event__process_fork;
2045         trace->tool.attr          = perf_event__process_attr;
2046         trace->tool.tracing_data = perf_event__process_tracing_data;
2047         trace->tool.build_id      = perf_event__process_build_id;
2048
2049         trace->tool.ordered_samples = true;
2050         trace->tool.ordering_requires_timestamps = true;
2051
2052         /* add tid to output */
2053         trace->multiple_threads = true;
2054
2055         if (symbol__init() < 0)
2056                 return -1;
2057
2058         session = perf_session__new(&file, false, &trace->tool);
2059         if (session == NULL)
2060                 return -ENOMEM;
2061
2062         trace->host = &session->machines.host;
2063
2064         err = perf_session__set_tracepoints_handlers(session, handlers);
2065         if (err)
2066                 goto out;
2067
2068         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2069                                                      "raw_syscalls:sys_enter");
2070         /* older kernels have syscalls tp versus raw_syscalls */
2071         if (evsel == NULL)
2072                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2073                                                              "syscalls:sys_enter");
2074         if (evsel == NULL) {
2075                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2076                 goto out;
2077         }
2078
2079         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2080             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2081                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2082                 goto out;
2083         }
2084
2085         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2086                                                      "raw_syscalls:sys_exit");
2087         if (evsel == NULL)
2088                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2089                                                              "syscalls:sys_exit");
2090         if (evsel == NULL) {
2091                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2092                 goto out;
2093         }
2094
2095         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2096             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2097                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2098                 goto out;
2099         }
2100
2101         err = parse_target_str(trace);
2102         if (err != 0)
2103                 goto out;
2104
2105         setup_pager();
2106
2107         err = perf_session__process_events(session, &trace->tool);
2108         if (err)
2109                 pr_err("Failed to process events, error %d", err);
2110
2111         else if (trace->summary)
2112                 trace__fprintf_thread_summary(trace, trace->output);
2113
2114 out:
2115         perf_session__delete(session);
2116
2117         return err;
2118 }
2119
2120 static size_t trace__fprintf_threads_header(FILE *fp)
2121 {
2122         size_t printed;
2123
2124         printed  = fprintf(fp, "\n Summary of events:\n\n");
2125
2126         return printed;
2127 }
2128
2129 static size_t thread__dump_stats(struct thread_trace *ttrace,
2130                                  struct trace *trace, FILE *fp)
2131 {
2132         struct stats *stats;
2133         size_t printed = 0;
2134         struct syscall *sc;
2135         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2136
2137         if (inode == NULL)
2138                 return 0;
2139
2140         printed += fprintf(fp, "\n");
2141
2142         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2143         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2144         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2145
2146         /* each int_node is a syscall */
2147         while (inode) {
2148                 stats = inode->priv;
2149                 if (stats) {
2150                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2151                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2152                         double avg = avg_stats(stats);
2153                         double pct;
2154                         u64 n = (u64) stats->n;
2155
2156                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2157                         avg /= NSEC_PER_MSEC;
2158
2159                         sc = &trace->syscalls.table[inode->i];
2160                         printed += fprintf(fp, "   %-15s", sc->name);
2161                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2162                                            n, min, avg);
2163                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2164                 }
2165
2166                 inode = intlist__next(inode);
2167         }
2168
2169         printed += fprintf(fp, "\n\n");
2170
2171         return printed;
2172 }
2173
2174 /* struct used to pass data to per-thread function */
2175 struct summary_data {
2176         FILE *fp;
2177         struct trace *trace;
2178         size_t printed;
2179 };
2180
2181 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2182 {
2183         struct summary_data *data = priv;
2184         FILE *fp = data->fp;
2185         size_t printed = data->printed;
2186         struct trace *trace = data->trace;
2187         struct thread_trace *ttrace = thread->priv;
2188         double ratio;
2189
2190         if (ttrace == NULL)
2191                 return 0;
2192
2193         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2194
2195         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2196         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2197         printed += fprintf(fp, "%.1f%%", ratio);
2198         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2199         printed += thread__dump_stats(ttrace, trace, fp);
2200
2201         data->printed += printed;
2202
2203         return 0;
2204 }
2205
2206 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2207 {
2208         struct summary_data data = {
2209                 .fp = fp,
2210                 .trace = trace
2211         };
2212         data.printed = trace__fprintf_threads_header(fp);
2213
2214         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2215
2216         return data.printed;
2217 }
2218
2219 static int trace__set_duration(const struct option *opt, const char *str,
2220                                int unset __maybe_unused)
2221 {
2222         struct trace *trace = opt->value;
2223
2224         trace->duration_filter = atof(str);
2225         return 0;
2226 }
2227
2228 static int trace__open_output(struct trace *trace, const char *filename)
2229 {
2230         struct stat st;
2231
2232         if (!stat(filename, &st) && st.st_size) {
2233                 char oldname[PATH_MAX];
2234
2235                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2236                 unlink(oldname);
2237                 rename(filename, oldname);
2238         }
2239
2240         trace->output = fopen(filename, "w");
2241
2242         return trace->output == NULL ? -errno : 0;
2243 }
2244
2245 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2246 {
2247         const char * const trace_usage[] = {
2248                 "perf trace [<options>] [<command>]",
2249                 "perf trace [<options>] -- <command> [<options>]",
2250                 "perf trace record [<options>] [<command>]",
2251                 "perf trace record [<options>] -- <command> [<options>]",
2252                 NULL
2253         };
2254         struct trace trace = {
2255                 .audit = {
2256                         .machine = audit_detect_machine(),
2257                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2258                 },
2259                 .syscalls = {
2260                         . max = -1,
2261                 },
2262                 .opts = {
2263                         .target = {
2264                                 .uid       = UINT_MAX,
2265                                 .uses_mmap = true,
2266                         },
2267                         .user_freq     = UINT_MAX,
2268                         .user_interval = ULLONG_MAX,
2269                         .no_delay      = true,
2270                         .mmap_pages    = 1024,
2271                 },
2272                 .output = stdout,
2273                 .show_comm = true,
2274         };
2275         const char *output_name = NULL;
2276         const char *ev_qualifier_str = NULL;
2277         const struct option trace_options[] = {
2278         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2279                     "show the thread COMM next to its id"),
2280         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2281         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2282                     "list of events to trace"),
2283         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2284         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2285         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2286                     "trace events on existing process id"),
2287         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2288                     "trace events on existing thread id"),
2289         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2290                     "system-wide collection from all CPUs"),
2291         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2292                     "list of cpus to monitor"),
2293         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2294                     "child tasks do not inherit counters"),
2295         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2296                      "number of mmap data pages",
2297                      perf_evlist__parse_mmap_pages),
2298         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2299                    "user to profile"),
2300         OPT_CALLBACK(0, "duration", &trace, "float",
2301                      "show only events with duration > N.M ms",
2302                      trace__set_duration),
2303         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2304         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2305         OPT_BOOLEAN('T', "time", &trace.full_time,
2306                     "Show full timestamp, not time relative to first start"),
2307         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2308                     "Show only syscall summary with statistics"),
2309         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2310                     "Show all syscalls and summary with statistics"),
2311         OPT_END()
2312         };
2313         int err;
2314         char bf[BUFSIZ];
2315
2316         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2317                 return trace__record(argc-2, &argv[2]);
2318
2319         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2320
2321         /* summary_only implies summary option, but don't overwrite summary if set */
2322         if (trace.summary_only)
2323                 trace.summary = trace.summary_only;
2324
2325         if (output_name != NULL) {
2326                 err = trace__open_output(&trace, output_name);
2327                 if (err < 0) {
2328                         perror("failed to create output file");
2329                         goto out;
2330                 }
2331         }
2332
2333         if (ev_qualifier_str != NULL) {
2334                 const char *s = ev_qualifier_str;
2335
2336                 trace.not_ev_qualifier = *s == '!';
2337                 if (trace.not_ev_qualifier)
2338                         ++s;
2339                 trace.ev_qualifier = strlist__new(true, s);
2340                 if (trace.ev_qualifier == NULL) {
2341                         fputs("Not enough memory to parse event qualifier",
2342                               trace.output);
2343                         err = -ENOMEM;
2344                         goto out_close;
2345                 }
2346         }
2347
2348         err = target__validate(&trace.opts.target);
2349         if (err) {
2350                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2351                 fprintf(trace.output, "%s", bf);
2352                 goto out_close;
2353         }
2354
2355         err = target__parse_uid(&trace.opts.target);
2356         if (err) {
2357                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2358                 fprintf(trace.output, "%s", bf);
2359                 goto out_close;
2360         }
2361
2362         if (!argc && target__none(&trace.opts.target))
2363                 trace.opts.target.system_wide = true;
2364
2365         if (input_name)
2366                 err = trace__replay(&trace);
2367         else
2368                 err = trace__run(&trace, argc, argv);
2369
2370 out_close:
2371         if (output_name != NULL)
2372                 fclose(trace.output);
2373 out:
2374         return err;
2375 }