perf trace: Add support for syscalls vs raw_syscalls
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 struct tp_field {
41         int offset;
42         union {
43                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
45         };
46 };
47
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 { \
51         return *(u##bits *)(sample->raw_data + field->offset); \
52 }
53
54 TP_UINT_FIELD(8);
55 TP_UINT_FIELD(16);
56 TP_UINT_FIELD(32);
57 TP_UINT_FIELD(64);
58
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 { \
62         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63         return bswap_##bits(value);\
64 }
65
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
69
70 static int tp_field__init_uint(struct tp_field *field,
71                                struct format_field *format_field,
72                                bool needs_swap)
73 {
74         field->offset = format_field->offset;
75
76         switch (format_field->size) {
77         case 1:
78                 field->integer = tp_field__u8;
79                 break;
80         case 2:
81                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82                 break;
83         case 4:
84                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85                 break;
86         case 8:
87                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
88                 break;
89         default:
90                 return -1;
91         }
92
93         return 0;
94 }
95
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 {
98         return sample->raw_data + field->offset;
99 }
100
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 {
103         field->offset = format_field->offset;
104         field->pointer = tp_field__ptr;
105         return 0;
106 }
107
108 struct syscall_tp {
109         struct tp_field id;
110         union {
111                 struct tp_field args, ret;
112         };
113 };
114
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116                                           struct tp_field *field,
117                                           const char *name)
118 {
119         struct format_field *format_field = perf_evsel__field(evsel, name);
120
121         if (format_field == NULL)
122                 return -1;
123
124         return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 }
126
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128         ({ struct syscall_tp *sc = evsel->priv;\
129            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132                                          struct tp_field *field,
133                                          const char *name)
134 {
135         struct format_field *format_field = perf_evsel__field(evsel, name);
136
137         if (format_field == NULL)
138                 return -1;
139
140         return tp_field__init_ptr(field, format_field);
141 }
142
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144         ({ struct syscall_tp *sc = evsel->priv;\
145            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
148 {
149         free(evsel->priv);
150         evsel->priv = NULL;
151         perf_evsel__delete(evsel);
152 }
153
154 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
155 {
156         evsel->priv = malloc(sizeof(struct syscall_tp));
157         if (evsel->priv != NULL) {
158                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
159                         goto out_delete;
160
161                 evsel->handler = handler;
162                 return 0;
163         }
164
165         return -ENOMEM;
166
167 out_delete:
168         free(evsel->priv);
169         evsel->priv = NULL;
170         return -ENOENT;
171 }
172
173 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
174 {
175         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
176
177         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
178         if (evsel == NULL)
179                 evsel = perf_evsel__newtp("syscalls", direction);
180
181         if (evsel) {
182                 if (perf_evsel__init_syscall_tp(evsel, handler))
183                         goto out_delete;
184         }
185
186         return evsel;
187
188 out_delete:
189         perf_evsel__delete_priv(evsel);
190         return NULL;
191 }
192
193 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
194         ({ struct syscall_tp *fields = evsel->priv; \
195            fields->name.integer(&fields->name, sample); })
196
197 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
198         ({ struct syscall_tp *fields = evsel->priv; \
199            fields->name.pointer(&fields->name, sample); })
200
201 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
202                                           void *sys_enter_handler,
203                                           void *sys_exit_handler)
204 {
205         int ret = -1;
206         struct perf_evsel *sys_enter, *sys_exit;
207
208         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
209         if (sys_enter == NULL)
210                 goto out;
211
212         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
213                 goto out_delete_sys_enter;
214
215         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
216         if (sys_exit == NULL)
217                 goto out_delete_sys_enter;
218
219         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
220                 goto out_delete_sys_exit;
221
222         perf_evlist__add(evlist, sys_enter);
223         perf_evlist__add(evlist, sys_exit);
224
225         ret = 0;
226 out:
227         return ret;
228
229 out_delete_sys_exit:
230         perf_evsel__delete_priv(sys_exit);
231 out_delete_sys_enter:
232         perf_evsel__delete_priv(sys_enter);
233         goto out;
234 }
235
236
237 struct syscall_arg {
238         unsigned long val;
239         struct thread *thread;
240         struct trace  *trace;
241         void          *parm;
242         u8            idx;
243         u8            mask;
244 };
245
246 struct strarray {
247         int         offset;
248         int         nr_entries;
249         const char **entries;
250 };
251
252 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
253         .nr_entries = ARRAY_SIZE(array), \
254         .entries = array, \
255 }
256
257 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
258         .offset     = off, \
259         .nr_entries = ARRAY_SIZE(array), \
260         .entries = array, \
261 }
262
263 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
264                                                 const char *intfmt,
265                                                 struct syscall_arg *arg)
266 {
267         struct strarray *sa = arg->parm;
268         int idx = arg->val - sa->offset;
269
270         if (idx < 0 || idx >= sa->nr_entries)
271                 return scnprintf(bf, size, intfmt, arg->val);
272
273         return scnprintf(bf, size, "%s", sa->entries[idx]);
274 }
275
276 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
277                                               struct syscall_arg *arg)
278 {
279         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
280 }
281
282 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
283
284 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
285                                                  struct syscall_arg *arg)
286 {
287         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
288 }
289
290 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
291
292 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
293                                         struct syscall_arg *arg);
294
295 #define SCA_FD syscall_arg__scnprintf_fd
296
297 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
298                                            struct syscall_arg *arg)
299 {
300         int fd = arg->val;
301
302         if (fd == AT_FDCWD)
303                 return scnprintf(bf, size, "CWD");
304
305         return syscall_arg__scnprintf_fd(bf, size, arg);
306 }
307
308 #define SCA_FDAT syscall_arg__scnprintf_fd_at
309
310 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
311                                               struct syscall_arg *arg);
312
313 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
314
315 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
316                                          struct syscall_arg *arg)
317 {
318         return scnprintf(bf, size, "%#lx", arg->val);
319 }
320
321 #define SCA_HEX syscall_arg__scnprintf_hex
322
323 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
324                                                struct syscall_arg *arg)
325 {
326         int printed = 0, prot = arg->val;
327
328         if (prot == PROT_NONE)
329                 return scnprintf(bf, size, "NONE");
330 #define P_MMAP_PROT(n) \
331         if (prot & PROT_##n) { \
332                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
333                 prot &= ~PROT_##n; \
334         }
335
336         P_MMAP_PROT(EXEC);
337         P_MMAP_PROT(READ);
338         P_MMAP_PROT(WRITE);
339 #ifdef PROT_SEM
340         P_MMAP_PROT(SEM);
341 #endif
342         P_MMAP_PROT(GROWSDOWN);
343         P_MMAP_PROT(GROWSUP);
344 #undef P_MMAP_PROT
345
346         if (prot)
347                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
348
349         return printed;
350 }
351
352 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
353
354 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
355                                                 struct syscall_arg *arg)
356 {
357         int printed = 0, flags = arg->val;
358
359 #define P_MMAP_FLAG(n) \
360         if (flags & MAP_##n) { \
361                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
362                 flags &= ~MAP_##n; \
363         }
364
365         P_MMAP_FLAG(SHARED);
366         P_MMAP_FLAG(PRIVATE);
367 #ifdef MAP_32BIT
368         P_MMAP_FLAG(32BIT);
369 #endif
370         P_MMAP_FLAG(ANONYMOUS);
371         P_MMAP_FLAG(DENYWRITE);
372         P_MMAP_FLAG(EXECUTABLE);
373         P_MMAP_FLAG(FILE);
374         P_MMAP_FLAG(FIXED);
375         P_MMAP_FLAG(GROWSDOWN);
376 #ifdef MAP_HUGETLB
377         P_MMAP_FLAG(HUGETLB);
378 #endif
379         P_MMAP_FLAG(LOCKED);
380         P_MMAP_FLAG(NONBLOCK);
381         P_MMAP_FLAG(NORESERVE);
382         P_MMAP_FLAG(POPULATE);
383         P_MMAP_FLAG(STACK);
384 #ifdef MAP_UNINITIALIZED
385         P_MMAP_FLAG(UNINITIALIZED);
386 #endif
387 #undef P_MMAP_FLAG
388
389         if (flags)
390                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
391
392         return printed;
393 }
394
395 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
396
397 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
398                                                       struct syscall_arg *arg)
399 {
400         int behavior = arg->val;
401
402         switch (behavior) {
403 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
404         P_MADV_BHV(NORMAL);
405         P_MADV_BHV(RANDOM);
406         P_MADV_BHV(SEQUENTIAL);
407         P_MADV_BHV(WILLNEED);
408         P_MADV_BHV(DONTNEED);
409         P_MADV_BHV(REMOVE);
410         P_MADV_BHV(DONTFORK);
411         P_MADV_BHV(DOFORK);
412         P_MADV_BHV(HWPOISON);
413 #ifdef MADV_SOFT_OFFLINE
414         P_MADV_BHV(SOFT_OFFLINE);
415 #endif
416         P_MADV_BHV(MERGEABLE);
417         P_MADV_BHV(UNMERGEABLE);
418 #ifdef MADV_HUGEPAGE
419         P_MADV_BHV(HUGEPAGE);
420 #endif
421 #ifdef MADV_NOHUGEPAGE
422         P_MADV_BHV(NOHUGEPAGE);
423 #endif
424 #ifdef MADV_DONTDUMP
425         P_MADV_BHV(DONTDUMP);
426 #endif
427 #ifdef MADV_DODUMP
428         P_MADV_BHV(DODUMP);
429 #endif
430 #undef P_MADV_PHV
431         default: break;
432         }
433
434         return scnprintf(bf, size, "%#x", behavior);
435 }
436
437 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
438
439 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
440                                            struct syscall_arg *arg)
441 {
442         int printed = 0, op = arg->val;
443
444         if (op == 0)
445                 return scnprintf(bf, size, "NONE");
446 #define P_CMD(cmd) \
447         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
448                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
449                 op &= ~LOCK_##cmd; \
450         }
451
452         P_CMD(SH);
453         P_CMD(EX);
454         P_CMD(NB);
455         P_CMD(UN);
456         P_CMD(MAND);
457         P_CMD(RW);
458         P_CMD(READ);
459         P_CMD(WRITE);
460 #undef P_OP
461
462         if (op)
463                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
464
465         return printed;
466 }
467
468 #define SCA_FLOCK syscall_arg__scnprintf_flock
469
470 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
471 {
472         enum syscall_futex_args {
473                 SCF_UADDR   = (1 << 0),
474                 SCF_OP      = (1 << 1),
475                 SCF_VAL     = (1 << 2),
476                 SCF_TIMEOUT = (1 << 3),
477                 SCF_UADDR2  = (1 << 4),
478                 SCF_VAL3    = (1 << 5),
479         };
480         int op = arg->val;
481         int cmd = op & FUTEX_CMD_MASK;
482         size_t printed = 0;
483
484         switch (cmd) {
485 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
486         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
487         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
490         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
491         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
492         P_FUTEX_OP(WAKE_OP);                                                      break;
493         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
494         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
495         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
496         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
497         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
498         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
499         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
500         }
501
502         if (op & FUTEX_PRIVATE_FLAG)
503                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
504
505         if (op & FUTEX_CLOCK_REALTIME)
506                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
507
508         return printed;
509 }
510
511 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
512
513 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
514 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
515
516 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
517 static DEFINE_STRARRAY(itimers);
518
519 static const char *whences[] = { "SET", "CUR", "END",
520 #ifdef SEEK_DATA
521 "DATA",
522 #endif
523 #ifdef SEEK_HOLE
524 "HOLE",
525 #endif
526 };
527 static DEFINE_STRARRAY(whences);
528
529 static const char *fcntl_cmds[] = {
530         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
531         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
532         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
533         "F_GETOWNER_UIDS",
534 };
535 static DEFINE_STRARRAY(fcntl_cmds);
536
537 static const char *rlimit_resources[] = {
538         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
539         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
540         "RTTIME",
541 };
542 static DEFINE_STRARRAY(rlimit_resources);
543
544 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
545 static DEFINE_STRARRAY(sighow);
546
547 static const char *clockid[] = {
548         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
549         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
550 };
551 static DEFINE_STRARRAY(clockid);
552
553 static const char *socket_families[] = {
554         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
555         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
556         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
557         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
558         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
559         "ALG", "NFC", "VSOCK",
560 };
561 static DEFINE_STRARRAY(socket_families);
562
563 #ifndef SOCK_TYPE_MASK
564 #define SOCK_TYPE_MASK 0xf
565 #endif
566
567 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
568                                                       struct syscall_arg *arg)
569 {
570         size_t printed;
571         int type = arg->val,
572             flags = type & ~SOCK_TYPE_MASK;
573
574         type &= SOCK_TYPE_MASK;
575         /*
576          * Can't use a strarray, MIPS may override for ABI reasons.
577          */
578         switch (type) {
579 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
580         P_SK_TYPE(STREAM);
581         P_SK_TYPE(DGRAM);
582         P_SK_TYPE(RAW);
583         P_SK_TYPE(RDM);
584         P_SK_TYPE(SEQPACKET);
585         P_SK_TYPE(DCCP);
586         P_SK_TYPE(PACKET);
587 #undef P_SK_TYPE
588         default:
589                 printed = scnprintf(bf, size, "%#x", type);
590         }
591
592 #define P_SK_FLAG(n) \
593         if (flags & SOCK_##n) { \
594                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
595                 flags &= ~SOCK_##n; \
596         }
597
598         P_SK_FLAG(CLOEXEC);
599         P_SK_FLAG(NONBLOCK);
600 #undef P_SK_FLAG
601
602         if (flags)
603                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
604
605         return printed;
606 }
607
608 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
609
610 #ifndef MSG_PROBE
611 #define MSG_PROBE            0x10
612 #endif
613 #ifndef MSG_WAITFORONE
614 #define MSG_WAITFORONE  0x10000
615 #endif
616 #ifndef MSG_SENDPAGE_NOTLAST
617 #define MSG_SENDPAGE_NOTLAST 0x20000
618 #endif
619 #ifndef MSG_FASTOPEN
620 #define MSG_FASTOPEN         0x20000000
621 #endif
622
623 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
624                                                struct syscall_arg *arg)
625 {
626         int printed = 0, flags = arg->val;
627
628         if (flags == 0)
629                 return scnprintf(bf, size, "NONE");
630 #define P_MSG_FLAG(n) \
631         if (flags & MSG_##n) { \
632                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
633                 flags &= ~MSG_##n; \
634         }
635
636         P_MSG_FLAG(OOB);
637         P_MSG_FLAG(PEEK);
638         P_MSG_FLAG(DONTROUTE);
639         P_MSG_FLAG(TRYHARD);
640         P_MSG_FLAG(CTRUNC);
641         P_MSG_FLAG(PROBE);
642         P_MSG_FLAG(TRUNC);
643         P_MSG_FLAG(DONTWAIT);
644         P_MSG_FLAG(EOR);
645         P_MSG_FLAG(WAITALL);
646         P_MSG_FLAG(FIN);
647         P_MSG_FLAG(SYN);
648         P_MSG_FLAG(CONFIRM);
649         P_MSG_FLAG(RST);
650         P_MSG_FLAG(ERRQUEUE);
651         P_MSG_FLAG(NOSIGNAL);
652         P_MSG_FLAG(MORE);
653         P_MSG_FLAG(WAITFORONE);
654         P_MSG_FLAG(SENDPAGE_NOTLAST);
655         P_MSG_FLAG(FASTOPEN);
656         P_MSG_FLAG(CMSG_CLOEXEC);
657 #undef P_MSG_FLAG
658
659         if (flags)
660                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
661
662         return printed;
663 }
664
665 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
666
667 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
668                                                  struct syscall_arg *arg)
669 {
670         size_t printed = 0;
671         int mode = arg->val;
672
673         if (mode == F_OK) /* 0 */
674                 return scnprintf(bf, size, "F");
675 #define P_MODE(n) \
676         if (mode & n##_OK) { \
677                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
678                 mode &= ~n##_OK; \
679         }
680
681         P_MODE(R);
682         P_MODE(W);
683         P_MODE(X);
684 #undef P_MODE
685
686         if (mode)
687                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
688
689         return printed;
690 }
691
692 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
693
694 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
695                                                struct syscall_arg *arg)
696 {
697         int printed = 0, flags = arg->val;
698
699         if (!(flags & O_CREAT))
700                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
701
702         if (flags == 0)
703                 return scnprintf(bf, size, "RDONLY");
704 #define P_FLAG(n) \
705         if (flags & O_##n) { \
706                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
707                 flags &= ~O_##n; \
708         }
709
710         P_FLAG(APPEND);
711         P_FLAG(ASYNC);
712         P_FLAG(CLOEXEC);
713         P_FLAG(CREAT);
714         P_FLAG(DIRECT);
715         P_FLAG(DIRECTORY);
716         P_FLAG(EXCL);
717         P_FLAG(LARGEFILE);
718         P_FLAG(NOATIME);
719         P_FLAG(NOCTTY);
720 #ifdef O_NONBLOCK
721         P_FLAG(NONBLOCK);
722 #elif O_NDELAY
723         P_FLAG(NDELAY);
724 #endif
725 #ifdef O_PATH
726         P_FLAG(PATH);
727 #endif
728         P_FLAG(RDWR);
729 #ifdef O_DSYNC
730         if ((flags & O_SYNC) == O_SYNC)
731                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
732         else {
733                 P_FLAG(DSYNC);
734         }
735 #else
736         P_FLAG(SYNC);
737 #endif
738         P_FLAG(TRUNC);
739         P_FLAG(WRONLY);
740 #undef P_FLAG
741
742         if (flags)
743                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
744
745         return printed;
746 }
747
748 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
749
750 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
751                                                    struct syscall_arg *arg)
752 {
753         int printed = 0, flags = arg->val;
754
755         if (flags == 0)
756                 return scnprintf(bf, size, "NONE");
757 #define P_FLAG(n) \
758         if (flags & EFD_##n) { \
759                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
760                 flags &= ~EFD_##n; \
761         }
762
763         P_FLAG(SEMAPHORE);
764         P_FLAG(CLOEXEC);
765         P_FLAG(NONBLOCK);
766 #undef P_FLAG
767
768         if (flags)
769                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
770
771         return printed;
772 }
773
774 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
775
776 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
777                                                 struct syscall_arg *arg)
778 {
779         int printed = 0, flags = arg->val;
780
781 #define P_FLAG(n) \
782         if (flags & O_##n) { \
783                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
784                 flags &= ~O_##n; \
785         }
786
787         P_FLAG(CLOEXEC);
788         P_FLAG(NONBLOCK);
789 #undef P_FLAG
790
791         if (flags)
792                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
793
794         return printed;
795 }
796
797 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
798
799 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
800 {
801         int sig = arg->val;
802
803         switch (sig) {
804 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
805         P_SIGNUM(HUP);
806         P_SIGNUM(INT);
807         P_SIGNUM(QUIT);
808         P_SIGNUM(ILL);
809         P_SIGNUM(TRAP);
810         P_SIGNUM(ABRT);
811         P_SIGNUM(BUS);
812         P_SIGNUM(FPE);
813         P_SIGNUM(KILL);
814         P_SIGNUM(USR1);
815         P_SIGNUM(SEGV);
816         P_SIGNUM(USR2);
817         P_SIGNUM(PIPE);
818         P_SIGNUM(ALRM);
819         P_SIGNUM(TERM);
820         P_SIGNUM(STKFLT);
821         P_SIGNUM(CHLD);
822         P_SIGNUM(CONT);
823         P_SIGNUM(STOP);
824         P_SIGNUM(TSTP);
825         P_SIGNUM(TTIN);
826         P_SIGNUM(TTOU);
827         P_SIGNUM(URG);
828         P_SIGNUM(XCPU);
829         P_SIGNUM(XFSZ);
830         P_SIGNUM(VTALRM);
831         P_SIGNUM(PROF);
832         P_SIGNUM(WINCH);
833         P_SIGNUM(IO);
834         P_SIGNUM(PWR);
835         P_SIGNUM(SYS);
836         default: break;
837         }
838
839         return scnprintf(bf, size, "%#x", sig);
840 }
841
842 #define SCA_SIGNUM syscall_arg__scnprintf_signum
843
844 #define TCGETS          0x5401
845
846 static const char *tioctls[] = {
847         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
848         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
849         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
850         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
851         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
852         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
853         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
854         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
855         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
856         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
857         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
858         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
859         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
860         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
861         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
862 };
863
864 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
865
866 #define STRARRAY(arg, name, array) \
867           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
868           .arg_parm      = { [arg] = &strarray__##array, }
869
870 static struct syscall_fmt {
871         const char *name;
872         const char *alias;
873         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
874         void       *arg_parm[6];
875         bool       errmsg;
876         bool       timeout;
877         bool       hexret;
878 } syscall_fmts[] = {
879         { .name     = "access",     .errmsg = true,
880           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
881         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
882         { .name     = "brk",        .hexret = true,
883           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
884         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
885         { .name     = "close",      .errmsg = true,
886           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
887         { .name     = "connect",    .errmsg = true, },
888         { .name     = "dup",        .errmsg = true,
889           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
890         { .name     = "dup2",       .errmsg = true,
891           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
892         { .name     = "dup3",       .errmsg = true,
893           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
894         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
895         { .name     = "eventfd2",   .errmsg = true,
896           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
897         { .name     = "faccessat",  .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
899         { .name     = "fadvise64",  .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "fallocate",  .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "fchdir",     .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchmod",     .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
907         { .name     = "fchmodat",   .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
909         { .name     = "fchown",     .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
911         { .name     = "fchownat",   .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
913         { .name     = "fcntl",      .errmsg = true,
914           .arg_scnprintf = { [0] = SCA_FD, /* fd */
915                              [1] = SCA_STRARRAY, /* cmd */ },
916           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
917         { .name     = "fdatasync",  .errmsg = true,
918           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
919         { .name     = "flock",      .errmsg = true,
920           .arg_scnprintf = { [0] = SCA_FD, /* fd */
921                              [1] = SCA_FLOCK, /* cmd */ }, },
922         { .name     = "fsetxattr",  .errmsg = true,
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
925           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
926         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
927           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
928         { .name     = "fstatfs",    .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
930         { .name     = "fsync",    .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "ftruncate", .errmsg = true,
933           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
934         { .name     = "futex",      .errmsg = true,
935           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
936         { .name     = "futimesat", .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
938         { .name     = "getdents",   .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "getdents64", .errmsg = true,
941           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
942         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
943         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
944         { .name     = "ioctl",      .errmsg = true,
945           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
946                              [1] = SCA_STRHEXARRAY, /* cmd */
947                              [2] = SCA_HEX, /* arg */ },
948           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
949         { .name     = "kill",       .errmsg = true,
950           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
951         { .name     = "linkat",     .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
953         { .name     = "lseek",      .errmsg = true,
954           .arg_scnprintf = { [0] = SCA_FD, /* fd */
955                              [2] = SCA_STRARRAY, /* whence */ },
956           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
957         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
958         { .name     = "madvise",    .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
960                              [2] = SCA_MADV_BHV, /* behavior */ }, },
961         { .name     = "mkdirat",    .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
963         { .name     = "mknodat",    .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
965         { .name     = "mlock",      .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967         { .name     = "mlockall",   .errmsg = true,
968           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
969         { .name     = "mmap",       .hexret = true,
970           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
971                              [2] = SCA_MMAP_PROT, /* prot */
972                              [3] = SCA_MMAP_FLAGS, /* flags */
973                              [4] = SCA_FD,        /* fd */ }, },
974         { .name     = "mprotect",   .errmsg = true,
975           .arg_scnprintf = { [0] = SCA_HEX, /* start */
976                              [2] = SCA_MMAP_PROT, /* prot */ }, },
977         { .name     = "mremap",     .hexret = true,
978           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
979                              [4] = SCA_HEX, /* new_addr */ }, },
980         { .name     = "munlock",    .errmsg = true,
981           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982         { .name     = "munmap",     .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
984         { .name     = "name_to_handle_at", .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
986         { .name     = "newfstatat", .errmsg = true,
987           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
988         { .name     = "open",       .errmsg = true,
989           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
990         { .name     = "open_by_handle_at", .errmsg = true,
991           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
992                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
993         { .name     = "openat",     .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
995                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
996         { .name     = "pipe2",      .errmsg = true,
997           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
998         { .name     = "poll",       .errmsg = true, .timeout = true, },
999         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1000         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1002         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1003           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1004         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1005         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1006           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1007         { .name     = "pwritev",    .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "read",       .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1011         { .name     = "readlinkat", .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1013         { .name     = "readv",      .errmsg = true,
1014           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1015         { .name     = "recvfrom",   .errmsg = true,
1016           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017         { .name     = "recvmmsg",   .errmsg = true,
1018           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1019         { .name     = "recvmsg",    .errmsg = true,
1020           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1021         { .name     = "renameat",   .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1023         { .name     = "rt_sigaction", .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1025         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1026         { .name     = "rt_sigqueueinfo", .errmsg = true,
1027           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1028         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1029           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1030         { .name     = "select",     .errmsg = true, .timeout = true, },
1031         { .name     = "sendmmsg",    .errmsg = true,
1032           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1033         { .name     = "sendmsg",    .errmsg = true,
1034           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1035         { .name     = "sendto",     .errmsg = true,
1036           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1037         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1038         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1039         { .name     = "shutdown",   .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1041         { .name     = "socket",     .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1043                              [1] = SCA_SK_TYPE, /* type */ },
1044           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1045         { .name     = "socketpair", .errmsg = true,
1046           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1047                              [1] = SCA_SK_TYPE, /* type */ },
1048           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1049         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1050         { .name     = "symlinkat",  .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1052         { .name     = "tgkill",     .errmsg = true,
1053           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1054         { .name     = "tkill",      .errmsg = true,
1055           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1057         { .name     = "unlinkat",   .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1059         { .name     = "utimensat",  .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1061         { .name     = "write",      .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1063         { .name     = "writev",     .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1065 };
1066
1067 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1068 {
1069         const struct syscall_fmt *fmt = fmtp;
1070         return strcmp(name, fmt->name);
1071 }
1072
1073 static struct syscall_fmt *syscall_fmt__find(const char *name)
1074 {
1075         const int nmemb = ARRAY_SIZE(syscall_fmts);
1076         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1077 }
1078
1079 struct syscall {
1080         struct event_format *tp_format;
1081         const char          *name;
1082         bool                filtered;
1083         struct syscall_fmt  *fmt;
1084         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1085         void                **arg_parm;
1086 };
1087
1088 static size_t fprintf_duration(unsigned long t, FILE *fp)
1089 {
1090         double duration = (double)t / NSEC_PER_MSEC;
1091         size_t printed = fprintf(fp, "(");
1092
1093         if (duration >= 1.0)
1094                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1095         else if (duration >= 0.01)
1096                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1097         else
1098                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1099         return printed + fprintf(fp, "): ");
1100 }
1101
1102 struct thread_trace {
1103         u64               entry_time;
1104         u64               exit_time;
1105         bool              entry_pending;
1106         unsigned long     nr_events;
1107         char              *entry_str;
1108         double            runtime_ms;
1109         struct {
1110                 int       max;
1111                 char      **table;
1112         } paths;
1113
1114         struct intlist *syscall_stats;
1115 };
1116
1117 static struct thread_trace *thread_trace__new(void)
1118 {
1119         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1120
1121         if (ttrace)
1122                 ttrace->paths.max = -1;
1123
1124         ttrace->syscall_stats = intlist__new(NULL);
1125
1126         return ttrace;
1127 }
1128
1129 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1130 {
1131         struct thread_trace *ttrace;
1132
1133         if (thread == NULL)
1134                 goto fail;
1135
1136         if (thread->priv == NULL)
1137                 thread->priv = thread_trace__new();
1138                 
1139         if (thread->priv == NULL)
1140                 goto fail;
1141
1142         ttrace = thread->priv;
1143         ++ttrace->nr_events;
1144
1145         return ttrace;
1146 fail:
1147         color_fprintf(fp, PERF_COLOR_RED,
1148                       "WARNING: not enough memory, dropping samples!\n");
1149         return NULL;
1150 }
1151
1152 struct trace {
1153         struct perf_tool        tool;
1154         struct {
1155                 int             machine;
1156                 int             open_id;
1157         }                       audit;
1158         struct {
1159                 int             max;
1160                 struct syscall  *table;
1161         } syscalls;
1162         struct perf_record_opts opts;
1163         struct machine          *host;
1164         u64                     base_time;
1165         bool                    full_time;
1166         FILE                    *output;
1167         unsigned long           nr_events;
1168         struct strlist          *ev_qualifier;
1169         bool                    not_ev_qualifier;
1170         bool                    live;
1171         const char              *last_vfs_getname;
1172         struct intlist          *tid_list;
1173         struct intlist          *pid_list;
1174         bool                    sched;
1175         bool                    multiple_threads;
1176         bool                    summary;
1177         bool                    summary_only;
1178         bool                    show_comm;
1179         bool                    show_tool_stats;
1180         double                  duration_filter;
1181         double                  runtime_ms;
1182         struct {
1183                 u64             vfs_getname, proc_getname;
1184         } stats;
1185 };
1186
1187 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1188 {
1189         struct thread_trace *ttrace = thread->priv;
1190
1191         if (fd > ttrace->paths.max) {
1192                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1193
1194                 if (npath == NULL)
1195                         return -1;
1196
1197                 if (ttrace->paths.max != -1) {
1198                         memset(npath + ttrace->paths.max + 1, 0,
1199                                (fd - ttrace->paths.max) * sizeof(char *));
1200                 } else {
1201                         memset(npath, 0, (fd + 1) * sizeof(char *));
1202                 }
1203
1204                 ttrace->paths.table = npath;
1205                 ttrace->paths.max   = fd;
1206         }
1207
1208         ttrace->paths.table[fd] = strdup(pathname);
1209
1210         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1211 }
1212
1213 static int thread__read_fd_path(struct thread *thread, int fd)
1214 {
1215         char linkname[PATH_MAX], pathname[PATH_MAX];
1216         struct stat st;
1217         int ret;
1218
1219         if (thread->pid_ == thread->tid) {
1220                 scnprintf(linkname, sizeof(linkname),
1221                           "/proc/%d/fd/%d", thread->pid_, fd);
1222         } else {
1223                 scnprintf(linkname, sizeof(linkname),
1224                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1225         }
1226
1227         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1228                 return -1;
1229
1230         ret = readlink(linkname, pathname, sizeof(pathname));
1231
1232         if (ret < 0 || ret > st.st_size)
1233                 return -1;
1234
1235         pathname[ret] = '\0';
1236         return trace__set_fd_pathname(thread, fd, pathname);
1237 }
1238
1239 static const char *thread__fd_path(struct thread *thread, int fd,
1240                                    struct trace *trace)
1241 {
1242         struct thread_trace *ttrace = thread->priv;
1243
1244         if (ttrace == NULL)
1245                 return NULL;
1246
1247         if (fd < 0)
1248                 return NULL;
1249
1250         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1251                 if (!trace->live)
1252                         return NULL;
1253                 ++trace->stats.proc_getname;
1254                 if (thread__read_fd_path(thread, fd)) {
1255                         return NULL;
1256         }
1257
1258         return ttrace->paths.table[fd];
1259 }
1260
1261 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1262                                         struct syscall_arg *arg)
1263 {
1264         int fd = arg->val;
1265         size_t printed = scnprintf(bf, size, "%d", fd);
1266         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1267
1268         if (path)
1269                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1270
1271         return printed;
1272 }
1273
1274 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1275                                               struct syscall_arg *arg)
1276 {
1277         int fd = arg->val;
1278         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1279         struct thread_trace *ttrace = arg->thread->priv;
1280
1281         if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1282                 free(ttrace->paths.table[fd]);
1283                 ttrace->paths.table[fd] = NULL;
1284         }
1285
1286         return printed;
1287 }
1288
1289 static bool trace__filter_duration(struct trace *trace, double t)
1290 {
1291         return t < (trace->duration_filter * NSEC_PER_MSEC);
1292 }
1293
1294 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1295 {
1296         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1297
1298         return fprintf(fp, "%10.3f ", ts);
1299 }
1300
1301 static bool done = false;
1302 static bool interrupted = false;
1303
1304 static void sig_handler(int sig)
1305 {
1306         done = true;
1307         interrupted = sig == SIGINT;
1308 }
1309
1310 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1311                                         u64 duration, u64 tstamp, FILE *fp)
1312 {
1313         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1314         printed += fprintf_duration(duration, fp);
1315
1316         if (trace->multiple_threads) {
1317                 if (trace->show_comm)
1318                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1319                 printed += fprintf(fp, "%d ", thread->tid);
1320         }
1321
1322         return printed;
1323 }
1324
1325 static int trace__process_event(struct trace *trace, struct machine *machine,
1326                                 union perf_event *event, struct perf_sample *sample)
1327 {
1328         int ret = 0;
1329
1330         switch (event->header.type) {
1331         case PERF_RECORD_LOST:
1332                 color_fprintf(trace->output, PERF_COLOR_RED,
1333                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1334                 ret = machine__process_lost_event(machine, event, sample);
1335         default:
1336                 ret = machine__process_event(machine, event, sample);
1337                 break;
1338         }
1339
1340         return ret;
1341 }
1342
1343 static int trace__tool_process(struct perf_tool *tool,
1344                                union perf_event *event,
1345                                struct perf_sample *sample,
1346                                struct machine *machine)
1347 {
1348         struct trace *trace = container_of(tool, struct trace, tool);
1349         return trace__process_event(trace, machine, event, sample);
1350 }
1351
1352 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1353 {
1354         int err = symbol__init();
1355
1356         if (err)
1357                 return err;
1358
1359         trace->host = machine__new_host();
1360         if (trace->host == NULL)
1361                 return -ENOMEM;
1362
1363         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1364                                             evlist->threads, trace__tool_process, false);
1365         if (err)
1366                 symbol__exit();
1367
1368         return err;
1369 }
1370
1371 static int syscall__set_arg_fmts(struct syscall *sc)
1372 {
1373         struct format_field *field;
1374         int idx = 0;
1375
1376         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1377         if (sc->arg_scnprintf == NULL)
1378                 return -1;
1379
1380         if (sc->fmt)
1381                 sc->arg_parm = sc->fmt->arg_parm;
1382
1383         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1384                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1385                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1386                 else if (field->flags & FIELD_IS_POINTER)
1387                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1388                 ++idx;
1389         }
1390
1391         return 0;
1392 }
1393
1394 static int trace__read_syscall_info(struct trace *trace, int id)
1395 {
1396         char tp_name[128];
1397         struct syscall *sc;
1398         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1399
1400         if (name == NULL)
1401                 return -1;
1402
1403         if (id > trace->syscalls.max) {
1404                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1405
1406                 if (nsyscalls == NULL)
1407                         return -1;
1408
1409                 if (trace->syscalls.max != -1) {
1410                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1411                                (id - trace->syscalls.max) * sizeof(*sc));
1412                 } else {
1413                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1414                 }
1415
1416                 trace->syscalls.table = nsyscalls;
1417                 trace->syscalls.max   = id;
1418         }
1419
1420         sc = trace->syscalls.table + id;
1421         sc->name = name;
1422
1423         if (trace->ev_qualifier) {
1424                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1425
1426                 if (!(in ^ trace->not_ev_qualifier)) {
1427                         sc->filtered = true;
1428                         /*
1429                          * No need to do read tracepoint information since this will be
1430                          * filtered out.
1431                          */
1432                         return 0;
1433                 }
1434         }
1435
1436         sc->fmt  = syscall_fmt__find(sc->name);
1437
1438         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1439         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1440
1441         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1442                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1443                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1444         }
1445
1446         if (sc->tp_format == NULL)
1447                 return -1;
1448
1449         return syscall__set_arg_fmts(sc);
1450 }
1451
1452 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1453                                       unsigned long *args, struct trace *trace,
1454                                       struct thread *thread)
1455 {
1456         size_t printed = 0;
1457
1458         if (sc->tp_format != NULL) {
1459                 struct format_field *field;
1460                 u8 bit = 1;
1461                 struct syscall_arg arg = {
1462                         .idx    = 0,
1463                         .mask   = 0,
1464                         .trace  = trace,
1465                         .thread = thread,
1466                 };
1467
1468                 for (field = sc->tp_format->format.fields->next; field;
1469                      field = field->next, ++arg.idx, bit <<= 1) {
1470                         if (arg.mask & bit)
1471                                 continue;
1472                         /*
1473                          * Suppress this argument if its value is zero and
1474                          * and we don't have a string associated in an
1475                          * strarray for it.
1476                          */
1477                         if (args[arg.idx] == 0 &&
1478                             !(sc->arg_scnprintf &&
1479                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1480                               sc->arg_parm[arg.idx]))
1481                                 continue;
1482
1483                         printed += scnprintf(bf + printed, size - printed,
1484                                              "%s%s: ", printed ? ", " : "", field->name);
1485                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1486                                 arg.val = args[arg.idx];
1487                                 if (sc->arg_parm)
1488                                         arg.parm = sc->arg_parm[arg.idx];
1489                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1490                                                                       size - printed, &arg);
1491                         } else {
1492                                 printed += scnprintf(bf + printed, size - printed,
1493                                                      "%ld", args[arg.idx]);
1494                         }
1495                 }
1496         } else {
1497                 int i = 0;
1498
1499                 while (i < 6) {
1500                         printed += scnprintf(bf + printed, size - printed,
1501                                              "%sarg%d: %ld",
1502                                              printed ? ", " : "", i, args[i]);
1503                         ++i;
1504                 }
1505         }
1506
1507         return printed;
1508 }
1509
1510 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1511                                   struct perf_sample *sample);
1512
1513 static struct syscall *trace__syscall_info(struct trace *trace,
1514                                            struct perf_evsel *evsel, int id)
1515 {
1516
1517         if (id < 0) {
1518
1519                 /*
1520                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1521                  * before that, leaving at a higher verbosity level till that is
1522                  * explained. Reproduced with plain ftrace with:
1523                  *
1524                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1525                  * grep "NR -1 " /t/trace_pipe
1526                  *
1527                  * After generating some load on the machine.
1528                  */
1529                 if (verbose > 1) {
1530                         static u64 n;
1531                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1532                                 id, perf_evsel__name(evsel), ++n);
1533                 }
1534                 return NULL;
1535         }
1536
1537         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1538             trace__read_syscall_info(trace, id))
1539                 goto out_cant_read;
1540
1541         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1542                 goto out_cant_read;
1543
1544         return &trace->syscalls.table[id];
1545
1546 out_cant_read:
1547         if (verbose) {
1548                 fprintf(trace->output, "Problems reading syscall %d", id);
1549                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1550                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1551                 fputs(" information\n", trace->output);
1552         }
1553         return NULL;
1554 }
1555
1556 static void thread__update_stats(struct thread_trace *ttrace,
1557                                  int id, struct perf_sample *sample)
1558 {
1559         struct int_node *inode;
1560         struct stats *stats;
1561         u64 duration = 0;
1562
1563         inode = intlist__findnew(ttrace->syscall_stats, id);
1564         if (inode == NULL)
1565                 return;
1566
1567         stats = inode->priv;
1568         if (stats == NULL) {
1569                 stats = malloc(sizeof(struct stats));
1570                 if (stats == NULL)
1571                         return;
1572                 init_stats(stats);
1573                 inode->priv = stats;
1574         }
1575
1576         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1577                 duration = sample->time - ttrace->entry_time;
1578
1579         update_stats(stats, duration);
1580 }
1581
1582 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1583                             struct perf_sample *sample)
1584 {
1585         char *msg;
1586         void *args;
1587         size_t printed = 0;
1588         struct thread *thread;
1589         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1590         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1591         struct thread_trace *ttrace;
1592
1593         if (sc == NULL)
1594                 return -1;
1595
1596         if (sc->filtered)
1597                 return 0;
1598
1599         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1600         ttrace = thread__trace(thread, trace->output);
1601         if (ttrace == NULL)
1602                 return -1;
1603
1604         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1605         ttrace = thread->priv;
1606
1607         if (ttrace->entry_str == NULL) {
1608                 ttrace->entry_str = malloc(1024);
1609                 if (!ttrace->entry_str)
1610                         return -1;
1611         }
1612
1613         ttrace->entry_time = sample->time;
1614         msg = ttrace->entry_str;
1615         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1616
1617         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1618                                            args, trace, thread);
1619
1620         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1621                 if (!trace->duration_filter && !trace->summary_only) {
1622                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1623                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1624                 }
1625         } else
1626                 ttrace->entry_pending = true;
1627
1628         return 0;
1629 }
1630
1631 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1632                            struct perf_sample *sample)
1633 {
1634         int ret;
1635         u64 duration = 0;
1636         struct thread *thread;
1637         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1638         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1639         struct thread_trace *ttrace;
1640
1641         if (sc == NULL)
1642                 return -1;
1643
1644         if (sc->filtered)
1645                 return 0;
1646
1647         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1648         ttrace = thread__trace(thread, trace->output);
1649         if (ttrace == NULL)
1650                 return -1;
1651
1652         if (trace->summary)
1653                 thread__update_stats(ttrace, id, sample);
1654
1655         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1656
1657         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1658                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1659                 trace->last_vfs_getname = NULL;
1660                 ++trace->stats.vfs_getname;
1661         }
1662
1663         ttrace = thread->priv;
1664
1665         ttrace->exit_time = sample->time;
1666
1667         if (ttrace->entry_time) {
1668                 duration = sample->time - ttrace->entry_time;
1669                 if (trace__filter_duration(trace, duration))
1670                         goto out;
1671         } else if (trace->duration_filter)
1672                 goto out;
1673
1674         if (trace->summary_only)
1675                 goto out;
1676
1677         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1678
1679         if (ttrace->entry_pending) {
1680                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1681         } else {
1682                 fprintf(trace->output, " ... [");
1683                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1684                 fprintf(trace->output, "]: %s()", sc->name);
1685         }
1686
1687         if (sc->fmt == NULL) {
1688 signed_print:
1689                 fprintf(trace->output, ") = %d", ret);
1690         } else if (ret < 0 && sc->fmt->errmsg) {
1691                 char bf[256];
1692                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1693                            *e = audit_errno_to_name(-ret);
1694
1695                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1696         } else if (ret == 0 && sc->fmt->timeout)
1697                 fprintf(trace->output, ") = 0 Timeout");
1698         else if (sc->fmt->hexret)
1699                 fprintf(trace->output, ") = %#x", ret);
1700         else
1701                 goto signed_print;
1702
1703         fputc('\n', trace->output);
1704 out:
1705         ttrace->entry_pending = false;
1706
1707         return 0;
1708 }
1709
1710 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1711                               struct perf_sample *sample)
1712 {
1713         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1714         return 0;
1715 }
1716
1717 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1718                                      struct perf_sample *sample)
1719 {
1720         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1721         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1722         struct thread *thread = machine__findnew_thread(trace->host,
1723                                                         sample->pid,
1724                                                         sample->tid);
1725         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1726
1727         if (ttrace == NULL)
1728                 goto out_dump;
1729
1730         ttrace->runtime_ms += runtime_ms;
1731         trace->runtime_ms += runtime_ms;
1732         return 0;
1733
1734 out_dump:
1735         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1736                evsel->name,
1737                perf_evsel__strval(evsel, sample, "comm"),
1738                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1739                runtime,
1740                perf_evsel__intval(evsel, sample, "vruntime"));
1741         return 0;
1742 }
1743
1744 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1745 {
1746         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1747             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1748                 return false;
1749
1750         if (trace->pid_list || trace->tid_list)
1751                 return true;
1752
1753         return false;
1754 }
1755
1756 static int trace__process_sample(struct perf_tool *tool,
1757                                  union perf_event *event __maybe_unused,
1758                                  struct perf_sample *sample,
1759                                  struct perf_evsel *evsel,
1760                                  struct machine *machine __maybe_unused)
1761 {
1762         struct trace *trace = container_of(tool, struct trace, tool);
1763         int err = 0;
1764
1765         tracepoint_handler handler = evsel->handler;
1766
1767         if (skip_sample(trace, sample))
1768                 return 0;
1769
1770         if (!trace->full_time && trace->base_time == 0)
1771                 trace->base_time = sample->time;
1772
1773         if (handler)
1774                 handler(trace, evsel, sample);
1775
1776         return err;
1777 }
1778
1779 static int parse_target_str(struct trace *trace)
1780 {
1781         if (trace->opts.target.pid) {
1782                 trace->pid_list = intlist__new(trace->opts.target.pid);
1783                 if (trace->pid_list == NULL) {
1784                         pr_err("Error parsing process id string\n");
1785                         return -EINVAL;
1786                 }
1787         }
1788
1789         if (trace->opts.target.tid) {
1790                 trace->tid_list = intlist__new(trace->opts.target.tid);
1791                 if (trace->tid_list == NULL) {
1792                         pr_err("Error parsing thread id string\n");
1793                         return -EINVAL;
1794                 }
1795         }
1796
1797         return 0;
1798 }
1799
1800 static int trace__record(int argc, const char **argv)
1801 {
1802         unsigned int rec_argc, i, j;
1803         const char **rec_argv;
1804         const char * const record_args[] = {
1805                 "record",
1806                 "-R",
1807                 "-m", "1024",
1808                 "-c", "1",
1809                 "-e",
1810         };
1811
1812         /* +1 is for the event string below */
1813         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1814         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1815
1816         if (rec_argv == NULL)
1817                 return -ENOMEM;
1818
1819         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1820                 rec_argv[i] = record_args[i];
1821
1822         /* event string may be different for older kernels - e.g., RHEL6 */
1823         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1824                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1825         else if (is_valid_tracepoint("syscalls:sys_enter"))
1826                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1827         else {
1828                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1829                 return -1;
1830         }
1831         i++;
1832
1833         for (j = 0; j < (unsigned int)argc; j++, i++)
1834                 rec_argv[i] = argv[j];
1835
1836         return cmd_record(i, rec_argv, NULL);
1837 }
1838
1839 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1840
1841 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1842 {
1843         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1844         if (evsel == NULL)
1845                 return;
1846
1847         if (perf_evsel__field(evsel, "pathname") == NULL) {
1848                 perf_evsel__delete(evsel);
1849                 return;
1850         }
1851
1852         evsel->handler = trace__vfs_getname;
1853         perf_evlist__add(evlist, evsel);
1854 }
1855
1856 static int trace__run(struct trace *trace, int argc, const char **argv)
1857 {
1858         struct perf_evlist *evlist = perf_evlist__new();
1859         struct perf_evsel *evsel;
1860         int err = -1, i;
1861         unsigned long before;
1862         const bool forks = argc > 0;
1863
1864         trace->live = true;
1865
1866         if (evlist == NULL) {
1867                 fprintf(trace->output, "Not enough memory to run!\n");
1868                 goto out;
1869         }
1870
1871         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1872                 goto out_error_tp;
1873
1874         perf_evlist__add_vfs_getname(evlist);
1875
1876         if (trace->sched &&
1877                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1878                                 trace__sched_stat_runtime))
1879                 goto out_error_tp;
1880
1881         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1882         if (err < 0) {
1883                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1884                 goto out_delete_evlist;
1885         }
1886
1887         err = trace__symbols_init(trace, evlist);
1888         if (err < 0) {
1889                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1890                 goto out_delete_maps;
1891         }
1892
1893         perf_evlist__config(evlist, &trace->opts);
1894
1895         signal(SIGCHLD, sig_handler);
1896         signal(SIGINT, sig_handler);
1897
1898         if (forks) {
1899                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1900                                                     argv, false, false);
1901                 if (err < 0) {
1902                         fprintf(trace->output, "Couldn't run the workload!\n");
1903                         goto out_delete_maps;
1904                 }
1905         }
1906
1907         err = perf_evlist__open(evlist);
1908         if (err < 0)
1909                 goto out_error_open;
1910
1911         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1912         if (err < 0) {
1913                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1914                 goto out_close_evlist;
1915         }
1916
1917         perf_evlist__enable(evlist);
1918
1919         if (forks)
1920                 perf_evlist__start_workload(evlist);
1921
1922         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1923 again:
1924         before = trace->nr_events;
1925
1926         for (i = 0; i < evlist->nr_mmaps; i++) {
1927                 union perf_event *event;
1928
1929                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1930                         const u32 type = event->header.type;
1931                         tracepoint_handler handler;
1932                         struct perf_sample sample;
1933
1934                         ++trace->nr_events;
1935
1936                         err = perf_evlist__parse_sample(evlist, event, &sample);
1937                         if (err) {
1938                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1939                                 goto next_event;
1940                         }
1941
1942                         if (!trace->full_time && trace->base_time == 0)
1943                                 trace->base_time = sample.time;
1944
1945                         if (type != PERF_RECORD_SAMPLE) {
1946                                 trace__process_event(trace, trace->host, event, &sample);
1947                                 continue;
1948                         }
1949
1950                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1951                         if (evsel == NULL) {
1952                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1953                                 goto next_event;
1954                         }
1955
1956                         if (sample.raw_data == NULL) {
1957                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1958                                        perf_evsel__name(evsel), sample.tid,
1959                                        sample.cpu, sample.raw_size);
1960                                 goto next_event;
1961                         }
1962
1963                         handler = evsel->handler;
1964                         handler(trace, evsel, &sample);
1965 next_event:
1966                         perf_evlist__mmap_consume(evlist, i);
1967
1968                         if (interrupted)
1969                                 goto out_disable;
1970                 }
1971         }
1972
1973         if (trace->nr_events == before) {
1974                 int timeout = done ? 100 : -1;
1975
1976                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1977                         goto again;
1978         } else {
1979                 goto again;
1980         }
1981
1982 out_disable:
1983         perf_evlist__disable(evlist);
1984
1985         if (!err) {
1986                 if (trace->summary)
1987                         trace__fprintf_thread_summary(trace, trace->output);
1988
1989                 if (trace->show_tool_stats) {
1990                         fprintf(trace->output, "Stats:\n "
1991                                                " vfs_getname : %" PRIu64 "\n"
1992                                                " proc_getname: %" PRIu64 "\n",
1993                                 trace->stats.vfs_getname,
1994                                 trace->stats.proc_getname);
1995                 }
1996         }
1997
1998         perf_evlist__munmap(evlist);
1999 out_close_evlist:
2000         perf_evlist__close(evlist);
2001 out_delete_maps:
2002         perf_evlist__delete_maps(evlist);
2003 out_delete_evlist:
2004         perf_evlist__delete(evlist);
2005 out:
2006         trace->live = false;
2007         return err;
2008 {
2009         char errbuf[BUFSIZ];
2010
2011 out_error_tp:
2012         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2013         goto out_error;
2014
2015 out_error_open:
2016         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2017
2018 out_error:
2019         fprintf(trace->output, "%s\n", errbuf);
2020         goto out_delete_evlist;
2021 }
2022 }
2023
2024 static int trace__replay(struct trace *trace)
2025 {
2026         const struct perf_evsel_str_handler handlers[] = {
2027                 { "probe:vfs_getname",       trace__vfs_getname, },
2028         };
2029         struct perf_data_file file = {
2030                 .path  = input_name,
2031                 .mode  = PERF_DATA_MODE_READ,
2032         };
2033         struct perf_session *session;
2034         struct perf_evsel *evsel;
2035         int err = -1;
2036
2037         trace->tool.sample        = trace__process_sample;
2038         trace->tool.mmap          = perf_event__process_mmap;
2039         trace->tool.mmap2         = perf_event__process_mmap2;
2040         trace->tool.comm          = perf_event__process_comm;
2041         trace->tool.exit          = perf_event__process_exit;
2042         trace->tool.fork          = perf_event__process_fork;
2043         trace->tool.attr          = perf_event__process_attr;
2044         trace->tool.tracing_data = perf_event__process_tracing_data;
2045         trace->tool.build_id      = perf_event__process_build_id;
2046
2047         trace->tool.ordered_samples = true;
2048         trace->tool.ordering_requires_timestamps = true;
2049
2050         /* add tid to output */
2051         trace->multiple_threads = true;
2052
2053         if (symbol__init() < 0)
2054                 return -1;
2055
2056         session = perf_session__new(&file, false, &trace->tool);
2057         if (session == NULL)
2058                 return -ENOMEM;
2059
2060         trace->host = &session->machines.host;
2061
2062         err = perf_session__set_tracepoints_handlers(session, handlers);
2063         if (err)
2064                 goto out;
2065
2066         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2067                                                      "raw_syscalls:sys_enter");
2068         /* older kernels have syscalls tp versus raw_syscalls */
2069         if (evsel == NULL)
2070                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2071                                                              "syscalls:sys_enter");
2072         if (evsel == NULL) {
2073                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2074                 goto out;
2075         }
2076
2077         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2078             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2079                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2080                 goto out;
2081         }
2082
2083         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2084                                                      "raw_syscalls:sys_exit");
2085         if (evsel == NULL)
2086                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2087                                                              "syscalls:sys_exit");
2088         if (evsel == NULL) {
2089                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2090                 goto out;
2091         }
2092
2093         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2094             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2095                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2096                 goto out;
2097         }
2098
2099         err = parse_target_str(trace);
2100         if (err != 0)
2101                 goto out;
2102
2103         setup_pager();
2104
2105         err = perf_session__process_events(session, &trace->tool);
2106         if (err)
2107                 pr_err("Failed to process events, error %d", err);
2108
2109         else if (trace->summary)
2110                 trace__fprintf_thread_summary(trace, trace->output);
2111
2112 out:
2113         perf_session__delete(session);
2114
2115         return err;
2116 }
2117
2118 static size_t trace__fprintf_threads_header(FILE *fp)
2119 {
2120         size_t printed;
2121
2122         printed  = fprintf(fp, "\n Summary of events:\n\n");
2123
2124         return printed;
2125 }
2126
2127 static size_t thread__dump_stats(struct thread_trace *ttrace,
2128                                  struct trace *trace, FILE *fp)
2129 {
2130         struct stats *stats;
2131         size_t printed = 0;
2132         struct syscall *sc;
2133         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2134
2135         if (inode == NULL)
2136                 return 0;
2137
2138         printed += fprintf(fp, "\n");
2139
2140         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2141         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2142         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2143
2144         /* each int_node is a syscall */
2145         while (inode) {
2146                 stats = inode->priv;
2147                 if (stats) {
2148                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2149                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2150                         double avg = avg_stats(stats);
2151                         double pct;
2152                         u64 n = (u64) stats->n;
2153
2154                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2155                         avg /= NSEC_PER_MSEC;
2156
2157                         sc = &trace->syscalls.table[inode->i];
2158                         printed += fprintf(fp, "   %-15s", sc->name);
2159                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2160                                            n, min, avg);
2161                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2162                 }
2163
2164                 inode = intlist__next(inode);
2165         }
2166
2167         printed += fprintf(fp, "\n\n");
2168
2169         return printed;
2170 }
2171
2172 /* struct used to pass data to per-thread function */
2173 struct summary_data {
2174         FILE *fp;
2175         struct trace *trace;
2176         size_t printed;
2177 };
2178
2179 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2180 {
2181         struct summary_data *data = priv;
2182         FILE *fp = data->fp;
2183         size_t printed = data->printed;
2184         struct trace *trace = data->trace;
2185         struct thread_trace *ttrace = thread->priv;
2186         double ratio;
2187
2188         if (ttrace == NULL)
2189                 return 0;
2190
2191         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2192
2193         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2194         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2195         printed += fprintf(fp, "%.1f%%", ratio);
2196         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2197         printed += thread__dump_stats(ttrace, trace, fp);
2198
2199         data->printed += printed;
2200
2201         return 0;
2202 }
2203
2204 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2205 {
2206         struct summary_data data = {
2207                 .fp = fp,
2208                 .trace = trace
2209         };
2210         data.printed = trace__fprintf_threads_header(fp);
2211
2212         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2213
2214         return data.printed;
2215 }
2216
2217 static int trace__set_duration(const struct option *opt, const char *str,
2218                                int unset __maybe_unused)
2219 {
2220         struct trace *trace = opt->value;
2221
2222         trace->duration_filter = atof(str);
2223         return 0;
2224 }
2225
2226 static int trace__open_output(struct trace *trace, const char *filename)
2227 {
2228         struct stat st;
2229
2230         if (!stat(filename, &st) && st.st_size) {
2231                 char oldname[PATH_MAX];
2232
2233                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2234                 unlink(oldname);
2235                 rename(filename, oldname);
2236         }
2237
2238         trace->output = fopen(filename, "w");
2239
2240         return trace->output == NULL ? -errno : 0;
2241 }
2242
2243 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2244 {
2245         const char * const trace_usage[] = {
2246                 "perf trace [<options>] [<command>]",
2247                 "perf trace [<options>] -- <command> [<options>]",
2248                 "perf trace record [<options>] [<command>]",
2249                 "perf trace record [<options>] -- <command> [<options>]",
2250                 NULL
2251         };
2252         struct trace trace = {
2253                 .audit = {
2254                         .machine = audit_detect_machine(),
2255                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2256                 },
2257                 .syscalls = {
2258                         . max = -1,
2259                 },
2260                 .opts = {
2261                         .target = {
2262                                 .uid       = UINT_MAX,
2263                                 .uses_mmap = true,
2264                         },
2265                         .user_freq     = UINT_MAX,
2266                         .user_interval = ULLONG_MAX,
2267                         .no_delay      = true,
2268                         .mmap_pages    = 1024,
2269                 },
2270                 .output = stdout,
2271                 .show_comm = true,
2272         };
2273         const char *output_name = NULL;
2274         const char *ev_qualifier_str = NULL;
2275         const struct option trace_options[] = {
2276         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2277                     "show the thread COMM next to its id"),
2278         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2279         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2280                     "list of events to trace"),
2281         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2282         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2283         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2284                     "trace events on existing process id"),
2285         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2286                     "trace events on existing thread id"),
2287         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2288                     "system-wide collection from all CPUs"),
2289         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2290                     "list of cpus to monitor"),
2291         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2292                     "child tasks do not inherit counters"),
2293         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2294                      "number of mmap data pages",
2295                      perf_evlist__parse_mmap_pages),
2296         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2297                    "user to profile"),
2298         OPT_CALLBACK(0, "duration", &trace, "float",
2299                      "show only events with duration > N.M ms",
2300                      trace__set_duration),
2301         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2302         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2303         OPT_BOOLEAN('T', "time", &trace.full_time,
2304                     "Show full timestamp, not time relative to first start"),
2305         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2306                     "Show only syscall summary with statistics"),
2307         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2308                     "Show all syscalls and summary with statistics"),
2309         OPT_END()
2310         };
2311         int err;
2312         char bf[BUFSIZ];
2313
2314         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2315                 return trace__record(argc-2, &argv[2]);
2316
2317         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2318
2319         /* summary_only implies summary option, but don't overwrite summary if set */
2320         if (trace.summary_only)
2321                 trace.summary = trace.summary_only;
2322
2323         if (output_name != NULL) {
2324                 err = trace__open_output(&trace, output_name);
2325                 if (err < 0) {
2326                         perror("failed to create output file");
2327                         goto out;
2328                 }
2329         }
2330
2331         if (ev_qualifier_str != NULL) {
2332                 const char *s = ev_qualifier_str;
2333
2334                 trace.not_ev_qualifier = *s == '!';
2335                 if (trace.not_ev_qualifier)
2336                         ++s;
2337                 trace.ev_qualifier = strlist__new(true, s);
2338                 if (trace.ev_qualifier == NULL) {
2339                         fputs("Not enough memory to parse event qualifier",
2340                               trace.output);
2341                         err = -ENOMEM;
2342                         goto out_close;
2343                 }
2344         }
2345
2346         err = target__validate(&trace.opts.target);
2347         if (err) {
2348                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2349                 fprintf(trace.output, "%s", bf);
2350                 goto out_close;
2351         }
2352
2353         err = target__parse_uid(&trace.opts.target);
2354         if (err) {
2355                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2356                 fprintf(trace.output, "%s", bf);
2357                 goto out_close;
2358         }
2359
2360         if (!argc && target__none(&trace.opts.target))
2361                 trace.opts.target.system_wide = true;
2362
2363         if (input_name)
2364                 err = trace__replay(&trace);
2365         else
2366                 err = trace__run(&trace, argc, argv);
2367
2368 out_close:
2369         if (output_name != NULL)
2370                 fclose(trace.output);
2371 out:
2372         return err;
2373 }