perf record: Fix display of incorrect mmap pages
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48         if (__on_exit_count == ATEXIT_MAX)
49                 return -ENOMEM;
50         else if (__on_exit_count == 0)
51                 atexit(__handle_on_exit_funcs);
52         __on_exit_funcs[__on_exit_count] = function;
53         __on_exit_args[__on_exit_count++] = arg;
54         return 0;
55 }
56
57 static void __handle_on_exit_funcs(void)
58 {
59         int i;
60         for (i = 0; i < __on_exit_count; i++)
61                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64
65 struct perf_record {
66         struct perf_tool        tool;
67         struct perf_record_opts opts;
68         u64                     bytes_written;
69         struct perf_data_file   file;
70         struct perf_evlist      *evlist;
71         struct perf_session     *session;
72         const char              *progname;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77 };
78
79 static int perf_record__write(struct perf_record *rec, void *buf, size_t size)
80 {
81         struct perf_data_file *file = &rec->file;
82
83         while (size) {
84                 ssize_t ret = write(file->fd, buf, size);
85
86                 if (ret < 0) {
87                         pr_err("failed to write perf data, error: %m\n");
88                         return -1;
89                 }
90
91                 size -= ret;
92                 buf += ret;
93
94                 rec->bytes_written += ret;
95         }
96
97         return 0;
98 }
99
100 static int process_synthesized_event(struct perf_tool *tool,
101                                      union perf_event *event,
102                                      struct perf_sample *sample __maybe_unused,
103                                      struct machine *machine __maybe_unused)
104 {
105         struct perf_record *rec = container_of(tool, struct perf_record, tool);
106         return perf_record__write(rec, event, event->header.size);
107 }
108
109 static int perf_record__mmap_read(struct perf_record *rec,
110                                    struct perf_mmap *md)
111 {
112         unsigned int head = perf_mmap__read_head(md);
113         unsigned int old = md->prev;
114         unsigned char *data = md->base + page_size;
115         unsigned long size;
116         void *buf;
117         int rc = 0;
118
119         if (old == head)
120                 return 0;
121
122         rec->samples++;
123
124         size = head - old;
125
126         if ((old & md->mask) + size != (head & md->mask)) {
127                 buf = &data[old & md->mask];
128                 size = md->mask + 1 - (old & md->mask);
129                 old += size;
130
131                 if (perf_record__write(rec, buf, size) < 0) {
132                         rc = -1;
133                         goto out;
134                 }
135         }
136
137         buf = &data[old & md->mask];
138         size = head - old;
139         old += size;
140
141         if (perf_record__write(rec, buf, size) < 0) {
142                 rc = -1;
143                 goto out;
144         }
145
146         md->prev = old;
147         perf_mmap__write_tail(md, old);
148
149 out:
150         return rc;
151 }
152
153 static volatile int done = 0;
154 static volatile int signr = -1;
155 static volatile int child_finished = 0;
156
157 static void sig_handler(int sig)
158 {
159         if (sig == SIGCHLD)
160                 child_finished = 1;
161
162         done = 1;
163         signr = sig;
164 }
165
166 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
167 {
168         struct perf_record *rec = arg;
169         int status;
170
171         if (rec->evlist->workload.pid > 0) {
172                 if (!child_finished)
173                         kill(rec->evlist->workload.pid, SIGTERM);
174
175                 wait(&status);
176                 if (WIFSIGNALED(status))
177                         psignal(WTERMSIG(status), rec->progname);
178         }
179
180         if (signr == -1 || signr == SIGUSR1)
181                 return;
182
183         signal(signr, SIG_DFL);
184 }
185
186 static int perf_record__open(struct perf_record *rec)
187 {
188         char msg[512];
189         struct perf_evsel *pos;
190         struct perf_evlist *evlist = rec->evlist;
191         struct perf_session *session = rec->session;
192         struct perf_record_opts *opts = &rec->opts;
193         int rc = 0;
194
195         perf_evlist__config(evlist, opts);
196
197         list_for_each_entry(pos, &evlist->entries, node) {
198 try_again:
199                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
200                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
201                                 if (verbose)
202                                         ui__warning("%s\n", msg);
203                                 goto try_again;
204                         }
205
206                         rc = -errno;
207                         perf_evsel__open_strerror(pos, &opts->target,
208                                                   errno, msg, sizeof(msg));
209                         ui__error("%s\n", msg);
210                         goto out;
211                 }
212         }
213
214         if (perf_evlist__apply_filters(evlist)) {
215                 error("failed to set filter with %d (%s)\n", errno,
216                         strerror(errno));
217                 rc = -1;
218                 goto out;
219         }
220
221         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
222                 if (errno == EPERM) {
223                         pr_err("Permission error mapping pages.\n"
224                                "Consider increasing "
225                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
226                                "or try again with a smaller value of -m/--mmap_pages.\n"
227                                "(current value: %u)\n", opts->mmap_pages);
228                         rc = -errno;
229                 } else {
230                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
231                         rc = -errno;
232                 }
233                 goto out;
234         }
235
236         session->evlist = evlist;
237         perf_session__set_id_hdr_size(session);
238 out:
239         return rc;
240 }
241
242 static int process_buildids(struct perf_record *rec)
243 {
244         struct perf_data_file *file  = &rec->file;
245         struct perf_session *session = rec->session;
246         u64 start = session->header.data_offset;
247
248         u64 size = lseek(file->fd, 0, SEEK_CUR);
249         if (size == 0)
250                 return 0;
251
252         return __perf_session__process_events(session, start,
253                                               size - start,
254                                               size, &build_id__mark_dso_hit_ops);
255 }
256
257 static void perf_record__exit(int status, void *arg)
258 {
259         struct perf_record *rec = arg;
260         struct perf_data_file *file = &rec->file;
261
262         if (status != 0)
263                 return;
264
265         if (!file->is_pipe) {
266                 rec->session->header.data_size += rec->bytes_written;
267
268                 if (!rec->no_buildid)
269                         process_buildids(rec);
270                 perf_session__write_header(rec->session, rec->evlist,
271                                            file->fd, true);
272                 perf_session__delete(rec->session);
273                 perf_evlist__delete(rec->evlist);
274                 symbol__exit();
275         }
276 }
277
278 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
279 {
280         int err;
281         struct perf_tool *tool = data;
282         /*
283          *As for guest kernel when processing subcommand record&report,
284          *we arrange module mmap prior to guest kernel mmap and trigger
285          *a preload dso because default guest module symbols are loaded
286          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
287          *method is used to avoid symbol missing when the first addr is
288          *in module instead of in guest kernel.
289          */
290         err = perf_event__synthesize_modules(tool, process_synthesized_event,
291                                              machine);
292         if (err < 0)
293                 pr_err("Couldn't record guest kernel [%d]'s reference"
294                        " relocation symbol.\n", machine->pid);
295
296         /*
297          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
298          * have no _text sometimes.
299          */
300         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
301                                                  machine, "_text");
302         if (err < 0)
303                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
304                                                          machine, "_stext");
305         if (err < 0)
306                 pr_err("Couldn't record guest kernel [%d]'s reference"
307                        " relocation symbol.\n", machine->pid);
308 }
309
310 static struct perf_event_header finished_round_event = {
311         .size = sizeof(struct perf_event_header),
312         .type = PERF_RECORD_FINISHED_ROUND,
313 };
314
315 static int perf_record__mmap_read_all(struct perf_record *rec)
316 {
317         int i;
318         int rc = 0;
319
320         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
321                 if (rec->evlist->mmap[i].base) {
322                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
323                                 rc = -1;
324                                 goto out;
325                         }
326                 }
327         }
328
329         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
330                 rc = perf_record__write(rec, &finished_round_event,
331                                         sizeof(finished_round_event));
332
333 out:
334         return rc;
335 }
336
337 static void perf_record__init_features(struct perf_record *rec)
338 {
339         struct perf_evlist *evsel_list = rec->evlist;
340         struct perf_session *session = rec->session;
341         int feat;
342
343         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
344                 perf_header__set_feat(&session->header, feat);
345
346         if (rec->no_buildid)
347                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
348
349         if (!have_tracepoints(&evsel_list->entries))
350                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
351
352         if (!rec->opts.branch_stack)
353                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
354 }
355
356 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
357 {
358         int err;
359         unsigned long waking = 0;
360         const bool forks = argc > 0;
361         struct machine *machine;
362         struct perf_tool *tool = &rec->tool;
363         struct perf_record_opts *opts = &rec->opts;
364         struct perf_evlist *evsel_list = rec->evlist;
365         struct perf_data_file *file = &rec->file;
366         struct perf_session *session;
367         bool disabled = false;
368
369         rec->progname = argv[0];
370
371         on_exit(perf_record__sig_exit, rec);
372         signal(SIGCHLD, sig_handler);
373         signal(SIGINT, sig_handler);
374         signal(SIGUSR1, sig_handler);
375         signal(SIGTERM, sig_handler);
376
377         session = perf_session__new(file, false, NULL);
378         if (session == NULL) {
379                 pr_err("Not enough memory for reading perf file header\n");
380                 return -1;
381         }
382
383         rec->session = session;
384
385         perf_record__init_features(rec);
386
387         if (forks) {
388                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
389                                                     argv, file->is_pipe,
390                                                     true);
391                 if (err < 0) {
392                         pr_err("Couldn't run the workload!\n");
393                         goto out_delete_session;
394                 }
395         }
396
397         if (perf_record__open(rec) != 0) {
398                 err = -1;
399                 goto out_delete_session;
400         }
401
402         if (!evsel_list->nr_groups)
403                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
404
405         /*
406          * perf_session__delete(session) will be called at perf_record__exit()
407          */
408         on_exit(perf_record__exit, rec);
409
410         if (file->is_pipe) {
411                 err = perf_header__write_pipe(file->fd);
412                 if (err < 0)
413                         goto out_delete_session;
414         } else {
415                 err = perf_session__write_header(session, evsel_list,
416                                                  file->fd, false);
417                 if (err < 0)
418                         goto out_delete_session;
419         }
420
421         if (!rec->no_buildid
422             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
423                 pr_err("Couldn't generate buildids. "
424                        "Use --no-buildid to profile anyway.\n");
425                 err = -1;
426                 goto out_delete_session;
427         }
428
429         machine = &session->machines.host;
430
431         if (file->is_pipe) {
432                 err = perf_event__synthesize_attrs(tool, session,
433                                                    process_synthesized_event);
434                 if (err < 0) {
435                         pr_err("Couldn't synthesize attrs.\n");
436                         goto out_delete_session;
437                 }
438
439                 if (have_tracepoints(&evsel_list->entries)) {
440                         /*
441                          * FIXME err <= 0 here actually means that
442                          * there were no tracepoints so its not really
443                          * an error, just that we don't need to
444                          * synthesize anything.  We really have to
445                          * return this more properly and also
446                          * propagate errors that now are calling die()
447                          */
448                         err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
449                                                                   process_synthesized_event);
450                         if (err <= 0) {
451                                 pr_err("Couldn't record tracing data.\n");
452                                 goto out_delete_session;
453                         }
454                         rec->bytes_written += err;
455                 }
456         }
457
458         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
459                                                  machine, "_text");
460         if (err < 0)
461                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
462                                                          machine, "_stext");
463         if (err < 0)
464                 pr_err("Couldn't record kernel reference relocation symbol\n"
465                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
466                        "Check /proc/kallsyms permission or run as root.\n");
467
468         err = perf_event__synthesize_modules(tool, process_synthesized_event,
469                                              machine);
470         if (err < 0)
471                 pr_err("Couldn't record kernel module information.\n"
472                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
473                        "Check /proc/modules permission or run as root.\n");
474
475         if (perf_guest) {
476                 machines__process_guests(&session->machines,
477                                          perf_event__synthesize_guest_os, tool);
478         }
479
480         err = __machine__synthesize_threads(machine, tool, &opts->target, evsel_list->threads,
481                                             process_synthesized_event, opts->sample_address);
482         if (err != 0)
483                 goto out_delete_session;
484
485         if (rec->realtime_prio) {
486                 struct sched_param param;
487
488                 param.sched_priority = rec->realtime_prio;
489                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
490                         pr_err("Could not set realtime priority.\n");
491                         err = -1;
492                         goto out_delete_session;
493                 }
494         }
495
496         /*
497          * When perf is starting the traced process, all the events
498          * (apart from group members) have enable_on_exec=1 set,
499          * so don't spoil it by prematurely enabling them.
500          */
501         if (!target__none(&opts->target))
502                 perf_evlist__enable(evsel_list);
503
504         /*
505          * Let the child rip
506          */
507         if (forks)
508                 perf_evlist__start_workload(evsel_list);
509
510         for (;;) {
511                 int hits = rec->samples;
512
513                 if (perf_record__mmap_read_all(rec) < 0) {
514                         err = -1;
515                         goto out_delete_session;
516                 }
517
518                 if (hits == rec->samples) {
519                         if (done)
520                                 break;
521                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
522                         waking++;
523                 }
524
525                 /*
526                  * When perf is starting the traced process, at the end events
527                  * die with the process and we wait for that. Thus no need to
528                  * disable events in this case.
529                  */
530                 if (done && !disabled && !target__none(&opts->target)) {
531                         perf_evlist__disable(evsel_list);
532                         disabled = true;
533                 }
534         }
535
536         if (quiet || signr == SIGUSR1)
537                 return 0;
538
539         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
540
541         /*
542          * Approximate RIP event size: 24 bytes.
543          */
544         fprintf(stderr,
545                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
546                 (double)rec->bytes_written / 1024.0 / 1024.0,
547                 file->path,
548                 rec->bytes_written / 24);
549
550         return 0;
551
552 out_delete_session:
553         perf_session__delete(session);
554         return err;
555 }
556
557 #define BRANCH_OPT(n, m) \
558         { .name = n, .mode = (m) }
559
560 #define BRANCH_END { .name = NULL }
561
562 struct branch_mode {
563         const char *name;
564         int mode;
565 };
566
567 static const struct branch_mode branch_modes[] = {
568         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
569         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
570         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
571         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
572         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
573         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
574         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
575         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
576         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
577         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
578         BRANCH_END
579 };
580
581 static int
582 parse_branch_stack(const struct option *opt, const char *str, int unset)
583 {
584 #define ONLY_PLM \
585         (PERF_SAMPLE_BRANCH_USER        |\
586          PERF_SAMPLE_BRANCH_KERNEL      |\
587          PERF_SAMPLE_BRANCH_HV)
588
589         uint64_t *mode = (uint64_t *)opt->value;
590         const struct branch_mode *br;
591         char *s, *os = NULL, *p;
592         int ret = -1;
593
594         if (unset)
595                 return 0;
596
597         /*
598          * cannot set it twice, -b + --branch-filter for instance
599          */
600         if (*mode)
601                 return -1;
602
603         /* str may be NULL in case no arg is passed to -b */
604         if (str) {
605                 /* because str is read-only */
606                 s = os = strdup(str);
607                 if (!s)
608                         return -1;
609
610                 for (;;) {
611                         p = strchr(s, ',');
612                         if (p)
613                                 *p = '\0';
614
615                         for (br = branch_modes; br->name; br++) {
616                                 if (!strcasecmp(s, br->name))
617                                         break;
618                         }
619                         if (!br->name) {
620                                 ui__warning("unknown branch filter %s,"
621                                             " check man page\n", s);
622                                 goto error;
623                         }
624
625                         *mode |= br->mode;
626
627                         if (!p)
628                                 break;
629
630                         s = p + 1;
631                 }
632         }
633         ret = 0;
634
635         /* default to any branch */
636         if ((*mode & ~ONLY_PLM) == 0) {
637                 *mode = PERF_SAMPLE_BRANCH_ANY;
638         }
639 error:
640         free(os);
641         return ret;
642 }
643
644 #ifdef HAVE_LIBUNWIND_SUPPORT
645 static int get_stack_size(char *str, unsigned long *_size)
646 {
647         char *endptr;
648         unsigned long size;
649         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
650
651         size = strtoul(str, &endptr, 0);
652
653         do {
654                 if (*endptr)
655                         break;
656
657                 size = round_up(size, sizeof(u64));
658                 if (!size || size > max_size)
659                         break;
660
661                 *_size = size;
662                 return 0;
663
664         } while (0);
665
666         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
667                max_size, str);
668         return -1;
669 }
670 #endif /* HAVE_LIBUNWIND_SUPPORT */
671
672 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
673 {
674         char *tok, *name, *saveptr = NULL;
675         char *buf;
676         int ret = -1;
677
678         /* We need buffer that we know we can write to. */
679         buf = malloc(strlen(arg) + 1);
680         if (!buf)
681                 return -ENOMEM;
682
683         strcpy(buf, arg);
684
685         tok = strtok_r((char *)buf, ",", &saveptr);
686         name = tok ? : (char *)buf;
687
688         do {
689                 /* Framepointer style */
690                 if (!strncmp(name, "fp", sizeof("fp"))) {
691                         if (!strtok_r(NULL, ",", &saveptr)) {
692                                 opts->call_graph = CALLCHAIN_FP;
693                                 ret = 0;
694                         } else
695                                 pr_err("callchain: No more arguments "
696                                        "needed for -g fp\n");
697                         break;
698
699 #ifdef HAVE_LIBUNWIND_SUPPORT
700                 /* Dwarf style */
701                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
702                         const unsigned long default_stack_dump_size = 8192;
703
704                         ret = 0;
705                         opts->call_graph = CALLCHAIN_DWARF;
706                         opts->stack_dump_size = default_stack_dump_size;
707
708                         tok = strtok_r(NULL, ",", &saveptr);
709                         if (tok) {
710                                 unsigned long size = 0;
711
712                                 ret = get_stack_size(tok, &size);
713                                 opts->stack_dump_size = size;
714                         }
715 #endif /* HAVE_LIBUNWIND_SUPPORT */
716                 } else {
717                         pr_err("callchain: Unknown --call-graph option "
718                                "value: %s\n", arg);
719                         break;
720                 }
721
722         } while (0);
723
724         free(buf);
725         return ret;
726 }
727
728 static void callchain_debug(struct perf_record_opts *opts)
729 {
730         pr_debug("callchain: type %d\n", opts->call_graph);
731
732         if (opts->call_graph == CALLCHAIN_DWARF)
733                 pr_debug("callchain: stack dump size %d\n",
734                          opts->stack_dump_size);
735 }
736
737 int record_parse_callchain_opt(const struct option *opt,
738                                const char *arg,
739                                int unset)
740 {
741         struct perf_record_opts *opts = opt->value;
742         int ret;
743
744         /* --no-call-graph */
745         if (unset) {
746                 opts->call_graph = CALLCHAIN_NONE;
747                 pr_debug("callchain: disabled\n");
748                 return 0;
749         }
750
751         ret = record_parse_callchain(arg, opts);
752         if (!ret)
753                 callchain_debug(opts);
754
755         return ret;
756 }
757
758 int record_callchain_opt(const struct option *opt,
759                          const char *arg __maybe_unused,
760                          int unset __maybe_unused)
761 {
762         struct perf_record_opts *opts = opt->value;
763
764         if (opts->call_graph == CALLCHAIN_NONE)
765                 opts->call_graph = CALLCHAIN_FP;
766
767         callchain_debug(opts);
768         return 0;
769 }
770
771 static const char * const record_usage[] = {
772         "perf record [<options>] [<command>]",
773         "perf record [<options>] -- <command> [<options>]",
774         NULL
775 };
776
777 /*
778  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
779  * because we need to have access to it in perf_record__exit, that is called
780  * after cmd_record() exits, but since record_options need to be accessible to
781  * builtin-script, leave it here.
782  *
783  * At least we don't ouch it in all the other functions here directly.
784  *
785  * Just say no to tons of global variables, sigh.
786  */
787 static struct perf_record record = {
788         .opts = {
789                 .mmap_pages          = UINT_MAX,
790                 .user_freq           = UINT_MAX,
791                 .user_interval       = ULLONG_MAX,
792                 .freq                = 4000,
793                 .target              = {
794                         .uses_mmap   = true,
795                         .default_per_cpu = true,
796                 },
797         },
798 };
799
800 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
801
802 #ifdef HAVE_LIBUNWIND_SUPPORT
803 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
804 #else
805 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
806 #endif
807
808 /*
809  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
810  * with it and switch to use the library functions in perf_evlist that came
811  * from builtin-record.c, i.e. use perf_record_opts,
812  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
813  * using pipes, etc.
814  */
815 const struct option record_options[] = {
816         OPT_CALLBACK('e', "event", &record.evlist, "event",
817                      "event selector. use 'perf list' to list available events",
818                      parse_events_option),
819         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
820                      "event filter", parse_filter),
821         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
822                     "record events on existing process id"),
823         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
824                     "record events on existing thread id"),
825         OPT_INTEGER('r', "realtime", &record.realtime_prio,
826                     "collect data with this RT SCHED_FIFO priority"),
827         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
828                     "collect data without buffering"),
829         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
830                     "collect raw sample records from all opened counters"),
831         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
832                             "system-wide collection from all CPUs"),
833         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
834                     "list of cpus to monitor"),
835         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
836         OPT_STRING('o', "output", &record.file.path, "file",
837                     "output file name"),
838         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
839                         &record.opts.no_inherit_set,
840                         "child tasks do not inherit counters"),
841         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
842         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
843                      "number of mmap data pages",
844                      perf_evlist__parse_mmap_pages),
845         OPT_BOOLEAN(0, "group", &record.opts.group,
846                     "put the counters into a counter group"),
847         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
848                            NULL, "enables call-graph recording" ,
849                            &record_callchain_opt),
850         OPT_CALLBACK(0, "call-graph", &record.opts,
851                      "mode[,dump_size]", record_callchain_help,
852                      &record_parse_callchain_opt),
853         OPT_INCR('v', "verbose", &verbose,
854                     "be more verbose (show counter open errors, etc)"),
855         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
856         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
857                     "per thread counts"),
858         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
859                     "Sample addresses"),
860         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
861         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
862         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
863                     "don't sample"),
864         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
865                     "do not update the buildid cache"),
866         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
867                     "do not collect buildids in perf.data"),
868         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
869                      "monitor event in cgroup name only",
870                      parse_cgroups),
871         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
872                    "user to profile"),
873
874         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
875                      "branch any", "sample any taken branches",
876                      parse_branch_stack),
877
878         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
879                      "branch filter mask", "branch stack filter modes",
880                      parse_branch_stack),
881         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
882                     "sample by weight (on special events only)"),
883         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
884                     "sample transaction flags (special events only)"),
885         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
886                     "use per-thread mmaps"),
887         OPT_END()
888 };
889
890 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
891 {
892         int err = -ENOMEM;
893         struct perf_evlist *evsel_list;
894         struct perf_record *rec = &record;
895         char errbuf[BUFSIZ];
896
897         evsel_list = perf_evlist__new();
898         if (evsel_list == NULL)
899                 return -ENOMEM;
900
901         rec->evlist = evsel_list;
902
903         argc = parse_options(argc, argv, record_options, record_usage,
904                             PARSE_OPT_STOP_AT_NON_OPTION);
905         if (!argc && target__none(&rec->opts.target))
906                 usage_with_options(record_usage, record_options);
907
908         if (nr_cgroups && !rec->opts.target.system_wide) {
909                 ui__error("cgroup monitoring only available in"
910                           " system-wide mode\n");
911                 usage_with_options(record_usage, record_options);
912         }
913
914         symbol__init();
915
916         if (symbol_conf.kptr_restrict)
917                 pr_warning(
918 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
919 "check /proc/sys/kernel/kptr_restrict.\n\n"
920 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
921 "file is not found in the buildid cache or in the vmlinux path.\n\n"
922 "Samples in kernel modules won't be resolved at all.\n\n"
923 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
924 "even with a suitable vmlinux or kallsyms file.\n\n");
925
926         if (rec->no_buildid_cache || rec->no_buildid)
927                 disable_buildid_cache();
928
929         if (evsel_list->nr_entries == 0 &&
930             perf_evlist__add_default(evsel_list) < 0) {
931                 pr_err("Not enough memory for event selector list\n");
932                 goto out_symbol_exit;
933         }
934
935         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
936                 rec->opts.no_inherit = true;
937
938         err = target__validate(&rec->opts.target);
939         if (err) {
940                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
941                 ui__warning("%s", errbuf);
942         }
943
944         err = target__parse_uid(&rec->opts.target);
945         if (err) {
946                 int saved_errno = errno;
947
948                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
949                 ui__error("%s", errbuf);
950
951                 err = -saved_errno;
952                 goto out_symbol_exit;
953         }
954
955         err = -ENOMEM;
956         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
957                 usage_with_options(record_usage, record_options);
958
959         if (perf_record_opts__config(&rec->opts)) {
960                 err = -EINVAL;
961                 goto out_free_fd;
962         }
963
964         err = __cmd_record(&record, argc, argv);
965
966         perf_evlist__munmap(evsel_list);
967         perf_evlist__close(evsel_list);
968 out_free_fd:
969         perf_evlist__delete_maps(evsel_list);
970 out_symbol_exit:
971         symbol__exit();
972         return err;
973 }