perf record: Use perf_data_file__write for output file
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48         if (__on_exit_count == ATEXIT_MAX)
49                 return -ENOMEM;
50         else if (__on_exit_count == 0)
51                 atexit(__handle_on_exit_funcs);
52         __on_exit_funcs[__on_exit_count] = function;
53         __on_exit_args[__on_exit_count++] = arg;
54         return 0;
55 }
56
57 static void __handle_on_exit_funcs(void)
58 {
59         int i;
60         for (i = 0; i < __on_exit_count; i++)
61                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64
65 struct perf_record {
66         struct perf_tool        tool;
67         struct perf_record_opts opts;
68         u64                     bytes_written;
69         struct perf_data_file   file;
70         struct perf_evlist      *evlist;
71         struct perf_session     *session;
72         const char              *progname;
73         int                     realtime_prio;
74         bool                    no_buildid;
75         bool                    no_buildid_cache;
76         long                    samples;
77 };
78
79 static ssize_t perf_record__write(struct perf_record *rec,
80                                   void *buf, size_t size)
81 {
82         struct perf_session *session = rec->session;
83         ssize_t ret;
84
85         ret = perf_data_file__write(session->file, buf, size);
86         if (ret < 0) {
87                 pr_err("failed to write perf data, error: %m\n");
88                 return -1;
89         }
90
91         rec->bytes_written += ret;
92         return 0;
93 }
94
95 static int process_synthesized_event(struct perf_tool *tool,
96                                      union perf_event *event,
97                                      struct perf_sample *sample __maybe_unused,
98                                      struct machine *machine __maybe_unused)
99 {
100         struct perf_record *rec = container_of(tool, struct perf_record, tool);
101         return perf_record__write(rec, event, event->header.size);
102 }
103
104 static int perf_record__mmap_read(struct perf_record *rec,
105                                    struct perf_mmap *md)
106 {
107         unsigned int head = perf_mmap__read_head(md);
108         unsigned int old = md->prev;
109         unsigned char *data = md->base + page_size;
110         unsigned long size;
111         void *buf;
112         int rc = 0;
113
114         if (old == head)
115                 return 0;
116
117         rec->samples++;
118
119         size = head - old;
120
121         if ((old & md->mask) + size != (head & md->mask)) {
122                 buf = &data[old & md->mask];
123                 size = md->mask + 1 - (old & md->mask);
124                 old += size;
125
126                 if (perf_record__write(rec, buf, size) < 0) {
127                         rc = -1;
128                         goto out;
129                 }
130         }
131
132         buf = &data[old & md->mask];
133         size = head - old;
134         old += size;
135
136         if (perf_record__write(rec, buf, size) < 0) {
137                 rc = -1;
138                 goto out;
139         }
140
141         md->prev = old;
142         perf_mmap__write_tail(md, old);
143
144 out:
145         return rc;
146 }
147
148 static volatile int done = 0;
149 static volatile int signr = -1;
150 static volatile int child_finished = 0;
151
152 static void sig_handler(int sig)
153 {
154         if (sig == SIGCHLD)
155                 child_finished = 1;
156
157         done = 1;
158         signr = sig;
159 }
160
161 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
162 {
163         struct perf_record *rec = arg;
164         int status;
165
166         if (rec->evlist->workload.pid > 0) {
167                 if (!child_finished)
168                         kill(rec->evlist->workload.pid, SIGTERM);
169
170                 wait(&status);
171                 if (WIFSIGNALED(status))
172                         psignal(WTERMSIG(status), rec->progname);
173         }
174
175         if (signr == -1 || signr == SIGUSR1)
176                 return;
177
178         signal(signr, SIG_DFL);
179 }
180
181 static int perf_record__open(struct perf_record *rec)
182 {
183         char msg[512];
184         struct perf_evsel *pos;
185         struct perf_evlist *evlist = rec->evlist;
186         struct perf_session *session = rec->session;
187         struct perf_record_opts *opts = &rec->opts;
188         int rc = 0;
189
190         perf_evlist__config(evlist, opts);
191
192         list_for_each_entry(pos, &evlist->entries, node) {
193 try_again:
194                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
195                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
196                                 if (verbose)
197                                         ui__warning("%s\n", msg);
198                                 goto try_again;
199                         }
200
201                         rc = -errno;
202                         perf_evsel__open_strerror(pos, &opts->target,
203                                                   errno, msg, sizeof(msg));
204                         ui__error("%s\n", msg);
205                         goto out;
206                 }
207         }
208
209         if (perf_evlist__apply_filters(evlist)) {
210                 error("failed to set filter with %d (%s)\n", errno,
211                         strerror(errno));
212                 rc = -1;
213                 goto out;
214         }
215
216         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
217                 if (errno == EPERM) {
218                         pr_err("Permission error mapping pages.\n"
219                                "Consider increasing "
220                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
221                                "or try again with a smaller value of -m/--mmap_pages.\n"
222                                "(current value: %u)\n", opts->mmap_pages);
223                         rc = -errno;
224                 } else {
225                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
226                         rc = -errno;
227                 }
228                 goto out;
229         }
230
231         session->evlist = evlist;
232         perf_session__set_id_hdr_size(session);
233 out:
234         return rc;
235 }
236
237 static int process_buildids(struct perf_record *rec)
238 {
239         struct perf_data_file *file  = &rec->file;
240         struct perf_session *session = rec->session;
241         u64 start = session->header.data_offset;
242
243         u64 size = lseek(file->fd, 0, SEEK_CUR);
244         if (size == 0)
245                 return 0;
246
247         return __perf_session__process_events(session, start,
248                                               size - start,
249                                               size, &build_id__mark_dso_hit_ops);
250 }
251
252 static void perf_record__exit(int status, void *arg)
253 {
254         struct perf_record *rec = arg;
255         struct perf_data_file *file = &rec->file;
256
257         if (status != 0)
258                 return;
259
260         if (!file->is_pipe) {
261                 rec->session->header.data_size += rec->bytes_written;
262
263                 if (!rec->no_buildid)
264                         process_buildids(rec);
265                 perf_session__write_header(rec->session, rec->evlist,
266                                            file->fd, true);
267                 perf_session__delete(rec->session);
268                 perf_evlist__delete(rec->evlist);
269                 symbol__exit();
270         }
271 }
272
273 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
274 {
275         int err;
276         struct perf_tool *tool = data;
277         /*
278          *As for guest kernel when processing subcommand record&report,
279          *we arrange module mmap prior to guest kernel mmap and trigger
280          *a preload dso because default guest module symbols are loaded
281          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
282          *method is used to avoid symbol missing when the first addr is
283          *in module instead of in guest kernel.
284          */
285         err = perf_event__synthesize_modules(tool, process_synthesized_event,
286                                              machine);
287         if (err < 0)
288                 pr_err("Couldn't record guest kernel [%d]'s reference"
289                        " relocation symbol.\n", machine->pid);
290
291         /*
292          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
293          * have no _text sometimes.
294          */
295         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
296                                                  machine, "_text");
297         if (err < 0)
298                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
299                                                          machine, "_stext");
300         if (err < 0)
301                 pr_err("Couldn't record guest kernel [%d]'s reference"
302                        " relocation symbol.\n", machine->pid);
303 }
304
305 static struct perf_event_header finished_round_event = {
306         .size = sizeof(struct perf_event_header),
307         .type = PERF_RECORD_FINISHED_ROUND,
308 };
309
310 static int perf_record__mmap_read_all(struct perf_record *rec)
311 {
312         int i;
313         int rc = 0;
314
315         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
316                 if (rec->evlist->mmap[i].base) {
317                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
318                                 rc = -1;
319                                 goto out;
320                         }
321                 }
322         }
323
324         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
325                 rc = perf_record__write(rec, &finished_round_event,
326                                         sizeof(finished_round_event));
327
328 out:
329         return rc;
330 }
331
332 static void perf_record__init_features(struct perf_record *rec)
333 {
334         struct perf_evlist *evsel_list = rec->evlist;
335         struct perf_session *session = rec->session;
336         int feat;
337
338         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
339                 perf_header__set_feat(&session->header, feat);
340
341         if (rec->no_buildid)
342                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
343
344         if (!have_tracepoints(&evsel_list->entries))
345                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
346
347         if (!rec->opts.branch_stack)
348                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
349 }
350
351 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
352 {
353         int err;
354         unsigned long waking = 0;
355         const bool forks = argc > 0;
356         struct machine *machine;
357         struct perf_tool *tool = &rec->tool;
358         struct perf_record_opts *opts = &rec->opts;
359         struct perf_evlist *evsel_list = rec->evlist;
360         struct perf_data_file *file = &rec->file;
361         struct perf_session *session;
362         bool disabled = false;
363
364         rec->progname = argv[0];
365
366         on_exit(perf_record__sig_exit, rec);
367         signal(SIGCHLD, sig_handler);
368         signal(SIGINT, sig_handler);
369         signal(SIGUSR1, sig_handler);
370         signal(SIGTERM, sig_handler);
371
372         session = perf_session__new(file, false, NULL);
373         if (session == NULL) {
374                 pr_err("Not enough memory for reading perf file header\n");
375                 return -1;
376         }
377
378         rec->session = session;
379
380         perf_record__init_features(rec);
381
382         if (forks) {
383                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
384                                                     argv, file->is_pipe,
385                                                     true);
386                 if (err < 0) {
387                         pr_err("Couldn't run the workload!\n");
388                         goto out_delete_session;
389                 }
390         }
391
392         if (perf_record__open(rec) != 0) {
393                 err = -1;
394                 goto out_delete_session;
395         }
396
397         if (!evsel_list->nr_groups)
398                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
399
400         /*
401          * perf_session__delete(session) will be called at perf_record__exit()
402          */
403         on_exit(perf_record__exit, rec);
404
405         if (file->is_pipe) {
406                 err = perf_header__write_pipe(file->fd);
407                 if (err < 0)
408                         goto out_delete_session;
409         } else {
410                 err = perf_session__write_header(session, evsel_list,
411                                                  file->fd, false);
412                 if (err < 0)
413                         goto out_delete_session;
414         }
415
416         if (!rec->no_buildid
417             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
418                 pr_err("Couldn't generate buildids. "
419                        "Use --no-buildid to profile anyway.\n");
420                 err = -1;
421                 goto out_delete_session;
422         }
423
424         machine = &session->machines.host;
425
426         if (file->is_pipe) {
427                 err = perf_event__synthesize_attrs(tool, session,
428                                                    process_synthesized_event);
429                 if (err < 0) {
430                         pr_err("Couldn't synthesize attrs.\n");
431                         goto out_delete_session;
432                 }
433
434                 if (have_tracepoints(&evsel_list->entries)) {
435                         /*
436                          * FIXME err <= 0 here actually means that
437                          * there were no tracepoints so its not really
438                          * an error, just that we don't need to
439                          * synthesize anything.  We really have to
440                          * return this more properly and also
441                          * propagate errors that now are calling die()
442                          */
443                         err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
444                                                                   process_synthesized_event);
445                         if (err <= 0) {
446                                 pr_err("Couldn't record tracing data.\n");
447                                 goto out_delete_session;
448                         }
449                         rec->bytes_written += err;
450                 }
451         }
452
453         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
454                                                  machine, "_text");
455         if (err < 0)
456                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457                                                          machine, "_stext");
458         if (err < 0)
459                 pr_err("Couldn't record kernel reference relocation symbol\n"
460                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461                        "Check /proc/kallsyms permission or run as root.\n");
462
463         err = perf_event__synthesize_modules(tool, process_synthesized_event,
464                                              machine);
465         if (err < 0)
466                 pr_err("Couldn't record kernel module information.\n"
467                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468                        "Check /proc/modules permission or run as root.\n");
469
470         if (perf_guest) {
471                 machines__process_guests(&session->machines,
472                                          perf_event__synthesize_guest_os, tool);
473         }
474
475         err = __machine__synthesize_threads(machine, tool, &opts->target, evsel_list->threads,
476                                             process_synthesized_event, opts->sample_address);
477         if (err != 0)
478                 goto out_delete_session;
479
480         if (rec->realtime_prio) {
481                 struct sched_param param;
482
483                 param.sched_priority = rec->realtime_prio;
484                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
485                         pr_err("Could not set realtime priority.\n");
486                         err = -1;
487                         goto out_delete_session;
488                 }
489         }
490
491         /*
492          * When perf is starting the traced process, all the events
493          * (apart from group members) have enable_on_exec=1 set,
494          * so don't spoil it by prematurely enabling them.
495          */
496         if (!target__none(&opts->target))
497                 perf_evlist__enable(evsel_list);
498
499         /*
500          * Let the child rip
501          */
502         if (forks)
503                 perf_evlist__start_workload(evsel_list);
504
505         for (;;) {
506                 int hits = rec->samples;
507
508                 if (perf_record__mmap_read_all(rec) < 0) {
509                         err = -1;
510                         goto out_delete_session;
511                 }
512
513                 if (hits == rec->samples) {
514                         if (done)
515                                 break;
516                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
517                         waking++;
518                 }
519
520                 /*
521                  * When perf is starting the traced process, at the end events
522                  * die with the process and we wait for that. Thus no need to
523                  * disable events in this case.
524                  */
525                 if (done && !disabled && !target__none(&opts->target)) {
526                         perf_evlist__disable(evsel_list);
527                         disabled = true;
528                 }
529         }
530
531         if (quiet || signr == SIGUSR1)
532                 return 0;
533
534         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
535
536         /*
537          * Approximate RIP event size: 24 bytes.
538          */
539         fprintf(stderr,
540                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
541                 (double)rec->bytes_written / 1024.0 / 1024.0,
542                 file->path,
543                 rec->bytes_written / 24);
544
545         return 0;
546
547 out_delete_session:
548         perf_session__delete(session);
549         return err;
550 }
551
552 #define BRANCH_OPT(n, m) \
553         { .name = n, .mode = (m) }
554
555 #define BRANCH_END { .name = NULL }
556
557 struct branch_mode {
558         const char *name;
559         int mode;
560 };
561
562 static const struct branch_mode branch_modes[] = {
563         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
564         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
565         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
566         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
567         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
568         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
569         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
570         BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
571         BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
572         BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
573         BRANCH_END
574 };
575
576 static int
577 parse_branch_stack(const struct option *opt, const char *str, int unset)
578 {
579 #define ONLY_PLM \
580         (PERF_SAMPLE_BRANCH_USER        |\
581          PERF_SAMPLE_BRANCH_KERNEL      |\
582          PERF_SAMPLE_BRANCH_HV)
583
584         uint64_t *mode = (uint64_t *)opt->value;
585         const struct branch_mode *br;
586         char *s, *os = NULL, *p;
587         int ret = -1;
588
589         if (unset)
590                 return 0;
591
592         /*
593          * cannot set it twice, -b + --branch-filter for instance
594          */
595         if (*mode)
596                 return -1;
597
598         /* str may be NULL in case no arg is passed to -b */
599         if (str) {
600                 /* because str is read-only */
601                 s = os = strdup(str);
602                 if (!s)
603                         return -1;
604
605                 for (;;) {
606                         p = strchr(s, ',');
607                         if (p)
608                                 *p = '\0';
609
610                         for (br = branch_modes; br->name; br++) {
611                                 if (!strcasecmp(s, br->name))
612                                         break;
613                         }
614                         if (!br->name) {
615                                 ui__warning("unknown branch filter %s,"
616                                             " check man page\n", s);
617                                 goto error;
618                         }
619
620                         *mode |= br->mode;
621
622                         if (!p)
623                                 break;
624
625                         s = p + 1;
626                 }
627         }
628         ret = 0;
629
630         /* default to any branch */
631         if ((*mode & ~ONLY_PLM) == 0) {
632                 *mode = PERF_SAMPLE_BRANCH_ANY;
633         }
634 error:
635         free(os);
636         return ret;
637 }
638
639 #ifdef HAVE_LIBUNWIND_SUPPORT
640 static int get_stack_size(char *str, unsigned long *_size)
641 {
642         char *endptr;
643         unsigned long size;
644         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
645
646         size = strtoul(str, &endptr, 0);
647
648         do {
649                 if (*endptr)
650                         break;
651
652                 size = round_up(size, sizeof(u64));
653                 if (!size || size > max_size)
654                         break;
655
656                 *_size = size;
657                 return 0;
658
659         } while (0);
660
661         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
662                max_size, str);
663         return -1;
664 }
665 #endif /* HAVE_LIBUNWIND_SUPPORT */
666
667 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
668 {
669         char *tok, *name, *saveptr = NULL;
670         char *buf;
671         int ret = -1;
672
673         /* We need buffer that we know we can write to. */
674         buf = malloc(strlen(arg) + 1);
675         if (!buf)
676                 return -ENOMEM;
677
678         strcpy(buf, arg);
679
680         tok = strtok_r((char *)buf, ",", &saveptr);
681         name = tok ? : (char *)buf;
682
683         do {
684                 /* Framepointer style */
685                 if (!strncmp(name, "fp", sizeof("fp"))) {
686                         if (!strtok_r(NULL, ",", &saveptr)) {
687                                 opts->call_graph = CALLCHAIN_FP;
688                                 ret = 0;
689                         } else
690                                 pr_err("callchain: No more arguments "
691                                        "needed for -g fp\n");
692                         break;
693
694 #ifdef HAVE_LIBUNWIND_SUPPORT
695                 /* Dwarf style */
696                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
697                         const unsigned long default_stack_dump_size = 8192;
698
699                         ret = 0;
700                         opts->call_graph = CALLCHAIN_DWARF;
701                         opts->stack_dump_size = default_stack_dump_size;
702
703                         tok = strtok_r(NULL, ",", &saveptr);
704                         if (tok) {
705                                 unsigned long size = 0;
706
707                                 ret = get_stack_size(tok, &size);
708                                 opts->stack_dump_size = size;
709                         }
710 #endif /* HAVE_LIBUNWIND_SUPPORT */
711                 } else {
712                         pr_err("callchain: Unknown --call-graph option "
713                                "value: %s\n", arg);
714                         break;
715                 }
716
717         } while (0);
718
719         free(buf);
720         return ret;
721 }
722
723 static void callchain_debug(struct perf_record_opts *opts)
724 {
725         pr_debug("callchain: type %d\n", opts->call_graph);
726
727         if (opts->call_graph == CALLCHAIN_DWARF)
728                 pr_debug("callchain: stack dump size %d\n",
729                          opts->stack_dump_size);
730 }
731
732 int record_parse_callchain_opt(const struct option *opt,
733                                const char *arg,
734                                int unset)
735 {
736         struct perf_record_opts *opts = opt->value;
737         int ret;
738
739         /* --no-call-graph */
740         if (unset) {
741                 opts->call_graph = CALLCHAIN_NONE;
742                 pr_debug("callchain: disabled\n");
743                 return 0;
744         }
745
746         ret = record_parse_callchain(arg, opts);
747         if (!ret)
748                 callchain_debug(opts);
749
750         return ret;
751 }
752
753 int record_callchain_opt(const struct option *opt,
754                          const char *arg __maybe_unused,
755                          int unset __maybe_unused)
756 {
757         struct perf_record_opts *opts = opt->value;
758
759         if (opts->call_graph == CALLCHAIN_NONE)
760                 opts->call_graph = CALLCHAIN_FP;
761
762         callchain_debug(opts);
763         return 0;
764 }
765
766 static const char * const record_usage[] = {
767         "perf record [<options>] [<command>]",
768         "perf record [<options>] -- <command> [<options>]",
769         NULL
770 };
771
772 /*
773  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
774  * because we need to have access to it in perf_record__exit, that is called
775  * after cmd_record() exits, but since record_options need to be accessible to
776  * builtin-script, leave it here.
777  *
778  * At least we don't ouch it in all the other functions here directly.
779  *
780  * Just say no to tons of global variables, sigh.
781  */
782 static struct perf_record record = {
783         .opts = {
784                 .mmap_pages          = UINT_MAX,
785                 .user_freq           = UINT_MAX,
786                 .user_interval       = ULLONG_MAX,
787                 .freq                = 4000,
788                 .target              = {
789                         .uses_mmap   = true,
790                         .default_per_cpu = true,
791                 },
792         },
793 };
794
795 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
796
797 #ifdef HAVE_LIBUNWIND_SUPPORT
798 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
799 #else
800 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
801 #endif
802
803 /*
804  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
805  * with it and switch to use the library functions in perf_evlist that came
806  * from builtin-record.c, i.e. use perf_record_opts,
807  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
808  * using pipes, etc.
809  */
810 const struct option record_options[] = {
811         OPT_CALLBACK('e', "event", &record.evlist, "event",
812                      "event selector. use 'perf list' to list available events",
813                      parse_events_option),
814         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
815                      "event filter", parse_filter),
816         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
817                     "record events on existing process id"),
818         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
819                     "record events on existing thread id"),
820         OPT_INTEGER('r', "realtime", &record.realtime_prio,
821                     "collect data with this RT SCHED_FIFO priority"),
822         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
823                     "collect data without buffering"),
824         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
825                     "collect raw sample records from all opened counters"),
826         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
827                             "system-wide collection from all CPUs"),
828         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
829                     "list of cpus to monitor"),
830         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
831         OPT_STRING('o', "output", &record.file.path, "file",
832                     "output file name"),
833         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
834                         &record.opts.no_inherit_set,
835                         "child tasks do not inherit counters"),
836         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
837         OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
838                      "number of mmap data pages",
839                      perf_evlist__parse_mmap_pages),
840         OPT_BOOLEAN(0, "group", &record.opts.group,
841                     "put the counters into a counter group"),
842         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
843                            NULL, "enables call-graph recording" ,
844                            &record_callchain_opt),
845         OPT_CALLBACK(0, "call-graph", &record.opts,
846                      "mode[,dump_size]", record_callchain_help,
847                      &record_parse_callchain_opt),
848         OPT_INCR('v', "verbose", &verbose,
849                     "be more verbose (show counter open errors, etc)"),
850         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
851         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
852                     "per thread counts"),
853         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
854                     "Sample addresses"),
855         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
856         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
857         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
858                     "don't sample"),
859         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
860                     "do not update the buildid cache"),
861         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
862                     "do not collect buildids in perf.data"),
863         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
864                      "monitor event in cgroup name only",
865                      parse_cgroups),
866         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
867                    "user to profile"),
868
869         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
870                      "branch any", "sample any taken branches",
871                      parse_branch_stack),
872
873         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
874                      "branch filter mask", "branch stack filter modes",
875                      parse_branch_stack),
876         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
877                     "sample by weight (on special events only)"),
878         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
879                     "sample transaction flags (special events only)"),
880         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
881                     "use per-thread mmaps"),
882         OPT_END()
883 };
884
885 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
886 {
887         int err = -ENOMEM;
888         struct perf_evlist *evsel_list;
889         struct perf_record *rec = &record;
890         char errbuf[BUFSIZ];
891
892         evsel_list = perf_evlist__new();
893         if (evsel_list == NULL)
894                 return -ENOMEM;
895
896         rec->evlist = evsel_list;
897
898         argc = parse_options(argc, argv, record_options, record_usage,
899                             PARSE_OPT_STOP_AT_NON_OPTION);
900         if (!argc && target__none(&rec->opts.target))
901                 usage_with_options(record_usage, record_options);
902
903         if (nr_cgroups && !rec->opts.target.system_wide) {
904                 ui__error("cgroup monitoring only available in"
905                           " system-wide mode\n");
906                 usage_with_options(record_usage, record_options);
907         }
908
909         symbol__init();
910
911         if (symbol_conf.kptr_restrict)
912                 pr_warning(
913 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
914 "check /proc/sys/kernel/kptr_restrict.\n\n"
915 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
916 "file is not found in the buildid cache or in the vmlinux path.\n\n"
917 "Samples in kernel modules won't be resolved at all.\n\n"
918 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
919 "even with a suitable vmlinux or kallsyms file.\n\n");
920
921         if (rec->no_buildid_cache || rec->no_buildid)
922                 disable_buildid_cache();
923
924         if (evsel_list->nr_entries == 0 &&
925             perf_evlist__add_default(evsel_list) < 0) {
926                 pr_err("Not enough memory for event selector list\n");
927                 goto out_symbol_exit;
928         }
929
930         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
931                 rec->opts.no_inherit = true;
932
933         err = target__validate(&rec->opts.target);
934         if (err) {
935                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
936                 ui__warning("%s", errbuf);
937         }
938
939         err = target__parse_uid(&rec->opts.target);
940         if (err) {
941                 int saved_errno = errno;
942
943                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
944                 ui__error("%s", errbuf);
945
946                 err = -saved_errno;
947                 goto out_symbol_exit;
948         }
949
950         err = -ENOMEM;
951         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
952                 usage_with_options(record_usage, record_options);
953
954         if (perf_record_opts__config(&rec->opts)) {
955                 err = -EINVAL;
956                 goto out_free_fd;
957         }
958
959         err = __cmd_record(&record, argc, argv);
960
961         perf_evlist__munmap(evsel_list);
962         perf_evlist__close(evsel_list);
963 out_free_fd:
964         perf_evlist__delete_maps(evsel_list);
965 out_symbol_exit:
966         symbol__exit();
967         return err;
968 }