perf record: Remove use of die/exit
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
35
36 #ifdef NO_LIBUNWIND_SUPPORT
37 static char callchain_help[] = CALLCHAIN_HELP "[fp]";
38 #else
39 static unsigned long default_stack_dump_size = 8192;
40 static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
41 #endif
42
43 enum write_mode_t {
44         WRITE_FORCE,
45         WRITE_APPEND
46 };
47
48 struct perf_record {
49         struct perf_tool        tool;
50         struct perf_record_opts opts;
51         u64                     bytes_written;
52         const char              *output_name;
53         struct perf_evlist      *evlist;
54         struct perf_session     *session;
55         const char              *progname;
56         int                     output;
57         unsigned int            page_size;
58         int                     realtime_prio;
59         enum write_mode_t       write_mode;
60         bool                    no_buildid;
61         bool                    no_buildid_cache;
62         bool                    force;
63         bool                    file_new;
64         bool                    append_file;
65         long                    samples;
66         off_t                   post_processing_offset;
67 };
68
69 static void advance_output(struct perf_record *rec, size_t size)
70 {
71         rec->bytes_written += size;
72 }
73
74 static int write_output(struct perf_record *rec, void *buf, size_t size)
75 {
76         while (size) {
77                 int ret = write(rec->output, buf, size);
78
79                 if (ret < 0) {
80                         pr_err("failed to write\n");
81                         return -1;
82                 }
83
84                 size -= ret;
85                 buf += ret;
86
87                 rec->bytes_written += ret;
88         }
89
90         return 0;
91 }
92
93 static int process_synthesized_event(struct perf_tool *tool,
94                                      union perf_event *event,
95                                      struct perf_sample *sample __used,
96                                      struct machine *machine __used)
97 {
98         struct perf_record *rec = container_of(tool, struct perf_record, tool);
99         if (write_output(rec, event, event->header.size) < 0)
100                 return -1;
101
102         return 0;
103 }
104
105 static int perf_record__mmap_read(struct perf_record *rec,
106                                    struct perf_mmap *md)
107 {
108         unsigned int head = perf_mmap__read_head(md);
109         unsigned int old = md->prev;
110         unsigned char *data = md->base + rec->page_size;
111         unsigned long size;
112         void *buf;
113         int rc = 0;
114
115         if (old == head)
116                 return 0;
117
118         rec->samples++;
119
120         size = head - old;
121
122         if ((old & md->mask) + size != (head & md->mask)) {
123                 buf = &data[old & md->mask];
124                 size = md->mask + 1 - (old & md->mask);
125                 old += size;
126
127                 if (write_output(rec, buf, size) < 0) {
128                         rc = -1;
129                         goto out;
130                 }
131         }
132
133         buf = &data[old & md->mask];
134         size = head - old;
135         old += size;
136
137         if (write_output(rec, buf, size) < 0) {
138                 rc = -1;
139                 goto out;
140         }
141
142         md->prev = old;
143         perf_mmap__write_tail(md, old);
144
145 out:
146         return rc;
147 }
148
149 static volatile int done = 0;
150 static volatile int signr = -1;
151 static volatile int child_finished = 0;
152
153 static void sig_handler(int sig)
154 {
155         if (sig == SIGCHLD)
156                 child_finished = 1;
157
158         done = 1;
159         signr = sig;
160 }
161
162 static void perf_record__sig_exit(int exit_status __used, void *arg)
163 {
164         struct perf_record *rec = arg;
165         int status;
166
167         if (rec->evlist->workload.pid > 0) {
168                 if (!child_finished)
169                         kill(rec->evlist->workload.pid, SIGTERM);
170
171                 wait(&status);
172                 if (WIFSIGNALED(status))
173                         psignal(WTERMSIG(status), rec->progname);
174         }
175
176         if (signr == -1 || signr == SIGUSR1)
177                 return;
178
179         signal(signr, SIG_DFL);
180         kill(getpid(), signr);
181 }
182
183 static bool perf_evlist__equal(struct perf_evlist *evlist,
184                                struct perf_evlist *other)
185 {
186         struct perf_evsel *pos, *pair;
187
188         if (evlist->nr_entries != other->nr_entries)
189                 return false;
190
191         pair = perf_evlist__first(other);
192
193         list_for_each_entry(pos, &evlist->entries, node) {
194                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
195                         return false;
196                 pair = perf_evsel__next(pair);
197         }
198
199         return true;
200 }
201
202 static int perf_record__open(struct perf_record *rec)
203 {
204         struct perf_evsel *pos;
205         struct perf_evlist *evlist = rec->evlist;
206         struct perf_session *session = rec->session;
207         struct perf_record_opts *opts = &rec->opts;
208         int rc = 0;
209
210         perf_evlist__config_attrs(evlist, opts);
211
212         if (opts->group)
213                 perf_evlist__set_leader(evlist);
214
215         list_for_each_entry(pos, &evlist->entries, node) {
216                 struct perf_event_attr *attr = &pos->attr;
217                 /*
218                  * Check if parse_single_tracepoint_event has already asked for
219                  * PERF_SAMPLE_TIME.
220                  *
221                  * XXX this is kludgy but short term fix for problems introduced by
222                  * eac23d1c that broke 'perf script' by having different sample_types
223                  * when using multiple tracepoint events when we use a perf binary
224                  * that tries to use sample_id_all on an older kernel.
225                  *
226                  * We need to move counter creation to perf_session, support
227                  * different sample_types, etc.
228                  */
229                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
230
231 fallback_missing_features:
232                 if (opts->exclude_guest_missing)
233                         attr->exclude_guest = attr->exclude_host = 0;
234 retry_sample_id:
235                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
236 try_again:
237                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
238                         int err = errno;
239
240                         if (err == EPERM || err == EACCES) {
241                                 ui__error_paranoid();
242                                 rc = -err;
243                                 goto out;
244                         } else if (err ==  ENODEV && opts->target.cpu_list) {
245                                 pr_err("No such device - did you specify"
246                                        " an out-of-range profile CPU?\n");
247                                 rc = -err;
248                                 goto out;
249                         } else if (err == EINVAL) {
250                                 if (!opts->exclude_guest_missing &&
251                                     (attr->exclude_guest || attr->exclude_host)) {
252                                         pr_debug("Old kernel, cannot exclude "
253                                                  "guest or host samples.\n");
254                                         opts->exclude_guest_missing = true;
255                                         goto fallback_missing_features;
256                                 } else if (!opts->sample_id_all_missing) {
257                                         /*
258                                          * Old kernel, no attr->sample_id_type_all field
259                                          */
260                                         opts->sample_id_all_missing = true;
261                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
262                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
263
264                                         goto retry_sample_id;
265                                 }
266                         }
267
268                         /*
269                          * If it's cycles then fall back to hrtimer
270                          * based cpu-clock-tick sw counter, which
271                          * is always available even if no PMU support.
272                          *
273                          * PPC returns ENXIO until 2.6.37 (behavior changed
274                          * with commit b0a873e).
275                          */
276                         if ((err == ENOENT || err == ENXIO)
277                                         && attr->type == PERF_TYPE_HARDWARE
278                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
279
280                                 if (verbose)
281                                         ui__warning("The cycles event is not supported, "
282                                                     "trying to fall back to cpu-clock-ticks\n");
283                                 attr->type = PERF_TYPE_SOFTWARE;
284                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
285                                 if (pos->name) {
286                                         free(pos->name);
287                                         pos->name = NULL;
288                                 }
289                                 goto try_again;
290                         }
291
292                         if (err == ENOENT) {
293                                 ui__error("The %s event is not supported.\n",
294                                           perf_evsel__name(pos));
295                                 rc = -err;
296                                 goto out;
297                         }
298
299                         printf("\n");
300                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
301                               err, strerror(err));
302
303 #if defined(__i386__) || defined(__x86_64__)
304                         if (attr->type == PERF_TYPE_HARDWARE &&
305                             err == EOPNOTSUPP) {
306                                 pr_err("No hardware sampling interrupt available."
307                                        " No APIC? If so then you can boot the kernel"
308                                        " with the \"lapic\" boot parameter to"
309                                        " force-enable it.\n");
310                                 rc = -err;
311                                 goto out;
312                         }
313 #endif
314
315                         pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
316                         rc = -err;
317                         goto out;
318                 }
319         }
320
321         if (perf_evlist__set_filters(evlist)) {
322                 error("failed to set filter with %d (%s)\n", errno,
323                         strerror(errno));
324                 rc = -1;
325                 goto out;
326         }
327
328         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
329                 if (errno == EPERM) {
330                         pr_err("Permission error mapping pages.\n"
331                                "Consider increasing "
332                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
333                                "or try again with a smaller value of -m/--mmap_pages.\n"
334                                "(current value: %d)\n", opts->mmap_pages);
335                         rc = -errno;
336                 } else if (!is_power_of_2(opts->mmap_pages)) {
337                         pr_err("--mmap_pages/-m value must be a power of two.");
338                         rc = -EINVAL;
339                 } else {
340                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
341                         rc = -errno;
342                 }
343                 goto out;
344         }
345
346         if (rec->file_new)
347                 session->evlist = evlist;
348         else {
349                 if (!perf_evlist__equal(session->evlist, evlist)) {
350                         fprintf(stderr, "incompatible append\n");
351                         rc = -1;
352                         goto out;
353                 }
354         }
355
356         perf_session__set_id_hdr_size(session);
357 out:
358         return rc;
359 }
360
361 static int process_buildids(struct perf_record *rec)
362 {
363         u64 size = lseek(rec->output, 0, SEEK_CUR);
364
365         if (size == 0)
366                 return 0;
367
368         rec->session->fd = rec->output;
369         return __perf_session__process_events(rec->session, rec->post_processing_offset,
370                                               size - rec->post_processing_offset,
371                                               size, &build_id__mark_dso_hit_ops);
372 }
373
374 static void perf_record__exit(int status, void *arg)
375 {
376         struct perf_record *rec = arg;
377
378         if (status != 0)
379                 return;
380
381         if (!rec->opts.pipe_output) {
382                 rec->session->header.data_size += rec->bytes_written;
383
384                 if (!rec->no_buildid)
385                         process_buildids(rec);
386                 perf_session__write_header(rec->session, rec->evlist,
387                                            rec->output, true);
388                 perf_session__delete(rec->session);
389                 perf_evlist__delete(rec->evlist);
390                 symbol__exit();
391         }
392 }
393
394 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
395 {
396         int err;
397         struct perf_tool *tool = data;
398
399         if (machine__is_host(machine))
400                 return;
401
402         /*
403          *As for guest kernel when processing subcommand record&report,
404          *we arrange module mmap prior to guest kernel mmap and trigger
405          *a preload dso because default guest module symbols are loaded
406          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
407          *method is used to avoid symbol missing when the first addr is
408          *in module instead of in guest kernel.
409          */
410         err = perf_event__synthesize_modules(tool, process_synthesized_event,
411                                              machine);
412         if (err < 0)
413                 pr_err("Couldn't record guest kernel [%d]'s reference"
414                        " relocation symbol.\n", machine->pid);
415
416         /*
417          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
418          * have no _text sometimes.
419          */
420         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
421                                                  machine, "_text");
422         if (err < 0)
423                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
424                                                          machine, "_stext");
425         if (err < 0)
426                 pr_err("Couldn't record guest kernel [%d]'s reference"
427                        " relocation symbol.\n", machine->pid);
428 }
429
430 static struct perf_event_header finished_round_event = {
431         .size = sizeof(struct perf_event_header),
432         .type = PERF_RECORD_FINISHED_ROUND,
433 };
434
435 static int perf_record__mmap_read_all(struct perf_record *rec)
436 {
437         int i;
438         int rc = 0;
439
440         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
441                 if (rec->evlist->mmap[i].base) {
442                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
443                                 rc = -1;
444                                 goto out;
445                         }
446                 }
447         }
448
449         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
450                 rc = write_output(rec, &finished_round_event,
451                                   sizeof(finished_round_event));
452
453 out:
454         return rc;
455 }
456
457 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
458 {
459         struct stat st;
460         int flags;
461         int err, output, feat;
462         unsigned long waking = 0;
463         const bool forks = argc > 0;
464         struct machine *machine;
465         struct perf_tool *tool = &rec->tool;
466         struct perf_record_opts *opts = &rec->opts;
467         struct perf_evlist *evsel_list = rec->evlist;
468         const char *output_name = rec->output_name;
469         struct perf_session *session;
470
471         rec->progname = argv[0];
472
473         rec->page_size = sysconf(_SC_PAGE_SIZE);
474
475         on_exit(perf_record__sig_exit, rec);
476         signal(SIGCHLD, sig_handler);
477         signal(SIGINT, sig_handler);
478         signal(SIGUSR1, sig_handler);
479
480         if (!output_name) {
481                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
482                         opts->pipe_output = true;
483                 else
484                         rec->output_name = output_name = "perf.data";
485         }
486         if (output_name) {
487                 if (!strcmp(output_name, "-"))
488                         opts->pipe_output = true;
489                 else if (!stat(output_name, &st) && st.st_size) {
490                         if (rec->write_mode == WRITE_FORCE) {
491                                 char oldname[PATH_MAX];
492                                 snprintf(oldname, sizeof(oldname), "%s.old",
493                                          output_name);
494                                 unlink(oldname);
495                                 rename(output_name, oldname);
496                         }
497                 } else if (rec->write_mode == WRITE_APPEND) {
498                         rec->write_mode = WRITE_FORCE;
499                 }
500         }
501
502         flags = O_CREAT|O_RDWR;
503         if (rec->write_mode == WRITE_APPEND)
504                 rec->file_new = 0;
505         else
506                 flags |= O_TRUNC;
507
508         if (opts->pipe_output)
509                 output = STDOUT_FILENO;
510         else
511                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
512         if (output < 0) {
513                 perror("failed to create output file");
514                 return -1;
515         }
516
517         rec->output = output;
518
519         session = perf_session__new(output_name, O_WRONLY,
520                                     rec->write_mode == WRITE_FORCE, false, NULL);
521         if (session == NULL) {
522                 pr_err("Not enough memory for reading perf file header\n");
523                 return -1;
524         }
525
526         rec->session = session;
527
528         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
529                 perf_header__set_feat(&session->header, feat);
530
531         if (rec->no_buildid)
532                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
533
534         if (!have_tracepoints(&evsel_list->entries))
535                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
536
537         if (!rec->opts.branch_stack)
538                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
539
540         if (!rec->file_new) {
541                 err = perf_session__read_header(session, output);
542                 if (err < 0)
543                         goto out_delete_session;
544         }
545
546         if (forks) {
547                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
548                 if (err < 0) {
549                         pr_err("Couldn't run the workload!\n");
550                         goto out_delete_session;
551                 }
552         }
553
554         if (perf_record__open(rec) != 0) {
555                 err = -1;
556                 goto out_delete_session;
557         }
558
559         /*
560          * perf_session__delete(session) will be called at perf_record__exit()
561          */
562         on_exit(perf_record__exit, rec);
563
564         if (opts->pipe_output) {
565                 err = perf_header__write_pipe(output);
566                 if (err < 0)
567                         goto out_delete_session;
568         } else if (rec->file_new) {
569                 err = perf_session__write_header(session, evsel_list,
570                                                  output, false);
571                 if (err < 0)
572                         goto out_delete_session;
573         }
574
575         if (!rec->no_buildid
576             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
577                 pr_err("Couldn't generate buildids. "
578                        "Use --no-buildid to profile anyway.\n");
579                 err = -1;
580                 goto out_delete_session;
581         }
582
583         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
584
585         machine = perf_session__find_host_machine(session);
586         if (!machine) {
587                 pr_err("Couldn't find native kernel information.\n");
588                 err = -1;
589                 goto out_delete_session;
590         }
591
592         if (opts->pipe_output) {
593                 err = perf_event__synthesize_attrs(tool, session,
594                                                    process_synthesized_event);
595                 if (err < 0) {
596                         pr_err("Couldn't synthesize attrs.\n");
597                         goto out_delete_session;
598                 }
599
600                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
601                                                          machine);
602                 if (err < 0) {
603                         pr_err("Couldn't synthesize event_types.\n");
604                         goto out_delete_session;
605                 }
606
607                 if (have_tracepoints(&evsel_list->entries)) {
608                         /*
609                          * FIXME err <= 0 here actually means that
610                          * there were no tracepoints so its not really
611                          * an error, just that we don't need to
612                          * synthesize anything.  We really have to
613                          * return this more properly and also
614                          * propagate errors that now are calling die()
615                          */
616                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
617                                                                   process_synthesized_event);
618                         if (err <= 0) {
619                                 pr_err("Couldn't record tracing data.\n");
620                                 goto out_delete_session;
621                         }
622                         advance_output(rec, err);
623                 }
624         }
625
626         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
627                                                  machine, "_text");
628         if (err < 0)
629                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
630                                                          machine, "_stext");
631         if (err < 0)
632                 pr_err("Couldn't record kernel reference relocation symbol\n"
633                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
634                        "Check /proc/kallsyms permission or run as root.\n");
635
636         err = perf_event__synthesize_modules(tool, process_synthesized_event,
637                                              machine);
638         if (err < 0)
639                 pr_err("Couldn't record kernel module information.\n"
640                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
641                        "Check /proc/modules permission or run as root.\n");
642
643         if (perf_guest)
644                 perf_session__process_machines(session, tool,
645                                                perf_event__synthesize_guest_os);
646
647         if (!opts->target.system_wide)
648                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
649                                                   process_synthesized_event,
650                                                   machine);
651         else
652                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
653                                                machine);
654
655         if (err != 0)
656                 goto out_delete_session;
657
658         if (rec->realtime_prio) {
659                 struct sched_param param;
660
661                 param.sched_priority = rec->realtime_prio;
662                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
663                         pr_err("Could not set realtime priority.\n");
664                         err = -1;
665                         goto out_delete_session;
666                 }
667         }
668
669         perf_evlist__enable(evsel_list);
670
671         /*
672          * Let the child rip
673          */
674         if (forks)
675                 perf_evlist__start_workload(evsel_list);
676
677         for (;;) {
678                 int hits = rec->samples;
679
680                 if (perf_record__mmap_read_all(rec) < 0) {
681                         err = -1;
682                         goto out_delete_session;
683                 }
684
685                 if (hits == rec->samples) {
686                         if (done)
687                                 break;
688                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
689                         waking++;
690                 }
691
692                 if (done)
693                         perf_evlist__disable(evsel_list);
694         }
695
696         if (quiet || signr == SIGUSR1)
697                 return 0;
698
699         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
700
701         /*
702          * Approximate RIP event size: 24 bytes.
703          */
704         fprintf(stderr,
705                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
706                 (double)rec->bytes_written / 1024.0 / 1024.0,
707                 output_name,
708                 rec->bytes_written / 24);
709
710         return 0;
711
712 out_delete_session:
713         perf_session__delete(session);
714         return err;
715 }
716
717 #define BRANCH_OPT(n, m) \
718         { .name = n, .mode = (m) }
719
720 #define BRANCH_END { .name = NULL }
721
722 struct branch_mode {
723         const char *name;
724         int mode;
725 };
726
727 static const struct branch_mode branch_modes[] = {
728         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
729         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
730         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
731         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
732         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
733         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
734         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
735         BRANCH_END
736 };
737
738 static int
739 parse_branch_stack(const struct option *opt, const char *str, int unset)
740 {
741 #define ONLY_PLM \
742         (PERF_SAMPLE_BRANCH_USER        |\
743          PERF_SAMPLE_BRANCH_KERNEL      |\
744          PERF_SAMPLE_BRANCH_HV)
745
746         uint64_t *mode = (uint64_t *)opt->value;
747         const struct branch_mode *br;
748         char *s, *os = NULL, *p;
749         int ret = -1;
750
751         if (unset)
752                 return 0;
753
754         /*
755          * cannot set it twice, -b + --branch-filter for instance
756          */
757         if (*mode)
758                 return -1;
759
760         /* str may be NULL in case no arg is passed to -b */
761         if (str) {
762                 /* because str is read-only */
763                 s = os = strdup(str);
764                 if (!s)
765                         return -1;
766
767                 for (;;) {
768                         p = strchr(s, ',');
769                         if (p)
770                                 *p = '\0';
771
772                         for (br = branch_modes; br->name; br++) {
773                                 if (!strcasecmp(s, br->name))
774                                         break;
775                         }
776                         if (!br->name) {
777                                 ui__warning("unknown branch filter %s,"
778                                             " check man page\n", s);
779                                 goto error;
780                         }
781
782                         *mode |= br->mode;
783
784                         if (!p)
785                                 break;
786
787                         s = p + 1;
788                 }
789         }
790         ret = 0;
791
792         /* default to any branch */
793         if ((*mode & ~ONLY_PLM) == 0) {
794                 *mode = PERF_SAMPLE_BRANCH_ANY;
795         }
796 error:
797         free(os);
798         return ret;
799 }
800
801 #ifndef NO_LIBUNWIND_SUPPORT
802 static int get_stack_size(char *str, unsigned long *_size)
803 {
804         char *endptr;
805         unsigned long size;
806         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
807
808         size = strtoul(str, &endptr, 0);
809
810         do {
811                 if (*endptr)
812                         break;
813
814                 size = round_up(size, sizeof(u64));
815                 if (!size || size > max_size)
816                         break;
817
818                 *_size = size;
819                 return 0;
820
821         } while (0);
822
823         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
824                max_size, str);
825         return -1;
826 }
827 #endif /* !NO_LIBUNWIND_SUPPORT */
828
829 static int
830 parse_callchain_opt(const struct option *opt __used, const char *arg,
831                     int unset)
832 {
833         struct perf_record *rec = (struct perf_record *)opt->value;
834         char *tok, *name, *saveptr = NULL;
835         char *buf;
836         int ret = -1;
837
838         /* --no-call-graph */
839         if (unset)
840                 return 0;
841
842         /* We specified default option if none is provided. */
843         BUG_ON(!arg);
844
845         /* We need buffer that we know we can write to. */
846         buf = malloc(strlen(arg) + 1);
847         if (!buf)
848                 return -ENOMEM;
849
850         strcpy(buf, arg);
851
852         tok = strtok_r((char *)buf, ",", &saveptr);
853         name = tok ? : (char *)buf;
854
855         do {
856                 /* Framepointer style */
857                 if (!strncmp(name, "fp", sizeof("fp"))) {
858                         if (!strtok_r(NULL, ",", &saveptr)) {
859                                 rec->opts.call_graph = CALLCHAIN_FP;
860                                 ret = 0;
861                         } else
862                                 pr_err("callchain: No more arguments "
863                                        "needed for -g fp\n");
864                         break;
865
866 #ifndef NO_LIBUNWIND_SUPPORT
867                 /* Dwarf style */
868                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
869                         ret = 0;
870                         rec->opts.call_graph = CALLCHAIN_DWARF;
871                         rec->opts.stack_dump_size = default_stack_dump_size;
872
873                         tok = strtok_r(NULL, ",", &saveptr);
874                         if (tok) {
875                                 unsigned long size = 0;
876
877                                 ret = get_stack_size(tok, &size);
878                                 rec->opts.stack_dump_size = size;
879                         }
880
881                         if (!ret)
882                                 pr_debug("callchain: stack dump size %d\n",
883                                          rec->opts.stack_dump_size);
884 #endif /* !NO_LIBUNWIND_SUPPORT */
885                 } else {
886                         pr_err("callchain: Unknown -g option "
887                                "value: %s\n", arg);
888                         break;
889                 }
890
891         } while (0);
892
893         free(buf);
894
895         if (!ret)
896                 pr_debug("callchain: type %d\n", rec->opts.call_graph);
897
898         return ret;
899 }
900
901 static const char * const record_usage[] = {
902         "perf record [<options>] [<command>]",
903         "perf record [<options>] -- <command> [<options>]",
904         NULL
905 };
906
907 /*
908  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
909  * because we need to have access to it in perf_record__exit, that is called
910  * after cmd_record() exits, but since record_options need to be accessible to
911  * builtin-script, leave it here.
912  *
913  * At least we don't ouch it in all the other functions here directly.
914  *
915  * Just say no to tons of global variables, sigh.
916  */
917 static struct perf_record record = {
918         .opts = {
919                 .mmap_pages          = UINT_MAX,
920                 .user_freq           = UINT_MAX,
921                 .user_interval       = ULLONG_MAX,
922                 .freq                = 4000,
923                 .target              = {
924                         .uses_mmap   = true,
925                 },
926         },
927         .write_mode = WRITE_FORCE,
928         .file_new   = true,
929 };
930
931 /*
932  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
933  * with it and switch to use the library functions in perf_evlist that came
934  * from builtin-record.c, i.e. use perf_record_opts,
935  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
936  * using pipes, etc.
937  */
938 const struct option record_options[] = {
939         OPT_CALLBACK('e', "event", &record.evlist, "event",
940                      "event selector. use 'perf list' to list available events",
941                      parse_events_option),
942         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
943                      "event filter", parse_filter),
944         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
945                     "record events on existing process id"),
946         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
947                     "record events on existing thread id"),
948         OPT_INTEGER('r', "realtime", &record.realtime_prio,
949                     "collect data with this RT SCHED_FIFO priority"),
950         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
951                     "collect data without buffering"),
952         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
953                     "collect raw sample records from all opened counters"),
954         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
955                             "system-wide collection from all CPUs"),
956         OPT_BOOLEAN('A', "append", &record.append_file,
957                             "append to the output file to do incremental profiling"),
958         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
959                     "list of cpus to monitor"),
960         OPT_BOOLEAN('f', "force", &record.force,
961                         "overwrite existing data file (deprecated)"),
962         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
963         OPT_STRING('o', "output", &record.output_name, "file",
964                     "output file name"),
965         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
966                     "child tasks do not inherit counters"),
967         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
968         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
969                      "number of mmap data pages"),
970         OPT_BOOLEAN(0, "group", &record.opts.group,
971                     "put the counters into a counter group"),
972         OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
973                              callchain_help, &parse_callchain_opt,
974                              "fp"),
975         OPT_INCR('v', "verbose", &verbose,
976                     "be more verbose (show counter open errors, etc)"),
977         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
978         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
979                     "per thread counts"),
980         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
981                     "Sample addresses"),
982         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
983         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
984         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
985                     "don't sample"),
986         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
987                     "do not update the buildid cache"),
988         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
989                     "do not collect buildids in perf.data"),
990         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
991                      "monitor event in cgroup name only",
992                      parse_cgroups),
993         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
994                    "user to profile"),
995
996         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
997                      "branch any", "sample any taken branches",
998                      parse_branch_stack),
999
1000         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1001                      "branch filter mask", "branch stack filter modes",
1002                      parse_branch_stack),
1003         OPT_END()
1004 };
1005
1006 int cmd_record(int argc, const char **argv, const char *prefix __used)
1007 {
1008         int err = -ENOMEM;
1009         struct perf_evsel *pos;
1010         struct perf_evlist *evsel_list;
1011         struct perf_record *rec = &record;
1012         char errbuf[BUFSIZ];
1013
1014         evsel_list = perf_evlist__new(NULL, NULL);
1015         if (evsel_list == NULL)
1016                 return -ENOMEM;
1017
1018         rec->evlist = evsel_list;
1019
1020         argc = parse_options(argc, argv, record_options, record_usage,
1021                             PARSE_OPT_STOP_AT_NON_OPTION);
1022         if (!argc && perf_target__none(&rec->opts.target))
1023                 usage_with_options(record_usage, record_options);
1024
1025         if (rec->force && rec->append_file) {
1026                 ui__error("Can't overwrite and append at the same time."
1027                           " You need to choose between -f and -A");
1028                 usage_with_options(record_usage, record_options);
1029         } else if (rec->append_file) {
1030                 rec->write_mode = WRITE_APPEND;
1031         } else {
1032                 rec->write_mode = WRITE_FORCE;
1033         }
1034
1035         if (nr_cgroups && !rec->opts.target.system_wide) {
1036                 ui__error("cgroup monitoring only available in"
1037                           " system-wide mode\n");
1038                 usage_with_options(record_usage, record_options);
1039         }
1040
1041         symbol__init();
1042
1043         if (symbol_conf.kptr_restrict)
1044                 pr_warning(
1045 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1046 "check /proc/sys/kernel/kptr_restrict.\n\n"
1047 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1048 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1049 "Samples in kernel modules won't be resolved at all.\n\n"
1050 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1051 "even with a suitable vmlinux or kallsyms file.\n\n");
1052
1053         if (rec->no_buildid_cache || rec->no_buildid)
1054                 disable_buildid_cache();
1055
1056         if (evsel_list->nr_entries == 0 &&
1057             perf_evlist__add_default(evsel_list) < 0) {
1058                 pr_err("Not enough memory for event selector list\n");
1059                 goto out_symbol_exit;
1060         }
1061
1062         err = perf_target__validate(&rec->opts.target);
1063         if (err) {
1064                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1065                 ui__warning("%s", errbuf);
1066         }
1067
1068         err = perf_target__parse_uid(&rec->opts.target);
1069         if (err) {
1070                 int saved_errno = errno;
1071
1072                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1073                 ui__error("%s", errbuf);
1074
1075                 err = -saved_errno;
1076                 goto out_free_fd;
1077         }
1078
1079         err = -ENOMEM;
1080         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1081                 usage_with_options(record_usage, record_options);
1082
1083         list_for_each_entry(pos, &evsel_list->entries, node) {
1084                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1085                         goto out_free_fd;
1086         }
1087
1088         if (rec->opts.user_interval != ULLONG_MAX)
1089                 rec->opts.default_interval = rec->opts.user_interval;
1090         if (rec->opts.user_freq != UINT_MAX)
1091                 rec->opts.freq = rec->opts.user_freq;
1092
1093         /*
1094          * User specified count overrides default frequency.
1095          */
1096         if (rec->opts.default_interval)
1097                 rec->opts.freq = 0;
1098         else if (rec->opts.freq) {
1099                 rec->opts.default_interval = rec->opts.freq;
1100         } else {
1101                 ui__error("frequency and count are zero, aborting\n");
1102                 err = -EINVAL;
1103                 goto out_free_fd;
1104         }
1105
1106         err = __cmd_record(&record, argc, argv);
1107 out_free_fd:
1108         perf_evlist__delete_maps(evsel_list);
1109 out_symbol_exit:
1110         symbol__exit();
1111         return err;
1112 }