479ff2a038fccf0dd9e8bcf33dab5e0652fe79df
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
35
36 #ifdef NO_LIBUNWIND_SUPPORT
37 static char callchain_help[] = CALLCHAIN_HELP "[fp]";
38 #else
39 static unsigned long default_stack_dump_size = 8192;
40 static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
41 #endif
42
43 enum write_mode_t {
44         WRITE_FORCE,
45         WRITE_APPEND
46 };
47
48 struct perf_record {
49         struct perf_tool        tool;
50         struct perf_record_opts opts;
51         u64                     bytes_written;
52         const char              *output_name;
53         struct perf_evlist      *evlist;
54         struct perf_session     *session;
55         const char              *progname;
56         int                     output;
57         unsigned int            page_size;
58         int                     realtime_prio;
59         enum write_mode_t       write_mode;
60         bool                    no_buildid;
61         bool                    no_buildid_cache;
62         bool                    force;
63         bool                    file_new;
64         bool                    append_file;
65         long                    samples;
66         off_t                   post_processing_offset;
67 };
68
69 static void advance_output(struct perf_record *rec, size_t size)
70 {
71         rec->bytes_written += size;
72 }
73
74 static void write_output(struct perf_record *rec, void *buf, size_t size)
75 {
76         while (size) {
77                 int ret = write(rec->output, buf, size);
78
79                 if (ret < 0)
80                         die("failed to write");
81
82                 size -= ret;
83                 buf += ret;
84
85                 rec->bytes_written += ret;
86         }
87 }
88
89 static int process_synthesized_event(struct perf_tool *tool,
90                                      union perf_event *event,
91                                      struct perf_sample *sample __used,
92                                      struct machine *machine __used)
93 {
94         struct perf_record *rec = container_of(tool, struct perf_record, tool);
95         write_output(rec, event, event->header.size);
96         return 0;
97 }
98
99 static void perf_record__mmap_read(struct perf_record *rec,
100                                    struct perf_mmap *md)
101 {
102         unsigned int head = perf_mmap__read_head(md);
103         unsigned int old = md->prev;
104         unsigned char *data = md->base + rec->page_size;
105         unsigned long size;
106         void *buf;
107
108         if (old == head)
109                 return;
110
111         rec->samples++;
112
113         size = head - old;
114
115         if ((old & md->mask) + size != (head & md->mask)) {
116                 buf = &data[old & md->mask];
117                 size = md->mask + 1 - (old & md->mask);
118                 old += size;
119
120                 write_output(rec, buf, size);
121         }
122
123         buf = &data[old & md->mask];
124         size = head - old;
125         old += size;
126
127         write_output(rec, buf, size);
128
129         md->prev = old;
130         perf_mmap__write_tail(md, old);
131 }
132
133 static volatile int done = 0;
134 static volatile int signr = -1;
135 static volatile int child_finished = 0;
136
137 static void sig_handler(int sig)
138 {
139         if (sig == SIGCHLD)
140                 child_finished = 1;
141
142         done = 1;
143         signr = sig;
144 }
145
146 static void perf_record__sig_exit(int exit_status __used, void *arg)
147 {
148         struct perf_record *rec = arg;
149         int status;
150
151         if (rec->evlist->workload.pid > 0) {
152                 if (!child_finished)
153                         kill(rec->evlist->workload.pid, SIGTERM);
154
155                 wait(&status);
156                 if (WIFSIGNALED(status))
157                         psignal(WTERMSIG(status), rec->progname);
158         }
159
160         if (signr == -1 || signr == SIGUSR1)
161                 return;
162
163         signal(signr, SIG_DFL);
164         kill(getpid(), signr);
165 }
166
167 static bool perf_evlist__equal(struct perf_evlist *evlist,
168                                struct perf_evlist *other)
169 {
170         struct perf_evsel *pos, *pair;
171
172         if (evlist->nr_entries != other->nr_entries)
173                 return false;
174
175         pair = perf_evlist__first(other);
176
177         list_for_each_entry(pos, &evlist->entries, node) {
178                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
179                         return false;
180                 pair = perf_evsel__next(pair);
181         }
182
183         return true;
184 }
185
186 static void perf_record__open(struct perf_record *rec)
187 {
188         struct perf_evsel *pos;
189         struct perf_evlist *evlist = rec->evlist;
190         struct perf_session *session = rec->session;
191         struct perf_record_opts *opts = &rec->opts;
192
193         perf_evlist__config_attrs(evlist, opts);
194
195         if (opts->group)
196                 perf_evlist__set_leader(evlist);
197
198         list_for_each_entry(pos, &evlist->entries, node) {
199                 struct perf_event_attr *attr = &pos->attr;
200                 /*
201                  * Check if parse_single_tracepoint_event has already asked for
202                  * PERF_SAMPLE_TIME.
203                  *
204                  * XXX this is kludgy but short term fix for problems introduced by
205                  * eac23d1c that broke 'perf script' by having different sample_types
206                  * when using multiple tracepoint events when we use a perf binary
207                  * that tries to use sample_id_all on an older kernel.
208                  *
209                  * We need to move counter creation to perf_session, support
210                  * different sample_types, etc.
211                  */
212                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
213
214 fallback_missing_features:
215                 if (opts->exclude_guest_missing)
216                         attr->exclude_guest = attr->exclude_host = 0;
217 retry_sample_id:
218                 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
219 try_again:
220                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
221                         int err = errno;
222
223                         if (err == EPERM || err == EACCES) {
224                                 ui__error_paranoid();
225                                 exit(EXIT_FAILURE);
226                         } else if (err ==  ENODEV && opts->target.cpu_list) {
227                                 die("No such device - did you specify"
228                                         " an out-of-range profile CPU?\n");
229                         } else if (err == EINVAL) {
230                                 if (!opts->exclude_guest_missing &&
231                                     (attr->exclude_guest || attr->exclude_host)) {
232                                         pr_debug("Old kernel, cannot exclude "
233                                                  "guest or host samples.\n");
234                                         opts->exclude_guest_missing = true;
235                                         goto fallback_missing_features;
236                                 } else if (!opts->sample_id_all_missing) {
237                                         /*
238                                          * Old kernel, no attr->sample_id_type_all field
239                                          */
240                                         opts->sample_id_all_missing = true;
241                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
242                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
243
244                                         goto retry_sample_id;
245                                 }
246                         }
247
248                         /*
249                          * If it's cycles then fall back to hrtimer
250                          * based cpu-clock-tick sw counter, which
251                          * is always available even if no PMU support.
252                          *
253                          * PPC returns ENXIO until 2.6.37 (behavior changed
254                          * with commit b0a873e).
255                          */
256                         if ((err == ENOENT || err == ENXIO)
257                                         && attr->type == PERF_TYPE_HARDWARE
258                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
259
260                                 if (verbose)
261                                         ui__warning("The cycles event is not supported, "
262                                                     "trying to fall back to cpu-clock-ticks\n");
263                                 attr->type = PERF_TYPE_SOFTWARE;
264                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
265                                 if (pos->name) {
266                                         free(pos->name);
267                                         pos->name = NULL;
268                                 }
269                                 goto try_again;
270                         }
271
272                         if (err == ENOENT) {
273                                 ui__error("The %s event is not supported.\n",
274                                           perf_evsel__name(pos));
275                                 exit(EXIT_FAILURE);
276                         }
277
278                         printf("\n");
279                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
280                               err, strerror(err));
281
282 #if defined(__i386__) || defined(__x86_64__)
283                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
284                                 die("No hardware sampling interrupt available."
285                                     " No APIC? If so then you can boot the kernel"
286                                     " with the \"lapic\" boot parameter to"
287                                     " force-enable it.\n");
288 #endif
289
290                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
291                 }
292         }
293
294         if (perf_evlist__set_filters(evlist)) {
295                 error("failed to set filter with %d (%s)\n", errno,
296                         strerror(errno));
297                 exit(-1);
298         }
299
300         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
301                 if (errno == EPERM)
302                         die("Permission error mapping pages.\n"
303                             "Consider increasing "
304                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
305                             "or try again with a smaller value of -m/--mmap_pages.\n"
306                             "(current value: %d)\n", opts->mmap_pages);
307                 else if (!is_power_of_2(opts->mmap_pages))
308                         die("--mmap_pages/-m value must be a power of two.");
309
310                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
311         }
312
313         if (rec->file_new)
314                 session->evlist = evlist;
315         else {
316                 if (!perf_evlist__equal(session->evlist, evlist)) {
317                         fprintf(stderr, "incompatible append\n");
318                         exit(-1);
319                 }
320         }
321
322         perf_session__set_id_hdr_size(session);
323 }
324
325 static int process_buildids(struct perf_record *rec)
326 {
327         u64 size = lseek(rec->output, 0, SEEK_CUR);
328
329         if (size == 0)
330                 return 0;
331
332         rec->session->fd = rec->output;
333         return __perf_session__process_events(rec->session, rec->post_processing_offset,
334                                               size - rec->post_processing_offset,
335                                               size, &build_id__mark_dso_hit_ops);
336 }
337
338 static void perf_record__exit(int status __used, void *arg)
339 {
340         struct perf_record *rec = arg;
341
342         if (!rec->opts.pipe_output) {
343                 rec->session->header.data_size += rec->bytes_written;
344
345                 if (!rec->no_buildid)
346                         process_buildids(rec);
347                 perf_session__write_header(rec->session, rec->evlist,
348                                            rec->output, true);
349                 perf_session__delete(rec->session);
350                 perf_evlist__delete(rec->evlist);
351                 symbol__exit();
352         }
353 }
354
355 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
356 {
357         int err;
358         struct perf_tool *tool = data;
359
360         if (machine__is_host(machine))
361                 return;
362
363         /*
364          *As for guest kernel when processing subcommand record&report,
365          *we arrange module mmap prior to guest kernel mmap and trigger
366          *a preload dso because default guest module symbols are loaded
367          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
368          *method is used to avoid symbol missing when the first addr is
369          *in module instead of in guest kernel.
370          */
371         err = perf_event__synthesize_modules(tool, process_synthesized_event,
372                                              machine);
373         if (err < 0)
374                 pr_err("Couldn't record guest kernel [%d]'s reference"
375                        " relocation symbol.\n", machine->pid);
376
377         /*
378          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
379          * have no _text sometimes.
380          */
381         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
382                                                  machine, "_text");
383         if (err < 0)
384                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
385                                                          machine, "_stext");
386         if (err < 0)
387                 pr_err("Couldn't record guest kernel [%d]'s reference"
388                        " relocation symbol.\n", machine->pid);
389 }
390
391 static struct perf_event_header finished_round_event = {
392         .size = sizeof(struct perf_event_header),
393         .type = PERF_RECORD_FINISHED_ROUND,
394 };
395
396 static void perf_record__mmap_read_all(struct perf_record *rec)
397 {
398         int i;
399
400         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
401                 if (rec->evlist->mmap[i].base)
402                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
403         }
404
405         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
406                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
407 }
408
409 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
410 {
411         struct stat st;
412         int flags;
413         int err, output, feat;
414         unsigned long waking = 0;
415         const bool forks = argc > 0;
416         struct machine *machine;
417         struct perf_tool *tool = &rec->tool;
418         struct perf_record_opts *opts = &rec->opts;
419         struct perf_evlist *evsel_list = rec->evlist;
420         const char *output_name = rec->output_name;
421         struct perf_session *session;
422
423         rec->progname = argv[0];
424
425         rec->page_size = sysconf(_SC_PAGE_SIZE);
426
427         on_exit(perf_record__sig_exit, rec);
428         signal(SIGCHLD, sig_handler);
429         signal(SIGINT, sig_handler);
430         signal(SIGUSR1, sig_handler);
431
432         if (!output_name) {
433                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
434                         opts->pipe_output = true;
435                 else
436                         rec->output_name = output_name = "perf.data";
437         }
438         if (output_name) {
439                 if (!strcmp(output_name, "-"))
440                         opts->pipe_output = true;
441                 else if (!stat(output_name, &st) && st.st_size) {
442                         if (rec->write_mode == WRITE_FORCE) {
443                                 char oldname[PATH_MAX];
444                                 snprintf(oldname, sizeof(oldname), "%s.old",
445                                          output_name);
446                                 unlink(oldname);
447                                 rename(output_name, oldname);
448                         }
449                 } else if (rec->write_mode == WRITE_APPEND) {
450                         rec->write_mode = WRITE_FORCE;
451                 }
452         }
453
454         flags = O_CREAT|O_RDWR;
455         if (rec->write_mode == WRITE_APPEND)
456                 rec->file_new = 0;
457         else
458                 flags |= O_TRUNC;
459
460         if (opts->pipe_output)
461                 output = STDOUT_FILENO;
462         else
463                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
464         if (output < 0) {
465                 perror("failed to create output file");
466                 exit(-1);
467         }
468
469         rec->output = output;
470
471         session = perf_session__new(output_name, O_WRONLY,
472                                     rec->write_mode == WRITE_FORCE, false, NULL);
473         if (session == NULL) {
474                 pr_err("Not enough memory for reading perf file header\n");
475                 return -1;
476         }
477
478         rec->session = session;
479
480         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
481                 perf_header__set_feat(&session->header, feat);
482
483         if (rec->no_buildid)
484                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
485
486         if (!have_tracepoints(&evsel_list->entries))
487                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
488
489         if (!rec->opts.branch_stack)
490                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
491
492         if (!rec->file_new) {
493                 err = perf_session__read_header(session, output);
494                 if (err < 0)
495                         goto out_delete_session;
496         }
497
498         if (forks) {
499                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
500                 if (err < 0) {
501                         pr_err("Couldn't run the workload!\n");
502                         goto out_delete_session;
503                 }
504         }
505
506         perf_record__open(rec);
507
508         /*
509          * perf_session__delete(session) will be called at perf_record__exit()
510          */
511         on_exit(perf_record__exit, rec);
512
513         if (opts->pipe_output) {
514                 err = perf_header__write_pipe(output);
515                 if (err < 0)
516                         return err;
517         } else if (rec->file_new) {
518                 err = perf_session__write_header(session, evsel_list,
519                                                  output, false);
520                 if (err < 0)
521                         return err;
522         }
523
524         if (!rec->no_buildid
525             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
526                 pr_err("Couldn't generate buildids. "
527                        "Use --no-buildid to profile anyway.\n");
528                 return -1;
529         }
530
531         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
532
533         machine = perf_session__find_host_machine(session);
534         if (!machine) {
535                 pr_err("Couldn't find native kernel information.\n");
536                 return -1;
537         }
538
539         if (opts->pipe_output) {
540                 err = perf_event__synthesize_attrs(tool, session,
541                                                    process_synthesized_event);
542                 if (err < 0) {
543                         pr_err("Couldn't synthesize attrs.\n");
544                         return err;
545                 }
546
547                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
548                                                          machine);
549                 if (err < 0) {
550                         pr_err("Couldn't synthesize event_types.\n");
551                         return err;
552                 }
553
554                 if (have_tracepoints(&evsel_list->entries)) {
555                         /*
556                          * FIXME err <= 0 here actually means that
557                          * there were no tracepoints so its not really
558                          * an error, just that we don't need to
559                          * synthesize anything.  We really have to
560                          * return this more properly and also
561                          * propagate errors that now are calling die()
562                          */
563                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
564                                                                   process_synthesized_event);
565                         if (err <= 0) {
566                                 pr_err("Couldn't record tracing data.\n");
567                                 return err;
568                         }
569                         advance_output(rec, err);
570                 }
571         }
572
573         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
574                                                  machine, "_text");
575         if (err < 0)
576                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
577                                                          machine, "_stext");
578         if (err < 0)
579                 pr_err("Couldn't record kernel reference relocation symbol\n"
580                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
581                        "Check /proc/kallsyms permission or run as root.\n");
582
583         err = perf_event__synthesize_modules(tool, process_synthesized_event,
584                                              machine);
585         if (err < 0)
586                 pr_err("Couldn't record kernel module information.\n"
587                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
588                        "Check /proc/modules permission or run as root.\n");
589
590         if (perf_guest)
591                 perf_session__process_machines(session, tool,
592                                                perf_event__synthesize_guest_os);
593
594         if (!opts->target.system_wide)
595                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
596                                                   process_synthesized_event,
597                                                   machine);
598         else
599                 perf_event__synthesize_threads(tool, process_synthesized_event,
600                                                machine);
601
602         if (rec->realtime_prio) {
603                 struct sched_param param;
604
605                 param.sched_priority = rec->realtime_prio;
606                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
607                         pr_err("Could not set realtime priority.\n");
608                         exit(-1);
609                 }
610         }
611
612         perf_evlist__enable(evsel_list);
613
614         /*
615          * Let the child rip
616          */
617         if (forks)
618                 perf_evlist__start_workload(evsel_list);
619
620         for (;;) {
621                 int hits = rec->samples;
622
623                 perf_record__mmap_read_all(rec);
624
625                 if (hits == rec->samples) {
626                         if (done)
627                                 break;
628                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
629                         waking++;
630                 }
631
632                 if (done)
633                         perf_evlist__disable(evsel_list);
634         }
635
636         if (quiet || signr == SIGUSR1)
637                 return 0;
638
639         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
640
641         /*
642          * Approximate RIP event size: 24 bytes.
643          */
644         fprintf(stderr,
645                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
646                 (double)rec->bytes_written / 1024.0 / 1024.0,
647                 output_name,
648                 rec->bytes_written / 24);
649
650         return 0;
651
652 out_delete_session:
653         perf_session__delete(session);
654         return err;
655 }
656
657 #define BRANCH_OPT(n, m) \
658         { .name = n, .mode = (m) }
659
660 #define BRANCH_END { .name = NULL }
661
662 struct branch_mode {
663         const char *name;
664         int mode;
665 };
666
667 static const struct branch_mode branch_modes[] = {
668         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
669         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
670         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
671         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
672         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
673         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
674         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
675         BRANCH_END
676 };
677
678 static int
679 parse_branch_stack(const struct option *opt, const char *str, int unset)
680 {
681 #define ONLY_PLM \
682         (PERF_SAMPLE_BRANCH_USER        |\
683          PERF_SAMPLE_BRANCH_KERNEL      |\
684          PERF_SAMPLE_BRANCH_HV)
685
686         uint64_t *mode = (uint64_t *)opt->value;
687         const struct branch_mode *br;
688         char *s, *os = NULL, *p;
689         int ret = -1;
690
691         if (unset)
692                 return 0;
693
694         /*
695          * cannot set it twice, -b + --branch-filter for instance
696          */
697         if (*mode)
698                 return -1;
699
700         /* str may be NULL in case no arg is passed to -b */
701         if (str) {
702                 /* because str is read-only */
703                 s = os = strdup(str);
704                 if (!s)
705                         return -1;
706
707                 for (;;) {
708                         p = strchr(s, ',');
709                         if (p)
710                                 *p = '\0';
711
712                         for (br = branch_modes; br->name; br++) {
713                                 if (!strcasecmp(s, br->name))
714                                         break;
715                         }
716                         if (!br->name) {
717                                 ui__warning("unknown branch filter %s,"
718                                             " check man page\n", s);
719                                 goto error;
720                         }
721
722                         *mode |= br->mode;
723
724                         if (!p)
725                                 break;
726
727                         s = p + 1;
728                 }
729         }
730         ret = 0;
731
732         /* default to any branch */
733         if ((*mode & ~ONLY_PLM) == 0) {
734                 *mode = PERF_SAMPLE_BRANCH_ANY;
735         }
736 error:
737         free(os);
738         return ret;
739 }
740
741 #ifndef NO_LIBUNWIND_SUPPORT
742 static int get_stack_size(char *str, unsigned long *_size)
743 {
744         char *endptr;
745         unsigned long size;
746         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
747
748         size = strtoul(str, &endptr, 0);
749
750         do {
751                 if (*endptr)
752                         break;
753
754                 size = round_up(size, sizeof(u64));
755                 if (!size || size > max_size)
756                         break;
757
758                 *_size = size;
759                 return 0;
760
761         } while (0);
762
763         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
764                max_size, str);
765         return -1;
766 }
767 #endif /* !NO_LIBUNWIND_SUPPORT */
768
769 static int
770 parse_callchain_opt(const struct option *opt __used, const char *arg,
771                     int unset)
772 {
773         struct perf_record *rec = (struct perf_record *)opt->value;
774         char *tok, *name, *saveptr = NULL;
775         char *buf;
776         int ret = -1;
777
778         /* --no-call-graph */
779         if (unset)
780                 return 0;
781
782         /* We specified default option if none is provided. */
783         BUG_ON(!arg);
784
785         /* We need buffer that we know we can write to. */
786         buf = malloc(strlen(arg) + 1);
787         if (!buf)
788                 return -ENOMEM;
789
790         strcpy(buf, arg);
791
792         tok = strtok_r((char *)buf, ",", &saveptr);
793         name = tok ? : (char *)buf;
794
795         do {
796                 /* Framepointer style */
797                 if (!strncmp(name, "fp", sizeof("fp"))) {
798                         if (!strtok_r(NULL, ",", &saveptr)) {
799                                 rec->opts.call_graph = CALLCHAIN_FP;
800                                 ret = 0;
801                         } else
802                                 pr_err("callchain: No more arguments "
803                                        "needed for -g fp\n");
804                         break;
805
806 #ifndef NO_LIBUNWIND_SUPPORT
807                 /* Dwarf style */
808                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
809                         ret = 0;
810                         rec->opts.call_graph = CALLCHAIN_DWARF;
811                         rec->opts.stack_dump_size = default_stack_dump_size;
812
813                         tok = strtok_r(NULL, ",", &saveptr);
814                         if (tok) {
815                                 unsigned long size = 0;
816
817                                 ret = get_stack_size(tok, &size);
818                                 rec->opts.stack_dump_size = size;
819                         }
820
821                         if (!ret)
822                                 pr_debug("callchain: stack dump size %d\n",
823                                          rec->opts.stack_dump_size);
824 #endif /* !NO_LIBUNWIND_SUPPORT */
825                 } else {
826                         pr_err("callchain: Unknown -g option "
827                                "value: %s\n", arg);
828                         break;
829                 }
830
831         } while (0);
832
833         free(buf);
834
835         if (!ret)
836                 pr_debug("callchain: type %d\n", rec->opts.call_graph);
837
838         return ret;
839 }
840
841 static const char * const record_usage[] = {
842         "perf record [<options>] [<command>]",
843         "perf record [<options>] -- <command> [<options>]",
844         NULL
845 };
846
847 /*
848  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
849  * because we need to have access to it in perf_record__exit, that is called
850  * after cmd_record() exits, but since record_options need to be accessible to
851  * builtin-script, leave it here.
852  *
853  * At least we don't ouch it in all the other functions here directly.
854  *
855  * Just say no to tons of global variables, sigh.
856  */
857 static struct perf_record record = {
858         .opts = {
859                 .mmap_pages          = UINT_MAX,
860                 .user_freq           = UINT_MAX,
861                 .user_interval       = ULLONG_MAX,
862                 .freq                = 4000,
863                 .target              = {
864                         .uses_mmap   = true,
865                 },
866         },
867         .write_mode = WRITE_FORCE,
868         .file_new   = true,
869 };
870
871 /*
872  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
873  * with it and switch to use the library functions in perf_evlist that came
874  * from builtin-record.c, i.e. use perf_record_opts,
875  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
876  * using pipes, etc.
877  */
878 const struct option record_options[] = {
879         OPT_CALLBACK('e', "event", &record.evlist, "event",
880                      "event selector. use 'perf list' to list available events",
881                      parse_events_option),
882         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
883                      "event filter", parse_filter),
884         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
885                     "record events on existing process id"),
886         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
887                     "record events on existing thread id"),
888         OPT_INTEGER('r', "realtime", &record.realtime_prio,
889                     "collect data with this RT SCHED_FIFO priority"),
890         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
891                     "collect data without buffering"),
892         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
893                     "collect raw sample records from all opened counters"),
894         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
895                             "system-wide collection from all CPUs"),
896         OPT_BOOLEAN('A', "append", &record.append_file,
897                             "append to the output file to do incremental profiling"),
898         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
899                     "list of cpus to monitor"),
900         OPT_BOOLEAN('f', "force", &record.force,
901                         "overwrite existing data file (deprecated)"),
902         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
903         OPT_STRING('o', "output", &record.output_name, "file",
904                     "output file name"),
905         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
906                     "child tasks do not inherit counters"),
907         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
908         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
909                      "number of mmap data pages"),
910         OPT_BOOLEAN(0, "group", &record.opts.group,
911                     "put the counters into a counter group"),
912         OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
913                              callchain_help, &parse_callchain_opt,
914                              "fp"),
915         OPT_INCR('v', "verbose", &verbose,
916                     "be more verbose (show counter open errors, etc)"),
917         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
918         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
919                     "per thread counts"),
920         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
921                     "Sample addresses"),
922         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
923         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
924         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
925                     "don't sample"),
926         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
927                     "do not update the buildid cache"),
928         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
929                     "do not collect buildids in perf.data"),
930         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
931                      "monitor event in cgroup name only",
932                      parse_cgroups),
933         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
934                    "user to profile"),
935
936         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
937                      "branch any", "sample any taken branches",
938                      parse_branch_stack),
939
940         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
941                      "branch filter mask", "branch stack filter modes",
942                      parse_branch_stack),
943         OPT_END()
944 };
945
946 int cmd_record(int argc, const char **argv, const char *prefix __used)
947 {
948         int err = -ENOMEM;
949         struct perf_evsel *pos;
950         struct perf_evlist *evsel_list;
951         struct perf_record *rec = &record;
952         char errbuf[BUFSIZ];
953
954         evsel_list = perf_evlist__new(NULL, NULL);
955         if (evsel_list == NULL)
956                 return -ENOMEM;
957
958         rec->evlist = evsel_list;
959
960         argc = parse_options(argc, argv, record_options, record_usage,
961                             PARSE_OPT_STOP_AT_NON_OPTION);
962         if (!argc && perf_target__none(&rec->opts.target))
963                 usage_with_options(record_usage, record_options);
964
965         if (rec->force && rec->append_file) {
966                 ui__error("Can't overwrite and append at the same time."
967                           " You need to choose between -f and -A");
968                 usage_with_options(record_usage, record_options);
969         } else if (rec->append_file) {
970                 rec->write_mode = WRITE_APPEND;
971         } else {
972                 rec->write_mode = WRITE_FORCE;
973         }
974
975         if (nr_cgroups && !rec->opts.target.system_wide) {
976                 ui__error("cgroup monitoring only available in"
977                           " system-wide mode\n");
978                 usage_with_options(record_usage, record_options);
979         }
980
981         symbol__init();
982
983         if (symbol_conf.kptr_restrict)
984                 pr_warning(
985 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
986 "check /proc/sys/kernel/kptr_restrict.\n\n"
987 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
988 "file is not found in the buildid cache or in the vmlinux path.\n\n"
989 "Samples in kernel modules won't be resolved at all.\n\n"
990 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
991 "even with a suitable vmlinux or kallsyms file.\n\n");
992
993         if (rec->no_buildid_cache || rec->no_buildid)
994                 disable_buildid_cache();
995
996         if (evsel_list->nr_entries == 0 &&
997             perf_evlist__add_default(evsel_list) < 0) {
998                 pr_err("Not enough memory for event selector list\n");
999                 goto out_symbol_exit;
1000         }
1001
1002         err = perf_target__validate(&rec->opts.target);
1003         if (err) {
1004                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1005                 ui__warning("%s", errbuf);
1006         }
1007
1008         err = perf_target__parse_uid(&rec->opts.target);
1009         if (err) {
1010                 int saved_errno = errno;
1011
1012                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1013                 ui__error("%s", errbuf);
1014
1015                 err = -saved_errno;
1016                 goto out_free_fd;
1017         }
1018
1019         err = -ENOMEM;
1020         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1021                 usage_with_options(record_usage, record_options);
1022
1023         list_for_each_entry(pos, &evsel_list->entries, node) {
1024                 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1025                         goto out_free_fd;
1026         }
1027
1028         if (rec->opts.user_interval != ULLONG_MAX)
1029                 rec->opts.default_interval = rec->opts.user_interval;
1030         if (rec->opts.user_freq != UINT_MAX)
1031                 rec->opts.freq = rec->opts.user_freq;
1032
1033         /*
1034          * User specified count overrides default frequency.
1035          */
1036         if (rec->opts.default_interval)
1037                 rec->opts.freq = 0;
1038         else if (rec->opts.freq) {
1039                 rec->opts.default_interval = rec->opts.freq;
1040         } else {
1041                 ui__error("frequency and count are zero, aborting\n");
1042                 err = -EINVAL;
1043                 goto out_free_fd;
1044         }
1045
1046         err = __cmd_record(&record, argc, argv);
1047 out_free_fd:
1048         perf_evlist__delete_maps(evsel_list);
1049 out_symbol_exit:
1050         symbol__exit();
1051         return err;
1052 }