perf tools: Handle kernels that don't support attr.exclude_{guest,host}
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         const char              *uid_str;
48         int                     output;
49         unsigned int            page_size;
50         int                     realtime_prio;
51         enum write_mode_t       write_mode;
52         bool                    no_buildid;
53         bool                    no_buildid_cache;
54         bool                    force;
55         bool                    file_new;
56         bool                    append_file;
57         long                    samples;
58         off_t                   post_processing_offset;
59 };
60
61 static void advance_output(struct perf_record *rec, size_t size)
62 {
63         rec->bytes_written += size;
64 }
65
66 static void write_output(struct perf_record *rec, void *buf, size_t size)
67 {
68         while (size) {
69                 int ret = write(rec->output, buf, size);
70
71                 if (ret < 0)
72                         die("failed to write");
73
74                 size -= ret;
75                 buf += ret;
76
77                 rec->bytes_written += ret;
78         }
79 }
80
81 static int process_synthesized_event(struct perf_tool *tool,
82                                      union perf_event *event,
83                                      struct perf_sample *sample __used,
84                                      struct machine *machine __used)
85 {
86         struct perf_record *rec = container_of(tool, struct perf_record, tool);
87         write_output(rec, event, event->header.size);
88         return 0;
89 }
90
91 static void perf_record__mmap_read(struct perf_record *rec,
92                                    struct perf_mmap *md)
93 {
94         unsigned int head = perf_mmap__read_head(md);
95         unsigned int old = md->prev;
96         unsigned char *data = md->base + rec->page_size;
97         unsigned long size;
98         void *buf;
99
100         if (old == head)
101                 return;
102
103         rec->samples++;
104
105         size = head - old;
106
107         if ((old & md->mask) + size != (head & md->mask)) {
108                 buf = &data[old & md->mask];
109                 size = md->mask + 1 - (old & md->mask);
110                 old += size;
111
112                 write_output(rec, buf, size);
113         }
114
115         buf = &data[old & md->mask];
116         size = head - old;
117         old += size;
118
119         write_output(rec, buf, size);
120
121         md->prev = old;
122         perf_mmap__write_tail(md, old);
123 }
124
125 static volatile int done = 0;
126 static volatile int signr = -1;
127 static volatile int child_finished = 0;
128
129 static void sig_handler(int sig)
130 {
131         if (sig == SIGCHLD)
132                 child_finished = 1;
133
134         done = 1;
135         signr = sig;
136 }
137
138 static void perf_record__sig_exit(int exit_status __used, void *arg)
139 {
140         struct perf_record *rec = arg;
141         int status;
142
143         if (rec->evlist->workload.pid > 0) {
144                 if (!child_finished)
145                         kill(rec->evlist->workload.pid, SIGTERM);
146
147                 wait(&status);
148                 if (WIFSIGNALED(status))
149                         psignal(WTERMSIG(status), rec->progname);
150         }
151
152         if (signr == -1 || signr == SIGUSR1)
153                 return;
154
155         signal(signr, SIG_DFL);
156         kill(getpid(), signr);
157 }
158
159 static bool perf_evlist__equal(struct perf_evlist *evlist,
160                                struct perf_evlist *other)
161 {
162         struct perf_evsel *pos, *pair;
163
164         if (evlist->nr_entries != other->nr_entries)
165                 return false;
166
167         pair = list_entry(other->entries.next, struct perf_evsel, node);
168
169         list_for_each_entry(pos, &evlist->entries, node) {
170                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
171                         return false;
172                 pair = list_entry(pair->node.next, struct perf_evsel, node);
173         }
174
175         return true;
176 }
177
178 static void perf_record__open(struct perf_record *rec)
179 {
180         struct perf_evsel *pos, *first;
181         struct perf_evlist *evlist = rec->evlist;
182         struct perf_session *session = rec->session;
183         struct perf_record_opts *opts = &rec->opts;
184
185         first = list_entry(evlist->entries.next, struct perf_evsel, node);
186
187         perf_evlist__config_attrs(evlist, opts);
188
189         list_for_each_entry(pos, &evlist->entries, node) {
190                 struct perf_event_attr *attr = &pos->attr;
191                 struct xyarray *group_fd = NULL;
192                 /*
193                  * Check if parse_single_tracepoint_event has already asked for
194                  * PERF_SAMPLE_TIME.
195                  *
196                  * XXX this is kludgy but short term fix for problems introduced by
197                  * eac23d1c that broke 'perf script' by having different sample_types
198                  * when using multiple tracepoint events when we use a perf binary
199                  * that tries to use sample_id_all on an older kernel.
200                  *
201                  * We need to move counter creation to perf_session, support
202                  * different sample_types, etc.
203                  */
204                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
205
206                 if (opts->group && pos != first)
207                         group_fd = first->fd;
208 fallback_missing_features:
209                 if (opts->exclude_guest_missing)
210                         attr->exclude_guest = attr->exclude_host = 0;
211 retry_sample_id:
212                 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
213 try_again:
214                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
215                                      opts->group, group_fd) < 0) {
216                         int err = errno;
217
218                         if (err == EPERM || err == EACCES) {
219                                 ui__error_paranoid();
220                                 exit(EXIT_FAILURE);
221                         } else if (err ==  ENODEV && opts->cpu_list) {
222                                 die("No such device - did you specify"
223                                         " an out-of-range profile CPU?\n");
224                         } else if (err == EINVAL) {
225                                 if (!opts->exclude_guest_missing &&
226                                     (attr->exclude_guest || attr->exclude_host)) {
227                                         pr_debug("Old kernel, cannot exclude "
228                                                  "guest or host samples.\n");
229                                         opts->exclude_guest_missing = true;
230                                         goto fallback_missing_features;
231                                 } else if (opts->sample_id_all_avail) {
232                                         /*
233                                          * Old kernel, no attr->sample_id_type_all field
234                                          */
235                                         opts->sample_id_all_avail = false;
236                                         if (!opts->sample_time && !opts->raw_samples && !time_needed)
237                                                 attr->sample_type &= ~PERF_SAMPLE_TIME;
238
239                                         goto retry_sample_id;
240                                 }
241                         }
242
243                         /*
244                          * If it's cycles then fall back to hrtimer
245                          * based cpu-clock-tick sw counter, which
246                          * is always available even if no PMU support:
247                          */
248                         if (attr->type == PERF_TYPE_HARDWARE
249                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
250
251                                 if (verbose)
252                                         ui__warning("The cycles event is not supported, "
253                                                     "trying to fall back to cpu-clock-ticks\n");
254                                 attr->type = PERF_TYPE_SOFTWARE;
255                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
256                                 goto try_again;
257                         }
258
259                         if (err == ENOENT) {
260                                 ui__warning("The %s event is not supported.\n",
261                                             event_name(pos));
262                                 exit(EXIT_FAILURE);
263                         }
264
265                         printf("\n");
266                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
267                               err, strerror(err));
268
269 #if defined(__i386__) || defined(__x86_64__)
270                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
271                                 die("No hardware sampling interrupt available."
272                                     " No APIC? If so then you can boot the kernel"
273                                     " with the \"lapic\" boot parameter to"
274                                     " force-enable it.\n");
275 #endif
276
277                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
278                 }
279         }
280
281         if (perf_evlist__set_filters(evlist)) {
282                 error("failed to set filter with %d (%s)\n", errno,
283                         strerror(errno));
284                 exit(-1);
285         }
286
287         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
288                 if (errno == EPERM)
289                         die("Permission error mapping pages.\n"
290                             "Consider increasing "
291                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
292                             "or try again with a smaller value of -m/--mmap_pages.\n"
293                             "(current value: %d)\n", opts->mmap_pages);
294                 else if (!is_power_of_2(opts->mmap_pages))
295                         die("--mmap_pages/-m value must be a power of two.");
296
297                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
298         }
299
300         if (rec->file_new)
301                 session->evlist = evlist;
302         else {
303                 if (!perf_evlist__equal(session->evlist, evlist)) {
304                         fprintf(stderr, "incompatible append\n");
305                         exit(-1);
306                 }
307         }
308
309         perf_session__update_sample_type(session);
310 }
311
312 static int process_buildids(struct perf_record *rec)
313 {
314         u64 size = lseek(rec->output, 0, SEEK_CUR);
315
316         if (size == 0)
317                 return 0;
318
319         rec->session->fd = rec->output;
320         return __perf_session__process_events(rec->session, rec->post_processing_offset,
321                                               size - rec->post_processing_offset,
322                                               size, &build_id__mark_dso_hit_ops);
323 }
324
325 static void perf_record__exit(int status __used, void *arg)
326 {
327         struct perf_record *rec = arg;
328
329         if (!rec->opts.pipe_output) {
330                 rec->session->header.data_size += rec->bytes_written;
331
332                 if (!rec->no_buildid)
333                         process_buildids(rec);
334                 perf_session__write_header(rec->session, rec->evlist,
335                                            rec->output, true);
336                 perf_session__delete(rec->session);
337                 perf_evlist__delete(rec->evlist);
338                 symbol__exit();
339         }
340 }
341
342 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
343 {
344         int err;
345         struct perf_tool *tool = data;
346
347         if (machine__is_host(machine))
348                 return;
349
350         /*
351          *As for guest kernel when processing subcommand record&report,
352          *we arrange module mmap prior to guest kernel mmap and trigger
353          *a preload dso because default guest module symbols are loaded
354          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
355          *method is used to avoid symbol missing when the first addr is
356          *in module instead of in guest kernel.
357          */
358         err = perf_event__synthesize_modules(tool, process_synthesized_event,
359                                              machine);
360         if (err < 0)
361                 pr_err("Couldn't record guest kernel [%d]'s reference"
362                        " relocation symbol.\n", machine->pid);
363
364         /*
365          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
366          * have no _text sometimes.
367          */
368         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
369                                                  machine, "_text");
370         if (err < 0)
371                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
372                                                          machine, "_stext");
373         if (err < 0)
374                 pr_err("Couldn't record guest kernel [%d]'s reference"
375                        " relocation symbol.\n", machine->pid);
376 }
377
378 static struct perf_event_header finished_round_event = {
379         .size = sizeof(struct perf_event_header),
380         .type = PERF_RECORD_FINISHED_ROUND,
381 };
382
383 static void perf_record__mmap_read_all(struct perf_record *rec)
384 {
385         int i;
386
387         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
388                 if (rec->evlist->mmap[i].base)
389                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
390         }
391
392         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
393                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
394 }
395
396 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
397 {
398         struct stat st;
399         int flags;
400         int err, output, feat;
401         unsigned long waking = 0;
402         const bool forks = argc > 0;
403         struct machine *machine;
404         struct perf_tool *tool = &rec->tool;
405         struct perf_record_opts *opts = &rec->opts;
406         struct perf_evlist *evsel_list = rec->evlist;
407         const char *output_name = rec->output_name;
408         struct perf_session *session;
409
410         rec->progname = argv[0];
411
412         rec->page_size = sysconf(_SC_PAGE_SIZE);
413
414         on_exit(perf_record__sig_exit, rec);
415         signal(SIGCHLD, sig_handler);
416         signal(SIGINT, sig_handler);
417         signal(SIGUSR1, sig_handler);
418
419         if (!output_name) {
420                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
421                         opts->pipe_output = true;
422                 else
423                         rec->output_name = output_name = "perf.data";
424         }
425         if (output_name) {
426                 if (!strcmp(output_name, "-"))
427                         opts->pipe_output = true;
428                 else if (!stat(output_name, &st) && st.st_size) {
429                         if (rec->write_mode == WRITE_FORCE) {
430                                 char oldname[PATH_MAX];
431                                 snprintf(oldname, sizeof(oldname), "%s.old",
432                                          output_name);
433                                 unlink(oldname);
434                                 rename(output_name, oldname);
435                         }
436                 } else if (rec->write_mode == WRITE_APPEND) {
437                         rec->write_mode = WRITE_FORCE;
438                 }
439         }
440
441         flags = O_CREAT|O_RDWR;
442         if (rec->write_mode == WRITE_APPEND)
443                 rec->file_new = 0;
444         else
445                 flags |= O_TRUNC;
446
447         if (opts->pipe_output)
448                 output = STDOUT_FILENO;
449         else
450                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
451         if (output < 0) {
452                 perror("failed to create output file");
453                 exit(-1);
454         }
455
456         rec->output = output;
457
458         session = perf_session__new(output_name, O_WRONLY,
459                                     rec->write_mode == WRITE_FORCE, false, NULL);
460         if (session == NULL) {
461                 pr_err("Not enough memory for reading perf file header\n");
462                 return -1;
463         }
464
465         rec->session = session;
466
467         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
468                 perf_header__set_feat(&session->header, feat);
469
470         if (rec->no_buildid)
471                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
472
473         if (!have_tracepoints(&evsel_list->entries))
474                 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
475
476         if (!rec->file_new) {
477                 err = perf_session__read_header(session, output);
478                 if (err < 0)
479                         goto out_delete_session;
480         }
481
482         if (forks) {
483                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
484                 if (err < 0) {
485                         pr_err("Couldn't run the workload!\n");
486                         goto out_delete_session;
487                 }
488         }
489
490         perf_record__open(rec);
491
492         /*
493          * perf_session__delete(session) will be called at perf_record__exit()
494          */
495         on_exit(perf_record__exit, rec);
496
497         if (opts->pipe_output) {
498                 err = perf_header__write_pipe(output);
499                 if (err < 0)
500                         return err;
501         } else if (rec->file_new) {
502                 err = perf_session__write_header(session, evsel_list,
503                                                  output, false);
504                 if (err < 0)
505                         return err;
506         }
507
508         if (!rec->no_buildid
509             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
510                 pr_err("Couldn't generate buildids. "
511                        "Use --no-buildid to profile anyway.\n");
512                 return -1;
513         }
514
515         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
516
517         machine = perf_session__find_host_machine(session);
518         if (!machine) {
519                 pr_err("Couldn't find native kernel information.\n");
520                 return -1;
521         }
522
523         if (opts->pipe_output) {
524                 err = perf_event__synthesize_attrs(tool, session,
525                                                    process_synthesized_event);
526                 if (err < 0) {
527                         pr_err("Couldn't synthesize attrs.\n");
528                         return err;
529                 }
530
531                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
532                                                          machine);
533                 if (err < 0) {
534                         pr_err("Couldn't synthesize event_types.\n");
535                         return err;
536                 }
537
538                 if (have_tracepoints(&evsel_list->entries)) {
539                         /*
540                          * FIXME err <= 0 here actually means that
541                          * there were no tracepoints so its not really
542                          * an error, just that we don't need to
543                          * synthesize anything.  We really have to
544                          * return this more properly and also
545                          * propagate errors that now are calling die()
546                          */
547                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
548                                                                   process_synthesized_event);
549                         if (err <= 0) {
550                                 pr_err("Couldn't record tracing data.\n");
551                                 return err;
552                         }
553                         advance_output(rec, err);
554                 }
555         }
556
557         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
558                                                  machine, "_text");
559         if (err < 0)
560                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
561                                                          machine, "_stext");
562         if (err < 0)
563                 pr_err("Couldn't record kernel reference relocation symbol\n"
564                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
565                        "Check /proc/kallsyms permission or run as root.\n");
566
567         err = perf_event__synthesize_modules(tool, process_synthesized_event,
568                                              machine);
569         if (err < 0)
570                 pr_err("Couldn't record kernel module information.\n"
571                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
572                        "Check /proc/modules permission or run as root.\n");
573
574         if (perf_guest)
575                 perf_session__process_machines(session, tool,
576                                                perf_event__synthesize_guest_os);
577
578         if (!opts->system_wide)
579                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
580                                                   process_synthesized_event,
581                                                   machine);
582         else
583                 perf_event__synthesize_threads(tool, process_synthesized_event,
584                                                machine);
585
586         if (rec->realtime_prio) {
587                 struct sched_param param;
588
589                 param.sched_priority = rec->realtime_prio;
590                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
591                         pr_err("Could not set realtime priority.\n");
592                         exit(-1);
593                 }
594         }
595
596         perf_evlist__enable(evsel_list);
597
598         /*
599          * Let the child rip
600          */
601         if (forks)
602                 perf_evlist__start_workload(evsel_list);
603
604         for (;;) {
605                 int hits = rec->samples;
606
607                 perf_record__mmap_read_all(rec);
608
609                 if (hits == rec->samples) {
610                         if (done)
611                                 break;
612                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
613                         waking++;
614                 }
615
616                 if (done)
617                         perf_evlist__disable(evsel_list);
618         }
619
620         if (quiet || signr == SIGUSR1)
621                 return 0;
622
623         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
624
625         /*
626          * Approximate RIP event size: 24 bytes.
627          */
628         fprintf(stderr,
629                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
630                 (double)rec->bytes_written / 1024.0 / 1024.0,
631                 output_name,
632                 rec->bytes_written / 24);
633
634         return 0;
635
636 out_delete_session:
637         perf_session__delete(session);
638         return err;
639 }
640
641 static const char * const record_usage[] = {
642         "perf record [<options>] [<command>]",
643         "perf record [<options>] -- <command> [<options>]",
644         NULL
645 };
646
647 /*
648  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
649  * because we need to have access to it in perf_record__exit, that is called
650  * after cmd_record() exits, but since record_options need to be accessible to
651  * builtin-script, leave it here.
652  *
653  * At least we don't ouch it in all the other functions here directly.
654  *
655  * Just say no to tons of global variables, sigh.
656  */
657 static struct perf_record record = {
658         .opts = {
659                 .mmap_pages          = UINT_MAX,
660                 .user_freq           = UINT_MAX,
661                 .user_interval       = ULLONG_MAX,
662                 .freq                = 1000,
663                 .sample_id_all_avail = true,
664         },
665         .write_mode = WRITE_FORCE,
666         .file_new   = true,
667 };
668
669 /*
670  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
671  * with it and switch to use the library functions in perf_evlist that came
672  * from builtin-record.c, i.e. use perf_record_opts,
673  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
674  * using pipes, etc.
675  */
676 const struct option record_options[] = {
677         OPT_CALLBACK('e', "event", &record.evlist, "event",
678                      "event selector. use 'perf list' to list available events",
679                      parse_events_option),
680         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
681                      "event filter", parse_filter),
682         OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
683                     "record events on existing process id"),
684         OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
685                     "record events on existing thread id"),
686         OPT_INTEGER('r', "realtime", &record.realtime_prio,
687                     "collect data with this RT SCHED_FIFO priority"),
688         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
689                     "collect data without buffering"),
690         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
691                     "collect raw sample records from all opened counters"),
692         OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
693                             "system-wide collection from all CPUs"),
694         OPT_BOOLEAN('A', "append", &record.append_file,
695                             "append to the output file to do incremental profiling"),
696         OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
697                     "list of cpus to monitor"),
698         OPT_BOOLEAN('f', "force", &record.force,
699                         "overwrite existing data file (deprecated)"),
700         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
701         OPT_STRING('o', "output", &record.output_name, "file",
702                     "output file name"),
703         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
704                     "child tasks do not inherit counters"),
705         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
706         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
707                      "number of mmap data pages"),
708         OPT_BOOLEAN(0, "group", &record.opts.group,
709                     "put the counters into a counter group"),
710         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
711                     "do call-graph (stack chain/backtrace) recording"),
712         OPT_INCR('v', "verbose", &verbose,
713                     "be more verbose (show counter open errors, etc)"),
714         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
715         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
716                     "per thread counts"),
717         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
718                     "Sample addresses"),
719         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
720         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
721         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
722                     "don't sample"),
723         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
724                     "do not update the buildid cache"),
725         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
726                     "do not collect buildids in perf.data"),
727         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
728                      "monitor event in cgroup name only",
729                      parse_cgroups),
730         OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
731         OPT_END()
732 };
733
734 int cmd_record(int argc, const char **argv, const char *prefix __used)
735 {
736         int err = -ENOMEM;
737         struct perf_evsel *pos;
738         struct perf_evlist *evsel_list;
739         struct perf_record *rec = &record;
740
741         perf_header__set_cmdline(argc, argv);
742
743         evsel_list = perf_evlist__new(NULL, NULL);
744         if (evsel_list == NULL)
745                 return -ENOMEM;
746
747         rec->evlist = evsel_list;
748
749         argc = parse_options(argc, argv, record_options, record_usage,
750                             PARSE_OPT_STOP_AT_NON_OPTION);
751         if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
752                 !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
753                 usage_with_options(record_usage, record_options);
754
755         if (rec->force && rec->append_file) {
756                 fprintf(stderr, "Can't overwrite and append at the same time."
757                                 " You need to choose between -f and -A");
758                 usage_with_options(record_usage, record_options);
759         } else if (rec->append_file) {
760                 rec->write_mode = WRITE_APPEND;
761         } else {
762                 rec->write_mode = WRITE_FORCE;
763         }
764
765         if (nr_cgroups && !rec->opts.system_wide) {
766                 fprintf(stderr, "cgroup monitoring only available in"
767                         " system-wide mode\n");
768                 usage_with_options(record_usage, record_options);
769         }
770
771         symbol__init();
772
773         if (symbol_conf.kptr_restrict)
774                 pr_warning(
775 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
776 "check /proc/sys/kernel/kptr_restrict.\n\n"
777 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
778 "file is not found in the buildid cache or in the vmlinux path.\n\n"
779 "Samples in kernel modules won't be resolved at all.\n\n"
780 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
781 "even with a suitable vmlinux or kallsyms file.\n\n");
782
783         if (rec->no_buildid_cache || rec->no_buildid)
784                 disable_buildid_cache();
785
786         if (evsel_list->nr_entries == 0 &&
787             perf_evlist__add_default(evsel_list) < 0) {
788                 pr_err("Not enough memory for event selector list\n");
789                 goto out_symbol_exit;
790         }
791
792         rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
793                                          rec->opts.target_pid);
794         if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
795                 goto out_free_fd;
796
797         if (rec->opts.target_pid)
798                 rec->opts.target_tid = rec->opts.target_pid;
799
800         if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
801                                      rec->opts.target_tid, rec->opts.uid,
802                                      rec->opts.cpu_list) < 0)
803                 usage_with_options(record_usage, record_options);
804
805         list_for_each_entry(pos, &evsel_list->entries, node) {
806                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
807                         goto out_free_fd;
808         }
809
810         if (rec->opts.user_interval != ULLONG_MAX)
811                 rec->opts.default_interval = rec->opts.user_interval;
812         if (rec->opts.user_freq != UINT_MAX)
813                 rec->opts.freq = rec->opts.user_freq;
814
815         /*
816          * User specified count overrides default frequency.
817          */
818         if (rec->opts.default_interval)
819                 rec->opts.freq = 0;
820         else if (rec->opts.freq) {
821                 rec->opts.default_interval = rec->opts.freq;
822         } else {
823                 fprintf(stderr, "frequency and count are zero, aborting\n");
824                 err = -EINVAL;
825                 goto out_free_fd;
826         }
827
828         err = __cmd_record(&record, argc, argv);
829 out_free_fd:
830         perf_evlist__delete_maps(evsel_list);
831 out_symbol_exit:
832         symbol__exit();
833         return err;
834 }