perf tools: Allow multiple threads or processes in record, stat, top
[firefly-linux-kernel-4.4.55.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         const char              *uid_str;
48         int                     output;
49         unsigned int            page_size;
50         int                     realtime_prio;
51         enum write_mode_t       write_mode;
52         bool                    no_buildid;
53         bool                    no_buildid_cache;
54         bool                    force;
55         bool                    file_new;
56         bool                    append_file;
57         long                    samples;
58         off_t                   post_processing_offset;
59 };
60
61 static void advance_output(struct perf_record *rec, size_t size)
62 {
63         rec->bytes_written += size;
64 }
65
66 static void write_output(struct perf_record *rec, void *buf, size_t size)
67 {
68         while (size) {
69                 int ret = write(rec->output, buf, size);
70
71                 if (ret < 0)
72                         die("failed to write");
73
74                 size -= ret;
75                 buf += ret;
76
77                 rec->bytes_written += ret;
78         }
79 }
80
81 static int process_synthesized_event(struct perf_tool *tool,
82                                      union perf_event *event,
83                                      struct perf_sample *sample __used,
84                                      struct machine *machine __used)
85 {
86         struct perf_record *rec = container_of(tool, struct perf_record, tool);
87         write_output(rec, event, event->header.size);
88         return 0;
89 }
90
91 static void perf_record__mmap_read(struct perf_record *rec,
92                                    struct perf_mmap *md)
93 {
94         unsigned int head = perf_mmap__read_head(md);
95         unsigned int old = md->prev;
96         unsigned char *data = md->base + rec->page_size;
97         unsigned long size;
98         void *buf;
99
100         if (old == head)
101                 return;
102
103         rec->samples++;
104
105         size = head - old;
106
107         if ((old & md->mask) + size != (head & md->mask)) {
108                 buf = &data[old & md->mask];
109                 size = md->mask + 1 - (old & md->mask);
110                 old += size;
111
112                 write_output(rec, buf, size);
113         }
114
115         buf = &data[old & md->mask];
116         size = head - old;
117         old += size;
118
119         write_output(rec, buf, size);
120
121         md->prev = old;
122         perf_mmap__write_tail(md, old);
123 }
124
125 static volatile int done = 0;
126 static volatile int signr = -1;
127 static volatile int child_finished = 0;
128
129 static void sig_handler(int sig)
130 {
131         if (sig == SIGCHLD)
132                 child_finished = 1;
133
134         done = 1;
135         signr = sig;
136 }
137
138 static void perf_record__sig_exit(int exit_status __used, void *arg)
139 {
140         struct perf_record *rec = arg;
141         int status;
142
143         if (rec->evlist->workload.pid > 0) {
144                 if (!child_finished)
145                         kill(rec->evlist->workload.pid, SIGTERM);
146
147                 wait(&status);
148                 if (WIFSIGNALED(status))
149                         psignal(WTERMSIG(status), rec->progname);
150         }
151
152         if (signr == -1 || signr == SIGUSR1)
153                 return;
154
155         signal(signr, SIG_DFL);
156         kill(getpid(), signr);
157 }
158
159 static bool perf_evlist__equal(struct perf_evlist *evlist,
160                                struct perf_evlist *other)
161 {
162         struct perf_evsel *pos, *pair;
163
164         if (evlist->nr_entries != other->nr_entries)
165                 return false;
166
167         pair = list_entry(other->entries.next, struct perf_evsel, node);
168
169         list_for_each_entry(pos, &evlist->entries, node) {
170                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
171                         return false;
172                 pair = list_entry(pair->node.next, struct perf_evsel, node);
173         }
174
175         return true;
176 }
177
178 static void perf_record__open(struct perf_record *rec)
179 {
180         struct perf_evsel *pos, *first;
181         struct perf_evlist *evlist = rec->evlist;
182         struct perf_session *session = rec->session;
183         struct perf_record_opts *opts = &rec->opts;
184
185         first = list_entry(evlist->entries.next, struct perf_evsel, node);
186
187         perf_evlist__config_attrs(evlist, opts);
188
189         list_for_each_entry(pos, &evlist->entries, node) {
190                 struct perf_event_attr *attr = &pos->attr;
191                 struct xyarray *group_fd = NULL;
192                 /*
193                  * Check if parse_single_tracepoint_event has already asked for
194                  * PERF_SAMPLE_TIME.
195                  *
196                  * XXX this is kludgy but short term fix for problems introduced by
197                  * eac23d1c that broke 'perf script' by having different sample_types
198                  * when using multiple tracepoint events when we use a perf binary
199                  * that tries to use sample_id_all on an older kernel.
200                  *
201                  * We need to move counter creation to perf_session, support
202                  * different sample_types, etc.
203                  */
204                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
205
206                 if (opts->group && pos != first)
207                         group_fd = first->fd;
208 retry_sample_id:
209                 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
210 try_again:
211                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
212                                      opts->group, group_fd) < 0) {
213                         int err = errno;
214
215                         if (err == EPERM || err == EACCES) {
216                                 ui__error_paranoid();
217                                 exit(EXIT_FAILURE);
218                         } else if (err ==  ENODEV && opts->cpu_list) {
219                                 die("No such device - did you specify"
220                                         " an out-of-range profile CPU?\n");
221                         } else if (err == EINVAL && opts->sample_id_all_avail) {
222                                 /*
223                                  * Old kernel, no attr->sample_id_type_all field
224                                  */
225                                 opts->sample_id_all_avail = false;
226                                 if (!opts->sample_time && !opts->raw_samples && !time_needed)
227                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
228
229                                 goto retry_sample_id;
230                         }
231
232                         /*
233                          * If it's cycles then fall back to hrtimer
234                          * based cpu-clock-tick sw counter, which
235                          * is always available even if no PMU support:
236                          */
237                         if (attr->type == PERF_TYPE_HARDWARE
238                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
239
240                                 if (verbose)
241                                         ui__warning("The cycles event is not supported, "
242                                                     "trying to fall back to cpu-clock-ticks\n");
243                                 attr->type = PERF_TYPE_SOFTWARE;
244                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
245                                 goto try_again;
246                         }
247
248                         if (err == ENOENT) {
249                                 ui__warning("The %s event is not supported.\n",
250                                             event_name(pos));
251                                 exit(EXIT_FAILURE);
252                         }
253
254                         printf("\n");
255                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
256                               err, strerror(err));
257
258 #if defined(__i386__) || defined(__x86_64__)
259                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
260                                 die("No hardware sampling interrupt available."
261                                     " No APIC? If so then you can boot the kernel"
262                                     " with the \"lapic\" boot parameter to"
263                                     " force-enable it.\n");
264 #endif
265
266                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
267                 }
268         }
269
270         if (perf_evlist__set_filters(evlist)) {
271                 error("failed to set filter with %d (%s)\n", errno,
272                         strerror(errno));
273                 exit(-1);
274         }
275
276         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
277                 if (errno == EPERM)
278                         die("Permission error mapping pages.\n"
279                             "Consider increasing "
280                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
281                             "or try again with a smaller value of -m/--mmap_pages.\n"
282                             "(current value: %d)\n", opts->mmap_pages);
283                 else if (!is_power_of_2(opts->mmap_pages))
284                         die("--mmap_pages/-m value must be a power of two.");
285
286                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
287         }
288
289         if (rec->file_new)
290                 session->evlist = evlist;
291         else {
292                 if (!perf_evlist__equal(session->evlist, evlist)) {
293                         fprintf(stderr, "incompatible append\n");
294                         exit(-1);
295                 }
296         }
297
298         perf_session__update_sample_type(session);
299 }
300
301 static int process_buildids(struct perf_record *rec)
302 {
303         u64 size = lseek(rec->output, 0, SEEK_CUR);
304
305         if (size == 0)
306                 return 0;
307
308         rec->session->fd = rec->output;
309         return __perf_session__process_events(rec->session, rec->post_processing_offset,
310                                               size - rec->post_processing_offset,
311                                               size, &build_id__mark_dso_hit_ops);
312 }
313
314 static void perf_record__exit(int status __used, void *arg)
315 {
316         struct perf_record *rec = arg;
317
318         if (!rec->opts.pipe_output) {
319                 rec->session->header.data_size += rec->bytes_written;
320
321                 if (!rec->no_buildid)
322                         process_buildids(rec);
323                 perf_session__write_header(rec->session, rec->evlist,
324                                            rec->output, true);
325                 perf_session__delete(rec->session);
326                 perf_evlist__delete(rec->evlist);
327                 symbol__exit();
328         }
329 }
330
331 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
332 {
333         int err;
334         struct perf_tool *tool = data;
335
336         if (machine__is_host(machine))
337                 return;
338
339         /*
340          *As for guest kernel when processing subcommand record&report,
341          *we arrange module mmap prior to guest kernel mmap and trigger
342          *a preload dso because default guest module symbols are loaded
343          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
344          *method is used to avoid symbol missing when the first addr is
345          *in module instead of in guest kernel.
346          */
347         err = perf_event__synthesize_modules(tool, process_synthesized_event,
348                                              machine);
349         if (err < 0)
350                 pr_err("Couldn't record guest kernel [%d]'s reference"
351                        " relocation symbol.\n", machine->pid);
352
353         /*
354          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
355          * have no _text sometimes.
356          */
357         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
358                                                  machine, "_text");
359         if (err < 0)
360                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
361                                                          machine, "_stext");
362         if (err < 0)
363                 pr_err("Couldn't record guest kernel [%d]'s reference"
364                        " relocation symbol.\n", machine->pid);
365 }
366
367 static struct perf_event_header finished_round_event = {
368         .size = sizeof(struct perf_event_header),
369         .type = PERF_RECORD_FINISHED_ROUND,
370 };
371
372 static void perf_record__mmap_read_all(struct perf_record *rec)
373 {
374         int i;
375
376         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
377                 if (rec->evlist->mmap[i].base)
378                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
379         }
380
381         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
382                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
383 }
384
385 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
386 {
387         struct stat st;
388         int flags;
389         int err, output, feat;
390         unsigned long waking = 0;
391         const bool forks = argc > 0;
392         struct machine *machine;
393         struct perf_tool *tool = &rec->tool;
394         struct perf_record_opts *opts = &rec->opts;
395         struct perf_evlist *evsel_list = rec->evlist;
396         const char *output_name = rec->output_name;
397         struct perf_session *session;
398
399         rec->progname = argv[0];
400
401         rec->page_size = sysconf(_SC_PAGE_SIZE);
402
403         on_exit(perf_record__sig_exit, rec);
404         signal(SIGCHLD, sig_handler);
405         signal(SIGINT, sig_handler);
406         signal(SIGUSR1, sig_handler);
407
408         if (!output_name) {
409                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
410                         opts->pipe_output = true;
411                 else
412                         rec->output_name = output_name = "perf.data";
413         }
414         if (output_name) {
415                 if (!strcmp(output_name, "-"))
416                         opts->pipe_output = true;
417                 else if (!stat(output_name, &st) && st.st_size) {
418                         if (rec->write_mode == WRITE_FORCE) {
419                                 char oldname[PATH_MAX];
420                                 snprintf(oldname, sizeof(oldname), "%s.old",
421                                          output_name);
422                                 unlink(oldname);
423                                 rename(output_name, oldname);
424                         }
425                 } else if (rec->write_mode == WRITE_APPEND) {
426                         rec->write_mode = WRITE_FORCE;
427                 }
428         }
429
430         flags = O_CREAT|O_RDWR;
431         if (rec->write_mode == WRITE_APPEND)
432                 rec->file_new = 0;
433         else
434                 flags |= O_TRUNC;
435
436         if (opts->pipe_output)
437                 output = STDOUT_FILENO;
438         else
439                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
440         if (output < 0) {
441                 perror("failed to create output file");
442                 exit(-1);
443         }
444
445         rec->output = output;
446
447         session = perf_session__new(output_name, O_WRONLY,
448                                     rec->write_mode == WRITE_FORCE, false, NULL);
449         if (session == NULL) {
450                 pr_err("Not enough memory for reading perf file header\n");
451                 return -1;
452         }
453
454         rec->session = session;
455
456         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
457                 perf_header__set_feat(&session->header, feat);
458
459         if (rec->no_buildid)
460                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
461
462         if (!have_tracepoints(&evsel_list->entries))
463                 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
464
465         if (!rec->file_new) {
466                 err = perf_session__read_header(session, output);
467                 if (err < 0)
468                         goto out_delete_session;
469         }
470
471         if (forks) {
472                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
473                 if (err < 0) {
474                         pr_err("Couldn't run the workload!\n");
475                         goto out_delete_session;
476                 }
477         }
478
479         perf_record__open(rec);
480
481         /*
482          * perf_session__delete(session) will be called at perf_record__exit()
483          */
484         on_exit(perf_record__exit, rec);
485
486         if (opts->pipe_output) {
487                 err = perf_header__write_pipe(output);
488                 if (err < 0)
489                         return err;
490         } else if (rec->file_new) {
491                 err = perf_session__write_header(session, evsel_list,
492                                                  output, false);
493                 if (err < 0)
494                         return err;
495         }
496
497         if (!rec->no_buildid
498             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
499                 pr_err("Couldn't generate buildids. "
500                        "Use --no-buildid to profile anyway.\n");
501                 return -1;
502         }
503
504         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
505
506         machine = perf_session__find_host_machine(session);
507         if (!machine) {
508                 pr_err("Couldn't find native kernel information.\n");
509                 return -1;
510         }
511
512         if (opts->pipe_output) {
513                 err = perf_event__synthesize_attrs(tool, session,
514                                                    process_synthesized_event);
515                 if (err < 0) {
516                         pr_err("Couldn't synthesize attrs.\n");
517                         return err;
518                 }
519
520                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
521                                                          machine);
522                 if (err < 0) {
523                         pr_err("Couldn't synthesize event_types.\n");
524                         return err;
525                 }
526
527                 if (have_tracepoints(&evsel_list->entries)) {
528                         /*
529                          * FIXME err <= 0 here actually means that
530                          * there were no tracepoints so its not really
531                          * an error, just that we don't need to
532                          * synthesize anything.  We really have to
533                          * return this more properly and also
534                          * propagate errors that now are calling die()
535                          */
536                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
537                                                                   process_synthesized_event);
538                         if (err <= 0) {
539                                 pr_err("Couldn't record tracing data.\n");
540                                 return err;
541                         }
542                         advance_output(rec, err);
543                 }
544         }
545
546         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
547                                                  machine, "_text");
548         if (err < 0)
549                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
550                                                          machine, "_stext");
551         if (err < 0)
552                 pr_err("Couldn't record kernel reference relocation symbol\n"
553                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
554                        "Check /proc/kallsyms permission or run as root.\n");
555
556         err = perf_event__synthesize_modules(tool, process_synthesized_event,
557                                              machine);
558         if (err < 0)
559                 pr_err("Couldn't record kernel module information.\n"
560                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
561                        "Check /proc/modules permission or run as root.\n");
562
563         if (perf_guest)
564                 perf_session__process_machines(session, tool,
565                                                perf_event__synthesize_guest_os);
566
567         if (!opts->system_wide)
568                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
569                                                   process_synthesized_event,
570                                                   machine);
571         else
572                 perf_event__synthesize_threads(tool, process_synthesized_event,
573                                                machine);
574
575         if (rec->realtime_prio) {
576                 struct sched_param param;
577
578                 param.sched_priority = rec->realtime_prio;
579                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
580                         pr_err("Could not set realtime priority.\n");
581                         exit(-1);
582                 }
583         }
584
585         perf_evlist__enable(evsel_list);
586
587         /*
588          * Let the child rip
589          */
590         if (forks)
591                 perf_evlist__start_workload(evsel_list);
592
593         for (;;) {
594                 int hits = rec->samples;
595
596                 perf_record__mmap_read_all(rec);
597
598                 if (hits == rec->samples) {
599                         if (done)
600                                 break;
601                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
602                         waking++;
603                 }
604
605                 if (done)
606                         perf_evlist__disable(evsel_list);
607         }
608
609         if (quiet || signr == SIGUSR1)
610                 return 0;
611
612         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
613
614         /*
615          * Approximate RIP event size: 24 bytes.
616          */
617         fprintf(stderr,
618                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
619                 (double)rec->bytes_written / 1024.0 / 1024.0,
620                 output_name,
621                 rec->bytes_written / 24);
622
623         return 0;
624
625 out_delete_session:
626         perf_session__delete(session);
627         return err;
628 }
629
630 static const char * const record_usage[] = {
631         "perf record [<options>] [<command>]",
632         "perf record [<options>] -- <command> [<options>]",
633         NULL
634 };
635
636 /*
637  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
638  * because we need to have access to it in perf_record__exit, that is called
639  * after cmd_record() exits, but since record_options need to be accessible to
640  * builtin-script, leave it here.
641  *
642  * At least we don't ouch it in all the other functions here directly.
643  *
644  * Just say no to tons of global variables, sigh.
645  */
646 static struct perf_record record = {
647         .opts = {
648                 .mmap_pages          = UINT_MAX,
649                 .user_freq           = UINT_MAX,
650                 .user_interval       = ULLONG_MAX,
651                 .freq                = 1000,
652                 .sample_id_all_avail = true,
653         },
654         .write_mode = WRITE_FORCE,
655         .file_new   = true,
656 };
657
658 /*
659  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
660  * with it and switch to use the library functions in perf_evlist that came
661  * from builtin-record.c, i.e. use perf_record_opts,
662  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
663  * using pipes, etc.
664  */
665 const struct option record_options[] = {
666         OPT_CALLBACK('e', "event", &record.evlist, "event",
667                      "event selector. use 'perf list' to list available events",
668                      parse_events_option),
669         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
670                      "event filter", parse_filter),
671         OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
672                     "record events on existing process id"),
673         OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
674                     "record events on existing thread id"),
675         OPT_INTEGER('r', "realtime", &record.realtime_prio,
676                     "collect data with this RT SCHED_FIFO priority"),
677         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
678                     "collect data without buffering"),
679         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
680                     "collect raw sample records from all opened counters"),
681         OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
682                             "system-wide collection from all CPUs"),
683         OPT_BOOLEAN('A', "append", &record.append_file,
684                             "append to the output file to do incremental profiling"),
685         OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
686                     "list of cpus to monitor"),
687         OPT_BOOLEAN('f', "force", &record.force,
688                         "overwrite existing data file (deprecated)"),
689         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
690         OPT_STRING('o', "output", &record.output_name, "file",
691                     "output file name"),
692         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
693                     "child tasks do not inherit counters"),
694         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
695         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
696                      "number of mmap data pages"),
697         OPT_BOOLEAN(0, "group", &record.opts.group,
698                     "put the counters into a counter group"),
699         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
700                     "do call-graph (stack chain/backtrace) recording"),
701         OPT_INCR('v', "verbose", &verbose,
702                     "be more verbose (show counter open errors, etc)"),
703         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
704         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
705                     "per thread counts"),
706         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
707                     "Sample addresses"),
708         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
709         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
710         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
711                     "don't sample"),
712         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
713                     "do not update the buildid cache"),
714         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
715                     "do not collect buildids in perf.data"),
716         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
717                      "monitor event in cgroup name only",
718                      parse_cgroups),
719         OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
720         OPT_END()
721 };
722
723 int cmd_record(int argc, const char **argv, const char *prefix __used)
724 {
725         int err = -ENOMEM;
726         struct perf_evsel *pos;
727         struct perf_evlist *evsel_list;
728         struct perf_record *rec = &record;
729
730         perf_header__set_cmdline(argc, argv);
731
732         evsel_list = perf_evlist__new(NULL, NULL);
733         if (evsel_list == NULL)
734                 return -ENOMEM;
735
736         rec->evlist = evsel_list;
737
738         argc = parse_options(argc, argv, record_options, record_usage,
739                             PARSE_OPT_STOP_AT_NON_OPTION);
740         if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
741                 !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
742                 usage_with_options(record_usage, record_options);
743
744         if (rec->force && rec->append_file) {
745                 fprintf(stderr, "Can't overwrite and append at the same time."
746                                 " You need to choose between -f and -A");
747                 usage_with_options(record_usage, record_options);
748         } else if (rec->append_file) {
749                 rec->write_mode = WRITE_APPEND;
750         } else {
751                 rec->write_mode = WRITE_FORCE;
752         }
753
754         if (nr_cgroups && !rec->opts.system_wide) {
755                 fprintf(stderr, "cgroup monitoring only available in"
756                         " system-wide mode\n");
757                 usage_with_options(record_usage, record_options);
758         }
759
760         symbol__init();
761
762         if (symbol_conf.kptr_restrict)
763                 pr_warning(
764 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
765 "check /proc/sys/kernel/kptr_restrict.\n\n"
766 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
767 "file is not found in the buildid cache or in the vmlinux path.\n\n"
768 "Samples in kernel modules won't be resolved at all.\n\n"
769 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
770 "even with a suitable vmlinux or kallsyms file.\n\n");
771
772         if (rec->no_buildid_cache || rec->no_buildid)
773                 disable_buildid_cache();
774
775         if (evsel_list->nr_entries == 0 &&
776             perf_evlist__add_default(evsel_list) < 0) {
777                 pr_err("Not enough memory for event selector list\n");
778                 goto out_symbol_exit;
779         }
780
781         rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
782                                          rec->opts.target_pid);
783         if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
784                 goto out_free_fd;
785
786         if (rec->opts.target_pid)
787                 rec->opts.target_tid = rec->opts.target_pid;
788
789         if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
790                                      rec->opts.target_tid, rec->opts.uid,
791                                      rec->opts.cpu_list) < 0)
792                 usage_with_options(record_usage, record_options);
793
794         list_for_each_entry(pos, &evsel_list->entries, node) {
795                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
796                         goto out_free_fd;
797         }
798
799         if (rec->opts.user_interval != ULLONG_MAX)
800                 rec->opts.default_interval = rec->opts.user_interval;
801         if (rec->opts.user_freq != UINT_MAX)
802                 rec->opts.freq = rec->opts.user_freq;
803
804         /*
805          * User specified count overrides default frequency.
806          */
807         if (rec->opts.default_interval)
808                 rec->opts.freq = 0;
809         else if (rec->opts.freq) {
810                 rec->opts.default_interval = rec->opts.freq;
811         } else {
812                 fprintf(stderr, "frequency and count are zero, aborting\n");
813                 err = -EINVAL;
814                 goto out_free_fd;
815         }
816
817         err = __cmd_record(&record, argc, argv);
818 out_free_fd:
819         perf_evlist__delete_maps(evsel_list);
820 out_symbol_exit:
821         symbol__exit();
822         return err;
823 }