f664a22b5fea8af7dc1a5a7f00a5ec4d3f3b3431
[firefly-linux-kernel-4.4.55.git] / tools / perf / util / evsel.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9
10 #include <byteswap.h>
11 #include <linux/bitops.h>
12 #include <api/fs/debugfs.h>
13 #include <traceevent/event-parse.h>
14 #include <linux/hw_breakpoint.h>
15 #include <linux/perf_event.h>
16 #include <sys/resource.h>
17 #include "asm/bug.h"
18 #include "callchain.h"
19 #include "cgroup.h"
20 #include "evsel.h"
21 #include "evlist.h"
22 #include "util.h"
23 #include "cpumap.h"
24 #include "thread_map.h"
25 #include "target.h"
26 #include "perf_regs.h"
27 #include "debug.h"
28 #include "trace-event.h"
29 #include "stat.h"
30
31 static struct {
32         bool sample_id_all;
33         bool exclude_guest;
34         bool mmap2;
35         bool cloexec;
36         bool clockid;
37         bool clockid_wrong;
38 } perf_missing_features;
39
40 static clockid_t clockid;
41
42 static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused)
43 {
44         return 0;
45 }
46
47 static void perf_evsel__no_extra_fini(struct perf_evsel *evsel __maybe_unused)
48 {
49 }
50
51 static struct {
52         size_t  size;
53         int     (*init)(struct perf_evsel *evsel);
54         void    (*fini)(struct perf_evsel *evsel);
55 } perf_evsel__object = {
56         .size = sizeof(struct perf_evsel),
57         .init = perf_evsel__no_extra_init,
58         .fini = perf_evsel__no_extra_fini,
59 };
60
61 int perf_evsel__object_config(size_t object_size,
62                               int (*init)(struct perf_evsel *evsel),
63                               void (*fini)(struct perf_evsel *evsel))
64 {
65
66         if (object_size == 0)
67                 goto set_methods;
68
69         if (perf_evsel__object.size > object_size)
70                 return -EINVAL;
71
72         perf_evsel__object.size = object_size;
73
74 set_methods:
75         if (init != NULL)
76                 perf_evsel__object.init = init;
77
78         if (fini != NULL)
79                 perf_evsel__object.fini = fini;
80
81         return 0;
82 }
83
84 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
85
86 int __perf_evsel__sample_size(u64 sample_type)
87 {
88         u64 mask = sample_type & PERF_SAMPLE_MASK;
89         int size = 0;
90         int i;
91
92         for (i = 0; i < 64; i++) {
93                 if (mask & (1ULL << i))
94                         size++;
95         }
96
97         size *= sizeof(u64);
98
99         return size;
100 }
101
102 /**
103  * __perf_evsel__calc_id_pos - calculate id_pos.
104  * @sample_type: sample type
105  *
106  * This function returns the position of the event id (PERF_SAMPLE_ID or
107  * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
108  * sample_event.
109  */
110 static int __perf_evsel__calc_id_pos(u64 sample_type)
111 {
112         int idx = 0;
113
114         if (sample_type & PERF_SAMPLE_IDENTIFIER)
115                 return 0;
116
117         if (!(sample_type & PERF_SAMPLE_ID))
118                 return -1;
119
120         if (sample_type & PERF_SAMPLE_IP)
121                 idx += 1;
122
123         if (sample_type & PERF_SAMPLE_TID)
124                 idx += 1;
125
126         if (sample_type & PERF_SAMPLE_TIME)
127                 idx += 1;
128
129         if (sample_type & PERF_SAMPLE_ADDR)
130                 idx += 1;
131
132         return idx;
133 }
134
135 /**
136  * __perf_evsel__calc_is_pos - calculate is_pos.
137  * @sample_type: sample type
138  *
139  * This function returns the position (counting backwards) of the event id
140  * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
141  * sample_id_all is used there is an id sample appended to non-sample events.
142  */
143 static int __perf_evsel__calc_is_pos(u64 sample_type)
144 {
145         int idx = 1;
146
147         if (sample_type & PERF_SAMPLE_IDENTIFIER)
148                 return 1;
149
150         if (!(sample_type & PERF_SAMPLE_ID))
151                 return -1;
152
153         if (sample_type & PERF_SAMPLE_CPU)
154                 idx += 1;
155
156         if (sample_type & PERF_SAMPLE_STREAM_ID)
157                 idx += 1;
158
159         return idx;
160 }
161
162 void perf_evsel__calc_id_pos(struct perf_evsel *evsel)
163 {
164         evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type);
165         evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type);
166 }
167
168 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
169                                   enum perf_event_sample_format bit)
170 {
171         if (!(evsel->attr.sample_type & bit)) {
172                 evsel->attr.sample_type |= bit;
173                 evsel->sample_size += sizeof(u64);
174                 perf_evsel__calc_id_pos(evsel);
175         }
176 }
177
178 void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
179                                     enum perf_event_sample_format bit)
180 {
181         if (evsel->attr.sample_type & bit) {
182                 evsel->attr.sample_type &= ~bit;
183                 evsel->sample_size -= sizeof(u64);
184                 perf_evsel__calc_id_pos(evsel);
185         }
186 }
187
188 void perf_evsel__set_sample_id(struct perf_evsel *evsel,
189                                bool can_sample_identifier)
190 {
191         if (can_sample_identifier) {
192                 perf_evsel__reset_sample_bit(evsel, ID);
193                 perf_evsel__set_sample_bit(evsel, IDENTIFIER);
194         } else {
195                 perf_evsel__set_sample_bit(evsel, ID);
196         }
197         evsel->attr.read_format |= PERF_FORMAT_ID;
198 }
199
200 void perf_evsel__init(struct perf_evsel *evsel,
201                       struct perf_event_attr *attr, int idx)
202 {
203         evsel->idx         = idx;
204         evsel->tracking    = !idx;
205         evsel->attr        = *attr;
206         evsel->leader      = evsel;
207         evsel->unit        = "";
208         evsel->scale       = 1.0;
209         INIT_LIST_HEAD(&evsel->node);
210         INIT_LIST_HEAD(&evsel->config_terms);
211         perf_evsel__object.init(evsel);
212         evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
213         perf_evsel__calc_id_pos(evsel);
214         evsel->cmdline_group_boundary = false;
215 }
216
217 struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
218 {
219         struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
220
221         if (evsel != NULL)
222                 perf_evsel__init(evsel, attr, idx);
223
224         return evsel;
225 }
226
227 struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
228 {
229         struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
230
231         if (evsel != NULL) {
232                 struct perf_event_attr attr = {
233                         .type          = PERF_TYPE_TRACEPOINT,
234                         .sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
235                                           PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
236                 };
237
238                 if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
239                         goto out_free;
240
241                 evsel->tp_format = trace_event__tp_format(sys, name);
242                 if (evsel->tp_format == NULL)
243                         goto out_free;
244
245                 event_attr_init(&attr);
246                 attr.config = evsel->tp_format->id;
247                 attr.sample_period = 1;
248                 perf_evsel__init(evsel, &attr, idx);
249         }
250
251         return evsel;
252
253 out_free:
254         zfree(&evsel->name);
255         free(evsel);
256         return NULL;
257 }
258
259 const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
260         "cycles",
261         "instructions",
262         "cache-references",
263         "cache-misses",
264         "branches",
265         "branch-misses",
266         "bus-cycles",
267         "stalled-cycles-frontend",
268         "stalled-cycles-backend",
269         "ref-cycles",
270 };
271
272 static const char *__perf_evsel__hw_name(u64 config)
273 {
274         if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
275                 return perf_evsel__hw_names[config];
276
277         return "unknown-hardware";
278 }
279
280 static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
281 {
282         int colon = 0, r = 0;
283         struct perf_event_attr *attr = &evsel->attr;
284         bool exclude_guest_default = false;
285
286 #define MOD_PRINT(context, mod) do {                                    \
287                 if (!attr->exclude_##context) {                         \
288                         if (!colon) colon = ++r;                        \
289                         r += scnprintf(bf + r, size - r, "%c", mod);    \
290                 } } while(0)
291
292         if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
293                 MOD_PRINT(kernel, 'k');
294                 MOD_PRINT(user, 'u');
295                 MOD_PRINT(hv, 'h');
296                 exclude_guest_default = true;
297         }
298
299         if (attr->precise_ip) {
300                 if (!colon)
301                         colon = ++r;
302                 r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
303                 exclude_guest_default = true;
304         }
305
306         if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
307                 MOD_PRINT(host, 'H');
308                 MOD_PRINT(guest, 'G');
309         }
310 #undef MOD_PRINT
311         if (colon)
312                 bf[colon - 1] = ':';
313         return r;
314 }
315
316 static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
317 {
318         int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
319         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
320 }
321
322 const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
323         "cpu-clock",
324         "task-clock",
325         "page-faults",
326         "context-switches",
327         "cpu-migrations",
328         "minor-faults",
329         "major-faults",
330         "alignment-faults",
331         "emulation-faults",
332         "dummy",
333 };
334
335 static const char *__perf_evsel__sw_name(u64 config)
336 {
337         if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
338                 return perf_evsel__sw_names[config];
339         return "unknown-software";
340 }
341
342 static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
343 {
344         int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
345         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
346 }
347
348 static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
349 {
350         int r;
351
352         r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
353
354         if (type & HW_BREAKPOINT_R)
355                 r += scnprintf(bf + r, size - r, "r");
356
357         if (type & HW_BREAKPOINT_W)
358                 r += scnprintf(bf + r, size - r, "w");
359
360         if (type & HW_BREAKPOINT_X)
361                 r += scnprintf(bf + r, size - r, "x");
362
363         return r;
364 }
365
366 static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
367 {
368         struct perf_event_attr *attr = &evsel->attr;
369         int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
370         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
371 }
372
373 const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
374                                 [PERF_EVSEL__MAX_ALIASES] = {
375  { "L1-dcache", "l1-d",         "l1d",          "L1-data",              },
376  { "L1-icache", "l1-i",         "l1i",          "L1-instruction",       },
377  { "LLC",       "L2",                                                   },
378  { "dTLB",      "d-tlb",        "Data-TLB",                             },
379  { "iTLB",      "i-tlb",        "Instruction-TLB",                      },
380  { "branch",    "branches",     "bpu",          "btb",          "bpc",  },
381  { "node",                                                              },
382 };
383
384 const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
385                                    [PERF_EVSEL__MAX_ALIASES] = {
386  { "load",      "loads",        "read",                                 },
387  { "store",     "stores",       "write",                                },
388  { "prefetch",  "prefetches",   "speculative-read", "speculative-load", },
389 };
390
391 const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
392                                        [PERF_EVSEL__MAX_ALIASES] = {
393  { "refs",      "Reference",    "ops",          "access",               },
394  { "misses",    "miss",                                                 },
395 };
396
397 #define C(x)            PERF_COUNT_HW_CACHE_##x
398 #define CACHE_READ      (1 << C(OP_READ))
399 #define CACHE_WRITE     (1 << C(OP_WRITE))
400 #define CACHE_PREFETCH  (1 << C(OP_PREFETCH))
401 #define COP(x)          (1 << x)
402
403 /*
404  * cache operartion stat
405  * L1I : Read and prefetch only
406  * ITLB and BPU : Read-only
407  */
408 static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
409  [C(L1D)]       = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
410  [C(L1I)]       = (CACHE_READ | CACHE_PREFETCH),
411  [C(LL)]        = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
412  [C(DTLB)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
413  [C(ITLB)]      = (CACHE_READ),
414  [C(BPU)]       = (CACHE_READ),
415  [C(NODE)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
416 };
417
418 bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
419 {
420         if (perf_evsel__hw_cache_stat[type] & COP(op))
421                 return true;    /* valid */
422         else
423                 return false;   /* invalid */
424 }
425
426 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
427                                             char *bf, size_t size)
428 {
429         if (result) {
430                 return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
431                                  perf_evsel__hw_cache_op[op][0],
432                                  perf_evsel__hw_cache_result[result][0]);
433         }
434
435         return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
436                          perf_evsel__hw_cache_op[op][1]);
437 }
438
439 static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
440 {
441         u8 op, result, type = (config >>  0) & 0xff;
442         const char *err = "unknown-ext-hardware-cache-type";
443
444         if (type > PERF_COUNT_HW_CACHE_MAX)
445                 goto out_err;
446
447         op = (config >>  8) & 0xff;
448         err = "unknown-ext-hardware-cache-op";
449         if (op > PERF_COUNT_HW_CACHE_OP_MAX)
450                 goto out_err;
451
452         result = (config >> 16) & 0xff;
453         err = "unknown-ext-hardware-cache-result";
454         if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
455                 goto out_err;
456
457         err = "invalid-cache";
458         if (!perf_evsel__is_cache_op_valid(type, op))
459                 goto out_err;
460
461         return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
462 out_err:
463         return scnprintf(bf, size, "%s", err);
464 }
465
466 static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
467 {
468         int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
469         return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
470 }
471
472 static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
473 {
474         int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
475         return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
476 }
477
478 const char *perf_evsel__name(struct perf_evsel *evsel)
479 {
480         char bf[128];
481
482         if (evsel->name)
483                 return evsel->name;
484
485         switch (evsel->attr.type) {
486         case PERF_TYPE_RAW:
487                 perf_evsel__raw_name(evsel, bf, sizeof(bf));
488                 break;
489
490         case PERF_TYPE_HARDWARE:
491                 perf_evsel__hw_name(evsel, bf, sizeof(bf));
492                 break;
493
494         case PERF_TYPE_HW_CACHE:
495                 perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
496                 break;
497
498         case PERF_TYPE_SOFTWARE:
499                 perf_evsel__sw_name(evsel, bf, sizeof(bf));
500                 break;
501
502         case PERF_TYPE_TRACEPOINT:
503                 scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
504                 break;
505
506         case PERF_TYPE_BREAKPOINT:
507                 perf_evsel__bp_name(evsel, bf, sizeof(bf));
508                 break;
509
510         default:
511                 scnprintf(bf, sizeof(bf), "unknown attr type: %d",
512                           evsel->attr.type);
513                 break;
514         }
515
516         evsel->name = strdup(bf);
517
518         return evsel->name ?: "unknown";
519 }
520
521 const char *perf_evsel__group_name(struct perf_evsel *evsel)
522 {
523         return evsel->group_name ?: "anon group";
524 }
525
526 int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
527 {
528         int ret;
529         struct perf_evsel *pos;
530         const char *group_name = perf_evsel__group_name(evsel);
531
532         ret = scnprintf(buf, size, "%s", group_name);
533
534         ret += scnprintf(buf + ret, size - ret, " { %s",
535                          perf_evsel__name(evsel));
536
537         for_each_group_member(pos, evsel)
538                 ret += scnprintf(buf + ret, size - ret, ", %s",
539                                  perf_evsel__name(pos));
540
541         ret += scnprintf(buf + ret, size - ret, " }");
542
543         return ret;
544 }
545
546 static void
547 perf_evsel__config_callgraph(struct perf_evsel *evsel,
548                              struct record_opts *opts,
549                              struct callchain_param *param)
550 {
551         bool function = perf_evsel__is_function_event(evsel);
552         struct perf_event_attr *attr = &evsel->attr;
553
554         perf_evsel__set_sample_bit(evsel, CALLCHAIN);
555
556         if (param->record_mode == CALLCHAIN_LBR) {
557                 if (!opts->branch_stack) {
558                         if (attr->exclude_user) {
559                                 pr_warning("LBR callstack option is only available "
560                                            "to get user callchain information. "
561                                            "Falling back to framepointers.\n");
562                         } else {
563                                 perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
564                                 attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
565                                                         PERF_SAMPLE_BRANCH_CALL_STACK;
566                         }
567                 } else
568                          pr_warning("Cannot use LBR callstack with branch stack. "
569                                     "Falling back to framepointers.\n");
570         }
571
572         if (param->record_mode == CALLCHAIN_DWARF) {
573                 if (!function) {
574                         perf_evsel__set_sample_bit(evsel, REGS_USER);
575                         perf_evsel__set_sample_bit(evsel, STACK_USER);
576                         attr->sample_regs_user = PERF_REGS_MASK;
577                         attr->sample_stack_user = param->dump_size;
578                         attr->exclude_callchain_user = 1;
579                 } else {
580                         pr_info("Cannot use DWARF unwind for function trace event,"
581                                 " falling back to framepointers.\n");
582                 }
583         }
584
585         if (function) {
586                 pr_info("Disabling user space callchains for function trace event.\n");
587                 attr->exclude_callchain_user = 1;
588         }
589 }
590
591 static void apply_config_terms(struct perf_evsel *evsel)
592 {
593         struct perf_evsel_config_term *term;
594         struct list_head *config_terms = &evsel->config_terms;
595         struct perf_event_attr *attr = &evsel->attr;
596
597         list_for_each_entry(term, config_terms, list) {
598                 switch (term->type) {
599                 case PERF_EVSEL__CONFIG_TERM_PERIOD:
600                         attr->sample_period = term->val.period;
601                         attr->freq = 0;
602                         break;
603                 case PERF_EVSEL__CONFIG_TERM_FREQ:
604                         attr->sample_freq = term->val.freq;
605                         attr->freq = 1;
606                         break;
607                 case PERF_EVSEL__CONFIG_TERM_TIME:
608                         if (term->val.time)
609                                 perf_evsel__set_sample_bit(evsel, TIME);
610                         else
611                                 perf_evsel__reset_sample_bit(evsel, TIME);
612                         break;
613                 default:
614                         break;
615                 }
616         }
617 }
618
619 /*
620  * The enable_on_exec/disabled value strategy:
621  *
622  *  1) For any type of traced program:
623  *    - all independent events and group leaders are disabled
624  *    - all group members are enabled
625  *
626  *     Group members are ruled by group leaders. They need to
627  *     be enabled, because the group scheduling relies on that.
628  *
629  *  2) For traced programs executed by perf:
630  *     - all independent events and group leaders have
631  *       enable_on_exec set
632  *     - we don't specifically enable or disable any event during
633  *       the record command
634  *
635  *     Independent events and group leaders are initially disabled
636  *     and get enabled by exec. Group members are ruled by group
637  *     leaders as stated in 1).
638  *
639  *  3) For traced programs attached by perf (pid/tid):
640  *     - we specifically enable or disable all events during
641  *       the record command
642  *
643  *     When attaching events to already running traced we
644  *     enable/disable events specifically, as there's no
645  *     initial traced exec call.
646  */
647 void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
648 {
649         struct perf_evsel *leader = evsel->leader;
650         struct perf_event_attr *attr = &evsel->attr;
651         int track = evsel->tracking;
652         bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
653
654         attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
655         attr->inherit       = !opts->no_inherit;
656
657         perf_evsel__set_sample_bit(evsel, IP);
658         perf_evsel__set_sample_bit(evsel, TID);
659
660         if (evsel->sample_read) {
661                 perf_evsel__set_sample_bit(evsel, READ);
662
663                 /*
664                  * We need ID even in case of single event, because
665                  * PERF_SAMPLE_READ process ID specific data.
666                  */
667                 perf_evsel__set_sample_id(evsel, false);
668
669                 /*
670                  * Apply group format only if we belong to group
671                  * with more than one members.
672                  */
673                 if (leader->nr_members > 1) {
674                         attr->read_format |= PERF_FORMAT_GROUP;
675                         attr->inherit = 0;
676                 }
677         }
678
679         /*
680          * We default some events to have a default interval. But keep
681          * it a weak assumption overridable by the user.
682          */
683         if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
684                                      opts->user_interval != ULLONG_MAX)) {
685                 if (opts->freq) {
686                         perf_evsel__set_sample_bit(evsel, PERIOD);
687                         attr->freq              = 1;
688                         attr->sample_freq       = opts->freq;
689                 } else {
690                         attr->sample_period = opts->default_interval;
691                 }
692         }
693
694         /*
695          * Disable sampling for all group members other
696          * than leader in case leader 'leads' the sampling.
697          */
698         if ((leader != evsel) && leader->sample_read) {
699                 attr->sample_freq   = 0;
700                 attr->sample_period = 0;
701         }
702
703         if (opts->no_samples)
704                 attr->sample_freq = 0;
705
706         if (opts->inherit_stat)
707                 attr->inherit_stat = 1;
708
709         if (opts->sample_address) {
710                 perf_evsel__set_sample_bit(evsel, ADDR);
711                 attr->mmap_data = track;
712         }
713
714         /*
715          * We don't allow user space callchains for  function trace
716          * event, due to issues with page faults while tracing page
717          * fault handler and its overall trickiness nature.
718          */
719         if (perf_evsel__is_function_event(evsel))
720                 evsel->attr.exclude_callchain_user = 1;
721
722         if (callchain_param.enabled && !evsel->no_aux_samples)
723                 perf_evsel__config_callgraph(evsel, opts, &callchain_param);
724
725         if (opts->sample_intr_regs) {
726                 attr->sample_regs_intr = PERF_REGS_MASK;
727                 perf_evsel__set_sample_bit(evsel, REGS_INTR);
728         }
729
730         if (target__has_cpu(&opts->target))
731                 perf_evsel__set_sample_bit(evsel, CPU);
732
733         if (opts->period)
734                 perf_evsel__set_sample_bit(evsel, PERIOD);
735
736         /*
737          * When the user explicitely disabled time don't force it here.
738          */
739         if (opts->sample_time &&
740             (!perf_missing_features.sample_id_all &&
741             (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
742              opts->sample_time_set)))
743                 perf_evsel__set_sample_bit(evsel, TIME);
744
745         if (opts->raw_samples && !evsel->no_aux_samples) {
746                 perf_evsel__set_sample_bit(evsel, TIME);
747                 perf_evsel__set_sample_bit(evsel, RAW);
748                 perf_evsel__set_sample_bit(evsel, CPU);
749         }
750
751         if (opts->sample_address)
752                 perf_evsel__set_sample_bit(evsel, DATA_SRC);
753
754         if (opts->no_buffering) {
755                 attr->watermark = 0;
756                 attr->wakeup_events = 1;
757         }
758         if (opts->branch_stack && !evsel->no_aux_samples) {
759                 perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
760                 attr->branch_sample_type = opts->branch_stack;
761         }
762
763         if (opts->sample_weight)
764                 perf_evsel__set_sample_bit(evsel, WEIGHT);
765
766         attr->task  = track;
767         attr->mmap  = track;
768         attr->mmap2 = track && !perf_missing_features.mmap2;
769         attr->comm  = track;
770
771         if (opts->record_switch_events)
772                 attr->context_switch = track;
773
774         if (opts->sample_transaction)
775                 perf_evsel__set_sample_bit(evsel, TRANSACTION);
776
777         if (opts->running_time) {
778                 evsel->attr.read_format |=
779                         PERF_FORMAT_TOTAL_TIME_ENABLED |
780                         PERF_FORMAT_TOTAL_TIME_RUNNING;
781         }
782
783         /*
784          * XXX see the function comment above
785          *
786          * Disabling only independent events or group leaders,
787          * keeping group members enabled.
788          */
789         if (perf_evsel__is_group_leader(evsel))
790                 attr->disabled = 1;
791
792         /*
793          * Setting enable_on_exec for independent events and
794          * group leaders for traced executed by perf.
795          */
796         if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
797                 !opts->initial_delay)
798                 attr->enable_on_exec = 1;
799
800         if (evsel->immediate) {
801                 attr->disabled = 0;
802                 attr->enable_on_exec = 0;
803         }
804
805         clockid = opts->clockid;
806         if (opts->use_clockid) {
807                 attr->use_clockid = 1;
808                 attr->clockid = opts->clockid;
809         }
810
811         /*
812          * Apply event specific term settings,
813          * it overloads any global configuration.
814          */
815         apply_config_terms(evsel);
816 }
817
818 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
819 {
820         int cpu, thread;
821
822         if (evsel->system_wide)
823                 nthreads = 1;
824
825         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
826
827         if (evsel->fd) {
828                 for (cpu = 0; cpu < ncpus; cpu++) {
829                         for (thread = 0; thread < nthreads; thread++) {
830                                 FD(evsel, cpu, thread) = -1;
831                         }
832                 }
833         }
834
835         return evsel->fd != NULL ? 0 : -ENOMEM;
836 }
837
838 static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthreads,
839                           int ioc,  void *arg)
840 {
841         int cpu, thread;
842
843         if (evsel->system_wide)
844                 nthreads = 1;
845
846         for (cpu = 0; cpu < ncpus; cpu++) {
847                 for (thread = 0; thread < nthreads; thread++) {
848                         int fd = FD(evsel, cpu, thread),
849                             err = ioctl(fd, ioc, arg);
850
851                         if (err)
852                                 return err;
853                 }
854         }
855
856         return 0;
857 }
858
859 int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
860                              const char *filter)
861 {
862         return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
863                                      PERF_EVENT_IOC_SET_FILTER,
864                                      (void *)filter);
865 }
866
867 int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
868 {
869         char *new_filter = strdup(filter);
870
871         if (new_filter != NULL) {
872                 free(evsel->filter);
873                 evsel->filter = new_filter;
874                 return 0;
875         }
876
877         return -1;
878 }
879
880 int perf_evsel__append_filter(struct perf_evsel *evsel,
881                               const char *op, const char *filter)
882 {
883         char *new_filter;
884
885         if (evsel->filter == NULL)
886                 return perf_evsel__set_filter(evsel, filter);
887
888         if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) {
889                 free(evsel->filter);
890                 evsel->filter = new_filter;
891                 return 0;
892         }
893
894         return -1;
895 }
896
897 int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
898 {
899         return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
900                                      PERF_EVENT_IOC_ENABLE,
901                                      0);
902 }
903
904 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
905 {
906         if (ncpus == 0 || nthreads == 0)
907                 return 0;
908
909         if (evsel->system_wide)
910                 nthreads = 1;
911
912         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
913         if (evsel->sample_id == NULL)
914                 return -ENOMEM;
915
916         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
917         if (evsel->id == NULL) {
918                 xyarray__delete(evsel->sample_id);
919                 evsel->sample_id = NULL;
920                 return -ENOMEM;
921         }
922
923         return 0;
924 }
925
926 static void perf_evsel__free_fd(struct perf_evsel *evsel)
927 {
928         xyarray__delete(evsel->fd);
929         evsel->fd = NULL;
930 }
931
932 static void perf_evsel__free_id(struct perf_evsel *evsel)
933 {
934         xyarray__delete(evsel->sample_id);
935         evsel->sample_id = NULL;
936         zfree(&evsel->id);
937 }
938
939 static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
940 {
941         struct perf_evsel_config_term *term, *h;
942
943         list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
944                 list_del(&term->list);
945                 free(term);
946         }
947 }
948
949 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
950 {
951         int cpu, thread;
952
953         if (evsel->system_wide)
954                 nthreads = 1;
955
956         for (cpu = 0; cpu < ncpus; cpu++)
957                 for (thread = 0; thread < nthreads; ++thread) {
958                         close(FD(evsel, cpu, thread));
959                         FD(evsel, cpu, thread) = -1;
960                 }
961 }
962
963 void perf_evsel__exit(struct perf_evsel *evsel)
964 {
965         assert(list_empty(&evsel->node));
966         perf_evsel__free_fd(evsel);
967         perf_evsel__free_id(evsel);
968         perf_evsel__free_config_terms(evsel);
969         close_cgroup(evsel->cgrp);
970         cpu_map__put(evsel->cpus);
971         thread_map__put(evsel->threads);
972         zfree(&evsel->group_name);
973         zfree(&evsel->name);
974         perf_evsel__object.fini(evsel);
975 }
976
977 void perf_evsel__delete(struct perf_evsel *evsel)
978 {
979         perf_evsel__exit(evsel);
980         free(evsel);
981 }
982
983 void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
984                                 struct perf_counts_values *count)
985 {
986         struct perf_counts_values tmp;
987
988         if (!evsel->prev_raw_counts)
989                 return;
990
991         if (cpu == -1) {
992                 tmp = evsel->prev_raw_counts->aggr;
993                 evsel->prev_raw_counts->aggr = *count;
994         } else {
995                 tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
996                 *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
997         }
998
999         count->val = count->val - tmp.val;
1000         count->ena = count->ena - tmp.ena;
1001         count->run = count->run - tmp.run;
1002 }
1003
1004 void perf_counts_values__scale(struct perf_counts_values *count,
1005                                bool scale, s8 *pscaled)
1006 {
1007         s8 scaled = 0;
1008
1009         if (scale) {
1010                 if (count->run == 0) {
1011                         scaled = -1;
1012                         count->val = 0;
1013                 } else if (count->run < count->ena) {
1014                         scaled = 1;
1015                         count->val = (u64)((double) count->val * count->ena / count->run + 0.5);
1016                 }
1017         } else
1018                 count->ena = count->run = 0;
1019
1020         if (pscaled)
1021                 *pscaled = scaled;
1022 }
1023
1024 int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
1025                      struct perf_counts_values *count)
1026 {
1027         memset(count, 0, sizeof(*count));
1028
1029         if (FD(evsel, cpu, thread) < 0)
1030                 return -EINVAL;
1031
1032         if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0)
1033                 return -errno;
1034
1035         return 0;
1036 }
1037
1038 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
1039                               int cpu, int thread, bool scale)
1040 {
1041         struct perf_counts_values count;
1042         size_t nv = scale ? 3 : 1;
1043
1044         if (FD(evsel, cpu, thread) < 0)
1045                 return -EINVAL;
1046
1047         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
1048                 return -ENOMEM;
1049
1050         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
1051                 return -errno;
1052
1053         perf_evsel__compute_deltas(evsel, cpu, thread, &count);
1054         perf_counts_values__scale(&count, scale, NULL);
1055         *perf_counts(evsel->counts, cpu, thread) = count;
1056         return 0;
1057 }
1058
1059 static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
1060 {
1061         struct perf_evsel *leader = evsel->leader;
1062         int fd;
1063
1064         if (perf_evsel__is_group_leader(evsel))
1065                 return -1;
1066
1067         /*
1068          * Leader must be already processed/open,
1069          * if not it's a bug.
1070          */
1071         BUG_ON(!leader->fd);
1072
1073         fd = FD(leader, cpu, thread);
1074         BUG_ON(fd == -1);
1075
1076         return fd;
1077 }
1078
1079 struct bit_names {
1080         int bit;
1081         const char *name;
1082 };
1083
1084 static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
1085 {
1086         bool first_bit = true;
1087         int i = 0;
1088
1089         do {
1090                 if (value & bits[i].bit) {
1091                         buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
1092                         first_bit = false;
1093                 }
1094         } while (bits[++i].name != NULL);
1095 }
1096
1097 static void __p_sample_type(char *buf, size_t size, u64 value)
1098 {
1099 #define bit_name(n) { PERF_SAMPLE_##n, #n }
1100         struct bit_names bits[] = {
1101                 bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
1102                 bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
1103                 bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
1104                 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
1105                 bit_name(IDENTIFIER), bit_name(REGS_INTR),
1106                 { .name = NULL, }
1107         };
1108 #undef bit_name
1109         __p_bits(buf, size, value, bits);
1110 }
1111
1112 static void __p_read_format(char *buf, size_t size, u64 value)
1113 {
1114 #define bit_name(n) { PERF_FORMAT_##n, #n }
1115         struct bit_names bits[] = {
1116                 bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
1117                 bit_name(ID), bit_name(GROUP),
1118                 { .name = NULL, }
1119         };
1120 #undef bit_name
1121         __p_bits(buf, size, value, bits);
1122 }
1123
1124 #define BUF_SIZE                1024
1125
1126 #define p_hex(val)              snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
1127 #define p_unsigned(val)         snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
1128 #define p_signed(val)           snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
1129 #define p_sample_type(val)      __p_sample_type(buf, BUF_SIZE, val)
1130 #define p_read_format(val)      __p_read_format(buf, BUF_SIZE, val)
1131
1132 #define PRINT_ATTRn(_n, _f, _p)                         \
1133 do {                                                    \
1134         if (attr->_f) {                                 \
1135                 _p(attr->_f);                           \
1136                 ret += attr__fprintf(fp, _n, buf, priv);\
1137         }                                               \
1138 } while (0)
1139
1140 #define PRINT_ATTRf(_f, _p)     PRINT_ATTRn(#_f, _f, _p)
1141
1142 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
1143                              attr__fprintf_f attr__fprintf, void *priv)
1144 {
1145         char buf[BUF_SIZE];
1146         int ret = 0;
1147
1148         PRINT_ATTRf(type, p_unsigned);
1149         PRINT_ATTRf(size, p_unsigned);
1150         PRINT_ATTRf(config, p_hex);
1151         PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
1152         PRINT_ATTRf(sample_type, p_sample_type);
1153         PRINT_ATTRf(read_format, p_read_format);
1154
1155         PRINT_ATTRf(disabled, p_unsigned);
1156         PRINT_ATTRf(inherit, p_unsigned);
1157         PRINT_ATTRf(pinned, p_unsigned);
1158         PRINT_ATTRf(exclusive, p_unsigned);
1159         PRINT_ATTRf(exclude_user, p_unsigned);
1160         PRINT_ATTRf(exclude_kernel, p_unsigned);
1161         PRINT_ATTRf(exclude_hv, p_unsigned);
1162         PRINT_ATTRf(exclude_idle, p_unsigned);
1163         PRINT_ATTRf(mmap, p_unsigned);
1164         PRINT_ATTRf(comm, p_unsigned);
1165         PRINT_ATTRf(freq, p_unsigned);
1166         PRINT_ATTRf(inherit_stat, p_unsigned);
1167         PRINT_ATTRf(enable_on_exec, p_unsigned);
1168         PRINT_ATTRf(task, p_unsigned);
1169         PRINT_ATTRf(watermark, p_unsigned);
1170         PRINT_ATTRf(precise_ip, p_unsigned);
1171         PRINT_ATTRf(mmap_data, p_unsigned);
1172         PRINT_ATTRf(sample_id_all, p_unsigned);
1173         PRINT_ATTRf(exclude_host, p_unsigned);
1174         PRINT_ATTRf(exclude_guest, p_unsigned);
1175         PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
1176         PRINT_ATTRf(exclude_callchain_user, p_unsigned);
1177         PRINT_ATTRf(mmap2, p_unsigned);
1178         PRINT_ATTRf(comm_exec, p_unsigned);
1179         PRINT_ATTRf(use_clockid, p_unsigned);
1180         PRINT_ATTRf(context_switch, p_unsigned);
1181
1182         PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
1183         PRINT_ATTRf(bp_type, p_unsigned);
1184         PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
1185         PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
1186         PRINT_ATTRf(sample_regs_user, p_hex);
1187         PRINT_ATTRf(sample_stack_user, p_unsigned);
1188         PRINT_ATTRf(clockid, p_signed);
1189         PRINT_ATTRf(sample_regs_intr, p_hex);
1190         PRINT_ATTRf(aux_watermark, p_unsigned);
1191
1192         return ret;
1193 }
1194
1195 static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
1196                                 void *priv __attribute__((unused)))
1197 {
1198         return fprintf(fp, "  %-32s %s\n", name, val);
1199 }
1200
1201 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
1202                               struct thread_map *threads)
1203 {
1204         int cpu, thread, nthreads;
1205         unsigned long flags = PERF_FLAG_FD_CLOEXEC;
1206         int pid = -1, err;
1207         enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
1208
1209         if (evsel->system_wide)
1210                 nthreads = 1;
1211         else
1212                 nthreads = threads->nr;
1213
1214         if (evsel->fd == NULL &&
1215             perf_evsel__alloc_fd(evsel, cpus->nr, nthreads) < 0)
1216                 return -ENOMEM;
1217
1218         if (evsel->cgrp) {
1219                 flags |= PERF_FLAG_PID_CGROUP;
1220                 pid = evsel->cgrp->fd;
1221         }
1222
1223 fallback_missing_features:
1224         if (perf_missing_features.clockid_wrong)
1225                 evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */
1226         if (perf_missing_features.clockid) {
1227                 evsel->attr.use_clockid = 0;
1228                 evsel->attr.clockid = 0;
1229         }
1230         if (perf_missing_features.cloexec)
1231                 flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
1232         if (perf_missing_features.mmap2)
1233                 evsel->attr.mmap2 = 0;
1234         if (perf_missing_features.exclude_guest)
1235                 evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
1236 retry_sample_id:
1237         if (perf_missing_features.sample_id_all)
1238                 evsel->attr.sample_id_all = 0;
1239
1240         if (verbose >= 2) {
1241                 fprintf(stderr, "%.60s\n", graph_dotted_line);
1242                 fprintf(stderr, "perf_event_attr:\n");
1243                 perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
1244                 fprintf(stderr, "%.60s\n", graph_dotted_line);
1245         }
1246
1247         for (cpu = 0; cpu < cpus->nr; cpu++) {
1248
1249                 for (thread = 0; thread < nthreads; thread++) {
1250                         int group_fd;
1251
1252                         if (!evsel->cgrp && !evsel->system_wide)
1253                                 pid = thread_map__pid(threads, thread);
1254
1255                         group_fd = get_group_fd(evsel, cpu, thread);
1256 retry_open:
1257                         pr_debug2("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx\n",
1258                                   pid, cpus->map[cpu], group_fd, flags);
1259
1260                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
1261                                                                      pid,
1262                                                                      cpus->map[cpu],
1263                                                                      group_fd, flags);
1264                         if (FD(evsel, cpu, thread) < 0) {
1265                                 err = -errno;
1266                                 pr_debug2("sys_perf_event_open failed, error %d\n",
1267                                           err);
1268                                 goto try_fallback;
1269                         }
1270                         set_rlimit = NO_CHANGE;
1271
1272                         /*
1273                          * If we succeeded but had to kill clockid, fail and
1274                          * have perf_evsel__open_strerror() print us a nice
1275                          * error.
1276                          */
1277                         if (perf_missing_features.clockid ||
1278                             perf_missing_features.clockid_wrong) {
1279                                 err = -EINVAL;
1280                                 goto out_close;
1281                         }
1282                 }
1283         }
1284
1285         return 0;
1286
1287 try_fallback:
1288         /*
1289          * perf stat needs between 5 and 22 fds per CPU. When we run out
1290          * of them try to increase the limits.
1291          */
1292         if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
1293                 struct rlimit l;
1294                 int old_errno = errno;
1295
1296                 if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
1297                         if (set_rlimit == NO_CHANGE)
1298                                 l.rlim_cur = l.rlim_max;
1299                         else {
1300                                 l.rlim_cur = l.rlim_max + 1000;
1301                                 l.rlim_max = l.rlim_cur;
1302                         }
1303                         if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
1304                                 set_rlimit++;
1305                                 errno = old_errno;
1306                                 goto retry_open;
1307                         }
1308                 }
1309                 errno = old_errno;
1310         }
1311
1312         if (err != -EINVAL || cpu > 0 || thread > 0)
1313                 goto out_close;
1314
1315         /*
1316          * Must probe features in the order they were added to the
1317          * perf_event_attr interface.
1318          */
1319         if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
1320                 perf_missing_features.clockid_wrong = true;
1321                 goto fallback_missing_features;
1322         } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
1323                 perf_missing_features.clockid = true;
1324                 goto fallback_missing_features;
1325         } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
1326                 perf_missing_features.cloexec = true;
1327                 goto fallback_missing_features;
1328         } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
1329                 perf_missing_features.mmap2 = true;
1330                 goto fallback_missing_features;
1331         } else if (!perf_missing_features.exclude_guest &&
1332                    (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
1333                 perf_missing_features.exclude_guest = true;
1334                 goto fallback_missing_features;
1335         } else if (!perf_missing_features.sample_id_all) {
1336                 perf_missing_features.sample_id_all = true;
1337                 goto retry_sample_id;
1338         }
1339
1340 out_close:
1341         do {
1342                 while (--thread >= 0) {
1343                         close(FD(evsel, cpu, thread));
1344                         FD(evsel, cpu, thread) = -1;
1345                 }
1346                 thread = nthreads;
1347         } while (--cpu >= 0);
1348         return err;
1349 }
1350
1351 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
1352 {
1353         if (evsel->fd == NULL)
1354                 return;
1355
1356         perf_evsel__close_fd(evsel, ncpus, nthreads);
1357         perf_evsel__free_fd(evsel);
1358 }
1359
1360 static struct {
1361         struct cpu_map map;
1362         int cpus[1];
1363 } empty_cpu_map = {
1364         .map.nr = 1,
1365         .cpus   = { -1, },
1366 };
1367
1368 static struct {
1369         struct thread_map map;
1370         int threads[1];
1371 } empty_thread_map = {
1372         .map.nr  = 1,
1373         .threads = { -1, },
1374 };
1375
1376 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
1377                      struct thread_map *threads)
1378 {
1379         if (cpus == NULL) {
1380                 /* Work around old compiler warnings about strict aliasing */
1381                 cpus = &empty_cpu_map.map;
1382         }
1383
1384         if (threads == NULL)
1385                 threads = &empty_thread_map.map;
1386
1387         return __perf_evsel__open(evsel, cpus, threads);
1388 }
1389
1390 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
1391                              struct cpu_map *cpus)
1392 {
1393         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
1394 }
1395
1396 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
1397                                 struct thread_map *threads)
1398 {
1399         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
1400 }
1401
1402 static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
1403                                        const union perf_event *event,
1404                                        struct perf_sample *sample)
1405 {
1406         u64 type = evsel->attr.sample_type;
1407         const u64 *array = event->sample.array;
1408         bool swapped = evsel->needs_swap;
1409         union u64_swap u;
1410
1411         array += ((event->header.size -
1412                    sizeof(event->header)) / sizeof(u64)) - 1;
1413
1414         if (type & PERF_SAMPLE_IDENTIFIER) {
1415                 sample->id = *array;
1416                 array--;
1417         }
1418
1419         if (type & PERF_SAMPLE_CPU) {
1420                 u.val64 = *array;
1421                 if (swapped) {
1422                         /* undo swap of u64, then swap on individual u32s */
1423                         u.val64 = bswap_64(u.val64);
1424                         u.val32[0] = bswap_32(u.val32[0]);
1425                 }
1426
1427                 sample->cpu = u.val32[0];
1428                 array--;
1429         }
1430
1431         if (type & PERF_SAMPLE_STREAM_ID) {
1432                 sample->stream_id = *array;
1433                 array--;
1434         }
1435
1436         if (type & PERF_SAMPLE_ID) {
1437                 sample->id = *array;
1438                 array--;
1439         }
1440
1441         if (type & PERF_SAMPLE_TIME) {
1442                 sample->time = *array;
1443                 array--;
1444         }
1445
1446         if (type & PERF_SAMPLE_TID) {
1447                 u.val64 = *array;
1448                 if (swapped) {
1449                         /* undo swap of u64, then swap on individual u32s */
1450                         u.val64 = bswap_64(u.val64);
1451                         u.val32[0] = bswap_32(u.val32[0]);
1452                         u.val32[1] = bswap_32(u.val32[1]);
1453                 }
1454
1455                 sample->pid = u.val32[0];
1456                 sample->tid = u.val32[1];
1457                 array--;
1458         }
1459
1460         return 0;
1461 }
1462
1463 static inline bool overflow(const void *endp, u16 max_size, const void *offset,
1464                             u64 size)
1465 {
1466         return size > max_size || offset + size > endp;
1467 }
1468
1469 #define OVERFLOW_CHECK(offset, size, max_size)                          \
1470         do {                                                            \
1471                 if (overflow(endp, (max_size), (offset), (size)))       \
1472                         return -EFAULT;                                 \
1473         } while (0)
1474
1475 #define OVERFLOW_CHECK_u64(offset) \
1476         OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
1477
1478 int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
1479                              struct perf_sample *data)
1480 {
1481         u64 type = evsel->attr.sample_type;
1482         bool swapped = evsel->needs_swap;
1483         const u64 *array;
1484         u16 max_size = event->header.size;
1485         const void *endp = (void *)event + max_size;
1486         u64 sz;
1487
1488         /*
1489          * used for cross-endian analysis. See git commit 65014ab3
1490          * for why this goofiness is needed.
1491          */
1492         union u64_swap u;
1493
1494         memset(data, 0, sizeof(*data));
1495         data->cpu = data->pid = data->tid = -1;
1496         data->stream_id = data->id = data->time = -1ULL;
1497         data->period = evsel->attr.sample_period;
1498         data->weight = 0;
1499
1500         if (event->header.type != PERF_RECORD_SAMPLE) {
1501                 if (!evsel->attr.sample_id_all)
1502                         return 0;
1503                 return perf_evsel__parse_id_sample(evsel, event, data);
1504         }
1505
1506         array = event->sample.array;
1507
1508         /*
1509          * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
1510          * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
1511          * check the format does not go past the end of the event.
1512          */
1513         if (evsel->sample_size + sizeof(event->header) > event->header.size)
1514                 return -EFAULT;
1515
1516         data->id = -1ULL;
1517         if (type & PERF_SAMPLE_IDENTIFIER) {
1518                 data->id = *array;
1519                 array++;
1520         }
1521
1522         if (type & PERF_SAMPLE_IP) {
1523                 data->ip = *array;
1524                 array++;
1525         }
1526
1527         if (type & PERF_SAMPLE_TID) {
1528                 u.val64 = *array;
1529                 if (swapped) {
1530                         /* undo swap of u64, then swap on individual u32s */
1531                         u.val64 = bswap_64(u.val64);
1532                         u.val32[0] = bswap_32(u.val32[0]);
1533                         u.val32[1] = bswap_32(u.val32[1]);
1534                 }
1535
1536                 data->pid = u.val32[0];
1537                 data->tid = u.val32[1];
1538                 array++;
1539         }
1540
1541         if (type & PERF_SAMPLE_TIME) {
1542                 data->time = *array;
1543                 array++;
1544         }
1545
1546         data->addr = 0;
1547         if (type & PERF_SAMPLE_ADDR) {
1548                 data->addr = *array;
1549                 array++;
1550         }
1551
1552         if (type & PERF_SAMPLE_ID) {
1553                 data->id = *array;
1554                 array++;
1555         }
1556
1557         if (type & PERF_SAMPLE_STREAM_ID) {
1558                 data->stream_id = *array;
1559                 array++;
1560         }
1561
1562         if (type & PERF_SAMPLE_CPU) {
1563
1564                 u.val64 = *array;
1565                 if (swapped) {
1566                         /* undo swap of u64, then swap on individual u32s */
1567                         u.val64 = bswap_64(u.val64);
1568                         u.val32[0] = bswap_32(u.val32[0]);
1569                 }
1570
1571                 data->cpu = u.val32[0];
1572                 array++;
1573         }
1574
1575         if (type & PERF_SAMPLE_PERIOD) {
1576                 data->period = *array;
1577                 array++;
1578         }
1579
1580         if (type & PERF_SAMPLE_READ) {
1581                 u64 read_format = evsel->attr.read_format;
1582
1583                 OVERFLOW_CHECK_u64(array);
1584                 if (read_format & PERF_FORMAT_GROUP)
1585                         data->read.group.nr = *array;
1586                 else
1587                         data->read.one.value = *array;
1588
1589                 array++;
1590
1591                 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1592                         OVERFLOW_CHECK_u64(array);
1593                         data->read.time_enabled = *array;
1594                         array++;
1595                 }
1596
1597                 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1598                         OVERFLOW_CHECK_u64(array);
1599                         data->read.time_running = *array;
1600                         array++;
1601                 }
1602
1603                 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
1604                 if (read_format & PERF_FORMAT_GROUP) {
1605                         const u64 max_group_nr = UINT64_MAX /
1606                                         sizeof(struct sample_read_value);
1607
1608                         if (data->read.group.nr > max_group_nr)
1609                                 return -EFAULT;
1610                         sz = data->read.group.nr *
1611                              sizeof(struct sample_read_value);
1612                         OVERFLOW_CHECK(array, sz, max_size);
1613                         data->read.group.values =
1614                                         (struct sample_read_value *)array;
1615                         array = (void *)array + sz;
1616                 } else {
1617                         OVERFLOW_CHECK_u64(array);
1618                         data->read.one.id = *array;
1619                         array++;
1620                 }
1621         }
1622
1623         if (type & PERF_SAMPLE_CALLCHAIN) {
1624                 const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
1625
1626                 OVERFLOW_CHECK_u64(array);
1627                 data->callchain = (struct ip_callchain *)array++;
1628                 if (data->callchain->nr > max_callchain_nr)
1629                         return -EFAULT;
1630                 sz = data->callchain->nr * sizeof(u64);
1631                 OVERFLOW_CHECK(array, sz, max_size);
1632                 array = (void *)array + sz;
1633         }
1634
1635         if (type & PERF_SAMPLE_RAW) {
1636                 OVERFLOW_CHECK_u64(array);
1637                 u.val64 = *array;
1638                 if (WARN_ONCE(swapped,
1639                               "Endianness of raw data not corrected!\n")) {
1640                         /* undo swap of u64, then swap on individual u32s */
1641                         u.val64 = bswap_64(u.val64);
1642                         u.val32[0] = bswap_32(u.val32[0]);
1643                         u.val32[1] = bswap_32(u.val32[1]);
1644                 }
1645                 data->raw_size = u.val32[0];
1646                 array = (void *)array + sizeof(u32);
1647
1648                 OVERFLOW_CHECK(array, data->raw_size, max_size);
1649                 data->raw_data = (void *)array;
1650                 array = (void *)array + data->raw_size;
1651         }
1652
1653         if (type & PERF_SAMPLE_BRANCH_STACK) {
1654                 const u64 max_branch_nr = UINT64_MAX /
1655                                           sizeof(struct branch_entry);
1656
1657                 OVERFLOW_CHECK_u64(array);
1658                 data->branch_stack = (struct branch_stack *)array++;
1659
1660                 if (data->branch_stack->nr > max_branch_nr)
1661                         return -EFAULT;
1662                 sz = data->branch_stack->nr * sizeof(struct branch_entry);
1663                 OVERFLOW_CHECK(array, sz, max_size);
1664                 array = (void *)array + sz;
1665         }
1666
1667         if (type & PERF_SAMPLE_REGS_USER) {
1668                 OVERFLOW_CHECK_u64(array);
1669                 data->user_regs.abi = *array;
1670                 array++;
1671
1672                 if (data->user_regs.abi) {
1673                         u64 mask = evsel->attr.sample_regs_user;
1674
1675                         sz = hweight_long(mask) * sizeof(u64);
1676                         OVERFLOW_CHECK(array, sz, max_size);
1677                         data->user_regs.mask = mask;
1678                         data->user_regs.regs = (u64 *)array;
1679                         array = (void *)array + sz;
1680                 }
1681         }
1682
1683         if (type & PERF_SAMPLE_STACK_USER) {
1684                 OVERFLOW_CHECK_u64(array);
1685                 sz = *array++;
1686
1687                 data->user_stack.offset = ((char *)(array - 1)
1688                                           - (char *) event);
1689
1690                 if (!sz) {
1691                         data->user_stack.size = 0;
1692                 } else {
1693                         OVERFLOW_CHECK(array, sz, max_size);
1694                         data->user_stack.data = (char *)array;
1695                         array = (void *)array + sz;
1696                         OVERFLOW_CHECK_u64(array);
1697                         data->user_stack.size = *array++;
1698                         if (WARN_ONCE(data->user_stack.size > sz,
1699                                       "user stack dump failure\n"))
1700                                 return -EFAULT;
1701                 }
1702         }
1703
1704         data->weight = 0;
1705         if (type & PERF_SAMPLE_WEIGHT) {
1706                 OVERFLOW_CHECK_u64(array);
1707                 data->weight = *array;
1708                 array++;
1709         }
1710
1711         data->data_src = PERF_MEM_DATA_SRC_NONE;
1712         if (type & PERF_SAMPLE_DATA_SRC) {
1713                 OVERFLOW_CHECK_u64(array);
1714                 data->data_src = *array;
1715                 array++;
1716         }
1717
1718         data->transaction = 0;
1719         if (type & PERF_SAMPLE_TRANSACTION) {
1720                 OVERFLOW_CHECK_u64(array);
1721                 data->transaction = *array;
1722                 array++;
1723         }
1724
1725         data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
1726         if (type & PERF_SAMPLE_REGS_INTR) {
1727                 OVERFLOW_CHECK_u64(array);
1728                 data->intr_regs.abi = *array;
1729                 array++;
1730
1731                 if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
1732                         u64 mask = evsel->attr.sample_regs_intr;
1733
1734                         sz = hweight_long(mask) * sizeof(u64);
1735                         OVERFLOW_CHECK(array, sz, max_size);
1736                         data->intr_regs.mask = mask;
1737                         data->intr_regs.regs = (u64 *)array;
1738                         array = (void *)array + sz;
1739                 }
1740         }
1741
1742         return 0;
1743 }
1744
1745 size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
1746                                      u64 read_format)
1747 {
1748         size_t sz, result = sizeof(struct sample_event);
1749
1750         if (type & PERF_SAMPLE_IDENTIFIER)
1751                 result += sizeof(u64);
1752
1753         if (type & PERF_SAMPLE_IP)
1754                 result += sizeof(u64);
1755
1756         if (type & PERF_SAMPLE_TID)
1757                 result += sizeof(u64);
1758
1759         if (type & PERF_SAMPLE_TIME)
1760                 result += sizeof(u64);
1761
1762         if (type & PERF_SAMPLE_ADDR)
1763                 result += sizeof(u64);
1764
1765         if (type & PERF_SAMPLE_ID)
1766                 result += sizeof(u64);
1767
1768         if (type & PERF_SAMPLE_STREAM_ID)
1769                 result += sizeof(u64);
1770
1771         if (type & PERF_SAMPLE_CPU)
1772                 result += sizeof(u64);
1773
1774         if (type & PERF_SAMPLE_PERIOD)
1775                 result += sizeof(u64);
1776
1777         if (type & PERF_SAMPLE_READ) {
1778                 result += sizeof(u64);
1779                 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1780                         result += sizeof(u64);
1781                 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1782                         result += sizeof(u64);
1783                 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
1784                 if (read_format & PERF_FORMAT_GROUP) {
1785                         sz = sample->read.group.nr *
1786                              sizeof(struct sample_read_value);
1787                         result += sz;
1788                 } else {
1789                         result += sizeof(u64);
1790                 }
1791         }
1792
1793         if (type & PERF_SAMPLE_CALLCHAIN) {
1794                 sz = (sample->callchain->nr + 1) * sizeof(u64);
1795                 result += sz;
1796         }
1797
1798         if (type & PERF_SAMPLE_RAW) {
1799                 result += sizeof(u32);
1800                 result += sample->raw_size;
1801         }
1802
1803         if (type & PERF_SAMPLE_BRANCH_STACK) {
1804                 sz = sample->branch_stack->nr * sizeof(struct branch_entry);
1805                 sz += sizeof(u64);
1806                 result += sz;
1807         }
1808
1809         if (type & PERF_SAMPLE_REGS_USER) {
1810                 if (sample->user_regs.abi) {
1811                         result += sizeof(u64);
1812                         sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
1813                         result += sz;
1814                 } else {
1815                         result += sizeof(u64);
1816                 }
1817         }
1818
1819         if (type & PERF_SAMPLE_STACK_USER) {
1820                 sz = sample->user_stack.size;
1821                 result += sizeof(u64);
1822                 if (sz) {
1823                         result += sz;
1824                         result += sizeof(u64);
1825                 }
1826         }
1827
1828         if (type & PERF_SAMPLE_WEIGHT)
1829                 result += sizeof(u64);
1830
1831         if (type & PERF_SAMPLE_DATA_SRC)
1832                 result += sizeof(u64);
1833
1834         if (type & PERF_SAMPLE_TRANSACTION)
1835                 result += sizeof(u64);
1836
1837         if (type & PERF_SAMPLE_REGS_INTR) {
1838                 if (sample->intr_regs.abi) {
1839                         result += sizeof(u64);
1840                         sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
1841                         result += sz;
1842                 } else {
1843                         result += sizeof(u64);
1844                 }
1845         }
1846
1847         return result;
1848 }
1849
1850 int perf_event__synthesize_sample(union perf_event *event, u64 type,
1851                                   u64 read_format,
1852                                   const struct perf_sample *sample,
1853                                   bool swapped)
1854 {
1855         u64 *array;
1856         size_t sz;
1857         /*
1858          * used for cross-endian analysis. See git commit 65014ab3
1859          * for why this goofiness is needed.
1860          */
1861         union u64_swap u;
1862
1863         array = event->sample.array;
1864
1865         if (type & PERF_SAMPLE_IDENTIFIER) {
1866                 *array = sample->id;
1867                 array++;
1868         }
1869
1870         if (type & PERF_SAMPLE_IP) {
1871                 *array = sample->ip;
1872                 array++;
1873         }
1874
1875         if (type & PERF_SAMPLE_TID) {
1876                 u.val32[0] = sample->pid;
1877                 u.val32[1] = sample->tid;
1878                 if (swapped) {
1879                         /*
1880                          * Inverse of what is done in perf_evsel__parse_sample
1881                          */
1882                         u.val32[0] = bswap_32(u.val32[0]);
1883                         u.val32[1] = bswap_32(u.val32[1]);
1884                         u.val64 = bswap_64(u.val64);
1885                 }
1886
1887                 *array = u.val64;
1888                 array++;
1889         }
1890
1891         if (type & PERF_SAMPLE_TIME) {
1892                 *array = sample->time;
1893                 array++;
1894         }
1895
1896         if (type & PERF_SAMPLE_ADDR) {
1897                 *array = sample->addr;
1898                 array++;
1899         }
1900
1901         if (type & PERF_SAMPLE_ID) {
1902                 *array = sample->id;
1903                 array++;
1904         }
1905
1906         if (type & PERF_SAMPLE_STREAM_ID) {
1907                 *array = sample->stream_id;
1908                 array++;
1909         }
1910
1911         if (type & PERF_SAMPLE_CPU) {
1912                 u.val32[0] = sample->cpu;
1913                 if (swapped) {
1914                         /*
1915                          * Inverse of what is done in perf_evsel__parse_sample
1916                          */
1917                         u.val32[0] = bswap_32(u.val32[0]);
1918                         u.val64 = bswap_64(u.val64);
1919                 }
1920                 *array = u.val64;
1921                 array++;
1922         }
1923
1924         if (type & PERF_SAMPLE_PERIOD) {
1925                 *array = sample->period;
1926                 array++;
1927         }
1928
1929         if (type & PERF_SAMPLE_READ) {
1930                 if (read_format & PERF_FORMAT_GROUP)
1931                         *array = sample->read.group.nr;
1932                 else
1933                         *array = sample->read.one.value;
1934                 array++;
1935
1936                 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1937                         *array = sample->read.time_enabled;
1938                         array++;
1939                 }
1940
1941                 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1942                         *array = sample->read.time_running;
1943                         array++;
1944                 }
1945
1946                 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
1947                 if (read_format & PERF_FORMAT_GROUP) {
1948                         sz = sample->read.group.nr *
1949                              sizeof(struct sample_read_value);
1950                         memcpy(array, sample->read.group.values, sz);
1951                         array = (void *)array + sz;
1952                 } else {
1953                         *array = sample->read.one.id;
1954                         array++;
1955                 }
1956         }
1957
1958         if (type & PERF_SAMPLE_CALLCHAIN) {
1959                 sz = (sample->callchain->nr + 1) * sizeof(u64);
1960                 memcpy(array, sample->callchain, sz);
1961                 array = (void *)array + sz;
1962         }
1963
1964         if (type & PERF_SAMPLE_RAW) {
1965                 u.val32[0] = sample->raw_size;
1966                 if (WARN_ONCE(swapped,
1967                               "Endianness of raw data not corrected!\n")) {
1968                         /*
1969                          * Inverse of what is done in perf_evsel__parse_sample
1970                          */
1971                         u.val32[0] = bswap_32(u.val32[0]);
1972                         u.val32[1] = bswap_32(u.val32[1]);
1973                         u.val64 = bswap_64(u.val64);
1974                 }
1975                 *array = u.val64;
1976                 array = (void *)array + sizeof(u32);
1977
1978                 memcpy(array, sample->raw_data, sample->raw_size);
1979                 array = (void *)array + sample->raw_size;
1980         }
1981
1982         if (type & PERF_SAMPLE_BRANCH_STACK) {
1983                 sz = sample->branch_stack->nr * sizeof(struct branch_entry);
1984                 sz += sizeof(u64);
1985                 memcpy(array, sample->branch_stack, sz);
1986                 array = (void *)array + sz;
1987         }
1988
1989         if (type & PERF_SAMPLE_REGS_USER) {
1990                 if (sample->user_regs.abi) {
1991                         *array++ = sample->user_regs.abi;
1992                         sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
1993                         memcpy(array, sample->user_regs.regs, sz);
1994                         array = (void *)array + sz;
1995                 } else {
1996                         *array++ = 0;
1997                 }
1998         }
1999
2000         if (type & PERF_SAMPLE_STACK_USER) {
2001                 sz = sample->user_stack.size;
2002                 *array++ = sz;
2003                 if (sz) {
2004                         memcpy(array, sample->user_stack.data, sz);
2005                         array = (void *)array + sz;
2006                         *array++ = sz;
2007                 }
2008         }
2009
2010         if (type & PERF_SAMPLE_WEIGHT) {
2011                 *array = sample->weight;
2012                 array++;
2013         }
2014
2015         if (type & PERF_SAMPLE_DATA_SRC) {
2016                 *array = sample->data_src;
2017                 array++;
2018         }
2019
2020         if (type & PERF_SAMPLE_TRANSACTION) {
2021                 *array = sample->transaction;
2022                 array++;
2023         }
2024
2025         if (type & PERF_SAMPLE_REGS_INTR) {
2026                 if (sample->intr_regs.abi) {
2027                         *array++ = sample->intr_regs.abi;
2028                         sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
2029                         memcpy(array, sample->intr_regs.regs, sz);
2030                         array = (void *)array + sz;
2031                 } else {
2032                         *array++ = 0;
2033                 }
2034         }
2035
2036         return 0;
2037 }
2038
2039 struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
2040 {
2041         return pevent_find_field(evsel->tp_format, name);
2042 }
2043
2044 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
2045                          const char *name)
2046 {
2047         struct format_field *field = perf_evsel__field(evsel, name);
2048         int offset;
2049
2050         if (!field)
2051                 return NULL;
2052
2053         offset = field->offset;
2054
2055         if (field->flags & FIELD_IS_DYNAMIC) {
2056                 offset = *(int *)(sample->raw_data + field->offset);
2057                 offset &= 0xffff;
2058         }
2059
2060         return sample->raw_data + offset;
2061 }
2062
2063 u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
2064                        const char *name)
2065 {
2066         struct format_field *field = perf_evsel__field(evsel, name);
2067         void *ptr;
2068         u64 value;
2069
2070         if (!field)
2071                 return 0;
2072
2073         ptr = sample->raw_data + field->offset;
2074
2075         switch (field->size) {
2076         case 1:
2077                 return *(u8 *)ptr;
2078         case 2:
2079                 value = *(u16 *)ptr;
2080                 break;
2081         case 4:
2082                 value = *(u32 *)ptr;
2083                 break;
2084         case 8:
2085                 memcpy(&value, ptr, sizeof(u64));
2086                 break;
2087         default:
2088                 return 0;
2089         }
2090
2091         if (!evsel->needs_swap)
2092                 return value;
2093
2094         switch (field->size) {
2095         case 2:
2096                 return bswap_16(value);
2097         case 4:
2098                 return bswap_32(value);
2099         case 8:
2100                 return bswap_64(value);
2101         default:
2102                 return 0;
2103         }
2104
2105         return 0;
2106 }
2107
2108 static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
2109 {
2110         va_list args;
2111         int ret = 0;
2112
2113         if (!*first) {
2114                 ret += fprintf(fp, ",");
2115         } else {
2116                 ret += fprintf(fp, ":");
2117                 *first = false;
2118         }
2119
2120         va_start(args, fmt);
2121         ret += vfprintf(fp, fmt, args);
2122         va_end(args);
2123         return ret;
2124 }
2125
2126 static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
2127 {
2128         return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
2129 }
2130
2131 int perf_evsel__fprintf(struct perf_evsel *evsel,
2132                         struct perf_attr_details *details, FILE *fp)
2133 {
2134         bool first = true;
2135         int printed = 0;
2136
2137         if (details->event_group) {
2138                 struct perf_evsel *pos;
2139
2140                 if (!perf_evsel__is_group_leader(evsel))
2141                         return 0;
2142
2143                 if (evsel->nr_members > 1)
2144                         printed += fprintf(fp, "%s{", evsel->group_name ?: "");
2145
2146                 printed += fprintf(fp, "%s", perf_evsel__name(evsel));
2147                 for_each_group_member(pos, evsel)
2148                         printed += fprintf(fp, ",%s", perf_evsel__name(pos));
2149
2150                 if (evsel->nr_members > 1)
2151                         printed += fprintf(fp, "}");
2152                 goto out;
2153         }
2154
2155         printed += fprintf(fp, "%s", perf_evsel__name(evsel));
2156
2157         if (details->verbose) {
2158                 printed += perf_event_attr__fprintf(fp, &evsel->attr,
2159                                                     __print_attr__fprintf, &first);
2160         } else if (details->freq) {
2161                 printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
2162                                          (u64)evsel->attr.sample_freq);
2163         }
2164 out:
2165         fputc('\n', fp);
2166         return ++printed;
2167 }
2168
2169 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
2170                           char *msg, size_t msgsize)
2171 {
2172         if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
2173             evsel->attr.type   == PERF_TYPE_HARDWARE &&
2174             evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
2175                 /*
2176                  * If it's cycles then fall back to hrtimer based
2177                  * cpu-clock-tick sw counter, which is always available even if
2178                  * no PMU support.
2179                  *
2180                  * PPC returns ENXIO until 2.6.37 (behavior changed with commit
2181                  * b0a873e).
2182                  */
2183                 scnprintf(msg, msgsize, "%s",
2184 "The cycles event is not supported, trying to fall back to cpu-clock-ticks");
2185
2186                 evsel->attr.type   = PERF_TYPE_SOFTWARE;
2187                 evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
2188
2189                 zfree(&evsel->name);
2190                 return true;
2191         }
2192
2193         return false;
2194 }
2195
2196 int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
2197                               int err, char *msg, size_t size)
2198 {
2199         char sbuf[STRERR_BUFSIZE];
2200
2201         switch (err) {
2202         case EPERM:
2203         case EACCES:
2204                 return scnprintf(msg, size,
2205                  "You may not have permission to collect %sstats.\n"
2206                  "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
2207                  " -1 - Not paranoid at all\n"
2208                  "  0 - Disallow raw tracepoint access for unpriv\n"
2209                  "  1 - Disallow cpu events for unpriv\n"
2210                  "  2 - Disallow kernel profiling for unpriv",
2211                                  target->system_wide ? "system-wide " : "");
2212         case ENOENT:
2213                 return scnprintf(msg, size, "The %s event is not supported.",
2214                                  perf_evsel__name(evsel));
2215         case EMFILE:
2216                 return scnprintf(msg, size, "%s",
2217                          "Too many events are opened.\n"
2218                          "Probably the maximum number of open file descriptors has been reached.\n"
2219                          "Hint: Try again after reducing the number of events.\n"
2220                          "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2221         case ENODEV:
2222                 if (target->cpu_list)
2223                         return scnprintf(msg, size, "%s",
2224          "No such device - did you specify an out-of-range profile CPU?\n");
2225                 break;
2226         case EOPNOTSUPP:
2227                 if (evsel->attr.precise_ip)
2228                         return scnprintf(msg, size, "%s",
2229         "\'precise\' request may not be supported. Try removing 'p' modifier.");
2230 #if defined(__i386__) || defined(__x86_64__)
2231                 if (evsel->attr.type == PERF_TYPE_HARDWARE)
2232                         return scnprintf(msg, size, "%s",
2233         "No hardware sampling interrupt available.\n"
2234         "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.");
2235 #endif
2236                 break;
2237         case EBUSY:
2238                 if (find_process("oprofiled"))
2239                         return scnprintf(msg, size,
2240         "The PMU counters are busy/taken by another profiler.\n"
2241         "We found oprofile daemon running, please stop it and try again.");
2242                 break;
2243         case EINVAL:
2244                 if (perf_missing_features.clockid)
2245                         return scnprintf(msg, size, "clockid feature not supported.");
2246                 if (perf_missing_features.clockid_wrong)
2247                         return scnprintf(msg, size, "wrong clockid (%d).", clockid);
2248                 break;
2249         default:
2250                 break;
2251         }
2252
2253         return scnprintf(msg, size,
2254         "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
2255         "/bin/dmesg may provide additional information.\n"
2256         "No CONFIG_PERF_EVENTS=y kernel support configured?\n",
2257                          err, strerror_r(err, sbuf, sizeof(sbuf)),
2258                          perf_evsel__name(evsel));
2259 }