e20c65a0e1082caf28125c8dc2707a1a4dc84d93
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kernel / cpu / perf_event_intel_uncore.c
1 #include "perf_event_intel_uncore.h"
2
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 static struct intel_uncore_type **msr_uncores = empty_uncore;
5 static struct intel_uncore_type **pci_uncores = empty_uncore;
6 /* pci bus to socket mapping */
7 static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
8
9 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
10
11 /* mask of cpus that collect uncore events */
12 static cpumask_t uncore_cpu_mask;
13
14 /* constraint for the fixed counter */
15 static struct event_constraint constraint_fixed =
16         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
17
18 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
19 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
20 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
21 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
22 DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
23 DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
24
25 /* Sandy Bridge uncore support */
26 static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
27                                         struct perf_event *event)
28 {
29         struct hw_perf_event *hwc = &event->hw;
30
31         if (hwc->idx < UNCORE_PMC_IDX_FIXED)
32                 wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
33         else
34                 wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
35 }
36
37 static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
38                                         struct perf_event *event)
39 {
40         wrmsrl(event->hw.config_base, 0);
41 }
42
43 static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
44                                         struct perf_event *event)
45 {
46         u64 count;
47         rdmsrl(event->hw.event_base, count);
48         return count;
49 }
50
51 static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
52 {
53         if (box->pmu->pmu_idx == 0) {
54                 wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
55                         SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
56         }
57 }
58
59 static struct attribute *snb_uncore_formats_attr[] = {
60         &format_attr_event.attr,
61         &format_attr_umask.attr,
62         &format_attr_edge.attr,
63         &format_attr_inv.attr,
64         &format_attr_cmask5.attr,
65         NULL,
66 };
67
68 static struct attribute_group snb_uncore_format_group = {
69         .name = "format",
70         .attrs = snb_uncore_formats_attr,
71 };
72
73 static struct intel_uncore_ops snb_uncore_msr_ops = {
74         .init_box       = snb_uncore_msr_init_box,
75         .disable_event  = snb_uncore_msr_disable_event,
76         .enable_event   = snb_uncore_msr_enable_event,
77         .read_counter   = snb_uncore_msr_read_counter,
78 };
79
80 static struct event_constraint snb_uncore_cbox_constraints[] = {
81         UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
82         UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
83         EVENT_CONSTRAINT_END
84 };
85
86 static struct intel_uncore_type snb_uncore_cbox = {
87         .name           = "cbox",
88         .num_counters   = 2,
89         .num_boxes      = 4,
90         .perf_ctr_bits  = 44,
91         .fixed_ctr_bits = 48,
92         .perf_ctr       = SNB_UNC_CBO_0_PER_CTR0,
93         .event_ctl      = SNB_UNC_CBO_0_PERFEVTSEL0,
94         .fixed_ctr      = SNB_UNC_FIXED_CTR,
95         .fixed_ctl      = SNB_UNC_FIXED_CTR_CTRL,
96         .single_fixed   = 1,
97         .event_mask     = SNB_UNC_RAW_EVENT_MASK,
98         .msr_offset     = SNB_UNC_CBO_MSR_OFFSET,
99         .constraints    = snb_uncore_cbox_constraints,
100         .ops            = &snb_uncore_msr_ops,
101         .format_group   = &snb_uncore_format_group,
102 };
103
104 static struct intel_uncore_type *snb_msr_uncores[] = {
105         &snb_uncore_cbox,
106         NULL,
107 };
108 /* end of Sandy Bridge uncore support */
109
110 /* Nehalem uncore support */
111 static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
112 {
113         wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
114 }
115
116 static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
117 {
118         wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
119                 NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
120 }
121
122 static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
123                                         struct perf_event *event)
124 {
125         struct hw_perf_event *hwc = &event->hw;
126
127         if (hwc->idx < UNCORE_PMC_IDX_FIXED)
128                 wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
129         else
130                 wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
131 }
132
133 static struct attribute *nhm_uncore_formats_attr[] = {
134         &format_attr_event.attr,
135         &format_attr_umask.attr,
136         &format_attr_edge.attr,
137         &format_attr_inv.attr,
138         &format_attr_cmask8.attr,
139         NULL,
140 };
141
142 static struct attribute_group nhm_uncore_format_group = {
143         .name = "format",
144         .attrs = nhm_uncore_formats_attr,
145 };
146
147 static struct uncore_event_desc nhm_uncore_events[] = {
148         INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"),
149         /* full cache line writes to DRAM */
150         INTEL_UNCORE_EVENT_DESC(QMC_WRITES_FULL_ANY, "event=0x2f,umask=0xf"),
151         /* Quickpath Memory Controller normal priority read requests */
152         INTEL_UNCORE_EVENT_DESC(QMC_NORMAL_READS_ANY, "event=0x2c,umask=0xf"),
153         /* Quickpath Home Logic read requests from the IOH */
154         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_READS,
155                                 "event=0x20,umask=0x1"),
156         /* Quickpath Home Logic write requests from the IOH */
157         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_WRITES,
158                                 "event=0x20,umask=0x2"),
159         /* Quickpath Home Logic read requests from a remote socket */
160         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_READS,
161                                 "event=0x20,umask=0x4"),
162         /* Quickpath Home Logic write requests from a remote socket */
163         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_WRITES,
164                                 "event=0x20,umask=0x8"),
165         /* Quickpath Home Logic read requests from the local socket */
166         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_READS,
167                                 "event=0x20,umask=0x10"),
168         /* Quickpath Home Logic write requests from the local socket */
169         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_WRITES,
170                                 "event=0x20,umask=0x20"),
171         { /* end: all zeroes */ },
172 };
173
174 static struct intel_uncore_ops nhm_uncore_msr_ops = {
175         .disable_box    = nhm_uncore_msr_disable_box,
176         .enable_box     = nhm_uncore_msr_enable_box,
177         .disable_event  = snb_uncore_msr_disable_event,
178         .enable_event   = nhm_uncore_msr_enable_event,
179         .read_counter   = snb_uncore_msr_read_counter,
180 };
181
182 static struct intel_uncore_type nhm_uncore = {
183         .name           = "",
184         .num_counters   = 8,
185         .num_boxes      = 1,
186         .perf_ctr_bits  = 48,
187         .fixed_ctr_bits = 48,
188         .event_ctl      = NHM_UNC_PERFEVTSEL0,
189         .perf_ctr       = NHM_UNC_UNCORE_PMC0,
190         .fixed_ctr      = NHM_UNC_FIXED_CTR,
191         .fixed_ctl      = NHM_UNC_FIXED_CTR_CTRL,
192         .event_mask     = NHM_UNC_RAW_EVENT_MASK,
193         .event_descs    = nhm_uncore_events,
194         .ops            = &nhm_uncore_msr_ops,
195         .format_group   = &nhm_uncore_format_group,
196 };
197
198 static struct intel_uncore_type *nhm_msr_uncores[] = {
199         &nhm_uncore,
200         NULL,
201 };
202 /* end of Nehalem uncore support */
203
204 static void uncore_assign_hw_event(struct intel_uncore_box *box,
205                                 struct perf_event *event, int idx)
206 {
207         struct hw_perf_event *hwc = &event->hw;
208
209         hwc->idx = idx;
210         hwc->last_tag = ++box->tags[idx];
211
212         if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
213                 hwc->event_base = uncore_fixed_ctr(box);
214                 hwc->config_base = uncore_fixed_ctl(box);
215                 return;
216         }
217
218         hwc->config_base = uncore_event_ctl(box, hwc->idx);
219         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
220 }
221
222 static void uncore_perf_event_update(struct intel_uncore_box *box,
223                                         struct perf_event *event)
224 {
225         u64 prev_count, new_count, delta;
226         int shift;
227
228         if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
229                 shift = 64 - uncore_fixed_ctr_bits(box);
230         else
231                 shift = 64 - uncore_perf_ctr_bits(box);
232
233         /* the hrtimer might modify the previous event value */
234 again:
235         prev_count = local64_read(&event->hw.prev_count);
236         new_count = uncore_read_counter(box, event);
237         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
238                 goto again;
239
240         delta = (new_count << shift) - (prev_count << shift);
241         delta >>= shift;
242
243         local64_add(delta, &event->count);
244 }
245
246 /*
247  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
248  * for SandyBridge. So we use hrtimer to periodically poll the counter
249  * to avoid overflow.
250  */
251 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
252 {
253         struct intel_uncore_box *box;
254         unsigned long flags;
255         int bit;
256
257         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
258         if (!box->n_active || box->cpu != smp_processor_id())
259                 return HRTIMER_NORESTART;
260         /*
261          * disable local interrupt to prevent uncore_pmu_event_start/stop
262          * to interrupt the update process
263          */
264         local_irq_save(flags);
265
266         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
267                 uncore_perf_event_update(box, box->events[bit]);
268
269         local_irq_restore(flags);
270
271         hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
272         return HRTIMER_RESTART;
273 }
274
275 static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
276 {
277         __hrtimer_start_range_ns(&box->hrtimer,
278                         ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
279                         HRTIMER_MODE_REL_PINNED, 0);
280 }
281
282 static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
283 {
284         hrtimer_cancel(&box->hrtimer);
285 }
286
287 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
288 {
289         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
290         box->hrtimer.function = uncore_pmu_hrtimer;
291 }
292
293 struct intel_uncore_box *uncore_alloc_box(int cpu)
294 {
295         struct intel_uncore_box *box;
296
297         box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
298                            cpu_to_node(cpu));
299         if (!box)
300                 return NULL;
301
302         uncore_pmu_init_hrtimer(box);
303         atomic_set(&box->refcnt, 1);
304         box->cpu = -1;
305         box->phys_id = -1;
306
307         return box;
308 }
309
310 static struct intel_uncore_box *
311 uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
312 {
313         static struct intel_uncore_box *box;
314
315         box = *per_cpu_ptr(pmu->box, cpu);
316         if (box)
317                 return box;
318
319         raw_spin_lock(&uncore_box_lock);
320         list_for_each_entry(box, &pmu->box_list, list) {
321                 if (box->phys_id == topology_physical_package_id(cpu)) {
322                         atomic_inc(&box->refcnt);
323                         *per_cpu_ptr(pmu->box, cpu) = box;
324                         break;
325                 }
326         }
327         raw_spin_unlock(&uncore_box_lock);
328
329         return *per_cpu_ptr(pmu->box, cpu);
330 }
331
332 static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
333 {
334         return container_of(event->pmu, struct intel_uncore_pmu, pmu);
335 }
336
337 static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
338 {
339         /*
340          * perf core schedules event on the basis of cpu, uncore events are
341          * collected by one of the cpus inside a physical package.
342          */
343         return uncore_pmu_to_box(uncore_event_to_pmu(event),
344                                  smp_processor_id());
345 }
346
347 static int uncore_collect_events(struct intel_uncore_box *box,
348                                 struct perf_event *leader, bool dogrp)
349 {
350         struct perf_event *event;
351         int n, max_count;
352
353         max_count = box->pmu->type->num_counters;
354         if (box->pmu->type->fixed_ctl)
355                 max_count++;
356
357         if (box->n_events >= max_count)
358                 return -EINVAL;
359
360         n = box->n_events;
361         box->event_list[n] = leader;
362         n++;
363         if (!dogrp)
364                 return n;
365
366         list_for_each_entry(event, &leader->sibling_list, group_entry) {
367                 if (event->state <= PERF_EVENT_STATE_OFF)
368                         continue;
369
370                 if (n >= max_count)
371                         return -EINVAL;
372
373                 box->event_list[n] = event;
374                 n++;
375         }
376         return n;
377 }
378
379 static struct event_constraint *
380 uncore_event_constraint(struct intel_uncore_type *type,
381                         struct perf_event *event)
382 {
383         struct event_constraint *c;
384
385         if (event->hw.config == ~0ULL)
386                 return &constraint_fixed;
387
388         if (type->constraints) {
389                 for_each_event_constraint(c, type->constraints) {
390                         if ((event->hw.config & c->cmask) == c->code)
391                                 return c;
392                 }
393         }
394
395         return &type->unconstrainted;
396 }
397
398 static int uncore_assign_events(struct intel_uncore_box *box,
399                                 int assign[], int n)
400 {
401         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
402         struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
403         int i, ret, wmin, wmax;
404         struct hw_perf_event *hwc;
405
406         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
407
408         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
409                 c = uncore_event_constraint(box->pmu->type,
410                                 box->event_list[i]);
411                 constraints[i] = c;
412                 wmin = min(wmin, c->weight);
413                 wmax = max(wmax, c->weight);
414         }
415
416         /* fastpath, try to reuse previous register */
417         for (i = 0; i < n; i++) {
418                 hwc = &box->event_list[i]->hw;
419                 c = constraints[i];
420
421                 /* never assigned */
422                 if (hwc->idx == -1)
423                         break;
424
425                 /* constraint still honored */
426                 if (!test_bit(hwc->idx, c->idxmsk))
427                         break;
428
429                 /* not already used */
430                 if (test_bit(hwc->idx, used_mask))
431                         break;
432
433                 __set_bit(hwc->idx, used_mask);
434                 assign[i] = hwc->idx;
435         }
436         if (i == n)
437                 return 0;
438
439         /* slow path */
440         ret = perf_assign_events(constraints, n, wmin, wmax, assign);
441         return ret ? -EINVAL : 0;
442 }
443
444 static void uncore_pmu_event_start(struct perf_event *event, int flags)
445 {
446         struct intel_uncore_box *box = uncore_event_to_box(event);
447         int idx = event->hw.idx;
448
449         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
450                 return;
451
452         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
453                 return;
454
455         event->hw.state = 0;
456         box->events[idx] = event;
457         box->n_active++;
458         __set_bit(idx, box->active_mask);
459
460         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
461         uncore_enable_event(box, event);
462
463         if (box->n_active == 1) {
464                 uncore_enable_box(box);
465                 uncore_pmu_start_hrtimer(box);
466         }
467 }
468
469 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
470 {
471         struct intel_uncore_box *box = uncore_event_to_box(event);
472         struct hw_perf_event *hwc = &event->hw;
473
474         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
475                 uncore_disable_event(box, event);
476                 box->n_active--;
477                 box->events[hwc->idx] = NULL;
478                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
479                 hwc->state |= PERF_HES_STOPPED;
480
481                 if (box->n_active == 0) {
482                         uncore_disable_box(box);
483                         uncore_pmu_cancel_hrtimer(box);
484                 }
485         }
486
487         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
488                 /*
489                  * Drain the remaining delta count out of a event
490                  * that we are disabling:
491                  */
492                 uncore_perf_event_update(box, event);
493                 hwc->state |= PERF_HES_UPTODATE;
494         }
495 }
496
497 static int uncore_pmu_event_add(struct perf_event *event, int flags)
498 {
499         struct intel_uncore_box *box = uncore_event_to_box(event);
500         struct hw_perf_event *hwc = &event->hw;
501         int assign[UNCORE_PMC_IDX_MAX];
502         int i, n, ret;
503
504         if (!box)
505                 return -ENODEV;
506
507         ret = n = uncore_collect_events(box, event, false);
508         if (ret < 0)
509                 return ret;
510
511         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
512         if (!(flags & PERF_EF_START))
513                 hwc->state |= PERF_HES_ARCH;
514
515         ret = uncore_assign_events(box, assign, n);
516         if (ret)
517                 return ret;
518
519         /* save events moving to new counters */
520         for (i = 0; i < box->n_events; i++) {
521                 event = box->event_list[i];
522                 hwc = &event->hw;
523
524                 if (hwc->idx == assign[i] &&
525                         hwc->last_tag == box->tags[assign[i]])
526                         continue;
527                 /*
528                  * Ensure we don't accidentally enable a stopped
529                  * counter simply because we rescheduled.
530                  */
531                 if (hwc->state & PERF_HES_STOPPED)
532                         hwc->state |= PERF_HES_ARCH;
533
534                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
535         }
536
537         /* reprogram moved events into new counters */
538         for (i = 0; i < n; i++) {
539                 event = box->event_list[i];
540                 hwc = &event->hw;
541
542                 if (hwc->idx != assign[i] ||
543                         hwc->last_tag != box->tags[assign[i]])
544                         uncore_assign_hw_event(box, event, assign[i]);
545                 else if (i < box->n_events)
546                         continue;
547
548                 if (hwc->state & PERF_HES_ARCH)
549                         continue;
550
551                 uncore_pmu_event_start(event, 0);
552         }
553         box->n_events = n;
554
555         return 0;
556 }
557
558 static void uncore_pmu_event_del(struct perf_event *event, int flags)
559 {
560         struct intel_uncore_box *box = uncore_event_to_box(event);
561         int i;
562
563         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
564
565         for (i = 0; i < box->n_events; i++) {
566                 if (event == box->event_list[i]) {
567                         while (++i < box->n_events)
568                                 box->event_list[i - 1] = box->event_list[i];
569
570                         --box->n_events;
571                         break;
572                 }
573         }
574
575         event->hw.idx = -1;
576         event->hw.last_tag = ~0ULL;
577 }
578
579 static void uncore_pmu_event_read(struct perf_event *event)
580 {
581         struct intel_uncore_box *box = uncore_event_to_box(event);
582         uncore_perf_event_update(box, event);
583 }
584
585 /*
586  * validation ensures the group can be loaded onto the
587  * PMU if it was the only group available.
588  */
589 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
590                                 struct perf_event *event)
591 {
592         struct perf_event *leader = event->group_leader;
593         struct intel_uncore_box *fake_box;
594         int assign[UNCORE_PMC_IDX_MAX];
595         int ret = -EINVAL, n;
596
597         fake_box = uncore_alloc_box(smp_processor_id());
598         if (!fake_box)
599                 return -ENOMEM;
600
601         fake_box->pmu = pmu;
602         /*
603          * the event is not yet connected with its
604          * siblings therefore we must first collect
605          * existing siblings, then add the new event
606          * before we can simulate the scheduling
607          */
608         n = uncore_collect_events(fake_box, leader, true);
609         if (n < 0)
610                 goto out;
611
612         fake_box->n_events = n;
613         n = uncore_collect_events(fake_box, event, false);
614         if (n < 0)
615                 goto out;
616
617         fake_box->n_events = n;
618
619         ret = uncore_assign_events(fake_box, assign, n);
620 out:
621         kfree(fake_box);
622         return ret;
623 }
624
625 int uncore_pmu_event_init(struct perf_event *event)
626 {
627         struct intel_uncore_pmu *pmu;
628         struct intel_uncore_box *box;
629         struct hw_perf_event *hwc = &event->hw;
630         int ret;
631
632         if (event->attr.type != event->pmu->type)
633                 return -ENOENT;
634
635         pmu = uncore_event_to_pmu(event);
636         /* no device found for this pmu */
637         if (pmu->func_id < 0)
638                 return -ENOENT;
639
640         /*
641          * Uncore PMU does measure at all privilege level all the time.
642          * So it doesn't make sense to specify any exclude bits.
643          */
644         if (event->attr.exclude_user || event->attr.exclude_kernel ||
645                         event->attr.exclude_hv || event->attr.exclude_idle)
646                 return -EINVAL;
647
648         /* Sampling not supported yet */
649         if (hwc->sample_period)
650                 return -EINVAL;
651
652         /*
653          * Place all uncore events for a particular physical package
654          * onto a single cpu
655          */
656         if (event->cpu < 0)
657                 return -EINVAL;
658         box = uncore_pmu_to_box(pmu, event->cpu);
659         if (!box || box->cpu < 0)
660                 return -EINVAL;
661         event->cpu = box->cpu;
662
663         if (event->attr.config == UNCORE_FIXED_EVENT) {
664                 /* no fixed counter */
665                 if (!pmu->type->fixed_ctl)
666                         return -EINVAL;
667                 /*
668                  * if there is only one fixed counter, only the first pmu
669                  * can access the fixed counter
670                  */
671                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
672                         return -EINVAL;
673                 hwc->config = ~0ULL;
674         } else {
675                 hwc->config = event->attr.config & pmu->type->event_mask;
676         }
677
678         event->hw.idx = -1;
679         event->hw.last_tag = ~0ULL;
680
681         if (event->group_leader != event)
682                 ret = uncore_validate_group(pmu, event);
683         else
684                 ret = 0;
685
686         return ret;
687 }
688
689 static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
690 {
691         int ret;
692
693         pmu->pmu = (struct pmu) {
694                 .attr_groups    = pmu->type->attr_groups,
695                 .task_ctx_nr    = perf_invalid_context,
696                 .event_init     = uncore_pmu_event_init,
697                 .add            = uncore_pmu_event_add,
698                 .del            = uncore_pmu_event_del,
699                 .start          = uncore_pmu_event_start,
700                 .stop           = uncore_pmu_event_stop,
701                 .read           = uncore_pmu_event_read,
702         };
703
704         if (pmu->type->num_boxes == 1) {
705                 if (strlen(pmu->type->name) > 0)
706                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
707                 else
708                         sprintf(pmu->name, "uncore");
709         } else {
710                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
711                         pmu->pmu_idx);
712         }
713
714         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
715         return ret;
716 }
717
718 static void __init uncore_type_exit(struct intel_uncore_type *type)
719 {
720         int i;
721
722         for (i = 0; i < type->num_boxes; i++)
723                 free_percpu(type->pmus[i].box);
724         kfree(type->pmus);
725         type->pmus = NULL;
726         kfree(type->attr_groups[1]);
727         type->attr_groups[1] = NULL;
728 }
729
730 static void uncore_types_exit(struct intel_uncore_type **types)
731 {
732         int i;
733         for (i = 0; types[i]; i++)
734                 uncore_type_exit(types[i]);
735 }
736
737 static int __init uncore_type_init(struct intel_uncore_type *type)
738 {
739         struct intel_uncore_pmu *pmus;
740         struct attribute_group *events_group;
741         struct attribute **attrs;
742         int i, j;
743
744         pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
745         if (!pmus)
746                 return -ENOMEM;
747
748         type->unconstrainted = (struct event_constraint)
749                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
750                                 0, type->num_counters, 0);
751
752         for (i = 0; i < type->num_boxes; i++) {
753                 pmus[i].func_id = -1;
754                 pmus[i].pmu_idx = i;
755                 pmus[i].type = type;
756                 INIT_LIST_HEAD(&pmus[i].box_list);
757                 pmus[i].box = alloc_percpu(struct intel_uncore_box *);
758                 if (!pmus[i].box)
759                         goto fail;
760         }
761
762         if (type->event_descs) {
763                 i = 0;
764                 while (type->event_descs[i].attr.attr.name)
765                         i++;
766
767                 events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
768                                         sizeof(*events_group), GFP_KERNEL);
769                 if (!events_group)
770                         goto fail;
771
772                 attrs = (struct attribute **)(events_group + 1);
773                 events_group->name = "events";
774                 events_group->attrs = attrs;
775
776                 for (j = 0; j < i; j++)
777                         attrs[j] = &type->event_descs[j].attr.attr;
778
779                 type->attr_groups[1] = events_group;
780         }
781
782         type->pmus = pmus;
783         return 0;
784 fail:
785         uncore_type_exit(type);
786         return -ENOMEM;
787 }
788
789 static int __init uncore_types_init(struct intel_uncore_type **types)
790 {
791         int i, ret;
792
793         for (i = 0; types[i]; i++) {
794                 ret = uncore_type_init(types[i]);
795                 if (ret)
796                         goto fail;
797         }
798         return 0;
799 fail:
800         while (--i >= 0)
801                 uncore_type_exit(types[i]);
802         return ret;
803 }
804
805 static struct pci_driver *uncore_pci_driver;
806 static bool pcidrv_registered;
807
808 /*
809  * add a pci uncore device
810  */
811 static int __devinit uncore_pci_add(struct intel_uncore_type *type,
812                                     struct pci_dev *pdev)
813 {
814         struct intel_uncore_pmu *pmu;
815         struct intel_uncore_box *box;
816         int i, phys_id;
817
818         phys_id = pcibus_to_physid[pdev->bus->number];
819         if (phys_id < 0)
820                 return -ENODEV;
821
822         box = uncore_alloc_box(0);
823         if (!box)
824                 return -ENOMEM;
825
826         /*
827          * for performance monitoring unit with multiple boxes,
828          * each box has a different function id.
829          */
830         for (i = 0; i < type->num_boxes; i++) {
831                 pmu = &type->pmus[i];
832                 if (pmu->func_id == pdev->devfn)
833                         break;
834                 if (pmu->func_id < 0) {
835                         pmu->func_id = pdev->devfn;
836                         break;
837                 }
838                 pmu = NULL;
839         }
840
841         if (!pmu) {
842                 kfree(box);
843                 return -EINVAL;
844         }
845
846         box->phys_id = phys_id;
847         box->pci_dev = pdev;
848         box->pmu = pmu;
849         uncore_box_init(box);
850         pci_set_drvdata(pdev, box);
851
852         raw_spin_lock(&uncore_box_lock);
853         list_add_tail(&box->list, &pmu->box_list);
854         raw_spin_unlock(&uncore_box_lock);
855
856         return 0;
857 }
858
859 static void __devexit uncore_pci_remove(struct pci_dev *pdev)
860 {
861         struct intel_uncore_box *box = pci_get_drvdata(pdev);
862         struct intel_uncore_pmu *pmu = box->pmu;
863         int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
864
865         if (WARN_ON_ONCE(phys_id != box->phys_id))
866                 return;
867
868         raw_spin_lock(&uncore_box_lock);
869         list_del(&box->list);
870         raw_spin_unlock(&uncore_box_lock);
871
872         for_each_possible_cpu(cpu) {
873                 if (*per_cpu_ptr(pmu->box, cpu) == box) {
874                         *per_cpu_ptr(pmu->box, cpu) = NULL;
875                         atomic_dec(&box->refcnt);
876                 }
877         }
878
879         WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
880         kfree(box);
881 }
882
883 static int __devinit uncore_pci_probe(struct pci_dev *pdev,
884                                 const struct pci_device_id *id)
885 {
886         struct intel_uncore_type *type;
887
888         type = (struct intel_uncore_type *)id->driver_data;
889         return uncore_pci_add(type, pdev);
890 }
891
892 static int __init uncore_pci_init(void)
893 {
894         int ret;
895
896         switch (boot_cpu_data.x86_model) {
897         default:
898                 return 0;
899         }
900
901         ret = uncore_types_init(pci_uncores);
902         if (ret)
903                 return ret;
904
905         uncore_pci_driver->probe = uncore_pci_probe;
906         uncore_pci_driver->remove = uncore_pci_remove;
907
908         ret = pci_register_driver(uncore_pci_driver);
909         if (ret == 0)
910                 pcidrv_registered = true;
911         else
912                 uncore_types_exit(pci_uncores);
913
914         return ret;
915 }
916
917 static void __init uncore_pci_exit(void)
918 {
919         if (pcidrv_registered) {
920                 pcidrv_registered = false;
921                 pci_unregister_driver(uncore_pci_driver);
922                 uncore_types_exit(pci_uncores);
923         }
924 }
925
926 static void __cpuinit uncore_cpu_dying(int cpu)
927 {
928         struct intel_uncore_type *type;
929         struct intel_uncore_pmu *pmu;
930         struct intel_uncore_box *box;
931         int i, j;
932
933         for (i = 0; msr_uncores[i]; i++) {
934                 type = msr_uncores[i];
935                 for (j = 0; j < type->num_boxes; j++) {
936                         pmu = &type->pmus[j];
937                         box = *per_cpu_ptr(pmu->box, cpu);
938                         *per_cpu_ptr(pmu->box, cpu) = NULL;
939                         if (box && atomic_dec_and_test(&box->refcnt))
940                                 kfree(box);
941                 }
942         }
943 }
944
945 static int __cpuinit uncore_cpu_starting(int cpu)
946 {
947         struct intel_uncore_type *type;
948         struct intel_uncore_pmu *pmu;
949         struct intel_uncore_box *box, *exist;
950         int i, j, k, phys_id;
951
952         phys_id = topology_physical_package_id(cpu);
953
954         for (i = 0; msr_uncores[i]; i++) {
955                 type = msr_uncores[i];
956                 for (j = 0; j < type->num_boxes; j++) {
957                         pmu = &type->pmus[j];
958                         box = *per_cpu_ptr(pmu->box, cpu);
959                         /* called by uncore_cpu_init? */
960                         if (box && box->phys_id >= 0) {
961                                 uncore_box_init(box);
962                                 continue;
963                         }
964
965                         for_each_online_cpu(k) {
966                                 exist = *per_cpu_ptr(pmu->box, k);
967                                 if (exist && exist->phys_id == phys_id) {
968                                         atomic_inc(&exist->refcnt);
969                                         *per_cpu_ptr(pmu->box, cpu) = exist;
970                                         kfree(box);
971                                         box = NULL;
972                                         break;
973                                 }
974                         }
975
976                         if (box) {
977                                 box->phys_id = phys_id;
978                                 uncore_box_init(box);
979                         }
980                 }
981         }
982         return 0;
983 }
984
985 static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
986 {
987         struct intel_uncore_type *type;
988         struct intel_uncore_pmu *pmu;
989         struct intel_uncore_box *box;
990         int i, j;
991
992         for (i = 0; msr_uncores[i]; i++) {
993                 type = msr_uncores[i];
994                 for (j = 0; j < type->num_boxes; j++) {
995                         pmu = &type->pmus[j];
996                         if (pmu->func_id < 0)
997                                 pmu->func_id = j;
998
999                         box = uncore_alloc_box(cpu);
1000                         if (!box)
1001                                 return -ENOMEM;
1002
1003                         box->pmu = pmu;
1004                         box->phys_id = phys_id;
1005                         *per_cpu_ptr(pmu->box, cpu) = box;
1006                 }
1007         }
1008         return 0;
1009 }
1010
1011 static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
1012                                             int old_cpu, int new_cpu)
1013 {
1014         struct intel_uncore_type *type;
1015         struct intel_uncore_pmu *pmu;
1016         struct intel_uncore_box *box;
1017         int i, j;
1018
1019         for (i = 0; uncores[i]; i++) {
1020                 type = uncores[i];
1021                 for (j = 0; j < type->num_boxes; j++) {
1022                         pmu = &type->pmus[j];
1023                         if (old_cpu < 0)
1024                                 box = uncore_pmu_to_box(pmu, new_cpu);
1025                         else
1026                                 box = uncore_pmu_to_box(pmu, old_cpu);
1027                         if (!box)
1028                                 continue;
1029
1030                         if (old_cpu < 0) {
1031                                 WARN_ON_ONCE(box->cpu != -1);
1032                                 box->cpu = new_cpu;
1033                                 continue;
1034                         }
1035
1036                         WARN_ON_ONCE(box->cpu != old_cpu);
1037                         if (new_cpu >= 0) {
1038                                 uncore_pmu_cancel_hrtimer(box);
1039                                 perf_pmu_migrate_context(&pmu->pmu,
1040                                                 old_cpu, new_cpu);
1041                                 box->cpu = new_cpu;
1042                         } else {
1043                                 box->cpu = -1;
1044                         }
1045                 }
1046         }
1047 }
1048
1049 static void __cpuinit uncore_event_exit_cpu(int cpu)
1050 {
1051         int i, phys_id, target;
1052
1053         /* if exiting cpu is used for collecting uncore events */
1054         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1055                 return;
1056
1057         /* find a new cpu to collect uncore events */
1058         phys_id = topology_physical_package_id(cpu);
1059         target = -1;
1060         for_each_online_cpu(i) {
1061                 if (i == cpu)
1062                         continue;
1063                 if (phys_id == topology_physical_package_id(i)) {
1064                         target = i;
1065                         break;
1066                 }
1067         }
1068
1069         /* migrate uncore events to the new cpu */
1070         if (target >= 0)
1071                 cpumask_set_cpu(target, &uncore_cpu_mask);
1072
1073         uncore_change_context(msr_uncores, cpu, target);
1074         uncore_change_context(pci_uncores, cpu, target);
1075 }
1076
1077 static void __cpuinit uncore_event_init_cpu(int cpu)
1078 {
1079         int i, phys_id;
1080
1081         phys_id = topology_physical_package_id(cpu);
1082         for_each_cpu(i, &uncore_cpu_mask) {
1083                 if (phys_id == topology_physical_package_id(i))
1084                         return;
1085         }
1086
1087         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1088
1089         uncore_change_context(msr_uncores, -1, cpu);
1090         uncore_change_context(pci_uncores, -1, cpu);
1091 }
1092
1093 static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
1094                                          unsigned long action, void *hcpu)
1095 {
1096         unsigned int cpu = (long)hcpu;
1097
1098         /* allocate/free data structure for uncore box */
1099         switch (action & ~CPU_TASKS_FROZEN) {
1100         case CPU_UP_PREPARE:
1101                 uncore_cpu_prepare(cpu, -1);
1102                 break;
1103         case CPU_STARTING:
1104                 uncore_cpu_starting(cpu);
1105                 break;
1106         case CPU_UP_CANCELED:
1107         case CPU_DYING:
1108                 uncore_cpu_dying(cpu);
1109                 break;
1110         default:
1111                 break;
1112         }
1113
1114         /* select the cpu that collects uncore events */
1115         switch (action & ~CPU_TASKS_FROZEN) {
1116         case CPU_DOWN_FAILED:
1117         case CPU_STARTING:
1118                 uncore_event_init_cpu(cpu);
1119                 break;
1120         case CPU_DOWN_PREPARE:
1121                 uncore_event_exit_cpu(cpu);
1122                 break;
1123         default:
1124                 break;
1125         }
1126
1127         return NOTIFY_OK;
1128 }
1129
1130 static struct notifier_block uncore_cpu_nb __cpuinitdata = {
1131         .notifier_call = uncore_cpu_notifier,
1132         /*
1133          * to migrate uncore events, our notifier should be executed
1134          * before perf core's notifier.
1135          */
1136         .priority = CPU_PRI_PERF + 1,
1137 };
1138
1139 static void __init uncore_cpu_setup(void *dummy)
1140 {
1141         uncore_cpu_starting(smp_processor_id());
1142 }
1143
1144 static int __init uncore_cpu_init(void)
1145 {
1146         int ret, cpu;
1147
1148         switch (boot_cpu_data.x86_model) {
1149         case 26: /* Nehalem */
1150         case 30:
1151         case 37: /* Westmere */
1152         case 44:
1153                 msr_uncores = nhm_msr_uncores;
1154                 break;
1155         case 42: /* Sandy Bridge */
1156                 msr_uncores = snb_msr_uncores;
1157                 break;
1158         default:
1159                 return 0;
1160         }
1161
1162         ret = uncore_types_init(msr_uncores);
1163         if (ret)
1164                 return ret;
1165
1166         get_online_cpus();
1167
1168         for_each_online_cpu(cpu) {
1169                 int i, phys_id = topology_physical_package_id(cpu);
1170
1171                 for_each_cpu(i, &uncore_cpu_mask) {
1172                         if (phys_id == topology_physical_package_id(i)) {
1173                                 phys_id = -1;
1174                                 break;
1175                         }
1176                 }
1177                 if (phys_id < 0)
1178                         continue;
1179
1180                 uncore_cpu_prepare(cpu, phys_id);
1181                 uncore_event_init_cpu(cpu);
1182         }
1183         on_each_cpu(uncore_cpu_setup, NULL, 1);
1184
1185         register_cpu_notifier(&uncore_cpu_nb);
1186
1187         put_online_cpus();
1188
1189         return 0;
1190 }
1191
1192 static int __init uncore_pmus_register(void)
1193 {
1194         struct intel_uncore_pmu *pmu;
1195         struct intel_uncore_type *type;
1196         int i, j;
1197
1198         for (i = 0; msr_uncores[i]; i++) {
1199                 type = msr_uncores[i];
1200                 for (j = 0; j < type->num_boxes; j++) {
1201                         pmu = &type->pmus[j];
1202                         uncore_pmu_register(pmu);
1203                 }
1204         }
1205
1206         for (i = 0; pci_uncores[i]; i++) {
1207                 type = pci_uncores[i];
1208                 for (j = 0; j < type->num_boxes; j++) {
1209                         pmu = &type->pmus[j];
1210                         uncore_pmu_register(pmu);
1211                 }
1212         }
1213
1214         return 0;
1215 }
1216
1217 static int __init intel_uncore_init(void)
1218 {
1219         int ret;
1220
1221         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1222                 return -ENODEV;
1223
1224         ret = uncore_pci_init();
1225         if (ret)
1226                 goto fail;
1227         ret = uncore_cpu_init();
1228         if (ret) {
1229                 uncore_pci_exit();
1230                 goto fail;
1231         }
1232
1233         uncore_pmus_register();
1234         return 0;
1235 fail:
1236         return ret;
1237 }
1238 device_initcall(intel_uncore_init);