perf/x86: Add Intel Nehalem and Sandy Bridge uncore PMU support
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kernel / cpu / perf_event_intel_uncore.c
1 #include "perf_event_intel_uncore.h"
2
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 static struct intel_uncore_type **msr_uncores = empty_uncore;
5
6 /* mask of cpus that collect uncore events */
7 static cpumask_t uncore_cpu_mask;
8
9 /* constraint for the fixed counter */
10 static struct event_constraint constraint_fixed =
11         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
12
13 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
14 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
15 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
16 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
17 DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
18 DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
19
20 /* Sandy Bridge uncore support */
21 static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
22                                         struct perf_event *event)
23 {
24         struct hw_perf_event *hwc = &event->hw;
25
26         if (hwc->idx < UNCORE_PMC_IDX_FIXED)
27                 wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
28         else
29                 wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
30 }
31
32 static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
33                                         struct perf_event *event)
34 {
35         wrmsrl(event->hw.config_base, 0);
36 }
37
38 static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
39                                         struct perf_event *event)
40 {
41         u64 count;
42         rdmsrl(event->hw.event_base, count);
43         return count;
44 }
45
46 static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
47 {
48         if (box->pmu->pmu_idx == 0) {
49                 wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
50                         SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
51         }
52 }
53
54 static struct attribute *snb_uncore_formats_attr[] = {
55         &format_attr_event.attr,
56         &format_attr_umask.attr,
57         &format_attr_edge.attr,
58         &format_attr_inv.attr,
59         &format_attr_cmask5.attr,
60         NULL,
61 };
62
63 static struct attribute_group snb_uncore_format_group = {
64         .name = "format",
65         .attrs = snb_uncore_formats_attr,
66 };
67
68 static struct intel_uncore_ops snb_uncore_msr_ops = {
69         .init_box       = snb_uncore_msr_init_box,
70         .disable_event  = snb_uncore_msr_disable_event,
71         .enable_event   = snb_uncore_msr_enable_event,
72         .read_counter   = snb_uncore_msr_read_counter,
73 };
74
75 static struct event_constraint snb_uncore_cbox_constraints[] = {
76         UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
77         UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
78         EVENT_CONSTRAINT_END
79 };
80
81 static struct intel_uncore_type snb_uncore_cbox = {
82         .name           = "cbox",
83         .num_counters   = 2,
84         .num_boxes      = 4,
85         .perf_ctr_bits  = 44,
86         .fixed_ctr_bits = 48,
87         .perf_ctr       = SNB_UNC_CBO_0_PER_CTR0,
88         .event_ctl      = SNB_UNC_CBO_0_PERFEVTSEL0,
89         .fixed_ctr      = SNB_UNC_FIXED_CTR,
90         .fixed_ctl      = SNB_UNC_FIXED_CTR_CTRL,
91         .single_fixed   = 1,
92         .event_mask     = SNB_UNC_RAW_EVENT_MASK,
93         .msr_offset     = SNB_UNC_CBO_MSR_OFFSET,
94         .constraints    = snb_uncore_cbox_constraints,
95         .ops            = &snb_uncore_msr_ops,
96         .format_group   = &snb_uncore_format_group,
97 };
98
99 static struct intel_uncore_type *snb_msr_uncores[] = {
100         &snb_uncore_cbox,
101         NULL,
102 };
103 /* end of Sandy Bridge uncore support */
104
105 /* Nehalem uncore support */
106 static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
107 {
108         wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
109 }
110
111 static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
112 {
113         wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
114                 NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
115 }
116
117 static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
118                                         struct perf_event *event)
119 {
120         struct hw_perf_event *hwc = &event->hw;
121
122         if (hwc->idx < UNCORE_PMC_IDX_FIXED)
123                 wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
124         else
125                 wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
126 }
127
128 static struct attribute *nhm_uncore_formats_attr[] = {
129         &format_attr_event.attr,
130         &format_attr_umask.attr,
131         &format_attr_edge.attr,
132         &format_attr_inv.attr,
133         &format_attr_cmask8.attr,
134         NULL,
135 };
136
137 static struct attribute_group nhm_uncore_format_group = {
138         .name = "format",
139         .attrs = nhm_uncore_formats_attr,
140 };
141
142 static struct uncore_event_desc nhm_uncore_events[] = {
143         INTEL_UNCORE_EVENT_DESC(CLOCKTICKS, "config=0xffff"),
144         /* full cache line writes to DRAM */
145         INTEL_UNCORE_EVENT_DESC(QMC_WRITES_FULL_ANY, "event=0x2f,umask=0xf"),
146         /* Quickpath Memory Controller normal priority read requests */
147         INTEL_UNCORE_EVENT_DESC(QMC_NORMAL_READS_ANY, "event=0x2c,umask=0xf"),
148         /* Quickpath Home Logic read requests from the IOH */
149         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_READS,
150                                 "event=0x20,umask=0x1"),
151         /* Quickpath Home Logic write requests from the IOH */
152         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_IOH_WRITES,
153                                 "event=0x20,umask=0x2"),
154         /* Quickpath Home Logic read requests from a remote socket */
155         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_READS,
156                                 "event=0x20,umask=0x4"),
157         /* Quickpath Home Logic write requests from a remote socket */
158         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_REMOTE_WRITES,
159                                 "event=0x20,umask=0x8"),
160         /* Quickpath Home Logic read requests from the local socket */
161         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_READS,
162                                 "event=0x20,umask=0x10"),
163         /* Quickpath Home Logic write requests from the local socket */
164         INTEL_UNCORE_EVENT_DESC(QHL_REQUEST_LOCAL_WRITES,
165                                 "event=0x20,umask=0x20"),
166         { /* end: all zeroes */ },
167 };
168
169 static struct intel_uncore_ops nhm_uncore_msr_ops = {
170         .disable_box    = nhm_uncore_msr_disable_box,
171         .enable_box     = nhm_uncore_msr_enable_box,
172         .disable_event  = snb_uncore_msr_disable_event,
173         .enable_event   = nhm_uncore_msr_enable_event,
174         .read_counter   = snb_uncore_msr_read_counter,
175 };
176
177 static struct intel_uncore_type nhm_uncore = {
178         .name           = "",
179         .num_counters   = 8,
180         .num_boxes      = 1,
181         .perf_ctr_bits  = 48,
182         .fixed_ctr_bits = 48,
183         .event_ctl      = NHM_UNC_PERFEVTSEL0,
184         .perf_ctr       = NHM_UNC_UNCORE_PMC0,
185         .fixed_ctr      = NHM_UNC_FIXED_CTR,
186         .fixed_ctl      = NHM_UNC_FIXED_CTR_CTRL,
187         .event_mask     = NHM_UNC_RAW_EVENT_MASK,
188         .event_descs    = nhm_uncore_events,
189         .ops            = &nhm_uncore_msr_ops,
190         .format_group   = &nhm_uncore_format_group,
191 };
192
193 static struct intel_uncore_type *nhm_msr_uncores[] = {
194         &nhm_uncore,
195         NULL,
196 };
197 /* end of Nehalem uncore support */
198
199 static void uncore_assign_hw_event(struct intel_uncore_box *box,
200                                 struct perf_event *event, int idx)
201 {
202         struct hw_perf_event *hwc = &event->hw;
203
204         hwc->idx = idx;
205         hwc->last_tag = ++box->tags[idx];
206
207         if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
208                 hwc->event_base = uncore_msr_fixed_ctr(box);
209                 hwc->config_base = uncore_msr_fixed_ctl(box);
210                 return;
211         }
212
213         hwc->config_base = uncore_msr_event_ctl(box, hwc->idx);
214         hwc->event_base =  uncore_msr_perf_ctr(box, hwc->idx);
215 }
216
217 static void uncore_perf_event_update(struct intel_uncore_box *box,
218                                         struct perf_event *event)
219 {
220         u64 prev_count, new_count, delta;
221         int shift;
222
223         if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
224                 shift = 64 - uncore_fixed_ctr_bits(box);
225         else
226                 shift = 64 - uncore_perf_ctr_bits(box);
227
228         /* the hrtimer might modify the previous event value */
229 again:
230         prev_count = local64_read(&event->hw.prev_count);
231         new_count = uncore_read_counter(box, event);
232         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
233                 goto again;
234
235         delta = (new_count << shift) - (prev_count << shift);
236         delta >>= shift;
237
238         local64_add(delta, &event->count);
239 }
240
241 /*
242  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
243  * for SandyBridge. So we use hrtimer to periodically poll the counter
244  * to avoid overflow.
245  */
246 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
247 {
248         struct intel_uncore_box *box;
249         unsigned long flags;
250         int bit;
251
252         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
253         if (!box->n_active || box->cpu != smp_processor_id())
254                 return HRTIMER_NORESTART;
255         /*
256          * disable local interrupt to prevent uncore_pmu_event_start/stop
257          * to interrupt the update process
258          */
259         local_irq_save(flags);
260
261         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
262                 uncore_perf_event_update(box, box->events[bit]);
263
264         local_irq_restore(flags);
265
266         hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
267         return HRTIMER_RESTART;
268 }
269
270 static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
271 {
272         __hrtimer_start_range_ns(&box->hrtimer,
273                         ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
274                         HRTIMER_MODE_REL_PINNED, 0);
275 }
276
277 static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
278 {
279         hrtimer_cancel(&box->hrtimer);
280 }
281
282 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
283 {
284         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
285         box->hrtimer.function = uncore_pmu_hrtimer;
286 }
287
288 struct intel_uncore_box *uncore_alloc_box(int cpu)
289 {
290         struct intel_uncore_box *box;
291
292         box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
293                            cpu_to_node(cpu));
294         if (!box)
295                 return NULL;
296
297         uncore_pmu_init_hrtimer(box);
298         atomic_set(&box->refcnt, 1);
299         box->cpu = -1;
300         box->phys_id = -1;
301
302         return box;
303 }
304
305 static struct intel_uncore_box *
306 uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
307 {
308         return *per_cpu_ptr(pmu->box, cpu);
309 }
310
311 static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
312 {
313         return container_of(event->pmu, struct intel_uncore_pmu, pmu);
314 }
315
316 static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
317 {
318         /*
319          * perf core schedules event on the basis of cpu, uncore events are
320          * collected by one of the cpus inside a physical package.
321          */
322         return uncore_pmu_to_box(uncore_event_to_pmu(event),
323                                  smp_processor_id());
324 }
325
326 static int uncore_collect_events(struct intel_uncore_box *box,
327                                 struct perf_event *leader, bool dogrp)
328 {
329         struct perf_event *event;
330         int n, max_count;
331
332         max_count = box->pmu->type->num_counters;
333         if (box->pmu->type->fixed_ctl)
334                 max_count++;
335
336         if (box->n_events >= max_count)
337                 return -EINVAL;
338
339         n = box->n_events;
340         box->event_list[n] = leader;
341         n++;
342         if (!dogrp)
343                 return n;
344
345         list_for_each_entry(event, &leader->sibling_list, group_entry) {
346                 if (event->state <= PERF_EVENT_STATE_OFF)
347                         continue;
348
349                 if (n >= max_count)
350                         return -EINVAL;
351
352                 box->event_list[n] = event;
353                 n++;
354         }
355         return n;
356 }
357
358 static struct event_constraint *
359 uncore_event_constraint(struct intel_uncore_type *type,
360                         struct perf_event *event)
361 {
362         struct event_constraint *c;
363
364         if (event->hw.config == ~0ULL)
365                 return &constraint_fixed;
366
367         if (type->constraints) {
368                 for_each_event_constraint(c, type->constraints) {
369                         if ((event->hw.config & c->cmask) == c->code)
370                                 return c;
371                 }
372         }
373
374         return &type->unconstrainted;
375 }
376
377 static int uncore_assign_events(struct intel_uncore_box *box,
378                                 int assign[], int n)
379 {
380         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
381         struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
382         int i, ret, wmin, wmax;
383         struct hw_perf_event *hwc;
384
385         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
386
387         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
388                 c = uncore_event_constraint(box->pmu->type,
389                                 box->event_list[i]);
390                 constraints[i] = c;
391                 wmin = min(wmin, c->weight);
392                 wmax = max(wmax, c->weight);
393         }
394
395         /* fastpath, try to reuse previous register */
396         for (i = 0; i < n; i++) {
397                 hwc = &box->event_list[i]->hw;
398                 c = constraints[i];
399
400                 /* never assigned */
401                 if (hwc->idx == -1)
402                         break;
403
404                 /* constraint still honored */
405                 if (!test_bit(hwc->idx, c->idxmsk))
406                         break;
407
408                 /* not already used */
409                 if (test_bit(hwc->idx, used_mask))
410                         break;
411
412                 __set_bit(hwc->idx, used_mask);
413                 assign[i] = hwc->idx;
414         }
415         if (i == n)
416                 return 0;
417
418         /* slow path */
419         ret = perf_assign_events(constraints, n, wmin, wmax, assign);
420         return ret ? -EINVAL : 0;
421 }
422
423 static void uncore_pmu_event_start(struct perf_event *event, int flags)
424 {
425         struct intel_uncore_box *box = uncore_event_to_box(event);
426         int idx = event->hw.idx;
427
428         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
429                 return;
430
431         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
432                 return;
433
434         event->hw.state = 0;
435         box->events[idx] = event;
436         box->n_active++;
437         __set_bit(idx, box->active_mask);
438
439         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
440         uncore_enable_event(box, event);
441
442         if (box->n_active == 1) {
443                 uncore_enable_box(box);
444                 uncore_pmu_start_hrtimer(box);
445         }
446 }
447
448 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
449 {
450         struct intel_uncore_box *box = uncore_event_to_box(event);
451         struct hw_perf_event *hwc = &event->hw;
452
453         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
454                 uncore_disable_event(box, event);
455                 box->n_active--;
456                 box->events[hwc->idx] = NULL;
457                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
458                 hwc->state |= PERF_HES_STOPPED;
459
460                 if (box->n_active == 0) {
461                         uncore_disable_box(box);
462                         uncore_pmu_cancel_hrtimer(box);
463                 }
464         }
465
466         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
467                 /*
468                  * Drain the remaining delta count out of a event
469                  * that we are disabling:
470                  */
471                 uncore_perf_event_update(box, event);
472                 hwc->state |= PERF_HES_UPTODATE;
473         }
474 }
475
476 static int uncore_pmu_event_add(struct perf_event *event, int flags)
477 {
478         struct intel_uncore_box *box = uncore_event_to_box(event);
479         struct hw_perf_event *hwc = &event->hw;
480         int assign[UNCORE_PMC_IDX_MAX];
481         int i, n, ret;
482
483         if (!box)
484                 return -ENODEV;
485
486         ret = n = uncore_collect_events(box, event, false);
487         if (ret < 0)
488                 return ret;
489
490         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
491         if (!(flags & PERF_EF_START))
492                 hwc->state |= PERF_HES_ARCH;
493
494         ret = uncore_assign_events(box, assign, n);
495         if (ret)
496                 return ret;
497
498         /* save events moving to new counters */
499         for (i = 0; i < box->n_events; i++) {
500                 event = box->event_list[i];
501                 hwc = &event->hw;
502
503                 if (hwc->idx == assign[i] &&
504                         hwc->last_tag == box->tags[assign[i]])
505                         continue;
506                 /*
507                  * Ensure we don't accidentally enable a stopped
508                  * counter simply because we rescheduled.
509                  */
510                 if (hwc->state & PERF_HES_STOPPED)
511                         hwc->state |= PERF_HES_ARCH;
512
513                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
514         }
515
516         /* reprogram moved events into new counters */
517         for (i = 0; i < n; i++) {
518                 event = box->event_list[i];
519                 hwc = &event->hw;
520
521                 if (hwc->idx != assign[i] ||
522                         hwc->last_tag != box->tags[assign[i]])
523                         uncore_assign_hw_event(box, event, assign[i]);
524                 else if (i < box->n_events)
525                         continue;
526
527                 if (hwc->state & PERF_HES_ARCH)
528                         continue;
529
530                 uncore_pmu_event_start(event, 0);
531         }
532         box->n_events = n;
533
534         return 0;
535 }
536
537 static void uncore_pmu_event_del(struct perf_event *event, int flags)
538 {
539         struct intel_uncore_box *box = uncore_event_to_box(event);
540         int i;
541
542         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
543
544         for (i = 0; i < box->n_events; i++) {
545                 if (event == box->event_list[i]) {
546                         while (++i < box->n_events)
547                                 box->event_list[i - 1] = box->event_list[i];
548
549                         --box->n_events;
550                         break;
551                 }
552         }
553
554         event->hw.idx = -1;
555         event->hw.last_tag = ~0ULL;
556 }
557
558 static void uncore_pmu_event_read(struct perf_event *event)
559 {
560         struct intel_uncore_box *box = uncore_event_to_box(event);
561         uncore_perf_event_update(box, event);
562 }
563
564 /*
565  * validation ensures the group can be loaded onto the
566  * PMU if it was the only group available.
567  */
568 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
569                                 struct perf_event *event)
570 {
571         struct perf_event *leader = event->group_leader;
572         struct intel_uncore_box *fake_box;
573         int assign[UNCORE_PMC_IDX_MAX];
574         int ret = -EINVAL, n;
575
576         fake_box = uncore_alloc_box(smp_processor_id());
577         if (!fake_box)
578                 return -ENOMEM;
579
580         fake_box->pmu = pmu;
581         /*
582          * the event is not yet connected with its
583          * siblings therefore we must first collect
584          * existing siblings, then add the new event
585          * before we can simulate the scheduling
586          */
587         n = uncore_collect_events(fake_box, leader, true);
588         if (n < 0)
589                 goto out;
590
591         fake_box->n_events = n;
592         n = uncore_collect_events(fake_box, event, false);
593         if (n < 0)
594                 goto out;
595
596         fake_box->n_events = n;
597
598         ret = uncore_assign_events(fake_box, assign, n);
599 out:
600         kfree(fake_box);
601         return ret;
602 }
603
604 int uncore_pmu_event_init(struct perf_event *event)
605 {
606         struct intel_uncore_pmu *pmu;
607         struct intel_uncore_box *box;
608         struct hw_perf_event *hwc = &event->hw;
609         int ret;
610
611         if (event->attr.type != event->pmu->type)
612                 return -ENOENT;
613
614         pmu = uncore_event_to_pmu(event);
615         /* no device found for this pmu */
616         if (pmu->func_id < 0)
617                 return -ENOENT;
618
619         /*
620          * Uncore PMU does measure at all privilege level all the time.
621          * So it doesn't make sense to specify any exclude bits.
622          */
623         if (event->attr.exclude_user || event->attr.exclude_kernel ||
624                         event->attr.exclude_hv || event->attr.exclude_idle)
625                 return -EINVAL;
626
627         /* Sampling not supported yet */
628         if (hwc->sample_period)
629                 return -EINVAL;
630
631         /*
632          * Place all uncore events for a particular physical package
633          * onto a single cpu
634          */
635         if (event->cpu < 0)
636                 return -EINVAL;
637         box = uncore_pmu_to_box(pmu, event->cpu);
638         if (!box || box->cpu < 0)
639                 return -EINVAL;
640         event->cpu = box->cpu;
641
642         if (event->attr.config == UNCORE_FIXED_EVENT) {
643                 /* no fixed counter */
644                 if (!pmu->type->fixed_ctl)
645                         return -EINVAL;
646                 /*
647                  * if there is only one fixed counter, only the first pmu
648                  * can access the fixed counter
649                  */
650                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
651                         return -EINVAL;
652                 hwc->config = ~0ULL;
653         } else {
654                 hwc->config = event->attr.config & pmu->type->event_mask;
655         }
656
657         event->hw.idx = -1;
658         event->hw.last_tag = ~0ULL;
659
660         if (event->group_leader != event)
661                 ret = uncore_validate_group(pmu, event);
662         else
663                 ret = 0;
664
665         return ret;
666 }
667
668 static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
669 {
670         int ret;
671
672         pmu->pmu = (struct pmu) {
673                 .attr_groups    = pmu->type->attr_groups,
674                 .task_ctx_nr    = perf_invalid_context,
675                 .event_init     = uncore_pmu_event_init,
676                 .add            = uncore_pmu_event_add,
677                 .del            = uncore_pmu_event_del,
678                 .start          = uncore_pmu_event_start,
679                 .stop           = uncore_pmu_event_stop,
680                 .read           = uncore_pmu_event_read,
681         };
682
683         if (pmu->type->num_boxes == 1) {
684                 if (strlen(pmu->type->name) > 0)
685                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
686                 else
687                         sprintf(pmu->name, "uncore");
688         } else {
689                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
690                         pmu->pmu_idx);
691         }
692
693         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
694         return ret;
695 }
696
697 static void __init uncore_type_exit(struct intel_uncore_type *type)
698 {
699         int i;
700
701         for (i = 0; i < type->num_boxes; i++)
702                 free_percpu(type->pmus[i].box);
703         kfree(type->pmus);
704         type->pmus = NULL;
705         kfree(type->attr_groups[1]);
706         type->attr_groups[1] = NULL;
707 }
708
709 static int __init uncore_type_init(struct intel_uncore_type *type)
710 {
711         struct intel_uncore_pmu *pmus;
712         struct attribute_group *events_group;
713         struct attribute **attrs;
714         int i, j;
715
716         pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
717         if (!pmus)
718                 return -ENOMEM;
719
720         type->unconstrainted = (struct event_constraint)
721                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
722                                 0, type->num_counters, 0);
723
724         for (i = 0; i < type->num_boxes; i++) {
725                 pmus[i].func_id = -1;
726                 pmus[i].pmu_idx = i;
727                 pmus[i].type = type;
728                 pmus[i].box = alloc_percpu(struct intel_uncore_box *);
729                 if (!pmus[i].box)
730                         goto fail;
731         }
732
733         if (type->event_descs) {
734                 i = 0;
735                 while (type->event_descs[i].attr.attr.name)
736                         i++;
737
738                 events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
739                                         sizeof(*events_group), GFP_KERNEL);
740                 if (!events_group)
741                         goto fail;
742
743                 attrs = (struct attribute **)(events_group + 1);
744                 events_group->name = "events";
745                 events_group->attrs = attrs;
746
747                 for (j = 0; j < i; j++)
748                         attrs[j] = &type->event_descs[j].attr.attr;
749
750                 type->attr_groups[1] = events_group;
751         }
752
753         type->pmus = pmus;
754         return 0;
755 fail:
756         uncore_type_exit(type);
757         return -ENOMEM;
758 }
759
760 static int __init uncore_types_init(struct intel_uncore_type **types)
761 {
762         int i, ret;
763
764         for (i = 0; types[i]; i++) {
765                 ret = uncore_type_init(types[i]);
766                 if (ret)
767                         goto fail;
768         }
769         return 0;
770 fail:
771         while (--i >= 0)
772                 uncore_type_exit(types[i]);
773         return ret;
774 }
775
776 static void __cpuinit uncore_cpu_dying(int cpu)
777 {
778         struct intel_uncore_type *type;
779         struct intel_uncore_pmu *pmu;
780         struct intel_uncore_box *box;
781         int i, j;
782
783         for (i = 0; msr_uncores[i]; i++) {
784                 type = msr_uncores[i];
785                 for (j = 0; j < type->num_boxes; j++) {
786                         pmu = &type->pmus[j];
787                         box = *per_cpu_ptr(pmu->box, cpu);
788                         *per_cpu_ptr(pmu->box, cpu) = NULL;
789                         if (box && atomic_dec_and_test(&box->refcnt))
790                                 kfree(box);
791                 }
792         }
793 }
794
795 static int __cpuinit uncore_cpu_starting(int cpu)
796 {
797         struct intel_uncore_type *type;
798         struct intel_uncore_pmu *pmu;
799         struct intel_uncore_box *box, *exist;
800         int i, j, k, phys_id;
801
802         phys_id = topology_physical_package_id(cpu);
803
804         for (i = 0; msr_uncores[i]; i++) {
805                 type = msr_uncores[i];
806                 for (j = 0; j < type->num_boxes; j++) {
807                         pmu = &type->pmus[j];
808                         box = *per_cpu_ptr(pmu->box, cpu);
809                         /* called by uncore_cpu_init? */
810                         if (box && box->phys_id >= 0) {
811                                 uncore_box_init(box);
812                                 continue;
813                         }
814
815                         for_each_online_cpu(k) {
816                                 exist = *per_cpu_ptr(pmu->box, k);
817                                 if (exist && exist->phys_id == phys_id) {
818                                         atomic_inc(&exist->refcnt);
819                                         *per_cpu_ptr(pmu->box, cpu) = exist;
820                                         kfree(box);
821                                         box = NULL;
822                                         break;
823                                 }
824                         }
825
826                         if (box) {
827                                 box->phys_id = phys_id;
828                                 uncore_box_init(box);
829                         }
830                 }
831         }
832         return 0;
833 }
834
835 static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
836 {
837         struct intel_uncore_type *type;
838         struct intel_uncore_pmu *pmu;
839         struct intel_uncore_box *box;
840         int i, j;
841
842         for (i = 0; msr_uncores[i]; i++) {
843                 type = msr_uncores[i];
844                 for (j = 0; j < type->num_boxes; j++) {
845                         pmu = &type->pmus[j];
846                         if (pmu->func_id < 0)
847                                 pmu->func_id = j;
848
849                         box = uncore_alloc_box(cpu);
850                         if (!box)
851                                 return -ENOMEM;
852
853                         box->pmu = pmu;
854                         box->phys_id = phys_id;
855                         *per_cpu_ptr(pmu->box, cpu) = box;
856                 }
857         }
858         return 0;
859 }
860
861 static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
862                                             int old_cpu, int new_cpu)
863 {
864         struct intel_uncore_type *type;
865         struct intel_uncore_pmu *pmu;
866         struct intel_uncore_box *box;
867         int i, j;
868
869         for (i = 0; uncores[i]; i++) {
870                 type = uncores[i];
871                 for (j = 0; j < type->num_boxes; j++) {
872                         pmu = &type->pmus[j];
873                         if (old_cpu < 0)
874                                 box = uncore_pmu_to_box(pmu, new_cpu);
875                         else
876                                 box = uncore_pmu_to_box(pmu, old_cpu);
877                         if (!box)
878                                 continue;
879
880                         if (old_cpu < 0) {
881                                 WARN_ON_ONCE(box->cpu != -1);
882                                 box->cpu = new_cpu;
883                                 continue;
884                         }
885
886                         WARN_ON_ONCE(box->cpu != old_cpu);
887                         if (new_cpu >= 0) {
888                                 uncore_pmu_cancel_hrtimer(box);
889                                 perf_pmu_migrate_context(&pmu->pmu,
890                                                 old_cpu, new_cpu);
891                                 box->cpu = new_cpu;
892                         } else {
893                                 box->cpu = -1;
894                         }
895                 }
896         }
897 }
898
899 static void __cpuinit uncore_event_exit_cpu(int cpu)
900 {
901         int i, phys_id, target;
902
903         /* if exiting cpu is used for collecting uncore events */
904         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
905                 return;
906
907         /* find a new cpu to collect uncore events */
908         phys_id = topology_physical_package_id(cpu);
909         target = -1;
910         for_each_online_cpu(i) {
911                 if (i == cpu)
912                         continue;
913                 if (phys_id == topology_physical_package_id(i)) {
914                         target = i;
915                         break;
916                 }
917         }
918
919         /* migrate uncore events to the new cpu */
920         if (target >= 0)
921                 cpumask_set_cpu(target, &uncore_cpu_mask);
922
923         uncore_change_context(msr_uncores, cpu, target);
924 }
925
926 static void __cpuinit uncore_event_init_cpu(int cpu)
927 {
928         int i, phys_id;
929
930         phys_id = topology_physical_package_id(cpu);
931         for_each_cpu(i, &uncore_cpu_mask) {
932                 if (phys_id == topology_physical_package_id(i))
933                         return;
934         }
935
936         cpumask_set_cpu(cpu, &uncore_cpu_mask);
937
938         uncore_change_context(msr_uncores, -1, cpu);
939 }
940
941 static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
942                                          unsigned long action, void *hcpu)
943 {
944         unsigned int cpu = (long)hcpu;
945
946         /* allocate/free data structure for uncore box */
947         switch (action & ~CPU_TASKS_FROZEN) {
948         case CPU_UP_PREPARE:
949                 uncore_cpu_prepare(cpu, -1);
950                 break;
951         case CPU_STARTING:
952                 uncore_cpu_starting(cpu);
953                 break;
954         case CPU_UP_CANCELED:
955         case CPU_DYING:
956                 uncore_cpu_dying(cpu);
957                 break;
958         default:
959                 break;
960         }
961
962         /* select the cpu that collects uncore events */
963         switch (action & ~CPU_TASKS_FROZEN) {
964         case CPU_DOWN_FAILED:
965         case CPU_STARTING:
966                 uncore_event_init_cpu(cpu);
967                 break;
968         case CPU_DOWN_PREPARE:
969                 uncore_event_exit_cpu(cpu);
970                 break;
971         default:
972                 break;
973         }
974
975         return NOTIFY_OK;
976 }
977
978 static struct notifier_block uncore_cpu_nb __cpuinitdata = {
979         .notifier_call = uncore_cpu_notifier,
980         /*
981          * to migrate uncore events, our notifier should be executed
982          * before perf core's notifier.
983          */
984         .priority = CPU_PRI_PERF + 1,
985 };
986
987 static void __init uncore_cpu_setup(void *dummy)
988 {
989         uncore_cpu_starting(smp_processor_id());
990 }
991
992 static int __init uncore_cpu_init(void)
993 {
994         int ret, cpu;
995
996         switch (boot_cpu_data.x86_model) {
997         case 26: /* Nehalem */
998         case 30:
999         case 37: /* Westmere */
1000         case 44:
1001                 msr_uncores = nhm_msr_uncores;
1002                 break;
1003         case 42: /* Sandy Bridge */
1004                 msr_uncores = snb_msr_uncores;
1005                 break;
1006         default:
1007                 return 0;
1008         }
1009
1010         ret = uncore_types_init(msr_uncores);
1011         if (ret)
1012                 return ret;
1013
1014         get_online_cpus();
1015
1016         for_each_online_cpu(cpu) {
1017                 int i, phys_id = topology_physical_package_id(cpu);
1018
1019                 for_each_cpu(i, &uncore_cpu_mask) {
1020                         if (phys_id == topology_physical_package_id(i)) {
1021                                 phys_id = -1;
1022                                 break;
1023                         }
1024                 }
1025                 if (phys_id < 0)
1026                         continue;
1027
1028                 uncore_cpu_prepare(cpu, phys_id);
1029                 uncore_event_init_cpu(cpu);
1030         }
1031         on_each_cpu(uncore_cpu_setup, NULL, 1);
1032
1033         register_cpu_notifier(&uncore_cpu_nb);
1034
1035         put_online_cpus();
1036
1037         return 0;
1038 }
1039
1040 static int __init uncore_pmus_register(void)
1041 {
1042         struct intel_uncore_pmu *pmu;
1043         struct intel_uncore_type *type;
1044         int i, j;
1045
1046         for (i = 0; msr_uncores[i]; i++) {
1047                 type = msr_uncores[i];
1048                 for (j = 0; j < type->num_boxes; j++) {
1049                         pmu = &type->pmus[j];
1050                         uncore_pmu_register(pmu);
1051                 }
1052         }
1053
1054         return 0;
1055 }
1056
1057 static int __init intel_uncore_init(void)
1058 {
1059         int ret;
1060
1061         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1062                 return -ENODEV;
1063
1064         ret = uncore_cpu_init();
1065         if (ret)
1066                 goto fail;
1067
1068         uncore_pmus_register();
1069         return 0;
1070 fail:
1071         return ret;
1072 }
1073 device_initcall(intel_uncore_init);