perf/x86/intel: Add cross-HT counter exclusion infrastructure

author Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>

Mon, 17 Nov 2014 19:06:57 +0000 (20:06 +0100)

committer Ingo Molnar <mingo@kernel.org>

Thu, 2 Apr 2015 15:33:11 +0000 (17:33 +0200)
author Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Mon, 17 Nov 2014 19:06:57 +0000 (20:06 +0100)
committer Ingo Molnar <mingo@kernel.org>
Thu, 2 Apr 2015 15:33:11 +0000 (17:33 +0200)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h

index 24a65057c1c030feceb3a4c05fa3c8d75c042e23..f31f90e2d85987e6e6ffafd01591f1cb793aa4ab 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,7 @@ struct event_constraint {
  #define PERF_X86_EVENT_COMMITTED       0x8 /* event passed commit_txn */
  #define PERF_X86_EVENT_PEBS_LD_HSW     0x10 /* haswell style datala, load */
  #define PERF_X86_EVENT_PEBS_NA_HSW     0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL            0x40 /* HT exclusivity on counter */
  #define PERF_X86_EVENT_RDPMC_ALLOWED   0x40 /* grant rdpmc permission */
  
  
@@ -123,6 +124,26 @@ struct intel_shared_regs {
         unsigned                core_id;        /* per-core: core id */
  };
  
+enum intel_excl_state_type {
+       INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+       INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+       INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+       enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
+       enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+};
+
+struct intel_excl_cntrs {
+       raw_spinlock_t  lock;
+
+       struct intel_excl_states states[2];
+
+       int             refcnt;         /* per-core: #HT threads */
+       unsigned        core_id;        /* per-core: core id */
+};
+
  #define MAX_LBR_ENTRIES                16
  
  enum {
@@ -185,6 +206,12 @@ struct cpu_hw_events {
          * used on Intel NHM/WSM/SNB
          */
         struct intel_shared_regs        *shared_regs;
+       /*
+        * manage exclusive counter access between hyperthread
+        */
+       struct event_constraint *constraint_list; /* in enable order */
+       struct intel_excl_cntrs         *excl_cntrs;
+       int excl_thread_id; /* 0 or 1 */
  
         /*
          * AMD specific bits
@@ -208,6 +235,10 @@ struct cpu_hw_events {
  #define EVENT_CONSTRAINT(c, n, m)      \
         __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
  
+#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
+       __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+                          0, PERF_X86_EVENT_EXCL)
+
  /*
   * The overlap flag marks event constraints with overlapping counter
   * masks. This is the case if the counter mask of such an event is not
@@ -566,6 +597,7 @@ do {                                                                        \
   */
  #define PMU_FL_NO_HT_SHARING   0x1 /* no hyper-threading resource sharing */
  #define PMU_FL_HAS_RSP_1       0x2 /* has 2 equivalent offcore_rsp regs   */
+#define PMU_FL_EXCL_CNTRS      0x4 /* has exclusive counter requirements  */
  
  #define EVENT_VAR(_id)  event_attr_##_id
  #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c

index 2dd34b57d3ff610f16d3403c7350fb1eeeb2f010..7f54000fd0f1151c545e7bc54a38a155944823f4 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2224,16 +2224,52 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
         return regs;
  }
  
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+       struct intel_excl_cntrs *c;
+       int i;
+
+       c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+                        GFP_KERNEL, cpu_to_node(cpu));
+       if (c) {
+               raw_spin_lock_init(&c->lock);
+               for (i = 0; i < X86_PMC_IDX_MAX; i++) {
+                       c->states[0].state[i] = INTEL_EXCL_UNUSED;
+                       c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
+
+                       c->states[1].state[i] = INTEL_EXCL_UNUSED;
+                       c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
+               }
+               c->core_id = -1;
+       }
+       return c;
+}
+
  static int intel_pmu_cpu_prepare(int cpu)
  {
         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  
-       if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
-               return NOTIFY_OK;
+       if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       return NOTIFY_BAD;
+       }
  
-       cpuc->shared_regs = allocate_shared_regs(cpu);
-       if (!cpuc->shared_regs)
-               return NOTIFY_BAD;
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
+
+               cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+               if (!cpuc->constraint_list)
+                       return NOTIFY_BAD;
+
+               cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+               if (!cpuc->excl_cntrs) {
+                       kfree(cpuc->constraint_list);
+                       kfree(cpuc->shared_regs);
+                       return NOTIFY_BAD;
+               }
+               cpuc->excl_thread_id = 0;
+       }
  
         return NOTIFY_OK;
  }
@@ -2274,12 +2310,29 @@ static void intel_pmu_cpu_starting(int cpu)
  
         if (x86_pmu.lbr_sel_map)
                 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               for_each_cpu(i, topology_thread_cpumask(cpu)) {
+                       struct intel_excl_cntrs *c;
+
+                       c = per_cpu(cpu_hw_events, i).excl_cntrs;
+                       if (c && c->core_id == core_id) {
+                               cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+                               cpuc->excl_cntrs = c;
+                               cpuc->excl_thread_id = 1;
+                               break;
+                       }
+               }
+               cpuc->excl_cntrs->core_id = core_id;
+               cpuc->excl_cntrs->refcnt++;
+       }
  }
  
  static void intel_pmu_cpu_dying(int cpu)
  {
         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
         struct intel_shared_regs *pc;
+       struct intel_excl_cntrs *c;
  
         pc = cpuc->shared_regs;
         if (pc) {
@@ -2287,6 +2340,14 @@ static void intel_pmu_cpu_dying(int cpu)
                         kfree(pc);
                 cpuc->shared_regs = NULL;
         }
+       c = cpuc->excl_cntrs;
+       if (c) {
+               if (c->core_id == -1 || --c->refcnt == 0)
+                       kfree(c);
+               cpuc->excl_cntrs = NULL;
+               kfree(cpuc->constraint_list);
+               cpuc->constraint_list = NULL;
+       }
  
         fini_debug_store_on_cpu(cpu);
  }
author	Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
	Mon, 17 Nov 2014 19:06:57 +0000 (20:06 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 2 Apr 2015 15:33:11 +0000 (17:33 +0200)
arch/x86/kernel/cpu/perf_event.h		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel.c		patch \| blob \| history