ARC: perf: Add some comments/debug stuff
authorVineet Gupta <vgupta@synopsys.com>
Wed, 15 Apr 2015 14:14:07 +0000 (19:44 +0530)
committerVineet Gupta <vgupta@synopsys.com>
Mon, 20 Apr 2015 12:57:30 +0000 (18:27 +0530)
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
arch/arc/include/asm/perf_event.h
arch/arc/kernel/perf_event.c

index cbf755e32a03f3c19296b552e3f767865fbacc86..1c45667c13678a029068fad71ea60dee1b5fdee1 100644 (file)
@@ -57,26 +57,7 @@ struct arc_reg_cc_build {
 #define PERF_COUNT_ARC_HW_MAX  (PERF_COUNT_HW_MAX + 6)
 
 /*
- * The "generalized" performance events seem to really be a copy
- * of the available events on x86 processors; the mapping to ARC
- * events is not always possible 1-to-1. Fortunately, there doesn't
- * seem to be an exact definition for these events, so we can cheat
- * a bit where necessary.
- *
- * In particular, the following PERF events may behave a bit differently
- * compared to other architectures:
- *
- * PERF_COUNT_HW_CPU_CYCLES
- *     Cycles not in halted state
- *
- * PERF_COUNT_HW_REF_CPU_CYCLES
- *     Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
- *     for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
- *
- * PERF_COUNT_HW_BUS_CYCLES
- *     Unclear what this means, Intel uses 0x013c, which according to
- *     their datasheet means "unhalted reference cycles". It sounds similar
- *     to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
+ * Some ARC pct quirks:
  *
  * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
  * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
@@ -91,21 +72,35 @@ struct arc_reg_cc_build {
  *     Note that I$ cache misses aren't counted by either of the two!
  */
 
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find it's name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
 static const char * const arc_pmu_ev_hw_map[] = {
+       /* count cycles */
        [PERF_COUNT_HW_CPU_CYCLES] = "crun",
        [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
        [PERF_COUNT_HW_BUS_CYCLES] = "crun",
-       [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-       [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+
        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-       [PERF_COUNT_ARC_DCLM] = "dclm",
-       [PERF_COUNT_ARC_DCSM] = "dcsm",
-       [PERF_COUNT_ARC_ICM] = "icm",
-       [PERF_COUNT_ARC_BPOK] = "bpok",
-       [PERF_COUNT_ARC_EDTLB] = "edtlb",
-       [PERF_COUNT_ARC_EITLB] = "eitlb",
+
+       /* counts condition */
+       [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+       [PERF_COUNT_ARC_BPOK]         = "bpok",   /* NP-NT, PT-T, PNT-NT */
+       [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+
+       [PERF_COUNT_ARC_DCLM] = "dclm",         /* D-cache Load Miss */
+       [PERF_COUNT_ARC_DCSM] = "dcsm",         /* D-cache Store Miss */
+       [PERF_COUNT_ARC_ICM] = "icm",           /* I-cache Miss */
+       [PERF_COUNT_ARC_EDTLB] = "edtlb",       /* D-TLB Miss */
+       [PERF_COUNT_ARC_EITLB] = "eitlb",       /* I-TLB Miss */
 };
 
 #define C(_x)                  PERF_COUNT_HW_CACHE_##_x
index 64261c2711b11814a7525f0f2dd2555f26870287..181baeed4495c23a704f04cae6e9822b5c673537 100644 (file)
@@ -90,6 +90,10 @@ static int arc_pmu_cache_event(u64 config)
        if (ret == CACHE_OP_UNSUPPORTED)
                return -ENOENT;
 
+       pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
+                cache_type, cache_op, cache_result, ret,
+                arc_pmu_ev_hw_map[ret]);
+
        return ret;
 }
 
@@ -106,8 +110,9 @@ static int arc_pmu_event_init(struct perf_event *event)
                if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
                        return -ENOENT;
                hwc->config = arc_pmu->ev_hw_idx[event->attr.config];
-               pr_debug("initializing event %d with cfg %d\n",
-                        (int) event->attr.config, (int) hwc->config);
+               pr_debug("init event %d with h/w %d \'%s\'\n",
+                        (int) event->attr.config, (int) hwc->config,
+                        arc_pmu_ev_hw_map[event->attr.config]);
                return 0;
        case PERF_TYPE_HW_CACHE:
                ret = arc_pmu_cache_event(event->attr.config);
@@ -260,19 +265,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
                arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
 
        cc_name.str[8] = 0;
-       for (i = 0; i < PERF_COUNT_HW_MAX; i++)
+       for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
                arc_pmu->ev_hw_idx[i] = -1;
 
+       /* loop thru all available h/w condition indexes */
        for (j = 0; j < cc_bcr.c; j++) {
                write_aux_reg(ARC_REG_CC_INDEX, j);
                cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
                cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+
+               /* See if it has been mapped to a perf event_id */
                for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
                        if (arc_pmu_ev_hw_map[i] &&
                            !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
                            strlen(arc_pmu_ev_hw_map[i])) {
-                               pr_debug("mapping %d to idx %d with name %s\n",
-                                        i, j, cc_name.str);
+                               pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+                                        i, cc_name.str, j);
                                arc_pmu->ev_hw_idx[i] = j;
                        }
                }