drm/i915: load a ring frequency scaling table v3
authorJesse Barnes <jbarnes@virtuousgeek.org>
Tue, 28 Jun 2011 20:04:16 +0000 (13:04 -0700)
committerKeith Packard <keithp@keithp.com>
Tue, 28 Jun 2011 20:54:27 +0000 (13:54 -0700)
The ring frequency scaling table tells the PCU to treat certain GPU
frequencies as if they were a given CPU frequency for purposes of
scaling the ring frequency.  Normally the PCU will scale the ring
frequency based on the CPU P-state, but with the table present, it will
also take the GPU frequency into account.

The main downside of keeping the ring frequency high while the CPU is
at a low frequency (or asleep altogether) is increased power
consumption.  But then if you're keeping your GPU busy, you probably
want the extra performance.

v2:
  - add units to debug table header (from Eric)
  - use tsc_khz as a fallback if the cpufreq driver doesn't give us a freq
    (from Chris)
v3:
  - fix comments & debug output
  - remove unneeded force wake get/put

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Tested-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Keith Packard <keithp@keithp.com>
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_suspend.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h

index 4d46441cbe2d830de73ffacdb5821dbdd11f6340..8a5a032ec69611ce4f149584b6056b18a8d1cfe7 100644 (file)
@@ -1123,6 +1123,44 @@ static int i915_emon_status(struct seq_file *m, void *unused)
        return 0;
 }
 
+static int i915_ring_freq_table(struct seq_file *m, void *unused)
+{
+       struct drm_info_node *node = (struct drm_info_node *) m->private;
+       struct drm_device *dev = node->minor->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       int ret;
+       int gpu_freq, ia_freq;
+
+       if (!IS_GEN6(dev)) {
+               seq_printf(m, "unsupported on this chipset\n");
+               return 0;
+       }
+
+       ret = mutex_lock_interruptible(&dev->struct_mutex);
+       if (ret)
+               return ret;
+
+       seq_printf(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\n");
+
+       for (gpu_freq = dev_priv->min_delay; gpu_freq <= dev_priv->max_delay;
+            gpu_freq++) {
+               I915_WRITE(GEN6_PCODE_DATA, gpu_freq);
+               I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
+                          GEN6_PCODE_READ_MIN_FREQ_TABLE);
+               if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
+                             GEN6_PCODE_READY) == 0, 10)) {
+                       DRM_ERROR("pcode read of freq table timed out\n");
+                       continue;
+               }
+               ia_freq = I915_READ(GEN6_PCODE_DATA);
+               seq_printf(m, "%d\t\t%d\n", gpu_freq * 50, ia_freq * 100);
+       }
+
+       mutex_unlock(&dev->struct_mutex);
+
+       return 0;
+}
+
 static int i915_gfxec(struct seq_file *m, void *unused)
 {
        struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -1426,6 +1464,7 @@ static struct drm_info_list i915_debugfs_list[] = {
        {"i915_inttoext_table", i915_inttoext_table, 0},
        {"i915_drpc_info", i915_drpc_info, 0},
        {"i915_emon_status", i915_emon_status, 0},
+       {"i915_ring_freq_table", i915_ring_freq_table, 0},
        {"i915_gfxec", i915_gfxec, 0},
        {"i915_fbc_status", i915_fbc_status, 0},
        {"i915_sr_status", i915_sr_status, 0},
index 5d5def756c9e5beee9ae10def1cbe39d5f7cf02f..4a446b116e6aa1b5a12bf10cd2002ba42aea9576 100644 (file)
 #define GEN6_PCODE_MAILBOX                     0x138124
 #define   GEN6_PCODE_READY                     (1<<31)
 #define   GEN6_READ_OC_PARAMS                  0xc
-#define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE      0x9
+#define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE      0x8
+#define   GEN6_PCODE_READ_MIN_FREQ_TABLE       0x9
 #define GEN6_PCODE_DATA                                0x138128
+#define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT       8
 
 #endif /* _I915_REG_H_ */
index e8152d23d5b67c31ecf77b39f7250df71cb12bca..6fbd997f5a6c8a56d44324b3c6a1d8d43cf93776 100644 (file)
@@ -875,8 +875,10 @@ int i915_restore_state(struct drm_device *dev)
                intel_init_emon(dev);
        }
 
-       if (IS_GEN6(dev))
+       if (IS_GEN6(dev)) {
                gen6_enable_rps(dev_priv);
+               gen6_update_ring_freq(dev_priv);
+       }
 
        /* Cache mode state */
        I915_WRITE (CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
index e58627f580c6898dea21af31a321dc6a1c7b7e5c..804ac4d6cb482fd5eb6367147912c72f143637af 100644 (file)
@@ -24,6 +24,7 @@
  *     Eric Anholt <eric@anholt.net>
  */
 
+#include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/i2c.h>
@@ -7273,6 +7274,59 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
        mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
+void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
+{
+       int min_freq = 15;
+       int gpu_freq, ia_freq, max_ia_freq;
+       int scaling_factor = 180;
+
+       max_ia_freq = cpufreq_quick_get_max(0);
+       /*
+        * Default to measured freq if none found, PCU will ensure we don't go
+        * over
+        */
+       if (!max_ia_freq)
+               max_ia_freq = tsc_khz;
+
+       /* Convert from kHz to MHz */
+       max_ia_freq /= 1000;
+
+       mutex_lock(&dev_priv->dev->struct_mutex);
+
+       /*
+        * For each potential GPU frequency, load a ring frequency we'd like
+        * to use for memory access.  We do this by specifying the IA frequency
+        * the PCU should use as a reference to determine the ring frequency.
+        */
+       for (gpu_freq = dev_priv->max_delay; gpu_freq >= dev_priv->min_delay;
+            gpu_freq--) {
+               int diff = dev_priv->max_delay - gpu_freq;
+
+               /*
+                * For GPU frequencies less than 750MHz, just use the lowest
+                * ring freq.
+                */
+               if (gpu_freq < min_freq)
+                       ia_freq = 800;
+               else
+                       ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
+               ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+
+               I915_WRITE(GEN6_PCODE_DATA,
+                          (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) |
+                          gpu_freq);
+               I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
+                          GEN6_PCODE_WRITE_MIN_FREQ_TABLE);
+               if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
+                             GEN6_PCODE_READY) == 0, 10)) {
+                       DRM_ERROR("pcode write of freq table timed out\n");
+                       continue;
+               }
+       }
+
+       mutex_unlock(&dev_priv->dev->struct_mutex);
+}
+
 static void ironlake_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -7916,8 +7970,10 @@ void intel_modeset_init(struct drm_device *dev)
                intel_init_emon(dev);
        }
 
-       if (IS_GEN6(dev))
+       if (IS_GEN6(dev)) {
                gen6_enable_rps(dev_priv);
+               gen6_update_ring_freq(dev_priv);
+       }
 
        INIT_WORK(&dev_priv->idle_work, intel_idle_update);
        setup_timer(&dev_priv->idle_timer, intel_gpu_idle_timer,
index 9ffa61eb4d7efab156819980452e22e5507c3c76..8ac3bd8b6faa8a67ac35170d537679903d7ff14e 100644 (file)
@@ -317,6 +317,7 @@ extern void intel_enable_clock_gating(struct drm_device *dev);
 extern void ironlake_enable_drps(struct drm_device *dev);
 extern void ironlake_disable_drps(struct drm_device *dev);
 extern void gen6_enable_rps(struct drm_i915_private *dev_priv);
+extern void gen6_update_ring_freq(struct drm_i915_private *dev_priv);
 extern void gen6_disable_rps(struct drm_device *dev);
 extern void intel_init_emon(struct drm_device *dev);