drm/i915: Double the cursor self-refresh latency on Valleyview
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / intel_pm.c
index 72f41aaa71ff330637bdc82d4ee8e0c04091bdf2..cdd70e654af5a2812c473027a48f7382640fee62 100644 (file)
@@ -1286,6 +1286,7 @@ static void valleyview_update_wm(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
        int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
        int plane_sr, cursor_sr;
+       int ignore_plane_sr, ignore_cursor_sr;
        unsigned int enabled = 0;
 
        vlv_update_drain_latency(dev);
@@ -1308,7 +1309,12 @@ static void valleyview_update_wm(struct drm_device *dev)
                             sr_latency_ns,
                             &valleyview_wm_info,
                             &valleyview_cursor_wm_info,
-                            &plane_sr, &cursor_sr))
+                            &plane_sr, &ignore_cursor_sr) &&
+           g4x_compute_srwm(dev, ffs(enabled) - 1,
+                            2*sr_latency_ns,
+                            &valleyview_wm_info,
+                            &valleyview_cursor_wm_info,
+                            &ignore_plane_sr, &cursor_sr))
                I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN);
        else
                I915_WRITE(FW_BLC_SELF_VLV,
@@ -1325,10 +1331,11 @@ static void valleyview_update_wm(struct drm_device *dev)
                   (planeb_wm << DSPFW_PLANEB_SHIFT) |
                   planea_wm);
        I915_WRITE(DSPFW2,
-                  (I915_READ(DSPFW2) & DSPFW_CURSORA_MASK) |
+                  (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
                   (cursora_wm << DSPFW_CURSORA_SHIFT));
        I915_WRITE(DSPFW3,
-                  (I915_READ(DSPFW3) | (cursor_sr << DSPFW_CURSOR_SR_SHIFT)));
+                  (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
+                  (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
 }
 
 static void g4x_update_wm(struct drm_device *dev)
@@ -1374,11 +1381,11 @@ static void g4x_update_wm(struct drm_device *dev)
                   (planeb_wm << DSPFW_PLANEB_SHIFT) |
                   planea_wm);
        I915_WRITE(DSPFW2,
-                  (I915_READ(DSPFW2) & DSPFW_CURSORA_MASK) |
+                  (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
                   (cursora_wm << DSPFW_CURSORA_SHIFT));
        /* HPLL off in SR has some issues on G4x... disable it */
        I915_WRITE(DSPFW3,
-                  (I915_READ(DSPFW3) & ~DSPFW_HPLL_SR_EN) |
+                  (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
                   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
 }
 
@@ -1468,9 +1475,12 @@ static void i9xx_update_wm(struct drm_device *dev)
        fifo_size = dev_priv->display.get_fifo_size(dev, 0);
        crtc = intel_get_crtc_for_plane(dev, 0);
        if (crtc->enabled && crtc->fb) {
+               int cpp = crtc->fb->bits_per_pixel / 8;
+               if (IS_GEN2(dev))
+                       cpp = 4;
+
                planea_wm = intel_calculate_wm(crtc->mode.clock,
-                                              wm_info, fifo_size,
-                                              crtc->fb->bits_per_pixel / 8,
+                                              wm_info, fifo_size, cpp,
                                               latency_ns);
                enabled = crtc;
        } else
@@ -1479,9 +1489,12 @@ static void i9xx_update_wm(struct drm_device *dev)
        fifo_size = dev_priv->display.get_fifo_size(dev, 1);
        crtc = intel_get_crtc_for_plane(dev, 1);
        if (crtc->enabled && crtc->fb) {
+               int cpp = crtc->fb->bits_per_pixel / 8;
+               if (IS_GEN2(dev))
+                       cpp = 4;
+
                planeb_wm = intel_calculate_wm(crtc->mode.clock,
-                                              wm_info, fifo_size,
-                                              crtc->fb->bits_per_pixel / 8,
+                                              wm_info, fifo_size, cpp,
                                               latency_ns);
                if (enabled == NULL)
                        enabled = crtc;
@@ -1571,8 +1584,7 @@ static void i830_update_wm(struct drm_device *dev)
 
        planea_wm = intel_calculate_wm(crtc->mode.clock, &i830_wm_info,
                                       dev_priv->display.get_fifo_size(dev, 0),
-                                      crtc->fb->bits_per_pixel / 8,
-                                      latency_ns);
+                                      4, latency_ns);
        fwater_lo = I915_READ(FW_BLC) & ~0xfff;
        fwater_lo |= (3<<8) | planea_wm;
 
@@ -1805,8 +1817,110 @@ static void sandybridge_update_wm(struct drm_device *dev)
                enabled |= 2;
        }
 
-       if ((dev_priv->num_pipe == 3) &&
-           g4x_compute_wm0(dev, 2,
+       /*
+        * Calculate and update the self-refresh watermark only when one
+        * display plane is used.
+        *
+        * SNB support 3 levels of watermark.
+        *
+        * WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
+        * and disabled in the descending order
+        *
+        */
+       I915_WRITE(WM3_LP_ILK, 0);
+       I915_WRITE(WM2_LP_ILK, 0);
+       I915_WRITE(WM1_LP_ILK, 0);
+
+       if (!single_plane_enabled(enabled) ||
+           dev_priv->sprite_scaling_enabled)
+               return;
+       enabled = ffs(enabled) - 1;
+
+       /* WM1 */
+       if (!ironlake_compute_srwm(dev, 1, enabled,
+                                  SNB_READ_WM1_LATENCY() * 500,
+                                  &sandybridge_display_srwm_info,
+                                  &sandybridge_cursor_srwm_info,
+                                  &fbc_wm, &plane_wm, &cursor_wm))
+               return;
+
+       I915_WRITE(WM1_LP_ILK,
+                  WM1_LP_SR_EN |
+                  (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+                  (fbc_wm << WM1_LP_FBC_SHIFT) |
+                  (plane_wm << WM1_LP_SR_SHIFT) |
+                  cursor_wm);
+
+       /* WM2 */
+       if (!ironlake_compute_srwm(dev, 2, enabled,
+                                  SNB_READ_WM2_LATENCY() * 500,
+                                  &sandybridge_display_srwm_info,
+                                  &sandybridge_cursor_srwm_info,
+                                  &fbc_wm, &plane_wm, &cursor_wm))
+               return;
+
+       I915_WRITE(WM2_LP_ILK,
+                  WM2_LP_EN |
+                  (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+                  (fbc_wm << WM1_LP_FBC_SHIFT) |
+                  (plane_wm << WM1_LP_SR_SHIFT) |
+                  cursor_wm);
+
+       /* WM3 */
+       if (!ironlake_compute_srwm(dev, 3, enabled,
+                                  SNB_READ_WM3_LATENCY() * 500,
+                                  &sandybridge_display_srwm_info,
+                                  &sandybridge_cursor_srwm_info,
+                                  &fbc_wm, &plane_wm, &cursor_wm))
+               return;
+
+       I915_WRITE(WM3_LP_ILK,
+                  WM3_LP_EN |
+                  (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+                  (fbc_wm << WM1_LP_FBC_SHIFT) |
+                  (plane_wm << WM1_LP_SR_SHIFT) |
+                  cursor_wm);
+}
+
+static void ivybridge_update_wm(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int latency = SNB_READ_WM0_LATENCY() * 100;     /* In unit 0.1us */
+       u32 val;
+       int fbc_wm, plane_wm, cursor_wm;
+       int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
+       unsigned int enabled;
+
+       enabled = 0;
+       if (g4x_compute_wm0(dev, 0,
+                           &sandybridge_display_wm_info, latency,
+                           &sandybridge_cursor_wm_info, latency,
+                           &plane_wm, &cursor_wm)) {
+               val = I915_READ(WM0_PIPEA_ILK);
+               val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+               I915_WRITE(WM0_PIPEA_ILK, val |
+                          ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
+               DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
+                             " plane %d, " "cursor: %d\n",
+                             plane_wm, cursor_wm);
+               enabled |= 1;
+       }
+
+       if (g4x_compute_wm0(dev, 1,
+                           &sandybridge_display_wm_info, latency,
+                           &sandybridge_cursor_wm_info, latency,
+                           &plane_wm, &cursor_wm)) {
+               val = I915_READ(WM0_PIPEB_ILK);
+               val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+               I915_WRITE(WM0_PIPEB_ILK, val |
+                          ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
+               DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
+                             " plane %d, cursor: %d\n",
+                             plane_wm, cursor_wm);
+               enabled |= 2;
+       }
+
+       if (g4x_compute_wm0(dev, 2,
                            &sandybridge_display_wm_info, latency,
                            &sandybridge_cursor_wm_info, latency,
                            &plane_wm, &cursor_wm)) {
@@ -1869,12 +1983,17 @@ static void sandybridge_update_wm(struct drm_device *dev)
                   (plane_wm << WM1_LP_SR_SHIFT) |
                   cursor_wm);
 
-       /* WM3 */
+       /* WM3, note we have to correct the cursor latency */
        if (!ironlake_compute_srwm(dev, 3, enabled,
                                   SNB_READ_WM3_LATENCY() * 500,
                                   &sandybridge_display_srwm_info,
                                   &sandybridge_cursor_srwm_info,
-                                  &fbc_wm, &plane_wm, &cursor_wm))
+                                  &fbc_wm, &plane_wm, &ignore_cursor_wm) ||
+           !ironlake_compute_srwm(dev, 3, enabled,
+                                  2 * SNB_READ_WM3_LATENCY() * 500,
+                                  &sandybridge_display_srwm_info,
+                                  &sandybridge_cursor_srwm_info,
+                                  &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
                return;
 
        I915_WRITE(WM3_LP_ILK,
@@ -2323,7 +2442,7 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 limits = gen6_rps_limits(dev_priv, &val);
 
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
        WARN_ON(val > dev_priv->rps.max_delay);
        WARN_ON(val < dev_priv->rps.min_delay);
 
@@ -2404,12 +2523,12 @@ static void gen6_enable_rps(struct drm_device *dev)
        struct intel_ring_buffer *ring;
        u32 rp_state_cap;
        u32 gt_perf_status;
-       u32 pcu_mbox, rc6_mask = 0;
+       u32 rc6vids, pcu_mbox, rc6_mask = 0;
        u32 gtfifodbg;
        int rc6_mode;
-       int i;
+       int i, ret;
 
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
        /* Here begins a magic sequence of register writes to enable
         * auto-downclocking.
@@ -2503,30 +2622,16 @@ static void gen6_enable_rps(struct drm_device *dev)
                   GEN6_RP_UP_BUSY_AVG |
                   (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT));
 
-       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-                    500))
-               DRM_ERROR("timeout waiting for pcode mailbox to become idle\n");
-
-       I915_WRITE(GEN6_PCODE_DATA, 0);
-       I915_WRITE(GEN6_PCODE_MAILBOX,
-                  GEN6_PCODE_READY |
-                  GEN6_PCODE_WRITE_MIN_FREQ_TABLE);
-       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-                    500))
-               DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
-
-       /* Check for overclock support */
-       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-                    500))
-               DRM_ERROR("timeout waiting for pcode mailbox to become idle\n");
-       I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_READ_OC_PARAMS);
-       pcu_mbox = I915_READ(GEN6_PCODE_DATA);
-       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-                    500))
-               DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
-       if (pcu_mbox & (1<<31)) { /* OC supported */
-               dev_priv->rps.max_delay = pcu_mbox & 0xff;
-               DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
+       ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
+       if (!ret) {
+               pcu_mbox = 0;
+               ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
+               if (ret && pcu_mbox & (1<<31)) { /* OC supported */
+                       dev_priv->rps.max_delay = pcu_mbox & 0xff;
+                       DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
+               }
+       } else {
+               DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
        }
 
        gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
@@ -2540,6 +2645,20 @@ static void gen6_enable_rps(struct drm_device *dev)
        /* enable all PM interrupts */
        I915_WRITE(GEN6_PMINTRMSK, 0);
 
+       rc6vids = 0;
+       ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
+       if (IS_GEN6(dev) && ret) {
+               DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+       } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+               DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+                         GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+               rc6vids &= 0xffff00;
+               rc6vids |= GEN6_ENCODE_RC6_VID(450);
+               ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+               if (ret)
+                       DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+       }
+
        gen6_gt_force_wake_put(dev_priv);
 }
 
@@ -2547,10 +2666,11 @@ static void gen6_update_ring_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int min_freq = 15;
-       int gpu_freq, ia_freq, max_ia_freq;
+       int gpu_freq;
+       unsigned int ia_freq, max_ia_freq;
        int scaling_factor = 180;
 
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
        max_ia_freq = cpufreq_quick_get_max(0);
        /*
@@ -2581,17 +2701,11 @@ static void gen6_update_ring_freq(struct drm_device *dev)
                else
                        ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
                ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+               ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT;
 
-               I915_WRITE(GEN6_PCODE_DATA,
-                          (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) |
-                          gpu_freq);
-               I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
-                          GEN6_PCODE_WRITE_MIN_FREQ_TABLE);
-               if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
-                             GEN6_PCODE_READY) == 0, 10)) {
-                       DRM_ERROR("pcode write of freq table timed out\n");
-                       continue;
-               }
+               sandybridge_pcode_write(dev_priv,
+                                       GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+                                       ia_freq | gpu_freq);
        }
 }
 
@@ -2599,16 +2713,16 @@ void ironlake_teardown_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (dev_priv->renderctx) {
-               i915_gem_object_unpin(dev_priv->renderctx);
-               drm_gem_object_unreference(&dev_priv->renderctx->base);
-               dev_priv->renderctx = NULL;
+       if (dev_priv->ips.renderctx) {
+               i915_gem_object_unpin(dev_priv->ips.renderctx);
+               drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
+               dev_priv->ips.renderctx = NULL;
        }
 
-       if (dev_priv->pwrctx) {
-               i915_gem_object_unpin(dev_priv->pwrctx);
-               drm_gem_object_unreference(&dev_priv->pwrctx->base);
-               dev_priv->pwrctx = NULL;
+       if (dev_priv->ips.pwrctx) {
+               i915_gem_object_unpin(dev_priv->ips.pwrctx);
+               drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
+               dev_priv->ips.pwrctx = NULL;
        }
 }
 
@@ -2634,14 +2748,14 @@ static int ironlake_setup_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (dev_priv->renderctx == NULL)
-               dev_priv->renderctx = intel_alloc_context_page(dev);
-       if (!dev_priv->renderctx)
+       if (dev_priv->ips.renderctx == NULL)
+               dev_priv->ips.renderctx = intel_alloc_context_page(dev);
+       if (!dev_priv->ips.renderctx)
                return -ENOMEM;
 
-       if (dev_priv->pwrctx == NULL)
-               dev_priv->pwrctx = intel_alloc_context_page(dev);
-       if (!dev_priv->pwrctx) {
+       if (dev_priv->ips.pwrctx == NULL)
+               dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
+       if (!dev_priv->ips.pwrctx) {
                ironlake_teardown_rc6(dev);
                return -ENOMEM;
        }
@@ -2653,6 +2767,7 @@ static void ironlake_enable_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+       bool was_interruptible;
        int ret;
 
        /* rc6 disabled by default due to repeated reports of hanging during
@@ -2667,6 +2782,9 @@ static void ironlake_enable_rc6(struct drm_device *dev)
        if (ret)
                return;
 
+       was_interruptible = dev_priv->mm.interruptible;
+       dev_priv->mm.interruptible = false;
+
        /*
         * GPU can automatically power down the render unit if given a page
         * to save state.
@@ -2674,12 +2792,13 @@ static void ironlake_enable_rc6(struct drm_device *dev)
        ret = intel_ring_begin(ring, 6);
        if (ret) {
                ironlake_teardown_rc6(dev);
+               dev_priv->mm.interruptible = was_interruptible;
                return;
        }
 
        intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
        intel_ring_emit(ring, MI_SET_CONTEXT);
-       intel_ring_emit(ring, dev_priv->renderctx->gtt_offset |
+       intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset |
                        MI_MM_SPACE_GTT |
                        MI_SAVE_EXT_STATE_EN |
                        MI_RESTORE_EXT_STATE_EN |
@@ -2694,14 +2813,15 @@ static void ironlake_enable_rc6(struct drm_device *dev)
         * does an implicit flush, combined with MI_FLUSH above, it should be
         * safe to assume that renderctx is valid
         */
-       ret = intel_wait_ring_idle(ring);
+       ret = intel_ring_idle(ring);
+       dev_priv->mm.interruptible = was_interruptible;
        if (ret) {
                DRM_ERROR("failed to enable ironlake power power savings\n");
                ironlake_teardown_rc6(dev);
                return;
        }
 
-       I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN);
+       I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN);
        I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 }
 
@@ -3310,37 +3430,72 @@ static void intel_init_emon(struct drm_device *dev)
 
 void intel_disable_gt_powersave(struct drm_device *dev)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
        if (IS_IRONLAKE_M(dev)) {
                ironlake_disable_drps(dev);
                ironlake_disable_rc6(dev);
        } else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) {
+               cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
+               mutex_lock(&dev_priv->rps.hw_lock);
                gen6_disable_rps(dev);
+               mutex_unlock(&dev_priv->rps.hw_lock);
        }
 }
 
+static void intel_gen6_powersave_work(struct work_struct *work)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(work, struct drm_i915_private,
+                            rps.delayed_resume_work.work);
+       struct drm_device *dev = dev_priv->dev;
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+       gen6_enable_rps(dev);
+       gen6_update_ring_freq(dev);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
 void intel_enable_gt_powersave(struct drm_device *dev)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
        if (IS_IRONLAKE_M(dev)) {
                ironlake_enable_drps(dev);
                ironlake_enable_rc6(dev);
                intel_init_emon(dev);
        } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
-               gen6_enable_rps(dev);
-               gen6_update_ring_freq(dev);
+               /*
+                * PCU communication is slow and this doesn't need to be
+                * done at any specific time, so do this out of our fast path
+                * to make resume and init faster.
+                */
+               schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
+                                     round_jiffies_up_relative(HZ));
        }
 }
 
+static void ibx_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /*
+        * On Ibex Peak and Cougar Point, we need to disable clock
+        * gating for the panel power sequencer or it will fail to
+        * start up when no ports are active.
+        */
+       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+}
+
 static void ironlake_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+       uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
        /* Required for FBC */
-       dspclk_gate |= DPFCUNIT_CLOCK_GATE_DISABLE |
-               DPFCRUNIT_CLOCK_GATE_DISABLE |
-               DPFDUNIT_CLOCK_GATE_DISABLE;
-       /* Required for CxSR */
-       dspclk_gate |= DPARBUNIT_CLOCK_GATE_DISABLE;
+       dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
+                  ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
+                  ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
 
        I915_WRITE(PCH_3DCGDIS0,
                   MARIUNIT_CLOCK_GATE_DISABLE |
@@ -3348,8 +3503,6 @@ static void ironlake_init_clock_gating(struct drm_device *dev)
        I915_WRITE(PCH_3DCGDIS1,
                   VFMUNIT_CLOCK_GATE_DISABLE);
 
-       I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
-
        /*
         * According to the spec the following bits should be set in
         * order to enable memory self-refresh
@@ -3360,9 +3513,7 @@ static void ironlake_init_clock_gating(struct drm_device *dev)
        I915_WRITE(ILK_DISPLAY_CHICKEN2,
                   (I915_READ(ILK_DISPLAY_CHICKEN2) |
                    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
-       I915_WRITE(ILK_DSPCLK_GATE,
-                  (I915_READ(ILK_DSPCLK_GATE) |
-                   ILK_DPARB_CLK_GATE));
+       dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
        I915_WRITE(DISP_ARB_CTL,
                   (I915_READ(DISP_ARB_CTL) |
                    DISP_FBC_WM_DIS));
@@ -3384,28 +3535,56 @@ static void ironlake_init_clock_gating(struct drm_device *dev)
                I915_WRITE(ILK_DISPLAY_CHICKEN2,
                           I915_READ(ILK_DISPLAY_CHICKEN2) |
                           ILK_DPARB_GATE);
-               I915_WRITE(ILK_DSPCLK_GATE,
-                          I915_READ(ILK_DSPCLK_GATE) |
-                          ILK_DPFC_DIS1 |
-                          ILK_DPFC_DIS2 |
-                          ILK_CLK_FBC);
        }
 
+       I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
+
        I915_WRITE(ILK_DISPLAY_CHICKEN2,
                   I915_READ(ILK_DISPLAY_CHICKEN2) |
                   ILK_ELPIN_409_SELECT);
        I915_WRITE(_3D_CHICKEN2,
                   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
                   _3D_CHICKEN2_WM_READ_PIPELINED);
+
+       /* WaDisableRenderCachePipelinedFlush */
+       I915_WRITE(CACHE_MODE_0,
+                  _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
+
+       ibx_init_clock_gating(dev);
+}
+
+static void cpt_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int pipe;
+
+       /*
+        * On Ibex Peak and Cougar Point, we need to disable clock
+        * gating for the panel power sequencer or it will fail to
+        * start up when no ports are active.
+        */
+       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+       I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
+                  DPLS_EDP_PPS_FIX_DIS);
+       /* The below fixes the weird display corruption, a few pixels shifted
+        * downward, on (only) LVDS of some HP laptops with IVY.
+        */
+       for_each_pipe(pipe)
+               I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_CHICKEN2_TIMING_OVERRIDE);
+       /* WADP0ClockGatingDisable */
+       for_each_pipe(pipe) {
+               I915_WRITE(TRANS_CHICKEN1(pipe),
+                          TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+       }
 }
 
 static void gen6_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int pipe;
-       uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+       uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-       I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
+       I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
        I915_WRITE(ILK_DISPLAY_CHICKEN2,
                   I915_READ(ILK_DISPLAY_CHICKEN2) |
@@ -3460,11 +3639,12 @@ static void gen6_init_clock_gating(struct drm_device *dev)
        I915_WRITE(ILK_DISPLAY_CHICKEN2,
                   I915_READ(ILK_DISPLAY_CHICKEN2) |
                   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
-       I915_WRITE(ILK_DSPCLK_GATE,
-                  I915_READ(ILK_DSPCLK_GATE) |
-                  ILK_DPARB_CLK_GATE  |
-                  ILK_DPFD_CLK_GATE);
+       I915_WRITE(ILK_DSPCLK_GATE_D,
+                  I915_READ(ILK_DSPCLK_GATE_D) |
+                  ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
+                  ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
+       /* WaMbcDriverBootEnable */
        I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
                   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
@@ -3479,6 +3659,8 @@ static void gen6_init_clock_gating(struct drm_device *dev)
         * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
        I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
        I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
+
+       cpt_init_clock_gating(dev);
 }
 
 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
@@ -3493,13 +3675,24 @@ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
        I915_WRITE(GEN7_FF_THREAD_MODE, reg);
 }
 
+static void lpt_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /*
+        * TODO: this bit should only be enabled when really needed, then
+        * disabled when not needed anymore in order to save power.
+        */
+       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
+               I915_WRITE(SOUTH_DSPCLK_GATE_D,
+                          I915_READ(SOUTH_DSPCLK_GATE_D) |
+                          PCH_LP_PARTITION_LEVEL_DISABLE);
+}
+
 static void haswell_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int pipe;
-       uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
-
-       I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
 
        I915_WRITE(WM3_LP_ILK, 0);
        I915_WRITE(WM2_LP_ILK, 0);
@@ -3510,12 +3703,6 @@ static void haswell_init_clock_gating(struct drm_device *dev)
         */
        I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
-       I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
-
-       I915_WRITE(IVB_CHICKEN3,
-                  CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
-                  CHICKEN3_DGMG_DONE_FIX_DISABLE);
-
        /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
                   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3544,6 +3731,10 @@ static void haswell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(CACHE_MODE_1,
                   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
+       /* WaMbcDriverBootEnable */
+       I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
+                  GEN6_MBCTL_ENABLE_BOOT_FETCH);
+
        /* XXX: This is a workaround for early silicon revisions and should be
         * removed later.
         */
@@ -3553,27 +3744,38 @@ static void haswell_init_clock_gating(struct drm_device *dev)
                        WM_DBG_DISALLOW_SPRITE |
                        WM_DBG_DISALLOW_MAXFIFO);
 
+       lpt_init_clock_gating(dev);
 }
 
 static void ivybridge_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int pipe;
-       uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
        uint32_t snpcr;
 
-       I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
-
        I915_WRITE(WM3_LP_ILK, 0);
        I915_WRITE(WM2_LP_ILK, 0);
        I915_WRITE(WM1_LP_ILK, 0);
 
-       I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
+       I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
+
+       /* WaDisableEarlyCull */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
 
+       /* WaDisableBackToBackFlipFix */
        I915_WRITE(IVB_CHICKEN3,
                   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
                   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+       /* WaDisablePSDDualDispatchEnable */
+       if (IS_IVB_GT1(dev))
+               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+       else
+               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
+                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
        /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
                   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3582,7 +3784,18 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN7_L3CNTLREG1,
                        GEN7_WA_FOR_GEN7_L3_CONTROL);
        I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
-                       GEN7_WA_L3_CHICKEN_MODE);
+                  GEN7_WA_L3_CHICKEN_MODE);
+       if (IS_IVB_GT1(dev))
+               I915_WRITE(GEN7_ROW_CHICKEN2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       else
+               I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+
+       /* WaForceL3Serialization */
+       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
 
        /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
         * gating disable must be set.  Failure to set it results in
@@ -3613,6 +3826,7 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
                intel_flush_display_plane(dev_priv, pipe);
        }
 
+       /* WaMbcDriverBootEnable */
        I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
                   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
@@ -3626,39 +3840,59 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
        snpcr &= ~GEN6_MBC_SNPCR_MASK;
        snpcr |= GEN6_MBC_SNPCR_MED;
        I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
+
+       cpt_init_clock_gating(dev);
 }
 
 static void valleyview_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        int pipe;
-       uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
-
-       I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
 
        I915_WRITE(WM3_LP_ILK, 0);
        I915_WRITE(WM2_LP_ILK, 0);
        I915_WRITE(WM1_LP_ILK, 0);
 
-       I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
+       I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
+
+       /* WaDisableEarlyCull */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
 
+       /* WaDisableBackToBackFlipFix */
        I915_WRITE(IVB_CHICKEN3,
                   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
                   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+       I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+                  _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
        /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
        I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
                   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
 
        /* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
-       I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+       I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
        I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
 
+       /* WaForceL3Serialization */
+       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+       /* WaDisableDopClockGating */
+       I915_WRITE(GEN7_ROW_CHICKEN2,
+                  _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+       /* WaForceL3Serialization */
+       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
        /* This is required by WaCatErrorRejectionIssue */
        I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
                   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
                   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
+       /* WaMbcDriverBootEnable */
        I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
                   GEN6_MBCTL_ENABLE_BOOT_FETCH);
 
@@ -3710,6 +3944,13 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
                   PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN |
                   SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN |
                   PLANEA_FLIPDONE_INT_EN);
+
+       /*
+        * WaDisableVLVClockGating_VBIIssue
+        * Disable clock gating on th GCFG unit to prevent a delay
+        * in the reporting of vblank events.
+        */
+       I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -3728,6 +3969,10 @@ static void g4x_init_clock_gating(struct drm_device *dev)
        if (IS_GM45(dev))
                dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
        I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
+
+       /* WaDisableRenderCachePipelinedFlush */
+       I915_WRITE(CACHE_MODE_0,
+                  _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 }
 
 static void crestline_init_clock_gating(struct drm_device *dev)
@@ -3783,44 +4028,11 @@ static void i830_init_clock_gating(struct drm_device *dev)
        I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
 }
 
-static void ibx_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /*
-        * On Ibex Peak and Cougar Point, we need to disable clock
-        * gating for the panel power sequencer or it will fail to
-        * start up when no ports are active.
-        */
-       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-}
-
-static void cpt_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       int pipe;
-
-       /*
-        * On Ibex Peak and Cougar Point, we need to disable clock
-        * gating for the panel power sequencer or it will fail to
-        * start up when no ports are active.
-        */
-       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-       I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
-                  DPLS_EDP_PPS_FIX_DIS);
-       /* Without this, mode sets may fail silently on FDI */
-       for_each_pipe(pipe)
-               I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_AUTOTRAIN_GEN_STALL_DIS);
-}
-
 void intel_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        dev_priv->display.init_clock_gating(dev);
-
-       if (dev_priv->display.init_pch_clock_gating)
-               dev_priv->display.init_pch_clock_gating(dev);
 }
 
 /* Starting with Haswell, we have different power wells for
@@ -3846,7 +4058,7 @@ void intel_init_power_wells(struct drm_device *dev)
 
                if ((well & HSW_PWR_WELL_STATE) == 0) {
                        I915_WRITE(power_wells[i], well & HSW_PWR_WELL_ENABLE);
-                       if (wait_for(I915_READ(power_wells[i] & HSW_PWR_WELL_STATE), 20))
+                       if (wait_for((I915_READ(power_wells[i]) & HSW_PWR_WELL_STATE), 20))
                                DRM_ERROR("Error enabling power well %lx\n", power_wells[i]);
                }
        }
@@ -3884,11 +4096,6 @@ void intel_init_pm(struct drm_device *dev)
 
        /* For FIFO watermark updates */
        if (HAS_PCH_SPLIT(dev)) {
-               if (HAS_PCH_IBX(dev))
-                       dev_priv->display.init_pch_clock_gating = ibx_init_clock_gating;
-               else if (HAS_PCH_CPT(dev))
-                       dev_priv->display.init_pch_clock_gating = cpt_init_clock_gating;
-
                if (IS_GEN5(dev)) {
                        if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
                                dev_priv->display.update_wm = ironlake_update_wm;
@@ -3911,7 +4118,7 @@ void intel_init_pm(struct drm_device *dev)
                } else if (IS_IVYBRIDGE(dev)) {
                        /* FIXME: detect B0+ stepping and use auto training */
                        if (SNB_READ_WM0_LATENCY()) {
-                               dev_priv->display.update_wm = sandybridge_update_wm;
+                               dev_priv->display.update_wm = ivybridge_update_wm;
                                dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
                        } else {
                                DRM_DEBUG_KMS("Failed to read display plane latency. "
@@ -3999,6 +4206,12 @@ static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
                DRM_ERROR("GT thread status wait timed out\n");
 }
 
+static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
+{
+       I915_WRITE_NOTRACE(FORCEWAKE, 0);
+       POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
+}
+
 static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
        u32 forcewake_ack;
@@ -4012,7 +4225,7 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-       I915_WRITE_NOTRACE(FORCEWAKE, 1);
+       I915_WRITE_NOTRACE(FORCEWAKE, FORCEWAKE_KERNEL);
        POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 
        if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1),
@@ -4022,6 +4235,12 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
        __gen6_gt_wait_for_thread_c0(dev_priv);
 }
 
+static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
+{
+       I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
+       POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
+}
+
 static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
 {
        u32 forcewake_ack;
@@ -4035,7 +4254,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-       I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
+       I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
        POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
 
        if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1),
@@ -4079,7 +4298,7 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 
 static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
 {
-       I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(1));
+       I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
        /* gen6_gt_check_fifodbg doubles as the POSTING_READ */
        gen6_gt_check_fifodbg(dev_priv);
 }
@@ -4117,13 +4336,18 @@ int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
        return ret;
 }
 
+static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
+{
+       I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff));
+}
+
 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 {
        if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1) == 0,
                            FORCEWAKE_ACK_TIMEOUT_MS))
                DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
 
-       I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(1));
+       I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
 
        if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1),
                            FORCEWAKE_ACK_TIMEOUT_MS))
@@ -4134,49 +4358,89 @@ static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 
 static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 {
-       I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(1));
+       I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
        /* The below doubles as a POSTING_READ */
        gen6_gt_check_fifodbg(dev_priv);
 }
 
+void intel_gt_reset(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (IS_VALLEYVIEW(dev)) {
+               vlv_force_wake_reset(dev_priv);
+       } else if (INTEL_INFO(dev)->gen >= 6) {
+               __gen6_gt_force_wake_reset(dev_priv);
+               if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
+                       __gen6_gt_force_wake_mt_reset(dev_priv);
+       }
+}
+
 void intel_gt_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        spin_lock_init(&dev_priv->gt_lock);
 
+       intel_gt_reset(dev);
+
        if (IS_VALLEYVIEW(dev)) {
                dev_priv->gt.force_wake_get = vlv_force_wake_get;
                dev_priv->gt.force_wake_put = vlv_force_wake_put;
-       } else if (INTEL_INFO(dev)->gen >= 6) {
+       } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
+               dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
+               dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
+       } else if (IS_GEN6(dev)) {
                dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
                dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
+       }
+       INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
+                         intel_gen6_powersave_work);
+}
 
-               /* IVB configs may use multi-threaded forcewake */
-               if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
-                       u32 ecobus;
-
-                       /* A small trick here - if the bios hasn't configured
-                        * MT forcewake, and if the device is in RC6, then
-                        * force_wake_mt_get will not wake the device and the
-                        * ECOBUS read will return zero. Which will be
-                        * (correctly) interpreted by the test below as MT
-                        * forcewake being disabled.
-                        */
-                       mutex_lock(&dev->struct_mutex);
-                       __gen6_gt_force_wake_mt_get(dev_priv);
-                       ecobus = I915_READ_NOTRACE(ECOBUS);
-                       __gen6_gt_force_wake_mt_put(dev_priv);
-                       mutex_unlock(&dev->struct_mutex);
-
-                       if (ecobus & FORCEWAKE_MT_ENABLE) {
-                               DRM_DEBUG_KMS("Using MT version of forcewake\n");
-                               dev_priv->gt.force_wake_get =
-                                       __gen6_gt_force_wake_mt_get;
-                               dev_priv->gt.force_wake_put =
-                                       __gen6_gt_force_wake_mt_put;
-                       }
-               }
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
+{
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+       if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
+               DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
+               return -EAGAIN;
+       }
+
+       I915_WRITE(GEN6_PCODE_DATA, *val);
+       I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
+                    500)) {
+               DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
+               return -ETIMEDOUT;
        }
+
+       *val = I915_READ(GEN6_PCODE_DATA);
+       I915_WRITE(GEN6_PCODE_DATA, 0);
+
+       return 0;
 }
 
+int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
+{
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+       if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
+               DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
+               return -EAGAIN;
+       }
+
+       I915_WRITE(GEN6_PCODE_DATA, val);
+       I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
+                    500)) {
+               DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
+               return -ETIMEDOUT;
+       }
+
+       I915_WRITE(GEN6_PCODE_DATA, 0);
+
+       return 0;
+}