drm/i915/skl: Implement WaDisableDgMirrorFixInHalfSliceChicken5:skl
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / intel_pm.c
index 40c12295c0bde4648d319f3da48205ab131fc92a..4f5dcf545c8989b0e167f865827e0fbb472eb2dc 100644 (file)
  * i915.i915_enable_fbc parameter
  */
 
+static void gen9_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /*
+        * WaDisableSDEUnitClockGating:skl
+        * This seems to be a pre-production w/a.
+        */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+       /*
+        * WaDisableDgMirrorFixInHalfSliceChicken5:skl
+        * This is a pre-production w/a.
+        */
+       I915_WRITE(GEN9_HALF_SLICE_CHICKEN5,
+                  I915_READ(GEN9_HALF_SLICE_CHICKEN5) &
+                  ~GEN9_DG_MIRROR_FIX_ENABLE);
+
+       /* Wa4x4STCOptimizationDisable:skl */
+       I915_WRITE(CACHE_MODE_1,
+                  _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+}
+
 static void i8xx_disable_fbc(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -309,6 +333,9 @@ static void gen7_enable_fbc(struct drm_crtc *crtc)
 
        dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
 
+       if (dev_priv->fbc.false_color)
+               dpfc_ctl |= FBC_CTL_FALSE_COLOR;
+
        I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
        if (IS_IVYBRIDGE(dev)) {
@@ -342,6 +369,16 @@ bool intel_fbc_enabled(struct drm_device *dev)
        return dev_priv->display.fbc_enabled(dev);
 }
 
+void gen8_fbc_sw_flush(struct drm_device *dev, u32 value)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (!IS_GEN8(dev))
+               return;
+
+       I915_WRITE(MSG_FBC_REND_STATE, value);
+}
+
 static void intel_fbc_work_fn(struct work_struct *__work)
 {
        struct intel_fbc_work *work =
@@ -578,6 +615,12 @@ void intel_update_fbc(struct drm_device *dev)
                        DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
                goto out_disable;
        }
+       if (INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev) &&
+           to_intel_plane(crtc->primary)->rotation != BIT(DRM_ROTATE_0)) {
+               if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
+                       DRM_DEBUG_KMS("Rotation unsupported, disabling\n");
+               goto out_disable;
+       }
 
        /* If the kernel debugger is active, always disable compression */
        if (in_dbg_master())
@@ -853,7 +896,7 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
  * A value of 5us seems to be a good balance; safe for very low end
  * platforms but not overly aggressive on lower latency configs.
  */
-static const int latency_ns = 5000;
+static const int pessimal_latency_ns = 5000;
 
 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
 {
@@ -982,13 +1025,20 @@ static const struct intel_watermark_params i915_wm_info = {
        .guard_size = 2,
        .cacheline_size = I915_FIFO_LINE_SIZE,
 };
-static const struct intel_watermark_params i830_wm_info = {
+static const struct intel_watermark_params i830_a_wm_info = {
        .fifo_size = I855GM_FIFO_SIZE,
        .max_wm = I915_MAX_WM,
        .default_wm = 1,
        .guard_size = 2,
        .cacheline_size = I830_FIFO_LINE_SIZE,
 };
+static const struct intel_watermark_params i830_bc_wm_info = {
+       .fifo_size = I855GM_FIFO_SIZE,
+       .max_wm = I915_MAX_WM/2,
+       .default_wm = 1,
+       .guard_size = 2,
+       .cacheline_size = I830_FIFO_LINE_SIZE,
+};
 static const struct intel_watermark_params i845_wm_info = {
        .fifo_size = I830_FIFO_SIZE,
        .max_wm = I915_MAX_WM,
@@ -1268,33 +1318,27 @@ static bool g4x_compute_srwm(struct drm_device *dev,
                              display, cursor);
 }
 
-static bool vlv_compute_drain_latency(struct drm_device *dev,
-                                    int plane,
-                                    int *plane_prec_mult,
-                                    int *plane_dl,
-                                    int *cursor_prec_mult,
-                                    int *cursor_dl)
+static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
+                                     int pixel_size,
+                                     int *prec_mult,
+                                     int *drain_latency)
 {
-       struct drm_crtc *crtc;
-       int clock, pixel_size;
        int entries;
+       int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
 
-       crtc = intel_get_crtc_for_plane(dev, plane);
-       if (!intel_crtc_active(crtc))
+       if (WARN(clock == 0, "Pixel clock is zero!\n"))
                return false;
 
-       clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
-       pixel_size = crtc->primary->fb->bits_per_pixel / 8;     /* BPP */
+       if (WARN(pixel_size == 0, "Pixel size is zero!\n"))
+               return false;
 
-       entries = (clock / 1000) * pixel_size;
-       *plane_prec_mult = (entries > 128) ?
-               DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32;
-       *plane_dl = (64 * (*plane_prec_mult) * 4) / entries;
+       entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
+       *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 :
+                                      DRAIN_LATENCY_PRECISION_32;
+       *drain_latency = (64 * (*prec_mult) * 4) / entries;
 
-       entries = (clock / 1000) * 4;   /* BPP is always 4 for cursor */
-       *cursor_prec_mult = (entries > 128) ?
-               DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32;
-       *cursor_dl = (64 * (*cursor_prec_mult) * 4) / entries;
+       if (*drain_latency > DRAIN_LATENCY_MASK)
+               *drain_latency = DRAIN_LATENCY_MASK;
 
        return true;
 }
@@ -1307,39 +1351,48 @@ static bool vlv_compute_drain_latency(struct drm_device *dev,
  * latency value.
  */
 
-static void vlv_update_drain_latency(struct drm_device *dev)
+static void vlv_update_drain_latency(struct drm_crtc *crtc)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       int planea_prec, planea_dl, planeb_prec, planeb_dl;
-       int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl;
-       int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is
-                                                       either 16 or 32 */
+       struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int pixel_size;
+       int drain_latency;
+       enum pipe pipe = intel_crtc->pipe;
+       int plane_prec, prec_mult, plane_dl;
 
-       /* For plane A, Cursor A */
-       if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl,
-                                     &cursor_prec_mult, &cursora_dl)) {
-               cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-                       DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_64;
-               planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-                       DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_64;
+       plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 |
+                  DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 |
+                  (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT));
 
-               I915_WRITE(VLV_DDL1, cursora_prec |
-                               (cursora_dl << DDL_CURSORA_SHIFT) |
-                               planea_prec | planea_dl);
+       if (!intel_crtc_active(crtc)) {
+               I915_WRITE(VLV_DDL(pipe), plane_dl);
+               return;
+       }
+
+       /* Primary plane Drain Latency */
+       pixel_size = crtc->primary->fb->bits_per_pixel / 8;     /* BPP */
+       if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
+               plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
+                                          DDL_PLANE_PRECISION_64 :
+                                          DDL_PLANE_PRECISION_32;
+               plane_dl |= plane_prec | drain_latency;
        }
 
-       /* For plane B, Cursor B */
-       if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl,
-                                     &cursor_prec_mult, &cursorb_dl)) {
-               cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-                       DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_64;
-               planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-                       DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_64;
+       /* Cursor Drain Latency
+        * BPP is always 4 for cursor
+        */
+       pixel_size = 4;
 
-               I915_WRITE(VLV_DDL2, cursorb_prec |
-                               (cursorb_dl << DDL_CURSORB_SHIFT) |
-                               planeb_prec | planeb_dl);
+       /* Program cursor DL only if it is enabled */
+       if (intel_crtc->cursor_base &&
+           vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
+               plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
+                                          DDL_CURSOR_PRECISION_64 :
+                                          DDL_CURSOR_PRECISION_32;
+               plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT);
        }
+
+       I915_WRITE(VLV_DDL(pipe), plane_dl);
 }
 
 #define single_plane_enabled(mask) is_power_of_2(mask)
@@ -1355,17 +1408,17 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
        unsigned int enabled = 0;
        bool cxsr_enabled;
 
-       vlv_update_drain_latency(dev);
+       vlv_update_drain_latency(crtc);
 
        if (g4x_compute_wm0(dev, PIPE_A,
-                           &valleyview_wm_info, latency_ns,
-                           &valleyview_cursor_wm_info, latency_ns,
+                           &valleyview_wm_info, pessimal_latency_ns,
+                           &valleyview_cursor_wm_info, pessimal_latency_ns,
                            &planea_wm, &cursora_wm))
                enabled |= 1 << PIPE_A;
 
        if (g4x_compute_wm0(dev, PIPE_B,
-                           &valleyview_wm_info, latency_ns,
-                           &valleyview_cursor_wm_info, latency_ns,
+                           &valleyview_wm_info, pessimal_latency_ns,
+                           &valleyview_cursor_wm_info, pessimal_latency_ns,
                            &planeb_wm, &cursorb_wm))
                enabled |= 1 << PIPE_B;
 
@@ -1387,7 +1440,8 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
                plane_sr = cursor_sr = 0;
        }
 
-       DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
+       DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
+                     "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
                      planea_wm, cursora_wm,
                      planeb_wm, cursorb_wm,
                      plane_sr, cursor_sr);
@@ -1396,7 +1450,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
                   (plane_sr << DSPFW_SR_SHIFT) |
                   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
                   (planeb_wm << DSPFW_PLANEB_SHIFT) |
-                  planea_wm);
+                  (planea_wm << DSPFW_PLANEA_SHIFT));
        I915_WRITE(DSPFW2,
                   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
                   (cursora_wm << DSPFW_CURSORA_SHIFT));
@@ -1408,6 +1462,116 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
                intel_set_memory_cxsr(dev_priv, true);
 }
 
+static void cherryview_update_wm(struct drm_crtc *crtc)
+{
+       struct drm_device *dev = crtc->dev;
+       static const int sr_latency_ns = 12000;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int planea_wm, planeb_wm, planec_wm;
+       int cursora_wm, cursorb_wm, cursorc_wm;
+       int plane_sr, cursor_sr;
+       int ignore_plane_sr, ignore_cursor_sr;
+       unsigned int enabled = 0;
+       bool cxsr_enabled;
+
+       vlv_update_drain_latency(crtc);
+
+       if (g4x_compute_wm0(dev, PIPE_A,
+                           &valleyview_wm_info, pessimal_latency_ns,
+                           &valleyview_cursor_wm_info, pessimal_latency_ns,
+                           &planea_wm, &cursora_wm))
+               enabled |= 1 << PIPE_A;
+
+       if (g4x_compute_wm0(dev, PIPE_B,
+                           &valleyview_wm_info, pessimal_latency_ns,
+                           &valleyview_cursor_wm_info, pessimal_latency_ns,
+                           &planeb_wm, &cursorb_wm))
+               enabled |= 1 << PIPE_B;
+
+       if (g4x_compute_wm0(dev, PIPE_C,
+                           &valleyview_wm_info, pessimal_latency_ns,
+                           &valleyview_cursor_wm_info, pessimal_latency_ns,
+                           &planec_wm, &cursorc_wm))
+               enabled |= 1 << PIPE_C;
+
+       if (single_plane_enabled(enabled) &&
+           g4x_compute_srwm(dev, ffs(enabled) - 1,
+                            sr_latency_ns,
+                            &valleyview_wm_info,
+                            &valleyview_cursor_wm_info,
+                            &plane_sr, &ignore_cursor_sr) &&
+           g4x_compute_srwm(dev, ffs(enabled) - 1,
+                            2*sr_latency_ns,
+                            &valleyview_wm_info,
+                            &valleyview_cursor_wm_info,
+                            &ignore_plane_sr, &cursor_sr)) {
+               cxsr_enabled = true;
+       } else {
+               cxsr_enabled = false;
+               intel_set_memory_cxsr(dev_priv, false);
+               plane_sr = cursor_sr = 0;
+       }
+
+       DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
+                     "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, "
+                     "SR: plane=%d, cursor=%d\n",
+                     planea_wm, cursora_wm,
+                     planeb_wm, cursorb_wm,
+                     planec_wm, cursorc_wm,
+                     plane_sr, cursor_sr);
+
+       I915_WRITE(DSPFW1,
+                  (plane_sr << DSPFW_SR_SHIFT) |
+                  (cursorb_wm << DSPFW_CURSORB_SHIFT) |
+                  (planeb_wm << DSPFW_PLANEB_SHIFT) |
+                  (planea_wm << DSPFW_PLANEA_SHIFT));
+       I915_WRITE(DSPFW2,
+                  (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
+                  (cursora_wm << DSPFW_CURSORA_SHIFT));
+       I915_WRITE(DSPFW3,
+                  (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
+                  (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
+       I915_WRITE(DSPFW9_CHV,
+                  (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK |
+                                             DSPFW_CURSORC_MASK)) |
+                  (planec_wm << DSPFW_PLANEC_SHIFT) |
+                  (cursorc_wm << DSPFW_CURSORC_SHIFT));
+
+       if (cxsr_enabled)
+               intel_set_memory_cxsr(dev_priv, true);
+}
+
+static void valleyview_update_sprite_wm(struct drm_plane *plane,
+                                       struct drm_crtc *crtc,
+                                       uint32_t sprite_width,
+                                       uint32_t sprite_height,
+                                       int pixel_size,
+                                       bool enabled, bool scaled)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int pipe = to_intel_plane(plane)->pipe;
+       int sprite = to_intel_plane(plane)->plane;
+       int drain_latency;
+       int plane_prec;
+       int sprite_dl;
+       int prec_mult;
+
+       sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) |
+                   (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite)));
+
+       if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult,
+                                                &drain_latency)) {
+               plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
+                                          DDL_SPRITE_PRECISION_64(sprite) :
+                                          DDL_SPRITE_PRECISION_32(sprite);
+               sprite_dl |= plane_prec |
+                            (drain_latency << DDL_SPRITE_SHIFT(sprite));
+       }
+
+       I915_WRITE(VLV_DDL(pipe), sprite_dl);
+}
+
 static void g4x_update_wm(struct drm_crtc *crtc)
 {
        struct drm_device *dev = crtc->dev;
@@ -1419,14 +1583,14 @@ static void g4x_update_wm(struct drm_crtc *crtc)
        bool cxsr_enabled;
 
        if (g4x_compute_wm0(dev, PIPE_A,
-                           &g4x_wm_info, latency_ns,
-                           &g4x_cursor_wm_info, latency_ns,
+                           &g4x_wm_info, pessimal_latency_ns,
+                           &g4x_cursor_wm_info, pessimal_latency_ns,
                            &planea_wm, &cursora_wm))
                enabled |= 1 << PIPE_A;
 
        if (g4x_compute_wm0(dev, PIPE_B,
-                           &g4x_wm_info, latency_ns,
-                           &g4x_cursor_wm_info, latency_ns,
+                           &g4x_wm_info, pessimal_latency_ns,
+                           &g4x_cursor_wm_info, pessimal_latency_ns,
                            &planeb_wm, &cursorb_wm))
                enabled |= 1 << PIPE_B;
 
@@ -1443,7 +1607,8 @@ static void g4x_update_wm(struct drm_crtc *crtc)
                plane_sr = cursor_sr = 0;
        }
 
-       DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
+       DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
+                     "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
                      planea_wm, cursora_wm,
                      planeb_wm, cursorb_wm,
                      plane_sr, cursor_sr);
@@ -1452,7 +1617,7 @@ static void g4x_update_wm(struct drm_crtc *crtc)
                   (plane_sr << DSPFW_SR_SHIFT) |
                   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
                   (planeb_wm << DSPFW_PLANEB_SHIFT) |
-                  planea_wm);
+                  (planea_wm << DSPFW_PLANEA_SHIFT));
        I915_WRITE(DSPFW2,
                   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
                   (cursora_wm << DSPFW_CURSORA_SHIFT));
@@ -1526,8 +1691,11 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
 
        /* 965 has limitations... */
        I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
-                  (8 << 16) | (8 << 8) | (8 << 0));
-       I915_WRITE(DSPFW2, (8 << 8) | (8 << 0));
+                  (8 << DSPFW_CURSORB_SHIFT) |
+                  (8 << DSPFW_PLANEB_SHIFT) |
+                  (8 << DSPFW_PLANEA_SHIFT));
+       I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) |
+                  (8 << DSPFW_PLANEC_SHIFT_OLD));
        /* update cursor SR watermark */
        I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
 
@@ -1552,7 +1720,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
        else if (!IS_GEN2(dev))
                wm_info = &i915_wm_info;
        else
-               wm_info = &i830_wm_info;
+               wm_info = &i830_a_wm_info;
 
        fifo_size = dev_priv->display.get_fifo_size(dev, 0);
        crtc = intel_get_crtc_for_plane(dev, 0);
@@ -1565,10 +1733,16 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
                adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
                planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
                                               wm_info, fifo_size, cpp,
-                                              latency_ns);
+                                              pessimal_latency_ns);
                enabled = crtc;
-       } else
+       } else {
                planea_wm = fifo_size - wm_info->guard_size;
+               if (planea_wm > (long)wm_info->max_wm)
+                       planea_wm = wm_info->max_wm;
+       }
+
+       if (IS_GEN2(dev))
+               wm_info = &i830_bc_wm_info;
 
        fifo_size = dev_priv->display.get_fifo_size(dev, 1);
        crtc = intel_get_crtc_for_plane(dev, 1);
@@ -1581,13 +1755,16 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
                adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
                planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
                                               wm_info, fifo_size, cpp,
-                                              latency_ns);
+                                              pessimal_latency_ns);
                if (enabled == NULL)
                        enabled = crtc;
                else
                        enabled = NULL;
-       } else
+       } else {
                planeb_wm = fifo_size - wm_info->guard_size;
+               if (planeb_wm > (long)wm_info->max_wm)
+                       planeb_wm = wm_info->max_wm;
+       }
 
        DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
 
@@ -1674,7 +1851,7 @@ static void i845_update_wm(struct drm_crtc *unused_crtc)
        planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
                                       &i845_wm_info,
                                       dev_priv->display.get_fifo_size(dev, 0),
-                                      4, latency_ns);
+                                      4, pessimal_latency_ns);
        fwater_lo = I915_READ(FW_BLC) & ~0xfff;
        fwater_lo |= (3<<8) | planea_wm;
 
@@ -2121,7 +2298,6 @@ int ilk_wm_max_level(const struct drm_device *dev)
        else
                return 2;
 }
-
 static void intel_print_wm_latency(struct drm_device *dev,
                                   const char *name,
                                   const uint16_t wm[5])
@@ -2527,7 +2703,7 @@ static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
 #define WM_DIRTY_FBC (1 << 24)
 #define WM_DIRTY_DDB (1 << 25)
 
-static unsigned int ilk_compute_wm_dirty(struct drm_device *dev,
+static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
                                         const struct ilk_wm_values *old,
                                         const struct ilk_wm_values *new)
 {
@@ -2535,7 +2711,7 @@ static unsigned int ilk_compute_wm_dirty(struct drm_device *dev,
        enum pipe pipe;
        int wm_lp;
 
-       for_each_pipe(pipe) {
+       for_each_pipe(dev_priv, pipe) {
                if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
                        dirty |= WM_DIRTY_LINETIME(pipe);
                        /* Must disable LP1+ watermarks too */
@@ -2621,7 +2797,7 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
        unsigned int dirty;
        uint32_t val;
 
-       dirty = ilk_compute_wm_dirty(dev, previous, results);
+       dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
        if (!dirty)
                return;
 
@@ -3090,6 +3266,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 {
        int new_power;
 
+       if (dev_priv->rps.is_bdw_sw_turbo)
+               return;
+
        new_power = dev_priv->rps.power;
        switch (dev_priv->rps.power) {
        case LOW_POWER:
@@ -3297,8 +3476,11 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
                        valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
                else if (IS_VALLEYVIEW(dev))
                        vlv_set_rps_idle(dev_priv);
-               else
+               else if (!dev_priv->rps.is_bdw_sw_turbo
+                                       || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){
                        gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+               }
+
                dev_priv->rps.last_adj = 0;
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
@@ -3312,8 +3494,11 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
        if (dev_priv->rps.enabled) {
                if (IS_VALLEYVIEW(dev))
                        valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
-               else
+               else if (!dev_priv->rps.is_bdw_sw_turbo
+                                       || atomic_read(&dev_priv->rps.sw_turbo.flip_received)){
                        gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
+               }
+
                dev_priv->rps.last_adj = 0;
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
@@ -3332,6 +3517,10 @@ void valleyview_set_rps(struct drm_device *dev, u8 val)
                         dev_priv->rps.cur_freq,
                         vlv_gpu_freq(dev_priv, val), val);
 
+       if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
+                     "Odd GPU freq value\n"))
+               val &= ~1;
+
        if (val != dev_priv->rps.cur_freq)
                vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 
@@ -3344,21 +3533,26 @@ void valleyview_set_rps(struct drm_device *dev, u8 val)
 static void gen8_disable_rps_interrupts(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
+       if (IS_BROADWELL(dev) && dev_priv->rps.is_bdw_sw_turbo){
+               if (atomic_read(&dev_priv->rps.sw_turbo.flip_received))
+                       del_timer(&dev_priv->rps.sw_turbo.flip_timer);
+               dev_priv-> rps.is_bdw_sw_turbo = false;
+       } else {
+               I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP);
+               I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) &
+                                          ~dev_priv->pm_rps_events);
+               /* Complete PM interrupt masking here doesn't race with the rps work
+                * item again unmasking PM interrupts because that is using a different
+                * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in
+                * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which
+                * gen8_enable_rps will clean up. */
 
-       I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP);
-       I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) &
-                                  ~dev_priv->pm_rps_events);
-       /* Complete PM interrupt masking here doesn't race with the rps work
-        * item again unmasking PM interrupts because that is using a different
-        * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in
-        * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which
-        * gen8_enable_rps will clean up. */
-
-       spin_lock_irq(&dev_priv->irq_lock);
-       dev_priv->rps.pm_iir = 0;
-       spin_unlock_irq(&dev_priv->irq_lock);
+               spin_lock_irq(&dev_priv->irq_lock);
+               dev_priv->rps.pm_iir = 0;
+               spin_unlock_irq(&dev_priv->irq_lock);
 
-       I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+               I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+       }
 }
 
 static void gen6_disable_rps_interrupts(struct drm_device *dev)
@@ -3406,8 +3600,14 @@ static void valleyview_disable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       /* we're doing forcewake before Disabling RC6,
+        * This what the BIOS expects when going into suspend */
+       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
        I915_WRITE(GEN6_RC_CONTROL, 0);
 
+       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+
        gen6_disable_rps_interrupts(dev);
 }
 
@@ -3510,13 +3710,111 @@ static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_c
                dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
 }
 
+static void bdw_sw_calculate_freq(struct drm_device *dev,
+               struct intel_rps_bdw_cal *c, u32 *cur_time, u32 *c0)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u64 busy = 0;
+       u32 busyness_pct = 0;
+       u32 elapsed_time = 0;
+       u16 new_freq = 0;
+
+       if (!c || !cur_time || !c0)
+               return;
+
+       if (0 == c->last_c0)
+               goto out;
+
+       /* Check Evaluation interval */
+       elapsed_time = *cur_time - c->last_ts;
+       if (elapsed_time < c->eval_interval)
+               return;
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       /*
+        * c0 unit in 32*1.28 usec, elapsed_time unit in 1 usec.
+        * Whole busyness_pct calculation should be
+        *     busy = ((u64)(*c0 - c->last_c0) << 5 << 7) / 100;
+        *     busyness_pct = (u32)(busy * 100 / elapsed_time);
+        * The final formula is to simplify CPU calculation
+        */
+       busy = (u64)(*c0 - c->last_c0) << 12;
+       do_div(busy, elapsed_time);
+       busyness_pct = (u32)busy;
+
+       if (c->is_up && busyness_pct >= c->it_threshold_pct)
+               new_freq = (u16)dev_priv->rps.cur_freq + 3;
+       if (!c->is_up && busyness_pct <= c->it_threshold_pct)
+               new_freq = (u16)dev_priv->rps.cur_freq - 1;
+
+       /* Adjust to new frequency busyness and compare with threshold */
+       if (0 != new_freq) {
+               if (new_freq > dev_priv->rps.max_freq_softlimit)
+                       new_freq = dev_priv->rps.max_freq_softlimit;
+               else if (new_freq < dev_priv->rps.min_freq_softlimit)
+                       new_freq = dev_priv->rps.min_freq_softlimit;
+
+               gen6_set_rps(dev, new_freq);
+       }
+
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+out:
+       c->last_c0 = *c0;
+       c->last_ts = *cur_time;
+}
+
+static void gen8_set_frequency_RP0(struct work_struct *work)
+{
+       struct intel_rps_bdw_turbo *p_bdw_turbo =
+                       container_of(work, struct intel_rps_bdw_turbo, work_max_freq);
+       struct intel_gen6_power_mgmt *p_power_mgmt =
+                       container_of(p_bdw_turbo, struct intel_gen6_power_mgmt, sw_turbo);
+       struct drm_i915_private *dev_priv =
+                       container_of(p_power_mgmt, struct drm_i915_private, rps);
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+       gen6_set_rps(dev_priv->dev, dev_priv->rps.rp0_freq);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void flip_active_timeout_handler(unsigned long var)
+{
+       struct drm_i915_private *dev_priv = (struct drm_i915_private *) var;
+
+       del_timer(&dev_priv->rps.sw_turbo.flip_timer);
+       atomic_set(&dev_priv->rps.sw_turbo.flip_received, false);
+
+       queue_work(dev_priv->wq, &dev_priv->rps.sw_turbo.work_max_freq);
+}
+
+void bdw_software_turbo(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       u32 current_time = I915_READ(TIMESTAMP_CTR); /* unit in usec */
+       u32 current_c0 = I915_READ(MCHBAR_PCU_C0); /* unit in 32*1.28 usec */
+
+       bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.up,
+                       &current_time, &current_c0);
+       bdw_sw_calculate_freq(dev, &dev_priv->rps.sw_turbo.down,
+                       &current_time, &current_c0);
+}
+
 static void gen8_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *ring;
        uint32_t rc6_mask = 0, rp_state_cap;
+       uint32_t threshold_up_pct, threshold_down_pct;
+       uint32_t ei_up, ei_down; /* up and down evaluation interval */
+       u32 rp_ctl_flag;
        int unused;
 
+       /* Use software Turbo for BDW */
+       dev_priv->rps.is_bdw_sw_turbo = IS_BROADWELL(dev);
+
        /* 1a: Software RC state - RC0 */
        I915_WRITE(GEN6_RC_STATE, 0);
 
@@ -3560,35 +3858,74 @@ static void gen8_enable_rps(struct drm_device *dev)
                   HSW_FREQUENCY(dev_priv->rps.rp1_freq));
        I915_WRITE(GEN6_RC_VIDEO_FREQ,
                   HSW_FREQUENCY(dev_priv->rps.rp1_freq));
-       /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
+       ei_up = 84480; /* 84.48ms */
+       ei_down = 448000;
+       threshold_up_pct = 90; /* x percent busy */
+       threshold_down_pct = 70;
+
+       if (dev_priv->rps.is_bdw_sw_turbo) {
+               dev_priv->rps.sw_turbo.up.it_threshold_pct = threshold_up_pct;
+               dev_priv->rps.sw_turbo.up.eval_interval = ei_up;
+               dev_priv->rps.sw_turbo.up.is_up = true;
+               dev_priv->rps.sw_turbo.up.last_ts = 0;
+               dev_priv->rps.sw_turbo.up.last_c0 = 0;
+
+               dev_priv->rps.sw_turbo.down.it_threshold_pct = threshold_down_pct;
+               dev_priv->rps.sw_turbo.down.eval_interval = ei_down;
+               dev_priv->rps.sw_turbo.down.is_up = false;
+               dev_priv->rps.sw_turbo.down.last_ts = 0;
+               dev_priv->rps.sw_turbo.down.last_c0 = 0;
+
+               /* Start the timer to track if flip comes*/
+               dev_priv->rps.sw_turbo.timeout = 200*1000; /* in us */
+
+               init_timer(&dev_priv->rps.sw_turbo.flip_timer);
+               dev_priv->rps.sw_turbo.flip_timer.function = flip_active_timeout_handler;
+               dev_priv->rps.sw_turbo.flip_timer.data  = (unsigned long) dev_priv;
+               dev_priv->rps.sw_turbo.flip_timer.expires =
+                       usecs_to_jiffies(dev_priv->rps.sw_turbo.timeout) + jiffies;
+               add_timer(&dev_priv->rps.sw_turbo.flip_timer);
+               INIT_WORK(&dev_priv->rps.sw_turbo.work_max_freq, gen8_set_frequency_RP0);
+
+               atomic_set(&dev_priv->rps.sw_turbo.flip_received, true);
+       } else {
+               /* NB: Docs say 1s, and 1000000 - which aren't equivalent
+                * 1 second timeout*/
+               I915_WRITE(GEN6_RP_DOWN_TIMEOUT, FREQ_1_28_US(1000000));
 
-       /* Docs recommend 900MHz, and 300 MHz respectively */
-       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-                  dev_priv->rps.max_freq_softlimit << 24 |
-                  dev_priv->rps.min_freq_softlimit << 16);
+               /* Docs recommend 900MHz, and 300 MHz respectively */
+               I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+                          dev_priv->rps.max_freq_softlimit << 24 |
+                          dev_priv->rps.min_freq_softlimit << 16);
 
-       I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
-       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
-       I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
-       I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+               I915_WRITE(GEN6_RP_UP_THRESHOLD,
+                       FREQ_1_28_US(ei_up * threshold_up_pct / 100));
+               I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
+                       FREQ_1_28_US(ei_down * threshold_down_pct / 100));
+               I915_WRITE(GEN6_RP_UP_EI,
+                       FREQ_1_28_US(ei_up));
+               I915_WRITE(GEN6_RP_DOWN_EI,
+                       FREQ_1_28_US(ei_down));
 
-       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+               I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+       }
 
        /* 5: Enable RPS */
-       I915_WRITE(GEN6_RP_CONTROL,
-                  GEN6_RP_MEDIA_TURBO |
-                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                  GEN6_RP_MEDIA_IS_GFX |
-                  GEN6_RP_ENABLE |
-                  GEN6_RP_UP_BUSY_AVG |
-                  GEN6_RP_DOWN_IDLE_AVG);
-
-       /* 6: Ring frequency + overclocking (our driver does this later */
-
+       rp_ctl_flag = GEN6_RP_MEDIA_TURBO |
+                                       GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                                       GEN6_RP_MEDIA_IS_GFX |
+                                       GEN6_RP_UP_BUSY_AVG |
+                                       GEN6_RP_DOWN_IDLE_AVG;
+       if (!dev_priv->rps.is_bdw_sw_turbo)
+               rp_ctl_flag |= GEN6_RP_ENABLE;
+
+       I915_WRITE(GEN6_RP_CONTROL, rp_ctl_flag);
+
+       /* 6: Ring frequency + overclocking
+        * (our driver does this later */
        gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
-
-       gen8_enable_rps_interrupts(dev);
+       if (!dev_priv->rps.is_bdw_sw_turbo)
+               gen8_enable_rps_interrupts(dev);
 
        gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -3598,7 +3935,6 @@ static void gen6_enable_rps(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *ring;
        u32 rp_state_cap;
-       u32 gt_perf_status;
        u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
        u32 gtfifodbg;
        int rc6_mode;
@@ -3623,7 +3959,6 @@ static void gen6_enable_rps(struct drm_device *dev)
        gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
        rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-       gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
        parse_rp_state_cap(dev_priv, rp_state_cap);
 
@@ -3965,11 +4300,27 @@ static void valleyview_cleanup_pctx(struct drm_device *dev)
 static void valleyview_init_gt_powersave(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 val;
 
        valleyview_setup_pctx(dev);
 
        mutex_lock(&dev_priv->rps.hw_lock);
 
+       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+       switch ((val >> 6) & 3) {
+       case 0:
+       case 1:
+               dev_priv->mem_freq = 800;
+               break;
+       case 2:
+               dev_priv->mem_freq = 1066;
+               break;
+       case 3:
+               dev_priv->mem_freq = 1333;
+               break;
+       }
+       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
+
        dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
        dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
        DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
@@ -4004,11 +4355,38 @@ static void valleyview_init_gt_powersave(struct drm_device *dev)
 static void cherryview_init_gt_powersave(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 val;
 
        cherryview_setup_pctx(dev);
 
        mutex_lock(&dev_priv->rps.hw_lock);
 
+       val = vlv_punit_read(dev_priv, CCK_FUSE_REG);
+       switch ((val >> 2) & 0x7) {
+       case 0:
+       case 1:
+               dev_priv->rps.cz_freq = 200;
+               dev_priv->mem_freq = 1600;
+               break;
+       case 2:
+               dev_priv->rps.cz_freq = 267;
+               dev_priv->mem_freq = 1600;
+               break;
+       case 3:
+               dev_priv->rps.cz_freq = 333;
+               dev_priv->mem_freq = 2000;
+               break;
+       case 4:
+               dev_priv->rps.cz_freq = 320;
+               dev_priv->mem_freq = 1600;
+               break;
+       case 5:
+               dev_priv->rps.cz_freq = 400;
+               dev_priv->mem_freq = 1600;
+               break;
+       }
+       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
+
        dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
        dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
        DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
@@ -4030,6 +4408,12 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
                         vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
                         dev_priv->rps.min_freq);
 
+       WARN_ONCE((dev_priv->rps.max_freq |
+                  dev_priv->rps.efficient_freq |
+                  dev_priv->rps.rp1_freq |
+                  dev_priv->rps.min_freq) & 1,
+                 "Odd GPU freq values\n");
+
        /* Preserve min/max settings in case of re-init */
        if (dev_priv->rps.max_freq_softlimit == 0)
                dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
@@ -5015,6 +5399,8 @@ static void intel_gen6_powersave_work(struct work_struct *work)
                             rps.delayed_resume_work.work);
        struct drm_device *dev = dev_priv->dev;
 
+       dev_priv->rps.is_bdw_sw_turbo = false;
+
        mutex_lock(&dev_priv->rps.hw_lock);
 
        if (IS_CHERRYVIEW(dev)) {
@@ -5088,7 +5474,7 @@ static void g4x_disable_trickle_feed(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
        int pipe;
 
-       for_each_pipe(pipe) {
+       for_each_pipe(dev_priv, pipe) {
                I915_WRITE(DSPCNTR(pipe),
                           I915_READ(DSPCNTR(pipe)) |
                           DISPPLANE_TRICKLE_FEED_DISABLE);
@@ -5203,7 +5589,7 @@ static void cpt_init_clock_gating(struct drm_device *dev)
        /* The below fixes the weird display corruption, a few pixels shifted
         * downward, on (only) LVDS of some HP laptops with IVY.
         */
-       for_each_pipe(pipe) {
+       for_each_pipe(dev_priv, pipe) {
                val = I915_READ(TRANS_CHICKEN2(pipe));
                val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
                val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
@@ -5215,7 +5601,7 @@ static void cpt_init_clock_gating(struct drm_device *dev)
                I915_WRITE(TRANS_CHICKEN2(pipe), val);
        }
        /* WADP0ClockGatingDisable */
-       for_each_pipe(pipe) {
+       for_each_pipe(dev_priv, pipe) {
                I915_WRITE(TRANS_CHICKEN1(pipe),
                           TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
        }
@@ -5383,7 +5769,7 @@ static void lpt_suspend_hw(struct drm_device *dev)
        }
 }
 
-static void gen8_init_clock_gating(struct drm_device *dev)
+static void broadwell_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        enum pipe pipe;
@@ -5395,37 +5781,12 @@ static void gen8_init_clock_gating(struct drm_device *dev)
        /* FIXME(BDW): Check all the w/a, some might only apply to
         * pre-production hw. */
 
-       /* WaDisablePartialInstShootdown:bdw */
-       I915_WRITE(GEN8_ROW_CHICKEN,
-                  _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
-       /* WaDisableThreadStallDopClockGating:bdw */
-       /* FIXME: Unclear whether we really need this on production bdw. */
-       I915_WRITE(GEN8_ROW_CHICKEN,
-                  _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
 
-       /*
-        * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
-        * pre-production hardware
-        */
-       I915_WRITE(HALF_SLICE_CHICKEN3,
-                  _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
-       I915_WRITE(HALF_SLICE_CHICKEN3,
-                  _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
        I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
 
        I915_WRITE(_3D_CHICKEN3,
                   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
 
-       I915_WRITE(COMMON_SLICE_CHICKEN2,
-                  _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
-
-       I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
-                  _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
-
-       /* WaDisableDopClockGating:bdw May not be needed for production */
-       I915_WRITE(GEN7_ROW_CHICKEN2,
-                  _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
 
        /* WaSwitchSolVfFArbitrationPriority:bdw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -5435,37 +5796,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
                   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
 
        /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
-       for_each_pipe(pipe) {
+       for_each_pipe(dev_priv, pipe) {
                I915_WRITE(CHICKEN_PIPESL_1(pipe),
                           I915_READ(CHICKEN_PIPESL_1(pipe)) |
                           BDW_DPRS_MASK_VBLANK_SRD);
        }
 
-       /* Use Force Non-Coherent whenever executing a 3D context. This is a
-        * workaround for for a possible hang in the unlikely event a TLB
-        * invalidation occurs during a PSD flush.
-        */
-       I915_WRITE(HDC_CHICKEN0,
-                  I915_READ(HDC_CHICKEN0) |
-                  _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
-
        /* WaVSRefCountFullforceMissDisable:bdw */
        /* WaDSRefCountFullforceMissDisable:bdw */
        I915_WRITE(GEN7_FF_THREAD_MODE,
                   I915_READ(GEN7_FF_THREAD_MODE) &
                   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
 
-       /*
-        * BSpec recommends 8x4 when MSAA is used,
-        * however in practice 16x4 seems fastest.
-        *
-        * Note that PS/WM thread counts depend on the WIZ hashing
-        * disable bit, which we don't touch here, but it's good
-        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-        */
-       I915_WRITE(GEN7_GT_MODE,
-                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
        I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
                   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
@@ -5473,9 +5815,7 @@ static void gen8_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
-       /* Wa4x4STCOptimizationDisable:bdw */
-       I915_WRITE(CACHE_MODE_1,
-                  _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+       lpt_init_clock_gating(dev);
 }
 
 static void haswell_init_clock_gating(struct drm_device *dev)
@@ -5631,24 +5971,6 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 static void valleyview_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 val;
-
-       mutex_lock(&dev_priv->rps.hw_lock);
-       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-       mutex_unlock(&dev_priv->rps.hw_lock);
-       switch ((val >> 6) & 3) {
-       case 0:
-       case 1:
-               dev_priv->mem_freq = 800;
-               break;
-       case 2:
-               dev_priv->mem_freq = 1066;
-               break;
-       case 3:
-               dev_priv->mem_freq = 1333;
-               break;
-       }
-       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
 
        I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
 
@@ -5724,48 +6046,11 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 static void cherryview_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 val;
-
-       mutex_lock(&dev_priv->rps.hw_lock);
-       val = vlv_punit_read(dev_priv, CCK_FUSE_REG);
-       mutex_unlock(&dev_priv->rps.hw_lock);
-       switch ((val >> 2) & 0x7) {
-       case 0:
-       case 1:
-                       dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_200;
-                       dev_priv->mem_freq = 1600;
-                       break;
-       case 2:
-                       dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_267;
-                       dev_priv->mem_freq = 1600;
-                       break;
-       case 3:
-                       dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_333;
-                       dev_priv->mem_freq = 2000;
-                       break;
-       case 4:
-                       dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_320;
-                       dev_priv->mem_freq = 1600;
-                       break;
-       case 5:
-                       dev_priv->rps.cz_freq = CHV_CZ_CLOCK_FREQ_MODE_400;
-                       dev_priv->mem_freq = 1600;
-                       break;
-       }
-       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
 
        I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
 
        I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
-       /* WaDisablePartialInstShootdown:chv */
-       I915_WRITE(GEN8_ROW_CHICKEN,
-                  _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
-       /* WaDisableThreadStallDopClockGating:chv */
-       I915_WRITE(GEN8_ROW_CHICKEN,
-                  _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
-
        /* WaVSRefCountFullforceMissDisable:chv */
        /* WaDSRefCountFullforceMissDisable:chv */
        I915_WRITE(GEN7_FF_THREAD_MODE,
@@ -5784,10 +6069,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
-       /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
-       I915_WRITE(HALF_SLICE_CHICKEN3,
-                  _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
-
        /* WaDisableGunitClockGating:chv (pre-production hw) */
        I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) |
                   GINT_DIS);
@@ -5797,8 +6078,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
                   _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE));
 
        /* WaDisableDopClockGating:chv (pre-production hw) */
-       I915_WRITE(GEN7_ROW_CHICKEN2,
-                  _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
        I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
                   GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
 }
@@ -5883,6 +6162,9 @@ static void gen3_init_clock_gating(struct drm_device *dev)
 
        /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
        I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
+
+       I915_WRITE(MI_ARB_STATE,
+                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
 }
 
 static void i85x_init_clock_gating(struct drm_device *dev)
@@ -5894,6 +6176,9 @@ static void i85x_init_clock_gating(struct drm_device *dev)
        /* interrupts should cause a wake up from C3 */
        I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
                   _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
+
+       I915_WRITE(MEM_MODE,
+                  _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
 }
 
 static void i830_init_clock_gating(struct drm_device *dev)
@@ -5901,6 +6186,10 @@ static void i830_init_clock_gating(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
+
+       I915_WRITE(MEM_MODE,
+                  _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
+                  _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
 }
 
 void intel_init_clock_gating(struct drm_device *dev)
@@ -6008,7 +6297,7 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv)
        outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
        vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
 
-       if (IS_BROADWELL(dev))
+       if (IS_BROADWELL(dev) || (INTEL_INFO(dev)->gen >= 9))
                gen8_irq_power_well_post_enable(dev_priv);
 }
 
@@ -6203,6 +6492,8 @@ static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv,
        spin_unlock_irq(&dev_priv->irq_lock);
 
        vlv_set_power_well(dev_priv, power_well, false);
+
+       vlv_power_sequencer_reset(dev_priv);
 }
 
 static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
@@ -6238,12 +6529,11 @@ static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
                                            struct i915_power_well *power_well)
 {
-       struct drm_device *dev = dev_priv->dev;
        enum pipe pipe;
 
        WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC);
 
-       for_each_pipe(pipe)
+       for_each_pipe(dev_priv, pipe)
                assert_pll_disabled(dev_priv, pipe);
 
        /* Assert common reset */
@@ -6252,6 +6542,153 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
        vlv_set_power_well(dev_priv, power_well, false);
 }
 
+static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
+                                          struct i915_power_well *power_well)
+{
+       enum dpio_phy phy;
+
+       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC &&
+                    power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D);
+
+       /*
+        * Enable the CRI clock source so we can get at the
+        * display and the reference clock for VGA
+        * hotplug / manual detection.
+        */
+       if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) {
+               phy = DPIO_PHY0;
+               I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
+                          DPLL_REFA_CLK_ENABLE_VLV);
+               I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
+                          DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
+       } else {
+               phy = DPIO_PHY1;
+               I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) |
+                          DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
+       }
+       udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
+       vlv_set_power_well(dev_priv, power_well, true);
+
+       /* Poll for phypwrgood signal */
+       if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1))
+               DRM_ERROR("Display PHY %d is not power up\n", phy);
+
+       I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) |
+                  PHY_COM_LANE_RESET_DEASSERT(phy));
+}
+
+static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
+                                           struct i915_power_well *power_well)
+{
+       enum dpio_phy phy;
+
+       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC &&
+                    power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D);
+
+       if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) {
+               phy = DPIO_PHY0;
+               assert_pll_disabled(dev_priv, PIPE_A);
+               assert_pll_disabled(dev_priv, PIPE_B);
+       } else {
+               phy = DPIO_PHY1;
+               assert_pll_disabled(dev_priv, PIPE_C);
+       }
+
+       I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) &
+                  ~PHY_COM_LANE_RESET_DEASSERT(phy));
+
+       vlv_set_power_well(dev_priv, power_well, false);
+}
+
+static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
+                                       struct i915_power_well *power_well)
+{
+       enum pipe pipe = power_well->data;
+       bool enabled;
+       u32 state, ctrl;
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
+       /*
+        * We only ever set the power-on and power-gate states, anything
+        * else is unexpected.
+        */
+       WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe));
+       enabled = state == DP_SSS_PWR_ON(pipe);
+
+       /*
+        * A transient state at this point would mean some unexpected party
+        * is poking at the power controls too.
+        */
+       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe);
+       WARN_ON(ctrl << 16 != state);
+
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       return enabled;
+}
+
+static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
+                                   struct i915_power_well *power_well,
+                                   bool enable)
+{
+       enum pipe pipe = power_well->data;
+       u32 state;
+       u32 ctrl;
+
+       state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
+
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+#define COND \
+       ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state)
+
+       if (COND)
+               goto out;
+
+       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
+       ctrl &= ~DP_SSC_MASK(pipe);
+       ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe);
+       vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl);
+
+       if (wait_for(COND, 100))
+               DRM_ERROR("timout setting power well state %08x (%08x)\n",
+                         state,
+                         vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ));
+
+#undef COND
+
+out:
+       mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv,
+                                       struct i915_power_well *power_well)
+{
+       chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0);
+}
+
+static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
+                                      struct i915_power_well *power_well)
+{
+       WARN_ON_ONCE(power_well->data != PIPE_A &&
+                    power_well->data != PIPE_B &&
+                    power_well->data != PIPE_C);
+
+       chv_set_pipe_power_well(dev_priv, power_well, true);
+}
+
+static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
+                                       struct i915_power_well *power_well)
+{
+       WARN_ON_ONCE(power_well->data != PIPE_A &&
+                    power_well->data != PIPE_B &&
+                    power_well->data != PIPE_C);
+
+       chv_set_pipe_power_well(dev_priv, power_well, false);
+}
+
 static void check_power_well_state(struct drm_i915_private *dev_priv,
                                   struct i915_power_well *power_well)
 {
@@ -6443,6 +6880,39 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq);
        BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
        BIT(POWER_DOMAIN_INIT))
 
+#define CHV_PIPE_A_POWER_DOMAINS (     \
+       BIT(POWER_DOMAIN_PIPE_A) |      \
+       BIT(POWER_DOMAIN_INIT))
+
+#define CHV_PIPE_B_POWER_DOMAINS (     \
+       BIT(POWER_DOMAIN_PIPE_B) |      \
+       BIT(POWER_DOMAIN_INIT))
+
+#define CHV_PIPE_C_POWER_DOMAINS (     \
+       BIT(POWER_DOMAIN_PIPE_C) |      \
+       BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_CMN_BC_POWER_DOMAINS (                \
+       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_CMN_D_POWER_DOMAINS (         \
+       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \
+       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |  \
+       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \
+       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
+       BIT(POWER_DOMAIN_INIT))
+
 static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
        .sync_hw = i9xx_always_on_power_well_noop,
        .enable = i9xx_always_on_power_well_noop,
@@ -6450,6 +6920,20 @@ static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
        .is_enabled = i9xx_always_on_power_well_enabled,
 };
 
+static const struct i915_power_well_ops chv_pipe_power_well_ops = {
+       .sync_hw = chv_pipe_power_well_sync_hw,
+       .enable = chv_pipe_power_well_enable,
+       .disable = chv_pipe_power_well_disable,
+       .is_enabled = chv_pipe_power_well_enabled,
+};
+
+static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = {
+       .sync_hw = vlv_power_well_sync_hw,
+       .enable = chv_dpio_cmn_power_well_enable,
+       .disable = chv_dpio_cmn_power_well_disable,
+       .is_enabled = vlv_power_well_enabled,
+};
+
 static struct i915_power_well i9xx_always_on_power_well[] = {
        {
                .name = "always-on",
@@ -6572,6 +7056,107 @@ static struct i915_power_well vlv_power_wells[] = {
        },
 };
 
+static struct i915_power_well chv_power_wells[] = {
+       {
+               .name = "always-on",
+               .always_on = 1,
+               .domains = VLV_ALWAYS_ON_POWER_DOMAINS,
+               .ops = &i9xx_always_on_power_well_ops,
+       },
+#if 0
+       {
+               .name = "display",
+               .domains = VLV_DISPLAY_POWER_DOMAINS,
+               .data = PUNIT_POWER_WELL_DISP2D,
+               .ops = &vlv_display_power_well_ops,
+       },
+       {
+               .name = "pipe-a",
+               .domains = CHV_PIPE_A_POWER_DOMAINS,
+               .data = PIPE_A,
+               .ops = &chv_pipe_power_well_ops,
+       },
+       {
+               .name = "pipe-b",
+               .domains = CHV_PIPE_B_POWER_DOMAINS,
+               .data = PIPE_B,
+               .ops = &chv_pipe_power_well_ops,
+       },
+       {
+               .name = "pipe-c",
+               .domains = CHV_PIPE_C_POWER_DOMAINS,
+               .data = PIPE_C,
+               .ops = &chv_pipe_power_well_ops,
+       },
+#endif
+       {
+               .name = "dpio-common-bc",
+               /*
+                * XXX: cmnreset for one PHY seems to disturb the other.
+                * As a workaround keep both powered on at the same
+                * time for now.
+                */
+               .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS,
+               .data = PUNIT_POWER_WELL_DPIO_CMN_BC,
+               .ops = &chv_dpio_cmn_power_well_ops,
+       },
+       {
+               .name = "dpio-common-d",
+               /*
+                * XXX: cmnreset for one PHY seems to disturb the other.
+                * As a workaround keep both powered on at the same
+                * time for now.
+                */
+               .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS,
+               .data = PUNIT_POWER_WELL_DPIO_CMN_D,
+               .ops = &chv_dpio_cmn_power_well_ops,
+       },
+#if 0
+       {
+               .name = "dpio-tx-b-01",
+               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
+       },
+       {
+               .name = "dpio-tx-b-23",
+               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
+       },
+       {
+               .name = "dpio-tx-c-01",
+               .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
+       },
+       {
+               .name = "dpio-tx-c-23",
+               .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
+       },
+       {
+               .name = "dpio-tx-d-01",
+               .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
+                          CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01,
+       },
+       {
+               .name = "dpio-tx-d-23",
+               .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
+                          CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
+               .ops = &vlv_dpio_power_well_ops,
+               .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23,
+       },
+#endif
+};
+
 static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv,
                                                 enum punit_power_well power_well_id)
 {
@@ -6608,6 +7193,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
        } else if (IS_BROADWELL(dev_priv->dev)) {
                set_power_wells(power_domains, bdw_power_wells);
                hsw_pwr = power_domains;
+       } else if (IS_CHERRYVIEW(dev_priv->dev)) {
+               set_power_wells(power_domains, chv_power_wells);
        } else if (IS_VALLEYVIEW(dev_priv->dev)) {
                set_power_wells(power_domains, vlv_power_wells);
        } else {
@@ -6833,13 +7420,17 @@ void intel_init_pm(struct drm_device *dev)
                else if (IS_HASWELL(dev))
                        dev_priv->display.init_clock_gating = haswell_init_clock_gating;
                else if (INTEL_INFO(dev)->gen == 8)
-                       dev_priv->display.init_clock_gating = gen8_init_clock_gating;
+                       dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
+               else if (INTEL_INFO(dev)->gen == 9)
+                       dev_priv->display.init_clock_gating = gen9_init_clock_gating;
        } else if (IS_CHERRYVIEW(dev)) {
-               dev_priv->display.update_wm = valleyview_update_wm;
+               dev_priv->display.update_wm = cherryview_update_wm;
+               dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
                dev_priv->display.init_clock_gating =
                        cherryview_init_clock_gating;
        } else if (IS_VALLEYVIEW(dev)) {
                dev_priv->display.update_wm = valleyview_update_wm;
+               dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
                dev_priv->display.init_clock_gating =
                        valleyview_init_clock_gating;
        } else if (IS_PINEVIEW(dev)) {
@@ -7025,6 +7616,7 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
                return -1;
        }
 
+       /* CHV needs even values */
        opcode = (DIV_ROUND_CLOSEST((val * 2 * mul), dev_priv->rps.cz_freq) * 2);
 
        return opcode;