X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fi915%2Fintel_pm.c;h=417ba880c427f90a3e2061355bbb80ecf6909e0e;hb=54499b2a926964b6b671fd03dcdc83c444b8f467;hp=ad2fd605f76be43c847807996a0db06b8b8dbc05;hpb=0485c9dc24ec0939b42ca5104c0373297506b555;p=firefly-linux-kernel-4.4.55.git diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ad2fd605f76b..417ba880c427 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -30,9 +30,6 @@ #include "intel_drv.h" #include "../../../platform/x86/intel_ips.h" #include -#include -#include -#include /** * RC6 is a special power stage which allows the GPU to enter an very @@ -66,11 +63,37 @@ * i915.i915_enable_fbc parameter */ +static void gen9_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* + * WaDisableSDEUnitClockGating:skl + * This seems to be a pre-production w/a. + */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + + /* + * WaDisableDgMirrorFixInHalfSliceChicken5:skl + * This is a pre-production w/a. + */ + I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, + I915_READ(GEN9_HALF_SLICE_CHICKEN5) & + ~GEN9_DG_MIRROR_FIX_ENABLE); + + /* Wa4x4STCOptimizationDisable:skl */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); +} + static void i8xx_disable_fbc(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 fbc_ctl; + dev_priv->fbc.enabled = false; + /* Disable compression */ fbc_ctl = I915_READ(FBC_CONTROL); if ((fbc_ctl & FBC_CTL_EN) == 0) @@ -99,6 +122,8 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc) int i; u32 fbc_ctl; + dev_priv->fbc.enabled = true; + cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE; if (fb->pitches[0] < cfb_pitch) cfb_pitch = fb->pitches[0]; @@ -153,6 +178,8 @@ static void g4x_enable_fbc(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 dpfc_ctl; + dev_priv->fbc.enabled = true; + dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane) | DPFC_SR_EN; if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) dpfc_ctl |= DPFC_CTL_LIMIT_2X; @@ -173,6 +200,8 @@ static void g4x_disable_fbc(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; u32 dpfc_ctl; + dev_priv->fbc.enabled = false; + /* Disable compression */ dpfc_ctl = I915_READ(DPFC_CONTROL); if (dpfc_ctl & DPFC_CTL_EN) { @@ -224,6 +253,8 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 dpfc_ctl; + dev_priv->fbc.enabled = true; + dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane); if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) dev_priv->fbc.threshold++; @@ -264,6 +295,8 @@ static void ironlake_disable_fbc(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; u32 dpfc_ctl; + dev_priv->fbc.enabled = false; + /* Disable compression */ dpfc_ctl = I915_READ(ILK_DPFC_CONTROL); if (dpfc_ctl & DPFC_CTL_EN) { @@ -290,6 +323,8 @@ static void gen7_enable_fbc(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 dpfc_ctl; + dev_priv->fbc.enabled = true; + dpfc_ctl = IVB_DPFC_CTL_PLANE(intel_crtc->plane); if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) dev_priv->fbc.threshold++; @@ -339,19 +374,19 @@ bool intel_fbc_enabled(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (!dev_priv->display.fbc_enabled) - return false; - - return dev_priv->display.fbc_enabled(dev); + return dev_priv->fbc.enabled; } -void gen8_fbc_sw_flush(struct drm_device *dev, u32 value) +void bdw_fbc_sw_flush(struct drm_device *dev, u32 value) { struct drm_i915_private *dev_priv = dev->dev_private; if (!IS_GEN8(dev)) return; + if (!intel_fbc_enabled(dev)) + return; + I915_WRITE(MSG_FBC_REND_STATE, value); } @@ -1310,6 +1345,7 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, int *prec_mult, int *drain_latency) { + struct drm_device *dev = crtc->dev; int entries; int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; @@ -1320,8 +1356,12 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, return false; entries = DIV_ROUND_UP(clock, 1000) * pixel_size; - *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : - DRAIN_LATENCY_PRECISION_32; + if (IS_CHERRYVIEW(dev)) + *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 : + DRAIN_LATENCY_PRECISION_16; + else + *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : + DRAIN_LATENCY_PRECISION_32; *drain_latency = (64 * (*prec_mult) * 4) / entries; if (*drain_latency > DRAIN_LATENCY_MASK) @@ -1340,15 +1380,18 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, static void vlv_update_drain_latency(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pixel_size; int drain_latency; enum pipe pipe = intel_crtc->pipe; int plane_prec, prec_mult, plane_dl; + const int high_precision = IS_CHERRYVIEW(dev) ? + DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; - plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 | - DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 | + plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH | + DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH | (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT)); if (!intel_crtc_active(crtc)) { @@ -1359,9 +1402,9 @@ static void vlv_update_drain_latency(struct drm_crtc *crtc) /* Primary plane Drain Latency */ pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { - plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_PLANE_PRECISION_64 : - DDL_PLANE_PRECISION_32; + plane_prec = (prec_mult == high_precision) ? + DDL_PLANE_PRECISION_HIGH : + DDL_PLANE_PRECISION_LOW; plane_dl |= plane_prec | drain_latency; } @@ -1373,9 +1416,9 @@ static void vlv_update_drain_latency(struct drm_crtc *crtc) /* Program cursor DL only if it is enabled */ if (intel_crtc->cursor_base && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { - plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_CURSOR_PRECISION_64 : - DDL_CURSOR_PRECISION_32; + plane_prec = (prec_mult == high_precision) ? + DDL_CURSOR_PRECISION_HIGH : + DDL_CURSOR_PRECISION_LOW; plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT); } @@ -1543,15 +1586,17 @@ static void valleyview_update_sprite_wm(struct drm_plane *plane, int plane_prec; int sprite_dl; int prec_mult; + const int high_precision = IS_CHERRYVIEW(dev) ? + DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; - sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) | + sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) | (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite))); if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { - plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_SPRITE_PRECISION_64(sprite) : - DDL_SPRITE_PRECISION_32(sprite); + plane_prec = (prec_mult == high_precision) ? + DDL_SPRITE_PRECISION_HIGH(sprite) : + DDL_SPRITE_PRECISION_LOW(sprite); sprite_dl |= plane_prec | (drain_latency << DDL_SPRITE_SHIFT(sprite)); } @@ -1915,6 +1960,14 @@ static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2; } +struct skl_pipe_wm_parameters { + bool active; + uint32_t pipe_htotal; + uint32_t pixel_rate; /* in KHz */ + struct intel_plane_wm_parameters plane[I915_MAX_PLANES]; + struct intel_plane_wm_parameters cursor; +}; + struct ilk_pipe_wm_parameters { bool active; uint32_t pipe_htotal; @@ -2226,11 +2279,82 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) PIPE_WM_LINETIME_TIME(linetime); } -static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5]) +static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) { struct drm_i915_private *dev_priv = dev->dev_private; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + if (IS_GEN9(dev)) { + uint32_t val; + int ret, i; + int level, max_level = ilk_wm_max_level(dev); + + /* read the first set of memory latencies[0:3] */ + val = 0; /* data0 to be programmed to 0 for first set */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_read(dev_priv, + GEN9_PCODE_READ_MEM_LATENCY, + &val); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("SKL Mailbox read error = %d\n", ret); + return; + } + + wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; + wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + + /* read the second set of memory latencies[4:7] */ + val = 1; /* data0 to be programmed to 1 for second set */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_read(dev_priv, + GEN9_PCODE_READ_MEM_LATENCY, + &val); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("SKL Mailbox read error = %d\n", ret); + return; + } + + wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; + wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + + /* + * punit doesn't take into account the read latency so we need + * to add 2us to the various latency levels we retrieve from + * the punit. + * - W0 is a bit special in that it's the only level that + * can't be disabled if we want to have display working, so + * we always add 2us there. + * - For levels >=1, punit returns 0us latency when they are + * disabled, so we respect that and don't add 2us then + * + * Additionally, if a level n (n > 1) has a 0us latency, all + * levels m (m >= n) need to be disabled. We make sure to + * sanitize the values out of the punit to satisfy this + * requirement. + */ + wm[0] += 2; + for (level = 1; level <= max_level; level++) + if (wm[level] != 0) + wm[level] += 2; + else { + for (i = level + 1; i <= max_level; i++) + wm[i] = 0; + + break; + } + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { uint64_t sskpd = I915_READ64(MCH_SSKPD); wm[0] = (sskpd >> 56) & 0xFF; @@ -2278,7 +2402,9 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) int ilk_wm_max_level(const struct drm_device *dev) { /* how many WM levels are we expecting */ - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + if (IS_GEN9(dev)) + return 7; + else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) return 4; else if (INTEL_INFO(dev)->gen >= 6) return 3; @@ -2288,7 +2414,7 @@ int ilk_wm_max_level(const struct drm_device *dev) static void intel_print_wm_latency(struct drm_device *dev, const char *name, - const uint16_t wm[5]) + const uint16_t wm[8]) { int level, max_level = ilk_wm_max_level(dev); @@ -2301,8 +2427,13 @@ static void intel_print_wm_latency(struct drm_device *dev, continue; } - /* WM1+ latency values in 0.5us units */ - if (level > 0) + /* + * - latencies are in us on gen9. + * - before then, WM1+ latency values are in 0.5us units + */ + if (IS_GEN9(dev)) + latency *= 10; + else if (level > 0) latency *= 5; DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", @@ -2370,6 +2501,14 @@ static void ilk_setup_wm_latency(struct drm_device *dev) snb_wm_latency_quirk(dev); } +static void skl_setup_wm_latency(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + intel_read_wm_latency(dev, dev_priv->wm.skl_latency); + intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); +} + static void ilk_compute_wm_parameters(struct drm_crtc *crtc, struct ilk_pipe_wm_parameters *p) { @@ -2860,4337 +2999,4060 @@ static bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } -static void ilk_update_wm(struct drm_crtc *crtc) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct ilk_wm_maximums max; - struct ilk_pipe_wm_parameters params = {}; - struct ilk_wm_values results = {}; - enum intel_ddb_partitioning partitioning; - struct intel_pipe_wm pipe_wm = {}; - struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; - struct intel_wm_config config = {}; +/* + * On gen9, we need to allocate Display Data Buffer (DDB) portions to the + * different active planes. + */ - ilk_compute_wm_parameters(crtc, ¶ms); +#define SKL_DDB_SIZE 896 /* in blocks */ - intel_compute_pipe_wm(crtc, ¶ms, &pipe_wm); +static void +skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, + struct drm_crtc *for_crtc, + const struct intel_wm_config *config, + const struct skl_pipe_wm_parameters *params, + struct skl_ddb_entry *alloc /* out */) +{ + struct drm_crtc *crtc; + unsigned int pipe_size, ddb_size; + int nth_active_pipe; - if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm))) + if (!params->active) { + alloc->start = 0; + alloc->end = 0; return; + } - intel_crtc->wm.active = pipe_wm; + ddb_size = SKL_DDB_SIZE; - ilk_compute_wm_config(dev, &config); + ddb_size -= 4; /* 4 blocks for bypass path allocation */ - ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); - ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); + nth_active_pipe = 0; + for_each_crtc(dev, crtc) { + if (!intel_crtc_active(crtc)) + continue; - /* 5/6 split only in single pipe config on IVB+ */ - if (INTEL_INFO(dev)->gen >= 7 && - config.num_pipes_active == 1 && config.sprites_enabled) { - ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); - ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); + if (crtc == for_crtc) + break; - best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); - } else { - best_lp_wm = &lp_wm_1_2; + nth_active_pipe++; } - partitioning = (best_lp_wm == &lp_wm_1_2) ? - INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; + pipe_size = ddb_size / config->num_pipes_active; + alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; + alloc->end = alloc->start + pipe_size; +} - ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); +static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) +{ + if (config->num_pipes_active == 1) + return 32; - ilk_write_wm_values(dev_priv, &results); + return 8; } -static void -ilk_update_sprite_wm(struct drm_plane *plane, - struct drm_crtc *crtc, - uint32_t sprite_width, uint32_t sprite_height, - int pixel_size, bool enabled, bool scaled) +static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) { - struct drm_device *dev = plane->dev; - struct intel_plane *intel_plane = to_intel_plane(plane); + entry->start = reg & 0x3ff; + entry->end = (reg >> 16) & 0x3ff; + if (entry->end) + entry->end += 1; +} - intel_plane->wm.enabled = enabled; - intel_plane->wm.scaled = scaled; - intel_plane->wm.horiz_pixels = sprite_width; - intel_plane->wm.vert_pixels = sprite_width; - intel_plane->wm.bytes_per_pixel = pixel_size; +void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb /* out */) +{ + struct drm_device *dev = dev_priv->dev; + enum pipe pipe; + int plane; + u32 val; - /* - * IVB workaround: must disable low power watermarks for at least - * one frame before enabling scaling. LP watermarks can be re-enabled - * when scaling is disabled. - * - * WaCxSRDisabledForSpriteScaling:ivb - */ - if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev)) - intel_wait_for_vblank(dev, intel_plane->pipe); + for_each_pipe(dev_priv, pipe) { + for_each_plane(pipe, plane) { + val = I915_READ(PLANE_BUF_CFG(pipe, plane)); + skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], + val); + } - ilk_update_wm(crtc); + val = I915_READ(CUR_BUF_CFG(pipe)); + skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val); + } } -static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) +static unsigned int +skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p) +{ + return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel; +} + +/* + * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching + * a 8192x4096@32bpp framebuffer: + * 3 * 4096 * 8192 * 4 < 2^32 + */ +static unsigned int +skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc, + const struct skl_pipe_wm_parameters *params) +{ + unsigned int total_data_rate = 0; + int plane; + + for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { + const struct intel_plane_wm_parameters *p; + + p = ¶ms->plane[plane]; + if (!p->enabled) + continue; + + total_data_rate += skl_plane_relative_data_rate(p); + } + + return total_data_rate; +} + +static void +skl_allocate_pipe_ddb(struct drm_crtc *crtc, + const struct intel_wm_config *config, + const struct skl_pipe_wm_parameters *params, + struct skl_ddb_allocation *ddb /* out */) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct ilk_wm_values *hw = &dev_priv->wm.hw; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_pipe_wm *active = &intel_crtc->wm.active; enum pipe pipe = intel_crtc->pipe; - static const unsigned int wm0_pipe_reg[] = { - [PIPE_A] = WM0_PIPEA_ILK, - [PIPE_B] = WM0_PIPEB_ILK, - [PIPE_C] = WM0_PIPEC_IVB, - }; + struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; + uint16_t alloc_size, start, cursor_blocks; + unsigned int total_data_rate; + int plane; + + skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc); + alloc_size = skl_ddb_entry_size(alloc); + if (alloc_size == 0) { + memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); + memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe])); + return; + } - hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); + cursor_blocks = skl_cursor_allocation(config); + ddb->cursor[pipe].start = alloc->end - cursor_blocks; + ddb->cursor[pipe].end = alloc->end; - active->pipe_enabled = intel_crtc_active(crtc); + alloc_size -= cursor_blocks; + alloc->end -= cursor_blocks; - if (active->pipe_enabled) { - u32 tmp = hw->wm_pipe[pipe]; + /* + * Each active plane get a portion of the remaining space, in + * proportion to the amount of data they need to fetch from memory. + * + * FIXME: we may not allocate every single block here. + */ + total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params); - /* - * For active pipes LP0 watermark is marked as - * enabled, and LP1+ watermaks as disabled since - * we can't really reverse compute them in case - * multiple pipes are active. - */ - active->wm[0].enable = true; - active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; - active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; - active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; - active->linetime = hw->wm_linetime[pipe]; - } else { - int level, max_level = ilk_wm_max_level(dev); + start = alloc->start; + for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { + const struct intel_plane_wm_parameters *p; + unsigned int data_rate; + uint16_t plane_blocks; + + p = ¶ms->plane[plane]; + if (!p->enabled) + continue; + + data_rate = skl_plane_relative_data_rate(p); /* - * For inactive pipes, all watermark levels - * should be marked as enabled but zeroed, - * which is what we'd compute them to. + * promote the expression to 64 bits to avoid overflowing, the + * result is < available as data_rate / total_data_rate < 1 */ - for (level = 0; level <= max_level; level++) - active->wm[level].enable = true; + plane_blocks = div_u64((uint64_t)alloc_size * data_rate, + total_data_rate); + + ddb->plane[pipe][plane].start = start; + ddb->plane[pipe][plane].end = start + plane_blocks; + + start += plane_blocks; } + } -void ilk_wm_get_hw_state(struct drm_device *dev) +static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config) { + /* TODO: Take into account the scalers once we support them */ + return config->adjusted_mode.crtc_clock; +} + +/* + * The max latency should be 257 (max the punit can code is 255 and we add 2us + * for the read latency) and bytes_per_pixel should always be <= 8, so that + * should allow pixel_rate up to ~2 GHz which seems sufficient since max + * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. +*/ +static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, + uint32_t latency) +{ + uint32_t wm_intermediate_val, ret; + + if (latency == 0) + return UINT_MAX; + + wm_intermediate_val = latency * pixel_rate * bytes_per_pixel; + ret = DIV_ROUND_UP(wm_intermediate_val, 1000); + + return ret; +} + +static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, + uint32_t horiz_pixels, uint8_t bytes_per_pixel, + uint32_t latency) +{ + uint32_t ret, plane_bytes_per_line, wm_intermediate_val; + + if (latency == 0) + return UINT_MAX; + + plane_bytes_per_line = horiz_pixels * bytes_per_pixel; + wm_intermediate_val = latency * pixel_rate; + ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * + plane_bytes_per_line; + + return ret; +} + +static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, + const struct intel_crtc *intel_crtc) +{ + struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct ilk_wm_values *hw = &dev_priv->wm.hw; + const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + enum pipe pipe = intel_crtc->pipe; + + if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe], + sizeof(new_ddb->plane[pipe]))) + return true; + + if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe], + sizeof(new_ddb->cursor[pipe]))) + return true; + + return false; +} + +static void skl_compute_wm_global_parameters(struct drm_device *dev, + struct intel_wm_config *config) +{ struct drm_crtc *crtc; + struct drm_plane *plane; - for_each_crtc(dev, crtc) - ilk_pipe_wm_get_hw_state(crtc); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) + config->num_pipes_active += intel_crtc_active(crtc); - hw->wm_lp[0] = I915_READ(WM1_LP_ILK); - hw->wm_lp[1] = I915_READ(WM2_LP_ILK); - hw->wm_lp[2] = I915_READ(WM3_LP_ILK); + /* FIXME: I don't think we need those two global parameters on SKL */ + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { + struct intel_plane *intel_plane = to_intel_plane(plane); - hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); - if (INTEL_INFO(dev)->gen >= 7) { - hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); - hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); + config->sprites_enabled |= intel_plane->wm.enabled; + config->sprites_scaled |= intel_plane->wm.scaled; } +} - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? - INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; - else if (IS_IVYBRIDGE(dev)) - hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? - INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; +static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, + struct skl_pipe_wm_parameters *p) +{ + struct drm_device *dev = crtc->dev; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; + struct drm_plane *plane; + int i = 1; /* Index for sprite planes start */ - hw->enable_fbc_wm = - !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); -} + p->active = intel_crtc_active(crtc); + if (p->active) { + p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal; + p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config); -/** - * intel_update_watermarks - update FIFO watermark values based on current modes - * - * Calculate watermark values for the various WM regs based on current mode - * and plane configuration. - * - * There are several cases to deal with here: - * - normal (i.e. non-self-refresh) - * - self-refresh (SR) mode - * - lines are large relative to FIFO size (buffer can hold up to 2) - * - lines are small relative to FIFO size (buffer can hold more than 2 - * lines), so need to account for TLB latency - * - * The normal calculation is: - * watermark = dotclock * bytes per pixel * latency - * where latency is platform & configuration dependent (we assume pessimal - * values here). - * - * The SR calculation is: - * watermark = (trunc(latency/line time)+1) * surface width * - * bytes per pixel - * where - * line time = htotal / dotclock - * surface width = hdisplay for normal plane and 64 for cursor - * and latency is assumed to be high, as above. - * - * The final value programmed to the register should always be rounded up, - * and include an extra 2 entries to account for clock crossings. - * - * We don't use the sprite, so we can ignore that. And on Crestline we have - * to set the non-SR watermarks to 8. - */ -void intel_update_watermarks(struct drm_crtc *crtc) -{ - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + /* + * For now, assume primary and cursor planes are always enabled. + */ + p->plane[0].enabled = true; + p->plane[0].bytes_per_pixel = + crtc->primary->fb->bits_per_pixel / 8; + p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w; + p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h; - if (dev_priv->display.update_wm) - dev_priv->display.update_wm(crtc); -} + p->cursor.enabled = true; + p->cursor.bytes_per_pixel = 4; + p->cursor.horiz_pixels = intel_crtc->cursor_width ? + intel_crtc->cursor_width : 64; + } -void intel_update_sprite_watermarks(struct drm_plane *plane, - struct drm_crtc *crtc, - uint32_t sprite_width, - uint32_t sprite_height, - int pixel_size, - bool enabled, bool scaled) -{ - struct drm_i915_private *dev_priv = plane->dev->dev_private; + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { + struct intel_plane *intel_plane = to_intel_plane(plane); - if (dev_priv->display.update_sprite_wm) - dev_priv->display.update_sprite_wm(plane, crtc, - sprite_width, sprite_height, - pixel_size, enabled, scaled); + if (intel_plane->pipe == pipe) + p->plane[i++] = intel_plane->wm; + } } -static struct drm_i915_gem_object * -intel_alloc_context_page(struct drm_device *dev) +static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p, + struct intel_plane_wm_parameters *p_params, + uint16_t ddb_allocation, + uint32_t mem_value, + uint16_t *out_blocks, /* out */ + uint8_t *out_lines /* out */) { - struct drm_i915_gem_object *ctx; - int ret; + uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines; + uint32_t result_bytes; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + if (mem_value == 0 || !p->active || !p_params->enabled) + return false; - ctx = i915_gem_alloc_object(dev, 4096); - if (!ctx) { - DRM_DEBUG("failed to alloc power context, RC6 disabled\n"); - return NULL; - } + method1 = skl_wm_method1(p->pixel_rate, + p_params->bytes_per_pixel, + mem_value); + method2 = skl_wm_method2(p->pixel_rate, + p->pipe_htotal, + p_params->horiz_pixels, + p_params->bytes_per_pixel, + mem_value); - ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0); - if (ret) { - DRM_ERROR("failed to pin power context: %d\n", ret); - goto err_unref; - } + plane_bytes_per_line = p_params->horiz_pixels * + p_params->bytes_per_pixel; - ret = i915_gem_object_set_to_gtt_domain(ctx, 1); - if (ret) { - DRM_ERROR("failed to set-domain on power context: %d\n", ret); - goto err_unpin; - } + /* For now xtile and linear */ + if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1) + result_bytes = min(method1, method2); + else + result_bytes = method1; - return ctx; + res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1; + res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line); -err_unpin: - i915_gem_object_ggtt_unpin(ctx); -err_unref: - drm_gem_object_unreference(&ctx->base); - return NULL; -} + if (res_blocks > ddb_allocation || res_lines > 31) + return false; -/** - * Lock protecting IPS related data structures - */ -DEFINE_SPINLOCK(mchdev_lock); + *out_blocks = res_blocks; + *out_lines = res_lines; -/* Global for IPS driver to get at the current i915 device. Protected by - * mchdev_lock. */ -static struct drm_i915_private *i915_mch_dev; + return true; +} -bool ironlake_set_drps(struct drm_device *dev, u8 val) +static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb, + struct skl_pipe_wm_parameters *p, + enum pipe pipe, + int level, + int num_planes, + struct skl_wm_level *result) { - struct drm_i915_private *dev_priv = dev->dev_private; - u16 rgvswctl; + uint16_t latency = dev_priv->wm.skl_latency[level]; + uint16_t ddb_blocks; + int i; - assert_spin_locked(&mchdev_lock); + for (i = 0; i < num_planes; i++) { + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); - rgvswctl = I915_READ16(MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ + result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i], + ddb_blocks, + latency, + &result->plane_res_b[i], + &result->plane_res_l[i]); } - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; - I915_WRITE16(MEMSWCTL, rgvswctl); - POSTING_READ16(MEMSWCTL); + ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]); + result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks, + latency, &result->cursor_res_b, + &result->cursor_res_l); +} - rgvswctl |= MEMCTL_CMD_STS; - I915_WRITE16(MEMSWCTL, rgvswctl); +static uint32_t +skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p) +{ + if (!intel_crtc_active(crtc)) + return 0; + + return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate); - return true; } -static void ironlake_enable_drps(struct drm_device *dev) +static void skl_compute_transition_wm(struct drm_crtc *crtc, + struct skl_pipe_wm_parameters *params, + struct skl_wm_level *trans_wm /* out */) { - struct drm_i915_private *dev_priv = dev->dev_private; - u32 rgvmodectl = I915_READ(MEMMODECTL); - u8 fmax, fmin, fstart, vstart; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int i; - spin_lock_irq(&mchdev_lock); + if (!params->active) + return; - /* Enable temp reporting */ - I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); - I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); + /* Until we know more, just disable transition WMs */ + for (i = 0; i < intel_num_planes(intel_crtc); i++) + trans_wm->plane_en[i] = false; + trans_wm->cursor_en = false; +} - /* 100ms RC evaluation intervals */ - I915_WRITE(RCUPEI, 100000); - I915_WRITE(RCDNEI, 100000); +static void skl_compute_pipe_wm(struct drm_crtc *crtc, + struct skl_ddb_allocation *ddb, + struct skl_pipe_wm_parameters *params, + struct skl_pipe_wm *pipe_wm) +{ + struct drm_device *dev = crtc->dev; + const struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int level, max_level = ilk_wm_max_level(dev); - /* Set max/min thresholds to 90ms and 80ms respectively */ - I915_WRITE(RCBMAXAVG, 90000); - I915_WRITE(RCBMINAVG, 80000); + for (level = 0; level <= max_level; level++) { + skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe, + level, intel_num_planes(intel_crtc), + &pipe_wm->wm[level]); + } + pipe_wm->linetime = skl_compute_linetime_wm(crtc, params); - I915_WRITE(MEMIHYST, 1); + skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm); +} - /* Set up min, max, and cur for interrupt handling */ - fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; - fmin = (rgvmodectl & MEMMODE_FMIN_MASK); - fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> - MEMMODE_FSTART_SHIFT; +static void skl_compute_wm_results(struct drm_device *dev, + struct skl_pipe_wm_parameters *p, + struct skl_pipe_wm *p_wm, + struct skl_wm_values *r, + struct intel_crtc *intel_crtc) +{ + int level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = intel_crtc->pipe; + uint32_t temp; + int i; - vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = 0; - dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ - dev_priv->ips.fstart = fstart; + temp |= p_wm->wm[level].plane_res_l[i] << + PLANE_WM_LINES_SHIFT; + temp |= p_wm->wm[level].plane_res_b[i]; + if (p_wm->wm[level].plane_en[i]) + temp |= PLANE_WM_EN; - dev_priv->ips.max_delay = fstart; - dev_priv->ips.min_delay = fmin; - dev_priv->ips.cur_delay = fstart; + r->plane[pipe][i][level] = temp; + } - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); + temp = 0; - I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT; + temp |= p_wm->wm[level].cursor_res_b; - /* - * Interrupts will be enabled in ironlake_irq_postinstall - */ + if (p_wm->wm[level].cursor_en) + temp |= PLANE_WM_EN; - I915_WRITE(VIDSTART, vstart); - POSTING_READ(VIDSTART); + r->cursor[pipe][level] = temp; - rgvmodectl |= MEMMODE_SWMODE_EN; - I915_WRITE(MEMMODECTL, rgvmodectl); + } - if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); - mdelay(1); + /* transition WMs */ + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = 0; + temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT; + temp |= p_wm->trans_wm.plane_res_b[i]; + if (p_wm->trans_wm.plane_en[i]) + temp |= PLANE_WM_EN; - ironlake_set_drps(dev, fstart); + r->plane_trans[pipe][i] = temp; + } - dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + - I915_READ(0x112e0); - dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); - dev_priv->ips.last_count2 = I915_READ(0x112f4); - dev_priv->ips.last_time2 = ktime_get_raw_ns(); + temp = 0; + temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT; + temp |= p_wm->trans_wm.cursor_res_b; + if (p_wm->trans_wm.cursor_en) + temp |= PLANE_WM_EN; - spin_unlock_irq(&mchdev_lock); + r->cursor_trans[pipe] = temp; + + r->wm_linetime[pipe] = p_wm->linetime; } -static void ironlake_disable_drps(struct drm_device *dev) +static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg, + const struct skl_ddb_entry *entry) { - struct drm_i915_private *dev_priv = dev->dev_private; - u16 rgvswctl; + if (entry->end) + I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); + else + I915_WRITE(reg, 0); +} - spin_lock_irq(&mchdev_lock); +static void skl_write_wm_values(struct drm_i915_private *dev_priv, + const struct skl_wm_values *new) +{ + struct drm_device *dev = dev_priv->dev; + struct intel_crtc *crtc; - rgvswctl = I915_READ16(MEMSWCTL); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { + int i, level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = crtc->pipe; - /* Ack interrupts, disable EFC interrupt */ - I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); - I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); - I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); - I915_WRITE(DEIIR, DE_PCU_EVENT); - I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); + if (!new->dirty[pipe]) + continue; - /* Go back to the starting frequency */ - ironlake_set_drps(dev, dev_priv->ips.fstart); - mdelay(1); - rgvswctl |= MEMCTL_CMD_STS; - I915_WRITE(MEMSWCTL, rgvswctl); - mdelay(1); + I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); - spin_unlock_irq(&mchdev_lock); + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(crtc); i++) + I915_WRITE(PLANE_WM(pipe, i, level), + new->plane[pipe][i][level]); + I915_WRITE(CUR_WM(pipe, level), + new->cursor[pipe][level]); + } + for (i = 0; i < intel_num_planes(crtc); i++) + I915_WRITE(PLANE_WM_TRANS(pipe, i), + new->plane_trans[pipe][i]); + I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); + + for (i = 0; i < intel_num_planes(crtc); i++) + skl_ddb_entry_write(dev_priv, + PLANE_BUF_CFG(pipe, i), + &new->ddb.plane[pipe][i]); + + skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), + &new->ddb.cursor[pipe]); + } } -/* There's a funny hw issue where the hw returns all 0 when reading from - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value - * ourselves, instead of doing a rmw cycle (which might result in us clearing - * all limits and the gpu stuck at whatever frequency it is at atm). +/* + * When setting up a new DDB allocation arrangement, we need to correctly + * sequence the times at which the new allocations for the pipes are taken into + * account or we'll have pipes fetching from space previously allocated to + * another pipe. + * + * Roughly the sequence looks like: + * 1. re-allocate the pipe(s) with the allocation being reduced and not + * overlapping with a previous light-up pipe (another way to put it is: + * pipes with their new allocation strickly included into their old ones). + * 2. re-allocate the other pipes that get their allocation reduced + * 3. allocate the pipes having their allocation increased + * + * Steps 1. and 2. are here to take care of the following case: + * - Initially DDB looks like this: + * | B | C | + * - enable pipe A. + * - pipe B has a reduced DDB allocation that overlaps with the old pipe C + * allocation + * | A | B | C | + * + * We need to sequence the re-allocation: C, B, A (and not B, C, A). */ -static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val) + +static void +skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) { - u32 limits; + struct drm_device *dev = dev_priv->dev; + int plane; - /* Only set the down limit when we've reached the lowest level to avoid - * getting more interrupts, otherwise leave this clear. This prevents a - * race in the hw when coming out of rc6: There's a tiny window where - * the hw runs at the minimal clock before selecting the desired - * frequency, if the down threshold expires in that window we will not - * receive a down interrupt. */ - limits = dev_priv->rps.max_freq_softlimit << 24; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= dev_priv->rps.min_freq_softlimit << 16; + DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); - return limits; + for_each_plane(pipe, plane) { + I915_WRITE(PLANE_SURF(pipe, plane), + I915_READ(PLANE_SURF(pipe, plane))); + } + I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); } -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) +static bool +skl_ddb_allocation_included(const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe) { - int new_power; + uint16_t old_size, new_size; - new_power = dev_priv->rps.power; - switch (dev_priv->rps.power) { - case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) - new_power = BETWEEN; - break; + old_size = skl_ddb_entry_size(&old->pipe[pipe]); + new_size = skl_ddb_entry_size(&new->pipe[pipe]); - case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) - new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) - new_power = HIGH_POWER; - break; + return old_size != new_size && + new->pipe[pipe].start >= old->pipe[pipe].start && + new->pipe[pipe].end <= old->pipe[pipe].end; +} - case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) - new_power = BETWEEN; - break; - } - /* Max/min bins are special */ - if (val == dev_priv->rps.min_freq_softlimit) - new_power = LOW_POWER; - if (val == dev_priv->rps.max_freq_softlimit) - new_power = HIGH_POWER; - if (new_power == dev_priv->rps.power) - return; - - /* Note the units here are not exactly 1us, but 1280ns. */ - switch (new_power) { - case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ - I915_WRITE(GEN6_RP_UP_EI, 12500); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800); +static void skl_flush_wm_values(struct drm_i915_private *dev_priv, + struct skl_wm_values *new_values) +{ + struct drm_device *dev = dev_priv->dev; + struct skl_ddb_allocation *cur_ddb, *new_ddb; + bool reallocated[I915_MAX_PIPES] = {false, false, false}; + struct intel_crtc *crtc; + enum pipe pipe; - /* Downclock if less than 85% busy over 32ms */ - I915_WRITE(GEN6_RP_DOWN_EI, 25000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250); + new_ddb = &new_values->ddb; + cur_ddb = &dev_priv->wm.skl_hw.ddb; - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - break; + /* + * First pass: flush the pipes with the new allocation contained into + * the old space. + * + * We'll wait for the vblank on those pipes to ensure we can safely + * re-allocate the freed space without this pipe fetching from it. + */ + for_each_intel_crtc(dev, crtc) { + if (!crtc->active) + continue; - case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ - I915_WRITE(GEN6_RP_UP_EI, 10250); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225); + pipe = crtc->pipe; - /* Downclock if less than 75% busy over 32ms */ - I915_WRITE(GEN6_RP_DOWN_EI, 25000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750); + if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) + continue; - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - break; + skl_wm_flush_pipe(dev_priv, pipe, 1); + intel_wait_for_vblank(dev, pipe); - case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ - I915_WRITE(GEN6_RP_UP_EI, 8000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800); + reallocated[pipe] = true; + } - /* Downclock if less than 60% busy over 32ms */ - I915_WRITE(GEN6_RP_DOWN_EI, 25000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000); - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - break; - } + /* + * Second pass: flush the pipes that are having their allocation + * reduced, but overlapping with a previous allocation. + * + * Here as well we need to wait for the vblank to make sure the freed + * space is not used anymore. + */ + for_each_intel_crtc(dev, crtc) { + if (!crtc->active) + continue; - dev_priv->rps.power = new_power; - dev_priv->rps.last_adj = 0; -} + pipe = crtc->pipe; -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) -{ - u32 mask = 0; + if (reallocated[pipe]) + continue; - if (val > dev_priv->rps.min_freq_softlimit) - mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < dev_priv->rps.max_freq_softlimit) - mask |= GEN6_PM_RP_UP_THRESHOLD; + if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < + skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { + skl_wm_flush_pipe(dev_priv, pipe, 2); + intel_wait_for_vblank(dev, pipe); + } - mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); - mask &= dev_priv->pm_rps_events; + reallocated[pipe] = true; + } - /* IVB and SNB hard hangs on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. + /* + * Third pass: flush the pipes that got more space allocated. + * + * We don't need to actively wait for the update here, next vblank + * will just get more DDB space with the correct WM values. */ - if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) - mask |= GEN6_PM_RP_UP_EI_EXPIRED; + for_each_intel_crtc(dev, crtc) { + if (!crtc->active) + continue; - if (IS_GEN8(dev_priv->dev)) - mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; + pipe = crtc->pipe; - return ~mask; + /* + * At this point, only the pipes more space than before are + * left to re-allocate. + */ + if (reallocated[pipe]) + continue; + + skl_wm_flush_pipe(dev_priv, pipe, 3); + } } -/* gen6_set_rps is called to update the frequency request, but should also be - * called when the range (min_delay and max_delay) is modified so that we can - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -void gen6_set_rps(struct drm_device *dev, u8 val) +static bool skl_update_pipe_wm(struct drm_crtc *crtc, + struct skl_pipe_wm_parameters *params, + struct intel_wm_config *config, + struct skl_ddb_allocation *ddb, /* out */ + struct skl_pipe_wm *pipe_wm /* out */) { - struct drm_i915_private *dev_priv = dev->dev_private; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq_softlimit); - WARN_ON(val < dev_priv->rps.min_freq_softlimit); - - /* min/max delay may still have been modified so be sure to - * write the limits value. - */ - if (val != dev_priv->rps.cur_freq) { - gen6_set_rps_thresholds(dev_priv, val); - - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(val)); - else - I915_WRITE(GEN6_RPNSWREQ, - GEN6_FREQUENCY(val) | - GEN6_OFFSET(0) | - GEN6_AGGRESSIVE_TURBO); - } + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - /* Make sure we continue to get interrupts - * until we hit the minimum or maximum frequencies. - */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val)); - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + skl_compute_wm_pipe_parameters(crtc, params); + skl_allocate_pipe_ddb(crtc, config, params, ddb); + skl_compute_pipe_wm(crtc, ddb, params, pipe_wm); - POSTING_READ(GEN6_RPNSWREQ); + if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm))) + return false; - dev_priv->rps.cur_freq = val; - trace_intel_gpu_freq_change(val * 50); + intel_crtc->wm.skl_active = *pipe_wm; + return true; } -/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down - * - * * If Gfx is Idle, then - * 1. Mask Turbo interrupts - * 2. Bring up Gfx clock - * 3. Change the freq to Rpn and wait till P-Unit updates freq - * 4. Clear the Force GFX CLK ON bit so that Gfx can down - * 5. Unmask Turbo interrupts -*/ -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) +static void skl_update_other_pipe_wm(struct drm_device *dev, + struct drm_crtc *crtc, + struct intel_wm_config *config, + struct skl_wm_values *r) { - struct drm_device *dev = dev_priv->dev; - - /* Latest VLV doesn't need to force the gfx clock */ - if (dev->pdev->revision >= 0xd) { - valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); - return; - } + struct intel_crtc *intel_crtc; + struct intel_crtc *this_crtc = to_intel_crtc(crtc); /* - * When we are idle. Drop to min voltage state. + * If the WM update hasn't changed the allocation for this_crtc (the + * crtc we are currently computing the new WM values for), other + * enabled crtcs will keep the same allocation and we don't need to + * recompute anything for them. */ - - if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit) + if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) return; - /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); - - vlv_force_gfx_clock(dev_priv, true); - - dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit; - - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, - dev_priv->rps.min_freq_softlimit); - - if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) - & GENFREQSTATUS) == 0, 5)) - DRM_ERROR("timed out waiting for Punit\n"); - - vlv_force_gfx_clock(dev_priv, false); + /* + * Otherwise, because of this_crtc being freshly enabled/disabled, the + * other active pipes need new DDB allocation and WM values. + */ + list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, + base.head) { + struct skl_pipe_wm_parameters params = {}; + struct skl_pipe_wm pipe_wm = {}; + bool wm_changed; - I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); -} + if (this_crtc->pipe == intel_crtc->pipe) + continue; -void gen6_rps_idle(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; + if (!intel_crtc->active) + continue; - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { - if (IS_CHERRYVIEW(dev)) - valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); - else if (IS_VALLEYVIEW(dev)) - vlv_set_rps_idle(dev_priv); - else - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); - dev_priv->rps.last_adj = 0; - } - mutex_unlock(&dev_priv->rps.hw_lock); -} + wm_changed = skl_update_pipe_wm(&intel_crtc->base, + ¶ms, config, + &r->ddb, &pipe_wm); -void gen6_rps_boost(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; + /* + * If we end up re-computing the other pipe WM values, it's + * because it was really needed, so we expect the WM values to + * be different. + */ + WARN_ON(!wm_changed); - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { - if (IS_VALLEYVIEW(dev)) - valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); - else - gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); - dev_priv->rps.last_adj = 0; + skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc); + r->dirty[intel_crtc->pipe] = true; } - mutex_unlock(&dev_priv->rps.hw_lock); } -void valleyview_set_rps(struct drm_device *dev, u8 val) +static void skl_update_wm(struct drm_crtc *crtc) { + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct skl_pipe_wm_parameters params = {}; + struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_pipe_wm pipe_wm = {}; + struct intel_wm_config config = {}; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq_softlimit); - WARN_ON(val < dev_priv->rps.min_freq_softlimit); + memset(results, 0, sizeof(*results)); - if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), - "Odd GPU freq value\n")) - val &= ~1; + skl_compute_wm_global_parameters(dev, &config); - if (val != dev_priv->rps.cur_freq) { - DRM_DEBUG_DRIVER("GPU freq request from %d MHz (%u) to %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq, - vlv_gpu_freq(dev_priv, val), val); + if (!skl_update_pipe_wm(crtc, ¶ms, &config, + &results->ddb, &pipe_wm)) + return; - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - } + skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc); + results->dirty[intel_crtc->pipe] = true; - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + skl_update_other_pipe_wm(dev, crtc, &config, results); + skl_write_wm_values(dev_priv, results); + skl_flush_wm_values(dev_priv, results); - dev_priv->rps.cur_freq = val; - trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val)); + /* store the new configuration */ + dev_priv->wm.skl_hw = *results; } -static void gen8_disable_rps_interrupts(struct drm_device *dev) +static void +skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, + uint32_t sprite_width, uint32_t sprite_height, + int pixel_size, bool enabled, bool scaled) { - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); - I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & - ~dev_priv->pm_rps_events); - /* Complete PM interrupt masking here doesn't race with the rps work - * item again unmasking PM interrupts because that is using a different - * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in - * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which - * gen8_enable_rps will clean up. */ + struct intel_plane *intel_plane = to_intel_plane(plane); - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); + intel_plane->wm.enabled = enabled; + intel_plane->wm.scaled = scaled; + intel_plane->wm.horiz_pixels = sprite_width; + intel_plane->wm.vert_pixels = sprite_height; + intel_plane->wm.bytes_per_pixel = pixel_size; - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); + skl_update_wm(crtc); } -static void gen6_disable_rps_interrupts(struct drm_device *dev) +static void ilk_update_wm(struct drm_crtc *crtc) { + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct ilk_wm_maximums max; + struct ilk_pipe_wm_parameters params = {}; + struct ilk_wm_values results = {}; + enum intel_ddb_partitioning partitioning; + struct intel_pipe_wm pipe_wm = {}; + struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; + struct intel_wm_config config = {}; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); - I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & - ~dev_priv->pm_rps_events); - /* Complete PM interrupt masking here doesn't race with the rps work - * item again unmasking PM interrupts because that is using a different - * register (PMIMR) to mask PM interrupts. The only risk is in leaving - * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */ + ilk_compute_wm_parameters(crtc, ¶ms); - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); + intel_compute_pipe_wm(crtc, ¶ms, &pipe_wm); - I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events); -} + if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm))) + return; -static void gen6_disable_rps(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + intel_crtc->wm.active = pipe_wm; - I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN6_RPNSWREQ, 1 << 31); + ilk_compute_wm_config(dev, &config); - if (IS_BROADWELL(dev)) - gen8_disable_rps_interrupts(dev); - else - gen6_disable_rps_interrupts(dev); -} + ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); + ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); -static void cherryview_disable_rps(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + /* 5/6 split only in single pipe config on IVB+ */ + if (INTEL_INFO(dev)->gen >= 7 && + config.num_pipes_active == 1 && config.sprites_enabled) { + ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); + ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); - I915_WRITE(GEN6_RC_CONTROL, 0); + best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); + } else { + best_lp_wm = &lp_wm_1_2; + } + + partitioning = (best_lp_wm == &lp_wm_1_2) ? + INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; + + ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); - gen8_disable_rps_interrupts(dev); + ilk_write_wm_values(dev_priv, &results); } -static void valleyview_disable_rps(struct drm_device *dev) +static void +ilk_update_sprite_wm(struct drm_plane *plane, + struct drm_crtc *crtc, + uint32_t sprite_width, uint32_t sprite_height, + int pixel_size, bool enabled, bool scaled) { - struct drm_i915_private *dev_priv = dev->dev_private; - - /* we're doing forcewake before Disabling RC6, - * This what the BIOS expects when going into suspend */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + struct drm_device *dev = plane->dev; + struct intel_plane *intel_plane = to_intel_plane(plane); - I915_WRITE(GEN6_RC_CONTROL, 0); + intel_plane->wm.enabled = enabled; + intel_plane->wm.scaled = scaled; + intel_plane->wm.horiz_pixels = sprite_width; + intel_plane->wm.vert_pixels = sprite_width; + intel_plane->wm.bytes_per_pixel = pixel_size; - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + /* + * IVB workaround: must disable low power watermarks for at least + * one frame before enabling scaling. LP watermarks can be re-enabled + * when scaling is disabled. + * + * WaCxSRDisabledForSpriteScaling:ivb + */ + if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev)) + intel_wait_for_vblank(dev, intel_plane->pipe); - gen6_disable_rps_interrupts(dev); + ilk_update_wm(crtc); } -static void intel_print_rc6_info(struct drm_device *dev, u32 mode) +static void skl_pipe_wm_active_state(uint32_t val, + struct skl_pipe_wm *active, + bool is_transwm, + bool is_cursor, + int i, + int level) { - if (IS_VALLEYVIEW(dev)) { - if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) - mode = GEN6_RC_CTL_RC6_ENABLE; - else - mode = 0; + bool is_enabled = (val & PLANE_WM_EN) != 0; + + if (!is_transwm) { + if (!is_cursor) { + active->wm[level].plane_en[i] = is_enabled; + active->wm[level].plane_res_b[i] = + val & PLANE_WM_BLOCKS_MASK; + active->wm[level].plane_res_l[i] = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } else { + active->wm[level].cursor_en = is_enabled; + active->wm[level].cursor_res_b = + val & PLANE_WM_BLOCKS_MASK; + active->wm[level].cursor_res_l = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } + } else { + if (!is_cursor) { + active->trans_wm.plane_en[i] = is_enabled; + active->trans_wm.plane_res_b[i] = + val & PLANE_WM_BLOCKS_MASK; + active->trans_wm.plane_res_l[i] = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } else { + active->trans_wm.cursor_en = is_enabled; + active->trans_wm.cursor_res_b = + val & PLANE_WM_BLOCKS_MASK; + active->trans_wm.cursor_res_l = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } } - DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n", - (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", - (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", - (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); } -static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) +static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) { - /* No RC6 before Ironlake */ - if (INTEL_INFO(dev)->gen < 5) - return 0; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct skl_wm_values *hw = &dev_priv->wm.skl_hw; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct skl_pipe_wm *active = &intel_crtc->wm.skl_active; + enum pipe pipe = intel_crtc->pipe; + int level, i, max_level; + uint32_t temp; - /* RC6 is only on Ironlake mobile not on desktop */ - if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev)) - return 0; + max_level = ilk_wm_max_level(dev); - /* Respect the kernel parameter if it is set */ - if (enable_rc6 >= 0) { - int mask; + hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); - if (INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev)) - mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | - INTEL_RC6pp_ENABLE; - else - mask = INTEL_RC6_ENABLE; + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(intel_crtc); i++) + hw->plane[pipe][i][level] = + I915_READ(PLANE_WM(pipe, i, level)); + hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level)); + } - if ((enable_rc6 & mask) != enable_rc6) - DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", - enable_rc6 & mask, enable_rc6, mask); + for (i = 0; i < intel_num_planes(intel_crtc); i++) + hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i)); + hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe)); - return enable_rc6 & mask; - } + if (!intel_crtc_active(crtc)) + return; - /* Disable RC6 on Ironlake */ - if (INTEL_INFO(dev)->gen == 5) - return 0; + hw->dirty[pipe] = true; - if (IS_IVYBRIDGE(dev)) - return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); + active->linetime = hw->wm_linetime[pipe]; - return INTEL_RC6_ENABLE; -} + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = hw->plane[pipe][i][level]; + skl_pipe_wm_active_state(temp, active, false, + false, i, level); + } + temp = hw->cursor[pipe][level]; + skl_pipe_wm_active_state(temp, active, false, true, i, level); + } -int intel_enable_rc6(const struct drm_device *dev) -{ - return i915.enable_rc6; + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = hw->plane_trans[pipe][i]; + skl_pipe_wm_active_state(temp, active, true, false, i, 0); + } + + temp = hw->cursor_trans[pipe]; + skl_pipe_wm_active_state(temp, active, true, true, i, 0); } -static void gen8_enable_rps_interrupts(struct drm_device *dev) +void skl_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_crtc *crtc; - spin_lock_irq(&dev_priv->irq_lock); - WARN_ON(dev_priv->rps.pm_iir); - gen8_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); - spin_unlock_irq(&dev_priv->irq_lock); + skl_ddb_get_hw_state(dev_priv, ddb); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) + skl_pipe_wm_get_hw_state(crtc); } -static void gen6_enable_rps_interrupts(struct drm_device *dev) +static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct ilk_wm_values *hw = &dev_priv->wm.hw; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_pipe_wm *active = &intel_crtc->wm.active; + enum pipe pipe = intel_crtc->pipe; + static const unsigned int wm0_pipe_reg[] = { + [PIPE_A] = WM0_PIPEA_ILK, + [PIPE_B] = WM0_PIPEB_ILK, + [PIPE_C] = WM0_PIPEC_IVB, + }; - spin_lock_irq(&dev_priv->irq_lock); - WARN_ON(dev_priv->rps.pm_iir); - gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); - I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events); - spin_unlock_irq(&dev_priv->irq_lock); -} + hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); -static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_cap) -{ - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; - /* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */ - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; - /* XXX: only BYT has a special efficient freq */ - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + active->pipe_enabled = intel_crtc_active(crtc); - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + if (active->pipe_enabled) { + u32 tmp = hw->wm_pipe[pipe]; - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + /* + * For active pipes LP0 watermark is marked as + * enabled, and LP1+ watermaks as disabled since + * we can't really reverse compute them in case + * multiple pipes are active. + */ + active->wm[0].enable = true; + active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; + active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; + active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; + active->linetime = hw->wm_linetime[pipe]; + } else { + int level, max_level = ilk_wm_max_level(dev); + + /* + * For inactive pipes, all watermark levels + * should be marked as enabled but zeroed, + * which is what we'd compute them to. + */ + for (level = 0; level <= max_level; level++) + active->wm[level].enable = true; + } } -static void gen8_enable_rps(struct drm_device *dev) +void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - uint32_t rc6_mask = 0, rp_state_cap; - int unused; + struct ilk_wm_values *hw = &dev_priv->wm.hw; + struct drm_crtc *crtc; - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); + for_each_crtc(dev, crtc) + ilk_pipe_wm_get_hw_state(crtc); - /* 1c & 1d: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + hw->wm_lp[0] = I915_READ(WM1_LP_ILK); + hw->wm_lp[1] = I915_READ(WM2_LP_ILK); + hw->wm_lp[2] = I915_READ(WM3_LP_ILK); - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); + hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); + if (INTEL_INFO(dev)->gen >= 7) { + hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); + hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); + } - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - parse_rp_state_cap(dev_priv, rp_state_cap); + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? + INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; + else if (IS_IVYBRIDGE(dev)) + hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? + INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_ring(ring, dev_priv, unused) - I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev)) - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + hw->enable_fbc_wm = + !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); +} - /* 3: Enable RC6 */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - intel_print_rc6_info(dev, rc6_mask); - if (IS_BROADWELL(dev)) - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - else - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); +/** + * intel_update_watermarks - update FIFO watermark values based on current modes + * + * Calculate watermark values for the various WM regs based on current mode + * and plane configuration. + * + * There are several cases to deal with here: + * - normal (i.e. non-self-refresh) + * - self-refresh (SR) mode + * - lines are large relative to FIFO size (buffer can hold up to 2) + * - lines are small relative to FIFO size (buffer can hold more than 2 + * lines), so need to account for TLB latency + * + * The normal calculation is: + * watermark = dotclock * bytes per pixel * latency + * where latency is platform & configuration dependent (we assume pessimal + * values here). + * + * The SR calculation is: + * watermark = (trunc(latency/line time)+1) * surface width * + * bytes per pixel + * where + * line time = htotal / dotclock + * surface width = hdisplay for normal plane and 64 for cursor + * and latency is assumed to be high, as above. + * + * The final value programmed to the register should always be rounded up, + * and include an extra 2 entries to account for clock crossings. + * + * We don't use the sprite, so we can ignore that. And on Crestline we have + * to set the non-SR watermarks to 8. + */ +void intel_update_watermarks(struct drm_crtc *crtc) +{ + struct drm_i915_private *dev_priv = crtc->dev->dev_private; - /* 4 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); - I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ + if (dev_priv->display.update_wm) + dev_priv->display.update_wm(crtc); +} - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); +void intel_update_sprite_watermarks(struct drm_plane *plane, + struct drm_crtc *crtc, + uint32_t sprite_width, + uint32_t sprite_height, + int pixel_size, + bool enabled, bool scaled) +{ + struct drm_i915_private *dev_priv = plane->dev->dev_private; - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + if (dev_priv->display.update_sprite_wm) + dev_priv->display.update_sprite_wm(plane, crtc, + sprite_width, sprite_height, + pixel_size, enabled, scaled); +} - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); +static struct drm_i915_gem_object * +intel_alloc_context_page(struct drm_device *dev) +{ + struct drm_i915_gem_object *ctx; + int ret; - /* 5: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - /* 6: Ring frequency + overclocking (our driver does this later */ + ctx = i915_gem_alloc_object(dev, 4096); + if (!ctx) { + DRM_DEBUG("failed to alloc power context, RC6 disabled\n"); + return NULL; + } - gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); + ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0); + if (ret) { + DRM_ERROR("failed to pin power context: %d\n", ret); + goto err_unref; + } - gen8_enable_rps_interrupts(dev); + ret = i915_gem_object_set_to_gtt_domain(ctx, 1); + if (ret) { + DRM_ERROR("failed to set-domain on power context: %d\n", ret); + goto err_unpin; + } - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + return ctx; + +err_unpin: + i915_gem_object_ggtt_unpin(ctx); +err_unref: + drm_gem_object_unreference(&ctx->base); + return NULL; } -static void gen6_enable_rps(struct drm_device *dev) +/** + * Lock protecting IPS related data structures + */ +DEFINE_SPINLOCK(mchdev_lock); + +/* Global for IPS driver to get at the current i915 device. Protected by + * mchdev_lock. */ +static struct drm_i915_private *i915_mch_dev; + +bool ironlake_set_drps(struct drm_device *dev, u8 val) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - u32 rp_state_cap; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; - u32 gtfifodbg; - int rc6_mode; - int i, ret; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u16 rgvswctl; - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ - I915_WRITE(GEN6_RC_STATE, 0); + assert_spin_locked(&mchdev_lock); - /* Clear the DBG now so we don't confuse earlier errors */ - if ((gtfifodbg = I915_READ(GTFIFODBG))) { - DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); + rgvswctl = I915_READ16(MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ } - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; + I915_WRITE16(MEMSWCTL, rgvswctl); + POSTING_READ16(MEMSWCTL); - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + rgvswctl |= MEMCTL_CMD_STS; + I915_WRITE16(MEMSWCTL, rgvswctl); - parse_rp_state_cap(dev_priv, rp_state_cap); + return true; +} - /* disable the counters and set deterministic thresholds */ - I915_WRITE(GEN6_RC_CONTROL, 0); +static void ironlake_enable_drps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 rgvmodectl = I915_READ(MEMMODECTL); + u8 fmax, fmin, fstart, vstart; - I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); + spin_lock_irq(&mchdev_lock); - for_each_ring(ring, dev_priv, i) - I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + /* Enable temp reporting */ + I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); + I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev)) - I915_WRITE(GEN6_RC6_THRESHOLD, 125000); - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); - I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + /* 100ms RC evaluation intervals */ + I915_WRITE(RCUPEI, 100000); + I915_WRITE(RCDNEI, 100000); - /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(dev_priv->dev); - if (rc6_mode & INTEL_RC6_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; + /* Set max/min thresholds to 90ms and 80ms respectively */ + I915_WRITE(RCBMAXAVG, 90000); + I915_WRITE(RCBMINAVG, 80000); - /* We don't use those on Haswell */ - if (!IS_HASWELL(dev)) { - if (rc6_mode & INTEL_RC6p_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + I915_WRITE(MEMIHYST, 1); - if (rc6_mode & INTEL_RC6pp_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - } + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; - intel_print_rc6_info(dev, rc6_mask); + vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> + PXVFREQ_PX_SHIFT; - I915_WRITE(GEN6_RC_CONTROL, - rc6_mask | - GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); + dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ + dev_priv->ips.fstart = fstart; - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + dev_priv->ips.max_delay = fstart; + dev_priv->ips.min_delay = fmin; + dev_priv->ips.cur_delay = fstart; - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); - if (ret) - DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } + I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + /* + * Interrupts will be enabled in ironlake_irq_postinstall + */ - gen6_enable_rps_interrupts(dev); + I915_WRITE(VIDSTART, vstart); + POSTING_READ(VIDSTART); - rc6vids = 0; - ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - if (IS_GEN6(dev) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); - rc6vids &= 0xffff00; - rc6vids |= GEN6_ENCODE_RC6_VID(450); - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); - if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); - } + rgvmodectl |= MEMMODE_SWMODE_EN; + I915_WRITE(MEMMODECTL, rgvmodectl); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + ironlake_set_drps(dev, fstart); + + dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + + I915_READ(0x112e0); + dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); + dev_priv->ips.last_count2 = I915_READ(0x112f4); + dev_priv->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); } -static void __gen6_update_ring_freq(struct drm_device *dev) +static void ironlake_disable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int min_freq = 15; - unsigned int gpu_freq; - unsigned int max_ia_freq, min_ring_freq; - int scaling_factor = 180; - struct cpufreq_policy *policy; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u16 rgvswctl; - policy = cpufreq_cpu_get(0); - if (policy) { - max_ia_freq = policy->cpuinfo.max_freq; - cpufreq_cpu_put(policy); - } else { - /* - * Default to measured freq if none found, PCU will ensure we - * don't go over - */ - max_ia_freq = tsc_khz; - } + spin_lock_irq(&mchdev_lock); - /* Convert from kHz to MHz */ - max_ia_freq /= 1000; + rgvswctl = I915_READ16(MEMSWCTL); - min_ring_freq = I915_READ(DCLK) & 0xf; - /* convert DDR frequency from units of 266.6MHz to bandwidth */ - min_ring_freq = mult_frac(min_ring_freq, 8, 3); - - /* - * For each potential GPU frequency, load a ring frequency we'd like - * to use for memory access. We do this by specifying the IA frequency - * the PCU should use as a reference to determine the ring frequency. - */ - for (gpu_freq = dev_priv->rps.max_freq_softlimit; gpu_freq >= dev_priv->rps.min_freq_softlimit; - gpu_freq--) { - int diff = dev_priv->rps.max_freq_softlimit - gpu_freq; - unsigned int ia_freq = 0, ring_freq = 0; + /* Ack interrupts, disable EFC interrupt */ + I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); + I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); + I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); + I915_WRITE(DEIIR, DE_PCU_EVENT); + I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); - if (INTEL_INFO(dev)->gen >= 8) { - /* max(2 * GT, DDR). NB: GT is 50MHz units */ - ring_freq = max(min_ring_freq, gpu_freq); - } else if (IS_HASWELL(dev)) { - ring_freq = mult_frac(gpu_freq, 5, 4); - ring_freq = max(min_ring_freq, ring_freq); - /* leave ia_freq as the default, chosen by cpufreq */ - } else { - /* On older processors, there is no separate ring - * clock domain, so in order to boost the bandwidth - * of the ring, we need to upclock the CPU (ia_freq). - * - * For GPU frequencies less than 750MHz, - * just use the lowest ring freq. - */ - if (gpu_freq < min_freq) - ia_freq = 800; - else - ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); - ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); - } + /* Go back to the starting frequency */ + ironlake_set_drps(dev, dev_priv->ips.fstart); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + I915_WRITE(MEMSWCTL, rgvswctl); + mdelay(1); - sandybridge_pcode_write(dev_priv, - GEN6_PCODE_WRITE_MIN_FREQ_TABLE, - ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | - ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | - gpu_freq); - } + spin_unlock_irq(&mchdev_lock); } -void gen6_update_ring_freq(struct drm_device *dev) +/* There's a funny hw issue where the hw returns all 0 when reading from + * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value + * ourselves, instead of doing a rmw cycle (which might result in us clearing + * all limits and the gpu stuck at whatever frequency it is at atm). + */ +static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val) { - struct drm_i915_private *dev_priv = dev->dev_private; + u32 limits; - if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev)) - return; + /* Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. */ + limits = dev_priv->rps.max_freq_softlimit << 24; + if (val <= dev_priv->rps.min_freq_softlimit) + limits |= dev_priv->rps.min_freq_softlimit << 16; - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev); - mutex_unlock(&dev_priv->rps.hw_lock); + return limits; } -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) +static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { - u32 val, rp0; - - val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); - rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; + int new_power; - return rp0; -} + new_power = dev_priv->rps.power; + switch (dev_priv->rps.power) { + case LOW_POWER: + if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) + new_power = BETWEEN; + break; -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; + case BETWEEN: + if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) + new_power = LOW_POWER; + else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) + new_power = HIGH_POWER; + break; - val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); - rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; + case HIGH_POWER: + if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val == dev_priv->rps.min_freq_softlimit) + new_power = LOW_POWER; + if (val == dev_priv->rps.max_freq_softlimit) + new_power = HIGH_POWER; + if (new_power == dev_priv->rps.power) + return; - return rpe; -} + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + I915_WRITE(GEN6_RP_UP_EI, 12500); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800); -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; + /* Downclock if less than 85% busy over 32ms */ + I915_WRITE(GEN6_RP_DOWN_EI, 25000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - rp1 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + break; - return rp1; -} + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + I915_WRITE(GEN6_RP_UP_EI, 10250); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225); -static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpn; + /* Downclock if less than 75% busy over 32ms */ + I915_WRITE(GEN6_RP_DOWN_EI, 25000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750); - val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); - rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK; - return rpn; -} + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + break; -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + I915_WRITE(GEN6_RP_UP_EI, 8000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800); - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); + /* Downclock if less than 60% busy over 32ms */ + I915_WRITE(GEN6_RP_DOWN_EI, 25000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000); - rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + break; + } - return rp1; + dev_priv->rps.power = new_power; + dev_priv->rps.last_adj = 0; } -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) +static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { - u32 val, rp0; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); + u32 mask = 0; - rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; - /* Clamp to max */ - rp0 = min_t(u32, rp0, 0xea); + if (val > dev_priv->rps.min_freq_softlimit) + mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; + if (val < dev_priv->rps.max_freq_softlimit) + mask |= GEN6_PM_RP_UP_THRESHOLD; - return rp0; -} + mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); + mask &= dev_priv->pm_rps_events; -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; + /* IVB and SNB hard hangs on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + */ + if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) + mask |= GEN6_PM_RP_UP_EI_EXPIRED; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); - rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); - rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + if (IS_GEN8(dev_priv->dev)) + mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; - return rpe; + return ~mask; } -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) +/* gen6_set_rps is called to update the frequency request, but should also be + * called when the range (min_delay and max_delay) is modified so that we can + * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ +void gen6_set_rps(struct drm_device *dev, u8 val) { - return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; -} + struct drm_i915_private *dev_priv = dev->dev_private; -/* Check that the pctx buffer wasn't move under us. */ -static void valleyview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(val > dev_priv->rps.max_freq_softlimit); + WARN_ON(val < dev_priv->rps.min_freq_softlimit); - WARN_ON(pctx_addr != dev_priv->mm.stolen_base + - dev_priv->vlv_pctx->stolen->start); -} + /* min/max delay may still have been modified so be sure to + * write the limits value. + */ + if (val != dev_priv->rps.cur_freq) { + gen6_set_rps_thresholds(dev_priv, val); + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + I915_WRITE(GEN6_RPNSWREQ, + HSW_FREQUENCY(val)); + else + I915_WRITE(GEN6_RPNSWREQ, + GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + } -/* Check that the pcbr address is not empty. */ -static void cherryview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); -} - -static void cherryview_setup_pctx(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long pctx_paddr, paddr; - struct i915_gtt *gtt = &dev_priv->gtt; - u32 pcbr; - int pctx_size = 32*1024; - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + /* Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val)); + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - pcbr = I915_READ(VLV_PCBR); - if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - paddr = (dev_priv->mm.stolen_base + - (gtt->stolen_size - pctx_size)); + POSTING_READ(GEN6_RPNSWREQ); - pctx_paddr = (paddr & (~4095)); - I915_WRITE(VLV_PCBR, pctx_paddr); - } + dev_priv->rps.cur_freq = val; + trace_intel_gpu_freq_change(val * 50); } -static void valleyview_setup_pctx(struct drm_device *dev) +/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down + * + * * If Gfx is Idle, then + * 1. Mask Turbo interrupts + * 2. Bring up Gfx clock + * 3. Change the freq to Rpn and wait till P-Unit updates freq + * 4. Clear the Force GFX CLK ON bit so that Gfx can down + * 5. Unmask Turbo interrupts +*/ +static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *pctx; - unsigned long pctx_paddr; - u32 pcbr; - int pctx_size = 24*1024; - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - pcbr = I915_READ(VLV_PCBR); - if (pcbr) { - /* BIOS set it up already, grab the pre-alloc'd space */ - int pcbr_offset; + struct drm_device *dev = dev_priv->dev; - pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, - pcbr_offset, - I915_GTT_OFFSET_NONE, - pctx_size); - goto out; + /* Latest VLV doesn't need to force the gfx clock */ + if (dev->pdev->revision >= 0xd) { + valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + return; } /* - * From the Gunit register HAS: - * The Gfx driver is expected to program this register and ensure - * proper allocation within Gfx stolen memory. For example, this - * register should be programmed such than the PCBR range does not - * overlap with other ranges, such as the frame buffer, protected - * memory, or any other relevant ranges. + * When we are idle. Drop to min voltage state. */ - pctx = i915_gem_object_create_stolen(dev, pctx_size); - if (!pctx) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + + if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit) return; - } - pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; - I915_WRITE(VLV_PCBR, pctx_paddr); + /* Mask turbo interrupt so that they will not come in between */ + I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); -out: - dev_priv->vlv_pctx = pctx; -} + vlv_force_gfx_clock(dev_priv, true); -static void valleyview_cleanup_pctx(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit; - if (WARN_ON(!dev_priv->vlv_pctx)) - return; + vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, + dev_priv->rps.min_freq_softlimit); - drm_gem_object_unreference(&dev_priv->vlv_pctx->base); - dev_priv->vlv_pctx = NULL; + if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) + & GENFREQSTATUS) == 0, 5)) + DRM_ERROR("timed out waiting for Punit\n"); + + vlv_force_gfx_clock(dev_priv, false); + + I915_WRITE(GEN6_PMINTRMSK, + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); } -static void valleyview_init_gt_powersave(struct drm_device *dev) +void gen6_rps_idle(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - - valleyview_setup_pctx(dev); + struct drm_device *dev = dev_priv->dev; mutex_lock(&dev_priv->rps.hw_lock); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - switch ((val >> 6) & 3) { - case 0: - case 1: - dev_priv->mem_freq = 800; - break; - case 2: - dev_priv->mem_freq = 1066; - break; - case 3: - dev_priv->mem_freq = 1333; - break; + if (dev_priv->rps.enabled) { + if (IS_CHERRYVIEW(dev)) + valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + else if (IS_VALLEYVIEW(dev)) + vlv_set_rps_idle(dev_priv); + else + gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + dev_priv->rps.last_adj = 0; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); - - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); - - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); - - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); - - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - mutex_unlock(&dev_priv->rps.hw_lock); } -static void cherryview_init_gt_powersave(struct drm_device *dev) +void gen6_rps_boost(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - - cherryview_setup_pctx(dev); + struct drm_device *dev = dev_priv->dev; mutex_lock(&dev_priv->rps.hw_lock); - - val = vlv_punit_read(dev_priv, CCK_FUSE_REG); - switch ((val >> 2) & 0x7) { - case 0: - case 1: - dev_priv->rps.cz_freq = 200; - dev_priv->mem_freq = 1600; - break; - case 2: - dev_priv->rps.cz_freq = 267; - dev_priv->mem_freq = 1600; - break; - case 3: - dev_priv->rps.cz_freq = 333; - dev_priv->mem_freq = 2000; - break; - case 4: - dev_priv->rps.cz_freq = 320; - dev_priv->mem_freq = 1600; - break; - case 5: - dev_priv->rps.cz_freq = 400; - dev_priv->mem_freq = 1600; - break; + if (dev_priv->rps.enabled) { + if (IS_VALLEYVIEW(dev)) + valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); + else + gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); + dev_priv->rps.last_adj = 0; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); + mutex_unlock(&dev_priv->rps.hw_lock); +} - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); +void valleyview_set_rps(struct drm_device *dev, u8 val) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(val > dev_priv->rps.max_freq_softlimit); + WARN_ON(val < dev_priv->rps.min_freq_softlimit); - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), + "Odd GPU freq value\n")) + val &= ~1; - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + if (val != dev_priv->rps.cur_freq) + vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - WARN_ONCE((dev_priv->rps.max_freq | - dev_priv->rps.efficient_freq | - dev_priv->rps.rp1_freq | - dev_priv->rps.min_freq) & 1, - "Odd GPU freq values\n"); + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.cur_freq = val; + trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val)); +} - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; +static void gen9_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - mutex_unlock(&dev_priv->rps.hw_lock); + I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_cleanup_gt_powersave(struct drm_device *dev) +static void gen6_disable_rps(struct drm_device *dev) { - valleyview_cleanup_pctx(dev); + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(GEN6_RC_CONTROL, 0); + I915_WRITE(GEN6_RPNSWREQ, 1 << 31); } -static void cherryview_enable_rps(struct drm_device *dev) +static void cherryview_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - u32 gtfifodbg, val, rc6_mode = 0, pcbr; - int i; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + I915_WRITE(GEN6_RC_CONTROL, 0); +} - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); +static void valleyview_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* we're doing forcewake before Disabling RC6, + * This what the BIOS expects when going into suspend */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(GEN6_RC_CONTROL, 0); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +} + +static void intel_print_rc6_info(struct drm_device *dev, u32 mode) +{ + if (IS_VALLEYVIEW(dev)) { + if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) + mode = GEN6_RC_CTL_RC6_ENABLE; + else + mode = 0; } + if (HAS_RC6p(dev)) + DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", + (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", + (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", + (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); - cherryview_check_pctx(dev_priv); + else + DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", + (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off"); +} - /* 1a & 1b: Get forcewake during program sequence. Although the driver +static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) +{ + /* No RC6 before Ironlake */ + if (INTEL_INFO(dev)->gen < 5) + return 0; + + /* RC6 is only on Ironlake mobile not on desktop */ + if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev)) + return 0; + + /* Respect the kernel parameter if it is set */ + if (enable_rc6 >= 0) { + int mask; + + if (HAS_RC6p(dev)) + mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | + INTEL_RC6pp_ENABLE; + else + mask = INTEL_RC6_ENABLE; + + if ((enable_rc6 & mask) != enable_rc6) + DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", + enable_rc6 & mask, enable_rc6, mask); + + return enable_rc6 & mask; + } + + /* Disable RC6 on Ironlake */ + if (INTEL_INFO(dev)->gen == 5) + return 0; + + if (IS_IVYBRIDGE(dev)) + return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); + + return INTEL_RC6_ENABLE; +} + +int intel_enable_rc6(const struct drm_device *dev) +{ + return i915.enable_rc6; +} + +static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_cap) +{ + /* All of these values are in units of 50MHz */ + dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */ + dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; + dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; + dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; + /* XXX: only BYT has a special efficient freq */ + dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; + /* hw_max = RP0 until we check for overclocking */ + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + + /* Preserve min/max settings in case of re-init */ + if (dev_priv->rps.max_freq_softlimit == 0) + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + + if (dev_priv->rps.min_freq_softlimit == 0) + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; +} + +static void gen9_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + uint32_t rc6_mask = 0; + int unused; + + /* 1a: Software RC state - RC0 */ + I915_WRITE(GEN6_RC_STATE, 0); + + /* 1b: Get forcewake during program sequence. Although the driver * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - /* 2a: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + /* 2a: Disable RC states. */ + I915_WRITE(GEN6_RC_CONTROL, 0); + + /* 2b: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - - for_each_ring(ring, dev_priv, i) + for_each_ring(ring, dev_priv, unused) I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + /* 3a: Enable RC6 */ + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? + "on" : "off"); + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_EI_MODE(1) | + rc6_mask); - /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); - /* For now we assume BIOS is allocating and populating the PCBR */ - pcbr = I915_READ(VLV_PCBR); +} - DRM_DEBUG_DRIVER("PCBR offset : 0x%x\n", pcbr); +static void gen8_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + uint32_t rc6_mask = 0, rp_state_cap; + int unused; - /* 3: Enable RC6 */ - if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && - (pcbr >> VLV_PCBR_ADDR_SHIFT)) - rc6_mode = GEN6_RC_CTL_EI_MODE(1); + /* 1a: Software RC state - RC0 */ + I915_WRITE(GEN6_RC_STATE, 0); - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + /* 1c & 1d: Get forcewake during program sequence. Although the driver + * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + /* 2a: Disable RC states. */ + I915_WRITE(GEN6_RC_CONTROL, 0); + + rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + parse_rp_state_cap(dev_priv, rp_state_cap); + + /* 2b: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_ring(ring, dev_priv, unused) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + I915_WRITE(GEN6_RC_SLEEP, 0); + if (IS_BROADWELL(dev)) + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + else + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* 3: Enable RC6 */ + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + intel_print_rc6_info(dev, rc6_mask); + if (IS_BROADWELL(dev)) + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + rc6_mask); + else + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_EI_MODE(1) | + rc6_mask); /* 4 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); + I915_WRITE(GEN6_RPNSWREQ, + HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + I915_WRITE(GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + /* Docs recommend 900MHz, and 300 MHz respectively */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, + dev_priv->rps.max_freq_softlimit << 24 | + dev_priv->rps.min_freq_softlimit << 16); - /* WaDisablePwrmtrEvent:chv (pre-production hw) */ - I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff); - I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ + I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ + I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); /* 5: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */ + GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no"); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); - - valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + /* 6: Ring frequency + overclocking (our driver does this later */ - gen8_enable_rps_interrupts(dev); + gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_device *dev) +static void gen6_enable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - u32 gtfifodbg, val, rc6_mode = 0; - int i; + u32 rp_state_cap; + u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 gtfifodbg; + int rc6_mode; + int i, ret; WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - valleyview_check_pctx(dev_priv); + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + I915_WRITE(GEN6_RC_STATE, 0); + /* Clear the DBG now so we don't confuse earlier errors */ if ((gtfifodbg = I915_READ(GTFIFODBG))) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); + DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); I915_WRITE(GTFIFODBG, gtfifodbg); } - /* If VLV, Forcewake all wells, else re-direct to regular path */ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); + rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); + parse_rp_state_cap(dev_priv, rp_state_cap); - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); + /* disable the counters and set deterministic thresholds */ + I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); for_each_ring(ring, dev_priv, i) I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); + I915_WRITE(GEN6_RC_SLEEP, 0); + I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(dev)) + I915_WRITE(GEN6_RC6_THRESHOLD, 125000); + else + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); + I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); + I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; + /* Check if we are enabling RC6 */ + rc6_mode = intel_enable_rc6(dev_priv->dev); + if (rc6_mode & INTEL_RC6_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; - intel_print_rc6_info(dev, rc6_mode); + /* We don't use those on Haswell */ + if (!IS_HASWELL(dev)) { + if (rc6_mode & INTEL_RC6p_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + if (rc6_mode & INTEL_RC6pp_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + } - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + intel_print_rc6_info(dev, rc6_mask); - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no"); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + I915_WRITE(GEN6_RC_CONTROL, + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); + if (ret) + DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); + if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", + (dev_priv->rps.max_freq_softlimit & 0xff) * 50, + (pcu_mbox & 0xff) * 50); + dev_priv->rps.max_freq = pcu_mbox & 0xff; + } - gen6_enable_rps_interrupts(dev); + dev_priv->rps.power = HIGH_POWER; /* force a reset */ + gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + + rc6vids = 0; + ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); + if (IS_GEN6(dev) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -void ironlake_teardown_rc6(struct drm_device *dev) +static void __gen6_update_ring_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int min_freq = 15; + unsigned int gpu_freq; + unsigned int max_ia_freq, min_ring_freq; + int scaling_factor = 180; + struct cpufreq_policy *policy; - if (dev_priv->ips.renderctx) { - i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); - drm_gem_object_unreference(&dev_priv->ips.renderctx->base); - dev_priv->ips.renderctx = NULL; - } + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - if (dev_priv->ips.pwrctx) { - i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); - drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); - dev_priv->ips.pwrctx = NULL; + policy = cpufreq_cpu_get(0); + if (policy) { + max_ia_freq = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } else { + /* + * Default to measured freq if none found, PCU will ensure we + * don't go over + */ + max_ia_freq = tsc_khz; } -} - -static void ironlake_disable_rc6(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - if (I915_READ(PWRCTXA)) { - /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ - I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); - wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), - 50); - - I915_WRITE(PWRCTXA, 0); - POSTING_READ(PWRCTXA); + /* Convert from kHz to MHz */ + max_ia_freq /= 1000; - I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); - POSTING_READ(RSTDBYCTL); - } -} + min_ring_freq = I915_READ(DCLK) & 0xf; + /* convert DDR frequency from units of 266.6MHz to bandwidth */ + min_ring_freq = mult_frac(min_ring_freq, 8, 3); -static int ironlake_setup_rc6(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = dev_priv->rps.max_freq_softlimit; gpu_freq >= dev_priv->rps.min_freq_softlimit; + gpu_freq--) { + int diff = dev_priv->rps.max_freq_softlimit - gpu_freq; + unsigned int ia_freq = 0, ring_freq = 0; - if (dev_priv->ips.renderctx == NULL) - dev_priv->ips.renderctx = intel_alloc_context_page(dev); - if (!dev_priv->ips.renderctx) - return -ENOMEM; + if (INTEL_INFO(dev)->gen >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(min_ring_freq, gpu_freq); + } else if (IS_HASWELL(dev)) { + ring_freq = mult_frac(gpu_freq, 5, 4); + ring_freq = max(min_ring_freq, ring_freq); + /* leave ia_freq as the default, chosen by cpufreq */ + } else { + /* On older processors, there is no separate ring + * clock domain, so in order to boost the bandwidth + * of the ring, we need to upclock the CPU (ia_freq). + * + * For GPU frequencies less than 750MHz, + * just use the lowest ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + } - if (dev_priv->ips.pwrctx == NULL) - dev_priv->ips.pwrctx = intel_alloc_context_page(dev); - if (!dev_priv->ips.pwrctx) { - ironlake_teardown_rc6(dev); - return -ENOMEM; + sandybridge_pcode_write(dev_priv, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | + ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | + gpu_freq); } - - return 0; } -static void ironlake_enable_rc6(struct drm_device *dev) +void gen6_update_ring_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring = &dev_priv->ring[RCS]; - bool was_interruptible; - int ret; - /* rc6 disabled by default due to repeated reports of hanging during - * boot and resume. - */ - if (!intel_enable_rc6(dev)) + if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev)) return; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - ret = ironlake_setup_rc6(dev); - if (ret) - return; + mutex_lock(&dev_priv->rps.hw_lock); + __gen6_update_ring_freq(dev); + mutex_unlock(&dev_priv->rps.hw_lock); +} - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; +static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp0; - /* - * GPU can automatically power down the render unit if given a page - * to save state. - */ - ret = intel_ring_begin(ring, 6); - if (ret) { - ironlake_teardown_rc6(dev); - dev_priv->mm.interruptible = was_interruptible; - return; - } + val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); + rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; - intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) | - MI_MM_SPACE_GTT | - MI_SAVE_EXT_STATE_EN | - MI_RESTORE_EXT_STATE_EN | - MI_RESTORE_INHIBIT); - intel_ring_emit(ring, MI_SUSPEND_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_advance(ring); + return rp0; +} - /* - * Wait for the command parser to advance past MI_SET_CONTEXT. The HW - * does an implicit flush, combined with MI_FLUSH above, it should be - * safe to assume that renderctx is valid - */ - ret = intel_ring_idle(ring); - dev_priv->mm.interruptible = was_interruptible; - if (ret) { - DRM_ERROR("failed to enable ironlake power savings\n"); - ironlake_teardown_rc6(dev); - return; - } +static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpe; - I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); - I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); + val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); + rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; - intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); + return rpe; } -static unsigned long intel_pxfreq(u32 vidfreq) +static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) { - unsigned long freq; - int div = (vidfreq & 0x3f0000) >> 16; - int post = (vidfreq & 0x3000) >> 12; - int pre = (vidfreq & 0x7); - - if (!pre) - return 0; + u32 val, rp1; - freq = ((div * 133333) / ((1<> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; - return freq; + return rp1; } -static const struct cparams { - u16 i; - u16 t; - u16 m; - u16 c; -} cparams[] = { - { 1, 1333, 301, 28664 }, - { 1, 1066, 294, 24460 }, - { 1, 800, 294, 25192 }, - { 0, 1333, 276, 27605 }, - { 0, 1066, 276, 27605 }, - { 0, 800, 231, 23784 }, -}; +static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; -static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) + val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); + rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK; + return rpn; +} + +static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { - u64 total_count, diff, ret; - u32 count1, count2, count3, m = 0, c = 0; - unsigned long now = jiffies_to_msecs(jiffies), diff1; - int i; + u32 val, rp1; - assert_spin_locked(&mchdev_lock); + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - diff1 = now - dev_priv->ips.last_time1; + rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; - /* Prevent division-by-zero if we are asking too fast. - * Also, we don't get interesting results if we are polling - * faster than once in 10ms, so just return the saved value - * in such cases. - */ - if (diff1 <= 10) - return dev_priv->ips.chipset_power; + return rp1; +} - count1 = I915_READ(DMIEC); - count2 = I915_READ(DDREC); - count3 = I915_READ(CSIEC); +static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp0; - total_count = count1 + count2 + count3; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - /* FIXME: handle per-counter overflow */ - if (total_count < dev_priv->ips.last_count1) { - diff = ~0UL - dev_priv->ips.last_count1; - diff += total_count; - } else { - diff = total_count - dev_priv->ips.last_count1; - } + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); - for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == dev_priv->ips.c_m && - cparams[i].t == dev_priv->ips.r_t) { - m = cparams[i].m; - c = cparams[i].c; - break; - } - } + return rp0; +} - diff = div_u64(diff, diff1); - ret = ((m * diff) + c); - ret = div_u64(ret, 10); +static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpe; - dev_priv->ips.last_count1 = total_count; - dev_priv->ips.last_time1 = now; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; - dev_priv->ips.chipset_power = ret; + return rpe; +} - return ret; +static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; } -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) +/* Check that the pctx buffer wasn't move under us. */ +static void valleyview_check_pctx(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; - unsigned long val; + unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - if (INTEL_INFO(dev)->gen != 5) - return 0; + WARN_ON(pctx_addr != dev_priv->mm.stolen_base + + dev_priv->vlv_pctx->stolen->start); +} - spin_lock_irq(&mchdev_lock); - val = __i915_chipset_val(dev_priv); +/* Check that the pcbr address is not empty. */ +static void cherryview_check_pctx(struct drm_i915_private *dev_priv) +{ + unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - spin_unlock_irq(&mchdev_lock); + WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); +} - return val; +static void cherryview_setup_pctx(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long pctx_paddr, paddr; + struct i915_gtt *gtt = &dev_priv->gtt; + u32 pcbr; + int pctx_size = 32*1024; + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + pcbr = I915_READ(VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = (dev_priv->mm.stolen_base + + (gtt->stolen_size - pctx_size)); + + pctx_paddr = (paddr & (~4095)); + I915_WRITE(VLV_PCBR, pctx_paddr); + } + + DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); } -unsigned long i915_mch_val(struct drm_i915_private *dev_priv) +static void valleyview_setup_pctx(struct drm_device *dev) { - unsigned long m, x, b; - u32 tsfs; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *pctx; + unsigned long pctx_paddr; + u32 pcbr; + int pctx_size = 24*1024; - tsfs = I915_READ(TSFS); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); - x = I915_READ8(TR1); + pcbr = I915_READ(VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + int pcbr_offset; - b = tsfs & TSFS_INTR_MASK; + pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; + pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + goto out; + } - return ((m * x) / 127) - b; + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(dev, pctx_size); + if (!pctx) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return; + } + + pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; + I915_WRITE(VLV_PCBR, pctx_paddr); + +out: + DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); + dev_priv->vlv_pctx = pctx; } -static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) +static void valleyview_cleanup_pctx(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - static const struct v_table { - u16 vd; /* in .1 mil */ - u16 vm; /* in .1 mil */ - } v_table[] = { - { 0, 0, }, - { 375, 0, }, - { 500, 0, }, - { 625, 0, }, - { 750, 0, }, - { 875, 0, }, - { 1000, 0, }, - { 1125, 0, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4250, 3125, }, - { 4375, 3250, }, - { 4500, 3375, }, - { 4625, 3500, }, - { 4750, 3625, }, - { 4875, 3750, }, - { 5000, 3875, }, - { 5125, 4000, }, - { 5250, 4125, }, - { 5375, 4250, }, - { 5500, 4375, }, - { 5625, 4500, }, - { 5750, 4625, }, - { 5875, 4750, }, - { 6000, 4875, }, - { 6125, 5000, }, - { 6250, 5125, }, - { 6375, 5250, }, - { 6500, 5375, }, - { 6625, 5500, }, - { 6750, 5625, }, - { 6875, 5750, }, - { 7000, 5875, }, - { 7125, 6000, }, - { 7250, 6125, }, - { 7375, 6250, }, - { 7500, 6375, }, - { 7625, 6500, }, - { 7750, 6625, }, - { 7875, 6750, }, - { 8000, 6875, }, - { 8125, 7000, }, - { 8250, 7125, }, - { 8375, 7250, }, - { 8500, 7375, }, - { 8625, 7500, }, - { 8750, 7625, }, - { 8875, 7750, }, - { 9000, 7875, }, - { 9125, 8000, }, - { 9250, 8125, }, - { 9375, 8250, }, - { 9500, 8375, }, - { 9625, 8500, }, - { 9750, 8625, }, - { 9875, 8750, }, - { 10000, 8875, }, - { 10125, 9000, }, - { 10250, 9125, }, - { 10375, 9250, }, - { 10500, 9375, }, - { 10625, 9500, }, - { 10750, 9625, }, - { 10875, 9750, }, - { 11000, 9875, }, - { 11125, 10000, }, - { 11250, 10125, }, - { 11375, 10250, }, - { 11500, 10375, }, - { 11625, 10500, }, - { 11750, 10625, }, - { 11875, 10750, }, - { 12000, 10875, }, - { 12125, 11000, }, - { 12250, 11125, }, - { 12375, 11250, }, - { 12500, 11375, }, - { 12625, 11500, }, - { 12750, 11625, }, - { 12875, 11750, }, - { 13000, 11875, }, - { 13125, 12000, }, - { 13250, 12125, }, - { 13375, 12250, }, - { 13500, 12375, }, - { 13625, 12500, }, - { 13750, 12625, }, - { 13875, 12750, }, - { 14000, 12875, }, - { 14125, 13000, }, - { 14250, 13125, }, - { 14375, 13250, }, - { 14500, 13375, }, - { 14625, 13500, }, - { 14750, 13625, }, - { 14875, 13750, }, - { 15000, 13875, }, - { 15125, 14000, }, - { 15250, 14125, }, - { 15375, 14250, }, - { 15500, 14375, }, - { 15625, 14500, }, - { 15750, 14625, }, - { 15875, 14750, }, - { 16000, 14875, }, - { 16125, 15000, }, - }; - if (INTEL_INFO(dev)->is_mobile) - return v_table[pxvid].vm; - else - return v_table[pxvid].vd; -} + struct drm_i915_private *dev_priv = dev->dev_private; -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - u64 now, diff, diffms; - u32 count; + if (WARN_ON(!dev_priv->vlv_pctx)) + return; - assert_spin_locked(&mchdev_lock); + drm_gem_object_unreference(&dev_priv->vlv_pctx->base); + dev_priv->vlv_pctx = NULL; +} - now = ktime_get_raw_ns(); - diffms = now - dev_priv->ips.last_time2; - do_div(diffms, NSEC_PER_MSEC); +static void valleyview_init_gt_powersave(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; - /* Don't divide by 0 */ - if (!diffms) - return; + valleyview_setup_pctx(dev); - count = I915_READ(GFXEC); + mutex_lock(&dev_priv->rps.hw_lock); - if (count < dev_priv->ips.last_count2) { - diff = ~0UL - dev_priv->ips.last_count2; - diff += count; - } else { - diff = count - dev_priv->ips.last_count2; + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + dev_priv->mem_freq = 800; + break; + case 2: + dev_priv->mem_freq = 1066; + break; + case 3: + dev_priv->mem_freq = 1333; + break; } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->ips.last_count2 = count; - dev_priv->ips.last_time2 = now; + dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); + dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), + dev_priv->rps.max_freq); - /* More magic constants... */ - diff = diff * 1181; - diff = div_u64(diff, diffms * 10); - dev_priv->ips.gfx_power = diff; -} + dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); -void i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; + dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), + dev_priv->rps.rp1_freq); - if (INTEL_INFO(dev)->gen != 5) - return; + dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), + dev_priv->rps.min_freq); - spin_lock_irq(&mchdev_lock); + /* Preserve min/max settings in case of re-init */ + if (dev_priv->rps.max_freq_softlimit == 0) + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - __i915_update_gfx_val(dev_priv); + if (dev_priv->rps.min_freq_softlimit == 0) + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - spin_unlock_irq(&mchdev_lock); + mutex_unlock(&dev_priv->rps.hw_lock); } -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) +static void cherryview_init_gt_powersave(struct drm_device *dev) { - unsigned long t, corr, state1, corr2, state2; - u32 pxvid, ext_v; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; - assert_spin_locked(&mchdev_lock); + cherryview_setup_pctx(dev); - pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4)); - pxvid = (pxvid >> 24) & 0x7f; - ext_v = pvid_to_extvid(dev_priv, pxvid); + mutex_lock(&dev_priv->rps.hw_lock); - state1 = ext_v; + mutex_lock(&dev_priv->dpio_lock); + val = vlv_cck_read(dev_priv, CCK_FUSE_REG); + mutex_unlock(&dev_priv->dpio_lock); - t = i915_mch_val(dev_priv); + switch ((val >> 2) & 0x7) { + case 0: + case 1: + dev_priv->rps.cz_freq = 200; + dev_priv->mem_freq = 1600; + break; + case 2: + dev_priv->rps.cz_freq = 267; + dev_priv->mem_freq = 1600; + break; + case 3: + dev_priv->rps.cz_freq = 333; + dev_priv->mem_freq = 2000; + break; + case 4: + dev_priv->rps.cz_freq = 320; + dev_priv->mem_freq = 1600; + break; + case 5: + dev_priv->rps.cz_freq = 400; + dev_priv->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - /* Revel in the empirically derived constants */ + dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); + dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), + dev_priv->rps.max_freq); - /* Correction factor in 1/100000 units */ - if (t > 80) - corr = ((t * 2349) + 135940); - else if (t >= 50) - corr = ((t * 964) + 29317); - else /* < 50 */ - corr = ((t * 301) + 1004); + dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); - corr = corr * ((150142 * state1) / 10000 - 78642); - corr /= 100000; - corr2 = (corr * dev_priv->ips.corr); + dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), + dev_priv->rps.rp1_freq); - state2 = (corr2 * state1) / 10000; - state2 /= 100; /* convert to mW */ + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), + dev_priv->rps.min_freq); - __i915_update_gfx_val(dev_priv); + WARN_ONCE((dev_priv->rps.max_freq | + dev_priv->rps.efficient_freq | + dev_priv->rps.rp1_freq | + dev_priv->rps.min_freq) & 1, + "Odd GPU freq values\n"); - return dev_priv->ips.gfx_power + state2; + /* Preserve min/max settings in case of re-init */ + if (dev_priv->rps.max_freq_softlimit == 0) + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + + if (dev_priv->rps.min_freq_softlimit == 0) + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + mutex_unlock(&dev_priv->rps.hw_lock); } -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) +static void valleyview_cleanup_gt_powersave(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - unsigned long val; + valleyview_cleanup_pctx(dev); +} - if (INTEL_INFO(dev)->gen != 5) - return 0; +static void cherryview_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + u32 gtfifodbg, val, rc6_mode = 0, pcbr; + int i; - spin_lock_irq(&mchdev_lock); + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - val = __i915_gfx_val(dev_priv); + gtfifodbg = I915_READ(GTFIFODBG); + if (gtfifodbg) { + DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", + gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); + } - spin_unlock_irq(&mchdev_lock); + cherryview_check_pctx(dev_priv); - return val; -} + /* 1a & 1b: Get forcewake during program sequence. Although the driver + * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); -/** - * i915_read_mch_val - return value for IPS use - * - * Calculate and return a value for the IPS driver to use when deciding whether - * we have thermal and power headroom to increase CPU or GPU power budget. - */ -unsigned long i915_read_mch_val(void) -{ - struct drm_i915_private *dev_priv; - unsigned long chipset_val, graphics_val, ret = 0; + /* 2a: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + I915_WRITE(GEN6_RC_SLEEP, 0); - chipset_val = __i915_chipset_val(dev_priv); - graphics_val = __i915_gfx_val(dev_priv); + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ - ret = chipset_val + graphics_val; + /* allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* For now we assume BIOS is allocating and populating the PCBR */ + pcbr = I915_READ(VLV_PCBR); - return ret; -} -EXPORT_SYMBOL_GPL(i915_read_mch_val); + /* 3: Enable RC6 */ + if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && + (pcbr >> VLV_PCBR_ADDR_SHIFT)) + rc6_mode = GEN6_RC_CTL_EI_MODE(1); -/** - * i915_gpu_raise - raise GPU frequency limit - * - * Raise the limit; IPS indicates we have thermal headroom. - */ -bool i915_gpu_raise(void) -{ - struct drm_i915_private *dev_priv; - bool ret = true; + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; + /* 4 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); - if (dev_priv->ips.max_delay > dev_priv->ips.fmax) - dev_priv->ips.max_delay--; + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* WaDisablePwrmtrEvent:chv (pre-production hw) */ + I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff); + I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_raise); + /* 5: Enable RPS */ + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */ + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); -/** - * i915_gpu_lower - lower GPU frequency limit - * - * IPS indicates we're close to a thermal limit, so throttle back the GPU - * frequency maximum. - */ -bool i915_gpu_lower(void) -{ - struct drm_i915_private *dev_priv; - bool ret = true; + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) - dev_priv->ips.max_delay++; + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no"); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); -out_unlock: - spin_unlock_irq(&mchdev_lock); + dev_priv->rps.cur_freq = (val >> 8) & 0xff; + DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + dev_priv->rps.cur_freq); - return ret; + DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); + + valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -EXPORT_SYMBOL_GPL(i915_gpu_lower); -/** - * i915_gpu_busy - indicate GPU business to IPS - * - * Tell the IPS driver whether or not the GPU is busy. - */ -bool i915_gpu_busy(void) +static void valleyview_enable_rps(struct drm_device *dev) { - struct drm_i915_private *dev_priv; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - bool ret = false; + u32 gtfifodbg, val, rc6_mode = 0; int i; - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + valleyview_check_pctx(dev_priv); + + if ((gtfifodbg = I915_READ(GTFIFODBG))) { + DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", + gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); + } + + /* If VLV, Forcewake all wells, else re-direct to regular path */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); + + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); for_each_ring(ring, dev_priv, i) - ret |= !list_empty(&ring->request_list); + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); -out_unlock: - spin_unlock_irq(&mchdev_lock); + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_busy); + /* allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); -/** - * i915_gpu_turbo_disable - disable graphics turbo - * - * Disable graphics turbo by resetting the max frequency and setting the - * current frequency to the default. - */ -bool i915_gpu_turbo_disable(void) -{ - struct drm_i915_private *dev_priv; - bool ret = true; + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; + intel_print_rc6_info(dev, rc6_mode); - dev_priv->ips.max_delay = dev_priv->ips.fstart; + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) - ret = false; + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no"); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); -/** - * Tells the intel_ips driver that the i915 driver is now loaded, if - * IPS got loaded first. - * - * This awkward dance is so that neither module has to depend on the - * other in order for IPS to do the appropriate communication of - * GPU turbo limits to i915. - */ -static void -ips_ping_for_i915_load(void) -{ - void (*link)(void); + dev_priv->rps.cur_freq = (val >> 8) & 0xff; + DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + dev_priv->rps.cur_freq); - link = symbol_get(ips_link_to_i915_driver); - if (link) { - link(); - symbol_put(ips_link_to_i915_driver); - } -} + DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); -void intel_gpu_ips_init(struct drm_i915_private *dev_priv) -{ - /* We only register the i915 ips part with intel-ips once everything is - * set up, to avoid intel-ips sneaking in and reading bogus values. */ - spin_lock_irq(&mchdev_lock); - i915_mch_dev = dev_priv; - spin_unlock_irq(&mchdev_lock); + valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); - ips_ping_for_i915_load(); + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -void intel_gpu_ips_teardown(void) +void ironlake_teardown_rc6(struct drm_device *dev) { - spin_lock_irq(&mchdev_lock); - i915_mch_dev = NULL; - spin_unlock_irq(&mchdev_lock); + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->ips.renderctx) { + i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); + drm_gem_object_unreference(&dev_priv->ips.renderctx->base); + dev_priv->ips.renderctx = NULL; + } + + if (dev_priv->ips.pwrctx) { + i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); + drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); + dev_priv->ips.pwrctx = NULL; + } } -static void intel_init_emon(struct drm_device *dev) +static void ironlake_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 lcfuse; - u8 pxw[16]; - int i; - /* Disable to program */ - I915_WRITE(ECR, 0); - POSTING_READ(ECR); + if (I915_READ(PWRCTXA)) { + /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); + wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), + 50); - /* Program energy weights for various events */ - I915_WRITE(SDEW, 0x15040d00); - I915_WRITE(CSIEW0, 0x007f0000); - I915_WRITE(CSIEW1, 0x1e220004); - I915_WRITE(CSIEW2, 0x04000004); + I915_WRITE(PWRCTXA, 0); + POSTING_READ(PWRCTXA); - for (i = 0; i < 5; i++) - I915_WRITE(PEW + (i * 4), 0); - for (i = 0; i < 3; i++) - I915_WRITE(DEW + (i * 4), 0); + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); + POSTING_READ(RSTDBYCTL); + } +} - /* Program P-state weights to account for frequency power adjustment */ - for (i = 0; i < 16; i++) { - u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4)); - unsigned long freq = intel_pxfreq(pxvidfreq); - unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - unsigned long val; +static int ironlake_setup_rc6(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - val = vid * vid; - val *= (freq / 1000); - val *= 255; - val /= (127*127*900); - if (val > 0xff) - DRM_ERROR("bad pxval: %ld\n", val); - pxw[i] = val; - } - /* Render standby states get 0 weight */ - pxw[14] = 0; - pxw[15] = 0; + if (dev_priv->ips.renderctx == NULL) + dev_priv->ips.renderctx = intel_alloc_context_page(dev); + if (!dev_priv->ips.renderctx) + return -ENOMEM; - for (i = 0; i < 4; i++) { - u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | - (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); - I915_WRITE(PXW + (i * 4), val); + if (dev_priv->ips.pwrctx == NULL) + dev_priv->ips.pwrctx = intel_alloc_context_page(dev); + if (!dev_priv->ips.pwrctx) { + ironlake_teardown_rc6(dev); + return -ENOMEM; } - /* Adjust magic regs to magic values (more experimental results) */ - I915_WRITE(OGW0, 0); - I915_WRITE(OGW1, 0); - I915_WRITE(EG0, 0x00007f00); - I915_WRITE(EG1, 0x0000000e); - I915_WRITE(EG2, 0x000e0000); - I915_WRITE(EG3, 0x68000300); - I915_WRITE(EG4, 0x42000000); - I915_WRITE(EG5, 0x00140031); - I915_WRITE(EG6, 0); - I915_WRITE(EG7, 0); + return 0; +} - for (i = 0; i < 8; i++) - I915_WRITE(PXWL + (i * 4), 0); +static void ironlake_enable_rc6(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + bool was_interruptible; + int ret; - /* Enable PMON + select events */ - I915_WRITE(ECR, 0x80000019); + /* rc6 disabled by default due to repeated reports of hanging during + * boot and resume. + */ + if (!intel_enable_rc6(dev)) + return; - lcfuse = I915_READ(LCFUSE02); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); -} + ret = ironlake_setup_rc6(dev); + if (ret) + return; -void intel_init_gt_powersave(struct drm_device *dev) -{ - i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; - if (IS_CHERRYVIEW(dev)) - cherryview_init_gt_powersave(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_init_gt_powersave(dev); -} + /* + * GPU can automatically power down the render unit if given a page + * to save state. + */ + ret = intel_ring_begin(ring, 6); + if (ret) { + ironlake_teardown_rc6(dev); + dev_priv->mm.interruptible = was_interruptible; + return; + } -void intel_cleanup_gt_powersave(struct drm_device *dev) -{ - if (IS_CHERRYVIEW(dev)) + intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + MI_RESTORE_EXT_STATE_EN | + MI_RESTORE_INHIBIT); + intel_ring_emit(ring, MI_SUSPEND_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_advance(ring); + + /* + * Wait for the command parser to advance past MI_SET_CONTEXT. The HW + * does an implicit flush, combined with MI_FLUSH above, it should be + * safe to assume that renderctx is valid + */ + ret = intel_ring_idle(ring); + dev_priv->mm.interruptible = was_interruptible; + if (ret) { + DRM_ERROR("failed to enable ironlake power savings\n"); + ironlake_teardown_rc6(dev); return; - else if (IS_VALLEYVIEW(dev)) - valleyview_cleanup_gt_powersave(dev); + } + + I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); + + intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); } -/** - * intel_suspend_gt_powersave - suspend PM work and helper threads - * @dev: drm device - * - * We don't want to disable RC6 or other features here, we just want - * to make sure any work we've queued has finished and won't bother - * us while we're suspended. - */ -void intel_suspend_gt_powersave(struct drm_device *dev) +static unsigned long intel_pxfreq(u32 vidfreq) { - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Interrupts should be disabled already to avoid re-arming. */ - WARN_ON(intel_irqs_enabled(dev_priv)); + unsigned long freq; + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); + if (!pre) + return 0; - cancel_work_sync(&dev_priv->rps.work); + freq = ((div * 133333) / ((1<dev_private; + u64 total_count, diff, ret; + u32 count1, count2, count3, m = 0, c = 0; + unsigned long now = jiffies_to_msecs(jiffies), diff1; + int i; - /* Interrupts should be disabled already to avoid re-arming. */ - WARN_ON(intel_irqs_enabled(dev_priv)); + assert_spin_locked(&mchdev_lock); - if (IS_IRONLAKE_M(dev)) { - ironlake_disable_drps(dev); - ironlake_disable_rc6(dev); - } else if (INTEL_INFO(dev)->gen >= 6) { - intel_suspend_gt_powersave(dev); + diff1 = now - dev_priv->ips.last_time1; - mutex_lock(&dev_priv->rps.hw_lock); - if (IS_CHERRYVIEW(dev)) - cherryview_disable_rps(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_disable_rps(dev); - else - gen6_disable_rps(dev); - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); - } -} + /* Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + if (diff1 <= 10) + return dev_priv->ips.chipset_power; -static void intel_gen6_powersave_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); - struct drm_device *dev = dev_priv->dev; + count1 = I915_READ(DMIEC); + count2 = I915_READ(DDREC); + count3 = I915_READ(CSIEC); - mutex_lock(&dev_priv->rps.hw_lock); + total_count = count1 + count2 + count3; - if (IS_CHERRYVIEW(dev)) { - cherryview_enable_rps(dev); - } else if (IS_VALLEYVIEW(dev)) { - valleyview_enable_rps(dev); - } else if (IS_BROADWELL(dev)) { - gen8_enable_rps(dev); - __gen6_update_ring_freq(dev); + /* FIXME: handle per-counter overflow */ + if (total_count < dev_priv->ips.last_count1) { + diff = ~0UL - dev_priv->ips.last_count1; + diff += total_count; } else { - gen6_enable_rps(dev); - __gen6_update_ring_freq(dev); + diff = total_count - dev_priv->ips.last_count1; } - dev_priv->rps.enabled = true; - mutex_unlock(&dev_priv->rps.hw_lock); - intel_runtime_pm_put(dev_priv); -} + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == dev_priv->ips.c_m && + cparams[i].t == dev_priv->ips.r_t) { + m = cparams[i].m; + c = cparams[i].c; + break; + } + } -void intel_enable_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (IS_IRONLAKE_M(dev)) { - mutex_lock(&dev->struct_mutex); - ironlake_enable_drps(dev); - ironlake_enable_rc6(dev); - intel_init_emon(dev); - mutex_unlock(&dev->struct_mutex); - } else if (INTEL_INFO(dev)->gen >= 6) { - /* - * PCU communication is slow and this doesn't need to be - * done at any specific time, so do this out of our fast path - * to make resume and init faster. - * - * We depend on the HW RC6 power context save/restore - * mechanism when entering D3 through runtime PM suspend. So - * disable RPM until RPS/RC6 is properly setup. We can only - * get here via the driver load/system resume/runtime resume - * paths, so the _noresume version is enough (and in case of - * runtime resume it's necessary). - */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) - intel_runtime_pm_get_noresume(dev_priv); - } -} - -void intel_reset_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - dev_priv->rps.enabled = false; - intel_enable_gt_powersave(dev); -} - -static void ibx_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* - * On Ibex Peak and Cougar Point, we need to disable clock - * gating for the panel power sequencer or it will fail to - * start up when no ports are active. - */ - I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); -} - -static void g4x_disable_trickle_feed(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; - - for_each_pipe(dev_priv, pipe) { - I915_WRITE(DSPCNTR(pipe), - I915_READ(DSPCNTR(pipe)) | - DISPPLANE_TRICKLE_FEED_DISABLE); - intel_flush_primary_plane(dev_priv, pipe); - } -} - -static void ilk_init_lp_watermarks(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); - - /* - * Don't touch WM1S_LP_EN here. - * Doing so could cause underruns. - */ -} - -static void ironlake_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - - /* - * Required for FBC - * WaFbcDisableDpfcClockGating:ilk - */ - dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | - ILK_DPFCUNIT_CLOCK_GATE_DISABLE | - ILK_DPFDUNIT_CLOCK_GATE_ENABLE; - - I915_WRITE(PCH_3DCGDIS0, - MARIUNIT_CLOCK_GATE_DISABLE | - SVSMUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(PCH_3DCGDIS1, - VFMUNIT_CLOCK_GATE_DISABLE); - - /* - * According to the spec the following bits should be set in - * order to enable memory self-refresh - * The bit 22/21 of 0x42004 - * The bit 5 of 0x42020 - * The bit 15 of 0x45000 - */ - I915_WRITE(ILK_DISPLAY_CHICKEN2, - (I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE | ILK_VSDPFD_FULL)); - dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; - I915_WRITE(DISP_ARB_CTL, - (I915_READ(DISP_ARB_CTL) | - DISP_FBC_WM_DIS)); - - ilk_init_lp_watermarks(dev); - - /* - * Based on the document from hardware guys the following bits - * should be set unconditionally in order to enable FBC. - * The bit 22 of 0x42000 - * The bit 22 of 0x42004 - * The bit 7,8,9 of 0x42020. - */ - if (IS_IRONLAKE_M(dev)) { - /* WaFbcAsynchFlipDisableFbcQueue:ilk */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE); - } - - I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - g4x_disable_trickle_feed(dev); - - ibx_init_clock_gating(dev); -} - -static void cpt_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; - uint32_t val; + diff = div_u64(diff, diff1); + ret = ((m * diff) + c); + ret = div_u64(ret, 10); - /* - * On Ibex Peak and Cougar Point, we need to disable clock - * gating for the panel power sequencer or it will fail to - * start up when no ports are active. - */ - I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | - PCH_DPLUNIT_CLOCK_GATE_DISABLE | - PCH_CPUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | - DPLS_EDP_PPS_FIX_DIS); - /* The below fixes the weird display corruption, a few pixels shifted - * downward, on (only) LVDS of some HP laptops with IVY. - */ - for_each_pipe(dev_priv, pipe) { - val = I915_READ(TRANS_CHICKEN2(pipe)); - val |= TRANS_CHICKEN2_TIMING_OVERRIDE; - val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - if (dev_priv->vbt.fdi_rx_polarity_inverted) - val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; - val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; - val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; - I915_WRITE(TRANS_CHICKEN2(pipe), val); - } - /* WADP0ClockGatingDisable */ - for_each_pipe(dev_priv, pipe) { - I915_WRITE(TRANS_CHICKEN1(pipe), - TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); - } -} + dev_priv->ips.last_count1 = total_count; + dev_priv->ips.last_time1 = now; -static void gen6_check_mch_setup(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t tmp; + dev_priv->ips.chipset_power = ret; - tmp = I915_READ(MCH_SSKPD); - if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) - DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", - tmp); + return ret; } -static void gen6_init_clock_gating(struct drm_device *dev) +unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - - I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); - - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN6_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - ilk_init_lp_watermarks(dev); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - - I915_WRITE(GEN6_UCGCTL1, - I915_READ(GEN6_UCGCTL1) | - GEN6_BLBUNIT_CLOCK_GATE_DISABLE | - GEN6_CSUNIT_CLOCK_GATE_DISABLE); - - /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock - * gating disable must be set. Failure to set it results in - * flickering pixels due to Z write ordering failures after - * some amount of runtime in the Mesa "fire" demo, and Unigine - * Sanctuary and Tropics, and apparently anything else with - * alpha test or pixel discard. - * - * According to the spec, bit 11 (RCCUNIT) must also be set, - * but we didn't debug actual testcases to find it out. - * - * WaDisableRCCUnitClockGating:snb - * WaDisableRCPBUnitClockGating:snb - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | - GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); + struct drm_device *dev = dev_priv->dev; + unsigned long val; - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); + if (INTEL_INFO(dev)->gen != 5) + return 0; - /* - * According to the spec the following bits should be - * set in order to enable memory self-refresh and fbc: - * The bit21 and bit22 of 0x42000 - * The bit21 and bit22 of 0x42004 - * The bit5 and bit7 of 0x42020 - * The bit14 of 0x70180 - * The bit14 of 0x71180 - * - * WaFbcAsynchFlipDisableFbcQueue:snb - */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE | ILK_VSDPFD_FULL); - I915_WRITE(ILK_DSPCLK_GATE_D, - I915_READ(ILK_DSPCLK_GATE_D) | - ILK_DPARBUNIT_CLOCK_GATE_ENABLE | - ILK_DPFDUNIT_CLOCK_GATE_ENABLE); + spin_lock_irq(&mchdev_lock); - g4x_disable_trickle_feed(dev); + val = __i915_chipset_val(dev_priv); - cpt_init_clock_gating(dev); + spin_unlock_irq(&mchdev_lock); - gen6_check_mch_setup(dev); + return val; } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) +unsigned long i915_mch_val(struct drm_i915_private *dev_priv) { - uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; + unsigned long m, x, b; + u32 tsfs; - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} + tsfs = I915_READ(TSFS); -static void lpt_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); + x = I915_READ8(TR1); - /* - * TODO: this bit should only be enabled when really needed, then - * disabled when not needed anymore in order to save power. - */ - if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) - I915_WRITE(SOUTH_DSPCLK_GATE_D, - I915_READ(SOUTH_DSPCLK_GATE_D) | - PCH_LP_PARTITION_LEVEL_DISABLE); + b = tsfs & TSFS_INTR_MASK; - /* WADPOClockGatingDisable:hsw */ - I915_WRITE(_TRANSA_CHICKEN1, - I915_READ(_TRANSA_CHICKEN1) | - TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + return ((m * x) / 127) - b; } -static void lpt_suspend_hw(struct drm_device *dev) +static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) { - struct drm_i915_private *dev_priv = dev->dev_private; - - if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { - uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); - - val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; - I915_WRITE(SOUTH_DSPCLK_GATE_D, val); - } + struct drm_device *dev = dev_priv->dev; + static const struct v_table { + u16 vd; /* in .1 mil */ + u16 vm; /* in .1 mil */ + } v_table[] = { + { 0, 0, }, + { 375, 0, }, + { 500, 0, }, + { 625, 0, }, + { 750, 0, }, + { 875, 0, }, + { 1000, 0, }, + { 1125, 0, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4125, 3000, }, + { 4250, 3125, }, + { 4375, 3250, }, + { 4500, 3375, }, + { 4625, 3500, }, + { 4750, 3625, }, + { 4875, 3750, }, + { 5000, 3875, }, + { 5125, 4000, }, + { 5250, 4125, }, + { 5375, 4250, }, + { 5500, 4375, }, + { 5625, 4500, }, + { 5750, 4625, }, + { 5875, 4750, }, + { 6000, 4875, }, + { 6125, 5000, }, + { 6250, 5125, }, + { 6375, 5250, }, + { 6500, 5375, }, + { 6625, 5500, }, + { 6750, 5625, }, + { 6875, 5750, }, + { 7000, 5875, }, + { 7125, 6000, }, + { 7250, 6125, }, + { 7375, 6250, }, + { 7500, 6375, }, + { 7625, 6500, }, + { 7750, 6625, }, + { 7875, 6750, }, + { 8000, 6875, }, + { 8125, 7000, }, + { 8250, 7125, }, + { 8375, 7250, }, + { 8500, 7375, }, + { 8625, 7500, }, + { 8750, 7625, }, + { 8875, 7750, }, + { 9000, 7875, }, + { 9125, 8000, }, + { 9250, 8125, }, + { 9375, 8250, }, + { 9500, 8375, }, + { 9625, 8500, }, + { 9750, 8625, }, + { 9875, 8750, }, + { 10000, 8875, }, + { 10125, 9000, }, + { 10250, 9125, }, + { 10375, 9250, }, + { 10500, 9375, }, + { 10625, 9500, }, + { 10750, 9625, }, + { 10875, 9750, }, + { 11000, 9875, }, + { 11125, 10000, }, + { 11250, 10125, }, + { 11375, 10250, }, + { 11500, 10375, }, + { 11625, 10500, }, + { 11750, 10625, }, + { 11875, 10750, }, + { 12000, 10875, }, + { 12125, 11000, }, + { 12250, 11125, }, + { 12375, 11250, }, + { 12500, 11375, }, + { 12625, 11500, }, + { 12750, 11625, }, + { 12875, 11750, }, + { 13000, 11875, }, + { 13125, 12000, }, + { 13250, 12125, }, + { 13375, 12250, }, + { 13500, 12375, }, + { 13625, 12500, }, + { 13750, 12625, }, + { 13875, 12750, }, + { 14000, 12875, }, + { 14125, 13000, }, + { 14250, 13125, }, + { 14375, 13250, }, + { 14500, 13375, }, + { 14625, 13500, }, + { 14750, 13625, }, + { 14875, 13750, }, + { 15000, 13875, }, + { 15125, 14000, }, + { 15250, 14125, }, + { 15375, 14250, }, + { 15500, 14375, }, + { 15625, 14500, }, + { 15750, 14625, }, + { 15875, 14750, }, + { 16000, 14875, }, + { 16125, 15000, }, + }; + if (INTEL_INFO(dev)->is_mobile) + return v_table[pxvid].vm; + else + return v_table[pxvid].vd; } -static void broadwell_init_clock_gating(struct drm_device *dev) +static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe pipe; - - I915_WRITE(WM3_LP_ILK, 0); - I915_WRITE(WM2_LP_ILK, 0); - I915_WRITE(WM1_LP_ILK, 0); - - /* FIXME(BDW): Check all the w/a, some might only apply to - * pre-production hw. */ - - - I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); + u64 now, diff, diffms; + u32 count; - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); + assert_spin_locked(&mchdev_lock); + now = ktime_get_raw_ns(); + diffms = now - dev_priv->ips.last_time2; + do_div(diffms, NSEC_PER_MSEC); - /* WaSwitchSolVfFArbitrationPriority:bdw */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + /* Don't divide by 0 */ + if (!diffms) + return; - /* WaPsrDPAMaskVBlankInSRD:bdw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); + count = I915_READ(GFXEC); - /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ - for_each_pipe(dev_priv, pipe) { - I915_WRITE(CHICKEN_PIPESL_1(pipe), - I915_READ(CHICKEN_PIPESL_1(pipe)) | - BDW_DPRS_MASK_VBLANK_SRD); + if (count < dev_priv->ips.last_count2) { + diff = ~0UL - dev_priv->ips.last_count2; + diff += count; + } else { + diff = count - dev_priv->ips.last_count2; } - /* WaVSRefCountFullforceMissDisable:bdw */ - /* WaDSRefCountFullforceMissDisable:bdw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); - - /* WaDisableSDEUnitClockGating:bdw */ - I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + dev_priv->ips.last_count2 = count; + dev_priv->ips.last_time2 = now; - lpt_init_clock_gating(dev); + /* More magic constants... */ + diff = diff * 1181; + diff = div_u64(diff, diffms * 10); + dev_priv->ips.gfx_power = diff; } -static void haswell_init_clock_gating(struct drm_device *dev) +void i915_update_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - ilk_init_lp_watermarks(dev); - - /* L3 caching of data atomics doesn't work -- disable it. */ - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); - I915_WRITE(HSW_ROW_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); - - /* This is required by WaCatErrorRejectionIssue:hsw */ - I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - - /* WaVSRefCountFullforceMissDisable:hsw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); - - /* WaDisable_RenderCache_OperationalFlush:hsw */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - - /* WaDisable4x2SubspanOptimization:hsw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + struct drm_device *dev = dev_priv->dev; - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + if (INTEL_INFO(dev)->gen != 5) + return; - /* WaSwitchSolVfFArbitrationPriority:hsw */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + spin_lock_irq(&mchdev_lock); - /* WaRsPkgCStateDisplayPMReq:hsw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + __i915_update_gfx_val(dev_priv); - lpt_init_clock_gating(dev); + spin_unlock_irq(&mchdev_lock); } -static void ivybridge_init_clock_gating(struct drm_device *dev) +static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t snpcr; - - ilk_init_lp_watermarks(dev); - - I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableEarlyCull:ivb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - - /* WaDisableBackToBackFlipFix:ivb */ - I915_WRITE(IVB_CHICKEN3, - CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | - CHICKEN3_DGMG_DONE_FIX_DISABLE); - - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(dev)) - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ivb */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); - - /* WaApplyL3ControlAndL3ChickenMode:ivb */ - I915_WRITE(GEN7_L3CNTLREG1, - GEN7_WA_FOR_GEN7_L3_CONTROL); - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); - if (IS_IVB_GT1(dev)) - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - else { - /* must write both registers */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - I915_WRITE(GEN7_ROW_CHICKEN2_GT2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - } - - /* WaForceL3Serialization:ivb */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - - /* - * According to the spec, bit 13 (RCZUNIT) must be set on IVB. - * This implements the WaDisableRCZUnitClockGating:ivb workaround. - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCZUNIT_CLOCK_GATE_DISABLE); + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; - /* This is required by WaCatErrorRejectionIssue:ivb */ - I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + assert_spin_locked(&mchdev_lock); - g4x_disable_trickle_feed(dev); + pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(dev_priv, pxvid); - gen7_setup_fixed_func_scheduler(dev_priv); + state1 = ext_v; - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - } + t = i915_mch_val(dev_priv); - /* WaDisable4x2SubspanOptimization:ivb */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + /* Revel in the empirically derived constants */ - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + /* Correction factor in 1/100000 units */ + if (t > 80) + corr = ((t * 2349) + 135940); + else if (t >= 50) + corr = ((t * 964) + 29317); + else /* < 50 */ + corr = ((t * 301) + 1004); - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - snpcr &= ~GEN6_MBC_SNPCR_MASK; - snpcr |= GEN6_MBC_SNPCR_MED; - I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + corr = corr * ((150142 * state1) / 10000 - 78642); + corr /= 100000; + corr2 = (corr * dev_priv->ips.corr); - if (!HAS_PCH_NOP(dev)) - cpt_init_clock_gating(dev); + state2 = (corr2 * state1) / 10000; + state2 /= 100; /* convert to mW */ - gen6_check_mch_setup(dev); + __i915_update_gfx_val(dev_priv); + + return dev_priv->ips.gfx_power + state2; } -static void valleyview_init_clock_gating(struct drm_device *dev) +unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_device *dev = dev_priv->dev; + unsigned long val; - I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + if (INTEL_INFO(dev)->gen != 5) + return 0; - /* WaDisableEarlyCull:vlv */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + spin_lock_irq(&mchdev_lock); - /* WaDisableBackToBackFlipFix:vlv */ - I915_WRITE(IVB_CHICKEN3, - CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | - CHICKEN3_DGMG_DONE_FIX_DISABLE); + val = __i915_gfx_val(dev_priv); - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); + spin_unlock_irq(&mchdev_lock); - /* WaDisable_RenderCache_OperationalFlush:vlv */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + return val; +} - /* WaForceL3Serialization:vlv */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *dev_priv; + unsigned long chipset_val, graphics_val, ret = 0; - /* WaDisableDopClockGating:vlv */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) + goto out_unlock; + dev_priv = i915_mch_dev; - /* This is required by WaCatErrorRejectionIssue:vlv */ - I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + chipset_val = __i915_chipset_val(dev_priv); + graphics_val = __i915_gfx_val(dev_priv); - gen7_setup_fixed_func_scheduler(dev_priv); + ret = chipset_val + graphics_val; - /* - * According to the spec, bit 13 (RCZUNIT) must be set on IVB. - * This implements the WaDisableRCZUnitClockGating:vlv workaround. - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCZUNIT_CLOCK_GATE_DISABLE); +out_unlock: + spin_unlock_irq(&mchdev_lock); - /* WaDisableL3Bank2xClockGate:vlv - * Disabling L3 clock gating- MMIO 940c[25] = 1 - * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ - I915_WRITE(GEN7_UCGCTL4, - I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); + return ret; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); - I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *dev_priv; + bool ret = true; - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; - /* - * WaIncreaseL3CreditsForVLVB0:vlv - * This is the hardware default actually. - */ - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); + if (dev_priv->ips.max_delay > dev_priv->ips.fmax) + dev_priv->ips.max_delay--; - /* - * WaDisableVLVClockGating_VBIIssue:vlv - * Disable clock gating on th GCFG unit to prevent a delay - * in the reporting of vblank events. - */ - I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); +out_unlock: + spin_unlock_irq(&mchdev_lock); + + return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_raise); -static void cherryview_init_clock_gating(struct drm_device *dev) +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv; + bool ret = true; - I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; - I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); + if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) + dev_priv->ips.max_delay++; - /* WaVSRefCountFullforceMissDisable:chv */ - /* WaDSRefCountFullforceMissDisable:chv */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); +out_unlock: + spin_unlock_irq(&mchdev_lock); - /* WaDisableSemaphoreAndSyncFlipWait:chv */ - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); - /* WaDisableCSUnitClockGating:chv */ - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE); +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *dev_priv; + struct intel_engine_cs *ring; + bool ret = false; + int i; - /* WaDisableSDEUnitClockGating:chv */ - I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) + goto out_unlock; + dev_priv = i915_mch_dev; - /* WaDisableGunitClockGating:chv (pre-production hw) */ - I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) | - GINT_DIS); + for_each_ring(ring, dev_priv, i) + ret |= !list_empty(&ring->request_list); - /* WaDisableFfDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE)); +out_unlock: + spin_unlock_irq(&mchdev_lock); - /* WaDisableDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | - GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); + return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_busy); -static void g4x_init_clock_gating(struct drm_device *dev) +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate; + struct drm_i915_private *dev_priv; + bool ret = true; - I915_WRITE(RENCLK_GATE_D1, 0); - I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | - GS_UNIT_CLOCK_GATE_DISABLE | - CL_UNIT_CLOCK_GATE_DISABLE); - I915_WRITE(RAMCLK_GATE_D, 0); - dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | - OVRUNIT_CLOCK_GATE_DISABLE | - OVCUNIT_CLOCK_GATE_DISABLE; - if (IS_GM45(dev)) - dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; - I915_WRITE(DSPCLK_GATE_D, dspclk_gate); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; - /* WaDisableRenderCachePipelinedFlush */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); + dev_priv->ips.max_delay = dev_priv->ips.fstart; - /* WaDisable_RenderCache_OperationalFlush:g4x */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) + ret = false; + +out_unlock: + spin_unlock_irq(&mchdev_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); - g4x_disable_trickle_feed(dev); + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } } -static void crestline_init_clock_gating(struct drm_device *dev) +void intel_gpu_ips_init(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); - I915_WRITE(RENCLK_GATE_D2, 0); - I915_WRITE(DSPCLK_GATE_D, 0); - I915_WRITE(RAMCLK_GATE_D, 0); - I915_WRITE16(DEUC, 0); - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); + /* We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. */ + spin_lock_irq(&mchdev_lock); + i915_mch_dev = dev_priv; + spin_unlock_irq(&mchdev_lock); - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + ips_ping_for_i915_load(); } -static void broadwater_init_clock_gating(struct drm_device *dev) +void intel_gpu_ips_teardown(void) { - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | - I965_RCC_CLOCK_GATE_DISABLE | - I965_RCPB_CLOCK_GATE_DISABLE | - I965_ISC_CLOCK_GATE_DISABLE | - I965_FBC_CLOCK_GATE_DISABLE); - I915_WRITE(RENCLK_GATE_D2, 0); - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + spin_lock_irq(&mchdev_lock); + i915_mch_dev = NULL; + spin_unlock_irq(&mchdev_lock); } -static void gen3_init_clock_gating(struct drm_device *dev) +static void intel_init_emon(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 dstate = I915_READ(D_STATE); + u32 lcfuse; + u8 pxw[16]; + int i; - dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | - DSTATE_DOT_CLOCK_GATING; - I915_WRITE(D_STATE, dstate); + /* Disable to program */ + I915_WRITE(ECR, 0); + POSTING_READ(ECR); - if (IS_PINEVIEW(dev)) - I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); + /* Program energy weights for various events */ + I915_WRITE(SDEW, 0x15040d00); + I915_WRITE(CSIEW0, 0x007f0000); + I915_WRITE(CSIEW1, 0x1e220004); + I915_WRITE(CSIEW2, 0x04000004); - /* IIR "flip pending" means done if this bit is set */ - I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); + for (i = 0; i < 5; i++) + I915_WRITE(PEW + (i * 4), 0); + for (i = 0; i < 3; i++) + I915_WRITE(DEW + (i * 4), 0); - /* interrupts should cause a wake up from C3 */ - I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4)); + unsigned long freq = intel_pxfreq(pxvidfreq); + unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> + PXVFREQ_PX_SHIFT; + unsigned long val; - /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ - I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); + val = vid * vid; + val *= (freq / 1000); + val *= 255; + val /= (127*127*900); + if (val > 0xff) + DRM_ERROR("bad pxval: %ld\n", val); + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); -} + for (i = 0; i < 4; i++) { + u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | + (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); + I915_WRITE(PXW + (i * 4), val); + } -static void i85x_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + /* Adjust magic regs to magic values (more experimental results) */ + I915_WRITE(OGW0, 0); + I915_WRITE(OGW1, 0); + I915_WRITE(EG0, 0x00007f00); + I915_WRITE(EG1, 0x0000000e); + I915_WRITE(EG2, 0x000e0000); + I915_WRITE(EG3, 0x68000300); + I915_WRITE(EG4, 0x42000000); + I915_WRITE(EG5, 0x00140031); + I915_WRITE(EG6, 0); + I915_WRITE(EG7, 0); - I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); + for (i = 0; i < 8; i++) + I915_WRITE(PXWL + (i * 4), 0); - /* interrupts should cause a wake up from C3 */ - I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | - _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); + /* Enable PMON + select events */ + I915_WRITE(ECR, 0x80000019); - I915_WRITE(MEM_MODE, - _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); + lcfuse = I915_READ(LCFUSE02); + + dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); } -static void i830_init_clock_gating(struct drm_device *dev) +void intel_init_gt_powersave(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); - I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); + if (IS_CHERRYVIEW(dev)) + cherryview_init_gt_powersave(dev); + else if (IS_VALLEYVIEW(dev)) + valleyview_init_gt_powersave(dev); +} - I915_WRITE(MEM_MODE, - _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | - _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); +void intel_cleanup_gt_powersave(struct drm_device *dev) +{ + if (IS_CHERRYVIEW(dev)) + return; + else if (IS_VALLEYVIEW(dev)) + valleyview_cleanup_gt_powersave(dev); } -void intel_init_clock_gating(struct drm_device *dev) +/** + * intel_suspend_gt_powersave - suspend PM work and helper threads + * @dev: drm device + * + * We don't want to disable RC6 or other features here, we just want + * to make sure any work we've queued has finished and won't bother + * us while we're suspended. + */ +void intel_suspend_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->display.init_clock_gating(dev); + if (INTEL_INFO(dev)->gen < 6) + return; + + flush_delayed_work(&dev_priv->rps.delayed_resume_work); + + /* + * TODO: disable RPS interrupts on GEN9+ too once RPS support + * is added for it. + */ + if (INTEL_INFO(dev)->gen < 9) + gen6_disable_rps_interrupts(dev); + + /* Force GPU to min freq during suspend */ + gen6_rps_idle(dev_priv); } -void intel_suspend_hw(struct drm_device *dev) +void intel_disable_gt_powersave(struct drm_device *dev) { - if (HAS_PCH_LPT(dev)) - lpt_suspend_hw(dev); -} + struct drm_i915_private *dev_priv = dev->dev_private; -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - if ((power_well)->domains & (domain_mask)) + /* Interrupts should be disabled already to avoid re-arming. */ + WARN_ON(intel_irqs_enabled(dev_priv)); -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - if ((power_well)->domains & (domain_mask)) + if (IS_IRONLAKE_M(dev)) { + ironlake_disable_drps(dev); + ironlake_disable_rc6(dev); + } else if (INTEL_INFO(dev)->gen >= 6) { + intel_suspend_gt_powersave(dev); -/** - * We should only use the power well if we explicitly asked the hardware to - * enable it, so check if it's enabled and also check if we've requested it to - * be enabled. - */ -static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - return I915_READ(HSW_PWR_WELL_DRIVER) == - (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED); + mutex_lock(&dev_priv->rps.hw_lock); + if (INTEL_INFO(dev)->gen >= 9) + gen9_disable_rps(dev); + else if (IS_CHERRYVIEW(dev)) + cherryview_disable_rps(dev); + else if (IS_VALLEYVIEW(dev)) + valleyview_disable_rps(dev); + else + gen6_disable_rps(dev); + + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); + } } -bool intel_display_power_enabled_unlocked(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static void intel_gen6_powersave_work(struct work_struct *work) { - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - bool is_enabled; - int i; + struct drm_i915_private *dev_priv = + container_of(work, struct drm_i915_private, + rps.delayed_resume_work.work); + struct drm_device *dev = dev_priv->dev; - if (dev_priv->pm.suspended) - return false; + mutex_lock(&dev_priv->rps.hw_lock); - power_domains = &dev_priv->power_domains; + /* + * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is + * added for it. + */ + if (INTEL_INFO(dev)->gen < 9) + gen6_reset_rps_interrupts(dev); - is_enabled = true; + if (IS_CHERRYVIEW(dev)) { + cherryview_enable_rps(dev); + } else if (IS_VALLEYVIEW(dev)) { + valleyview_enable_rps(dev); + } else if (INTEL_INFO(dev)->gen >= 9) { + gen9_enable_rps(dev); + } else if (IS_BROADWELL(dev)) { + gen8_enable_rps(dev); + __gen6_update_ring_freq(dev); + } else { + gen6_enable_rps(dev); + __gen6_update_ring_freq(dev); + } + dev_priv->rps.enabled = true; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { - if (power_well->always_on) - continue; + if (INTEL_INFO(dev)->gen < 9) + gen6_enable_rps_interrupts(dev); - if (!power_well->hw_enabled) { - is_enabled = false; - break; - } - } + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_runtime_pm_put(dev_priv); +} + +void intel_enable_gt_powersave(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - return is_enabled; + if (IS_IRONLAKE_M(dev)) { + mutex_lock(&dev->struct_mutex); + ironlake_enable_drps(dev); + ironlake_enable_rc6(dev); + intel_init_emon(dev); + mutex_unlock(&dev->struct_mutex); + } else if (INTEL_INFO(dev)->gen >= 6) { + /* + * PCU communication is slow and this doesn't need to be + * done at any specific time, so do this out of our fast path + * to make resume and init faster. + * + * We depend on the HW RC6 power context save/restore + * mechanism when entering D3 through runtime PM suspend. So + * disable RPM until RPS/RC6 is properly setup. We can only + * get here via the driver load/system resume/runtime resume + * paths, so the _noresume version is enough (and in case of + * runtime resume it's necessary). + */ + if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, + round_jiffies_up_relative(HZ))) + intel_runtime_pm_get_noresume(dev_priv); + } } -bool intel_display_power_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +void intel_reset_gt_powersave(struct drm_device *dev) { - struct i915_power_domains *power_domains; - bool ret; - - power_domains = &dev_priv->power_domains; - - mutex_lock(&power_domains->lock); - ret = intel_display_power_enabled_unlocked(dev_priv, domain); - mutex_unlock(&power_domains->lock); + struct drm_i915_private *dev_priv = dev->dev_private; - return ret; + dev_priv->rps.enabled = false; + intel_enable_gt_powersave(dev); } -/* - * Starting with Haswell, we have a "Power Down Well" that can be turned off - * when not needed anymore. We have 4 registers that can request the power well - * to be enabled, and it will only be disabled if none of the registers is - * requesting it to be enabled. - */ -static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) +static void ibx_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; + struct drm_i915_private *dev_priv = dev->dev_private; /* - * After we re-enable the power well, if we touch VGA register 0x3d5 - * we'll get unclaimed register interrupts. This stops after we write - * anything to the VGA MSR register. The vgacon module uses this - * register all the time, so if we unbind our driver and, as a - * consequence, bind vgacon, we'll get stuck in an infinite loop at - * console_unlock(). So make here we touch the VGA MSR register, making - * sure vgacon can keep working normally without triggering interrupts - * and error messages. + * On Ibex Peak and Cougar Point, we need to disable clock + * gating for the panel power sequencer or it will fail to + * start up when no ports are active. */ - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); - - if (IS_BROADWELL(dev)) - gen8_irq_power_well_post_enable(dev_priv); + I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); } -static void hsw_set_power_well(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well, bool enable) +static void g4x_disable_trickle_feed(struct drm_device *dev) { - bool is_enabled, enable_requested; - uint32_t tmp; - - tmp = I915_READ(HSW_PWR_WELL_DRIVER); - is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED; - enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST; - - if (enable) { - if (!enable_requested) - I915_WRITE(HSW_PWR_WELL_DRIVER, - HSW_PWR_WELL_ENABLE_REQUEST); - - if (!is_enabled) { - DRM_DEBUG_KMS("Enabling power well\n"); - if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) & - HSW_PWR_WELL_STATE_ENABLED), 20)) - DRM_ERROR("Timeout enabling power well\n"); - } + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; - hsw_power_well_post_enable(dev_priv); - } else { - if (enable_requested) { - I915_WRITE(HSW_PWR_WELL_DRIVER, 0); - POSTING_READ(HSW_PWR_WELL_DRIVER); - DRM_DEBUG_KMS("Requesting to disable the power well\n"); - } + for_each_pipe(dev_priv, pipe) { + I915_WRITE(DSPCNTR(pipe), + I915_READ(DSPCNTR(pipe)) | + DISPPLANE_TRICKLE_FEED_DISABLE); + intel_flush_primary_plane(dev_priv, pipe); } } -static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void ilk_init_lp_watermarks(struct drm_device *dev) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. + * Don't touch WM1S_LP_EN here. + * Doing so could cause underruns. */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) - I915_WRITE(HSW_PWR_WELL_BIOS, 0); -} - -static void hsw_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - hsw_set_power_well(dev_priv, power_well, true); -} - -static void hsw_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - hsw_set_power_well(dev_priv, power_well, false); } -static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ -} - -static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - return true; -} - -static void vlv_set_power_well(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well, bool enable) +static void ironlake_init_clock_gating(struct drm_device *dev) { - enum punit_power_well power_well_id = power_well->data; - u32 mask; - u32 state; - u32 ctrl; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - mask = PUNIT_PWRGT_MASK(power_well_id); - state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : - PUNIT_PWRGT_PWR_GATE(power_well_id); + /* + * Required for FBC + * WaFbcDisableDpfcClockGating:ilk + */ + dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | + ILK_DPFCUNIT_CLOCK_GATE_DISABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE; - mutex_lock(&dev_priv->rps.hw_lock); + I915_WRITE(PCH_3DCGDIS0, + MARIUNIT_CLOCK_GATE_DISABLE | + SVSMUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(PCH_3DCGDIS1, + VFMUNIT_CLOCK_GATE_DISABLE); -#define COND \ - ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) + /* + * According to the spec the following bits should be set in + * order to enable memory self-refresh + * The bit 22/21 of 0x42004 + * The bit 5 of 0x42020 + * The bit 15 of 0x45000 + */ + I915_WRITE(ILK_DISPLAY_CHICKEN2, + (I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE | ILK_VSDPFD_FULL)); + dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; + I915_WRITE(DISP_ARB_CTL, + (I915_READ(DISP_ARB_CTL) | + DISP_FBC_WM_DIS)); - if (COND) - goto out; + ilk_init_lp_watermarks(dev); - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL); - ctrl &= ~mask; - ctrl |= state; - vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl); + /* + * Based on the document from hardware guys the following bits + * should be set unconditionally in order to enable FBC. + * The bit 22 of 0x42000 + * The bit 22 of 0x42004 + * The bit 7,8,9 of 0x42020. + */ + if (IS_IRONLAKE_M(dev)) { + /* WaFbcAsynchFlipDisableFbcQueue:ilk */ + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE); + } - if (wait_for(COND, 100)) - DRM_ERROR("timout setting power well state %08x (%08x)\n", - state, - vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL)); + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); -#undef COND + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); + I915_WRITE(_3D_CHICKEN2, + _3D_CHICKEN2_WM_READ_PIPELINED << 16 | + _3D_CHICKEN2_WM_READ_PIPELINED); -out: - mutex_unlock(&dev_priv->rps.hw_lock); -} + /* WaDisableRenderCachePipelinedFlush:ilk */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} + /* WaDisable_RenderCache_OperationalFlush:ilk */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -static void vlv_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, true); -} + g4x_disable_trickle_feed(dev); -static void vlv_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, false); + ibx_init_clock_gating(dev); } -static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void cpt_init_clock_gating(struct drm_device *dev) { - int power_well_id = power_well->data; - bool enabled = false; - u32 mask; - u32 state; - u32 ctrl; - - mask = PUNIT_PWRGT_MASK(power_well_id); - ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); - - mutex_lock(&dev_priv->rps.hw_lock); + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + uint32_t val; - state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* - * We only ever set the power-on and power-gate states, anything - * else is unexpected. + * On Ibex Peak and Cougar Point, we need to disable clock + * gating for the panel power sequencer or it will fail to + * start up when no ports are active. */ - WARN_ON(state != PUNIT_PWRGT_PWR_ON(power_well_id) && - state != PUNIT_PWRGT_PWR_GATE(power_well_id)); - if (state == ctrl) - enabled = true; - - /* - * A transient state at this point would mean some unexpected party - * is poking at the power controls too. + I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | + PCH_DPLUNIT_CLOCK_GATE_DISABLE | + PCH_CPUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | + DPLS_EDP_PPS_FIX_DIS); + /* The below fixes the weird display corruption, a few pixels shifted + * downward, on (only) LVDS of some HP laptops with IVY. */ - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; - WARN_ON(ctrl != state); + for_each_pipe(dev_priv, pipe) { + val = I915_READ(TRANS_CHICKEN2(pipe)); + val |= TRANS_CHICKEN2_TIMING_OVERRIDE; + val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + if (dev_priv->vbt.fdi_rx_polarity_inverted) + val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; + val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; + val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; + I915_WRITE(TRANS_CHICKEN2(pipe), val); + } + /* WADP0ClockGatingDisable */ + for_each_pipe(dev_priv, pipe) { + I915_WRITE(TRANS_CHICKEN1(pipe), + TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + } +} - mutex_unlock(&dev_priv->rps.hw_lock); +static void gen6_check_mch_setup(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t tmp; - return enabled; + tmp = I915_READ(MCH_SSKPD); + if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) + DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", + tmp); } -static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void gen6_init_clock_gating(struct drm_device *dev) { - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - vlv_set_power_well(dev_priv, power_well, true); + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - spin_lock_irq(&dev_priv->irq_lock); - valleyview_enable_display_irqs(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); - /* - * During driver initialization/resume we can avoid restoring the - * part of the HW/SW state that will be inited anyway explicitly. - */ - if (dev_priv->power_domains.initializing) - return; + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - intel_hpd_init(dev_priv->dev); + /* WaDisable_RenderCache_OperationalFlush:snb */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - i915_redisable_vga_power_on(dev_priv->dev); -} + /* + * BSpec recoomends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN6_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); -static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + ilk_init_lp_watermarks(dev); - spin_lock_irq(&dev_priv->irq_lock); - valleyview_disable_display_irqs(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - vlv_set_power_well(dev_priv, power_well, false); + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | + GEN6_BLBUNIT_CLOCK_GATE_DISABLE | + GEN6_CSUNIT_CLOCK_GATE_DISABLE); - vlv_power_sequencer_reset(dev_priv); -} + /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock + * gating disable must be set. Failure to set it results in + * flickering pixels due to Z write ordering failures after + * some amount of runtime in the Mesa "fire" demo, and Unigine + * Sanctuary and Tropics, and apparently anything else with + * alpha test or pixel discard. + * + * According to the spec, bit 11 (RCCUNIT) must also be set, + * but we didn't debug actual testcases to find it out. + * + * WaDisableRCCUnitClockGating:snb + * WaDisableRCPBUnitClockGating:snb + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | + GEN6_RCCUNIT_CLOCK_GATE_DISABLE); -static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); /* - * Enable the CRI clock source so we can get at the - * display and the reference clock for VGA - * hotplug / manual detection. + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and + * 3DSTATE_SF number of SF output attributes is more than 16." */ - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ - - vlv_set_power_well(dev_priv, power_well, true); + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); /* - * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx - - * 6. De-assert cmn_reset/side_reset. Same as VLV X0. - * a. GUnit 0x2110 bit[0] set to 1 (def 0) - * b. The other bits such as sfr settings / modesel may all - * be set to 0. + * According to the spec the following bits should be + * set in order to enable memory self-refresh and fbc: + * The bit21 and bit22 of 0x42000 + * The bit21 and bit22 of 0x42004 + * The bit5 and bit7 of 0x42020 + * The bit14 of 0x70180 + * The bit14 of 0x71180 * - * This should only be done on init and resume from S3 with - * both PLLs disabled, or we risk losing DPIO and PLL - * synchronization. + * WaFbcAsynchFlipDisableFbcQueue:snb */ - I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST); -} - -static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - enum pipe pipe; - - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE | ILK_VSDPFD_FULL); + I915_WRITE(ILK_DSPCLK_GATE_D, + I915_READ(ILK_DSPCLK_GATE_D) | + ILK_DPARBUNIT_CLOCK_GATE_ENABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE); - for_each_pipe(dev_priv, pipe) - assert_pll_disabled(dev_priv, pipe); + g4x_disable_trickle_feed(dev); - /* Assert common reset */ - I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) & ~DPIO_CMNRST); + cpt_init_clock_gating(dev); - vlv_set_power_well(dev_priv, power_well, false); + gen6_check_mch_setup(dev); } -static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) { - enum dpio_phy phy; - - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && - power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); /* - * Enable the CRI clock source so we can get at the - * display and the reference clock for VGA - * hotplug / manual detection. + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. */ - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { - phy = DPIO_PHY0; - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | - DPLL_REFA_CLK_ENABLE_VLV); - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - } else { - phy = DPIO_PHY1; - I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - } - udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ - vlv_set_power_well(dev_priv, power_well, true); - - /* Poll for phypwrgood signal */ - if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) - DRM_ERROR("Display PHY %d is not power up\n", phy); + reg &= ~GEN7_FF_SCHED_MASK; + reg |= GEN7_FF_TS_SCHED_HW; + reg |= GEN7_FF_VS_SCHED_HW; + reg |= GEN7_FF_DS_SCHED_HW; - I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) | - PHY_COM_LANE_RESET_DEASSERT(phy)); + I915_WRITE(GEN7_FF_THREAD_MODE, reg); } -static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void lpt_init_clock_gating(struct drm_device *dev) { - enum dpio_phy phy; - - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && - power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); - - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { - phy = DPIO_PHY0; - assert_pll_disabled(dev_priv, PIPE_A); - assert_pll_disabled(dev_priv, PIPE_B); - } else { - phy = DPIO_PHY1; - assert_pll_disabled(dev_priv, PIPE_C); - } + struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) & - ~PHY_COM_LANE_RESET_DEASSERT(phy)); + /* + * TODO: this bit should only be enabled when really needed, then + * disabled when not needed anymore in order to save power. + */ + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) + I915_WRITE(SOUTH_DSPCLK_GATE_D, + I915_READ(SOUTH_DSPCLK_GATE_D) | + PCH_LP_PARTITION_LEVEL_DISABLE); - vlv_set_power_well(dev_priv, power_well, false); + /* WADPOClockGatingDisable:hsw */ + I915_WRITE(_TRANSA_CHICKEN1, + I915_READ(_TRANSA_CHICKEN1) | + TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } -static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void lpt_suspend_hw(struct drm_device *dev) { - enum pipe pipe = power_well->data; - bool enabled; - u32 state, ctrl; - - mutex_lock(&dev_priv->rps.hw_lock); - - state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); - /* - * We only ever set the power-on and power-gate states, anything - * else is unexpected. - */ - WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe)); - enabled = state == DP_SSS_PWR_ON(pipe); - - /* - * A transient state at this point would mean some unexpected party - * is poking at the power controls too. - */ - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); - WARN_ON(ctrl << 16 != state); + struct drm_i915_private *dev_priv = dev->dev_private; - mutex_unlock(&dev_priv->rps.hw_lock); + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { + uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); - return enabled; + val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; + I915_WRITE(SOUTH_DSPCLK_GATE_D, val); + } } -static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well, - bool enable) +static void broadwell_init_clock_gating(struct drm_device *dev) { - enum pipe pipe = power_well->data; - u32 state; - u32 ctrl; + struct drm_i915_private *dev_priv = dev->dev_private; + enum pipe pipe; - state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); - mutex_lock(&dev_priv->rps.hw_lock); + /* WaSwitchSolVfFArbitrationPriority:bdw */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); -#define COND \ - ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) + /* WaPsrDPAMaskVBlankInSRD:bdw */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); - if (COND) - goto out; + /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ + for_each_pipe(dev_priv, pipe) { + I915_WRITE(CHICKEN_PIPESL_1(pipe), + I915_READ(CHICKEN_PIPESL_1(pipe)) | + BDW_DPRS_MASK_VBLANK_SRD); + } - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - ctrl &= ~DP_SSC_MASK(pipe); - ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl); + /* WaVSRefCountFullforceMissDisable:bdw */ + /* WaDSRefCountFullforceMissDisable:bdw */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - if (wait_for(COND, 100)) - DRM_ERROR("timout setting power well state %08x (%08x)\n", - state, - vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ)); + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); -#undef COND + /* WaDisableSDEUnitClockGating:bdw */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); -out: - mutex_unlock(&dev_priv->rps.hw_lock); + lpt_init_clock_gating(dev); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void haswell_init_clock_gating(struct drm_device *dev) { - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} + struct drm_i915_private *dev_priv = dev->dev_private; -static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PIPE_A && - power_well->data != PIPE_B && - power_well->data != PIPE_C); + ilk_init_lp_watermarks(dev); - chv_set_pipe_power_well(dev_priv, power_well, true); -} + /* L3 caching of data atomics doesn't work -- disable it. */ + I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + I915_WRITE(HSW_ROW_CHICKEN3, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); -static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PIPE_A && - power_well->data != PIPE_B && - power_well->data != PIPE_C); + /* This is required by WaCatErrorRejectionIssue:hsw */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - chv_set_pipe_power_well(dev_priv, power_well, false); -} + /* WaVSRefCountFullforceMissDisable:hsw */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); -static void check_power_well_state(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - bool enabled = power_well->ops->is_enabled(dev_priv, power_well); + /* WaDisable_RenderCache_OperationalFlush:hsw */ + I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - if (power_well->always_on || !i915.disable_power_well) { - if (!enabled) - goto mismatch; + /* enable HiZ Raw Stall Optimization */ + I915_WRITE(CACHE_MODE_0_GEN7, + _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - return; - } + /* WaDisable4x2SubspanOptimization:hsw */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - if (enabled != (power_well->count > 0)) - goto mismatch; + /* WaSwitchSolVfFArbitrationPriority:hsw */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); - return; + /* WaRsPkgCStateDisplayPMReq:hsw */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); -mismatch: - WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", - power_well->name, power_well->always_on, enabled, - power_well->count, i915.disable_power_well); + lpt_init_clock_gating(dev); } -void intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static void ivybridge_init_clock_gating(struct drm_device *dev) { - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - int i; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t snpcr; - intel_runtime_pm_get(dev_priv); + ilk_init_lp_watermarks(dev); - power_domains = &dev_priv->power_domains; + I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - mutex_lock(&power_domains->lock); + /* WaDisableEarlyCull:ivb */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - for_each_power_well(i, power_well, BIT(domain), power_domains) { - if (!power_well->count++) { - DRM_DEBUG_KMS("enabling %s\n", power_well->name); - power_well->ops->enable(dev_priv, power_well); - power_well->hw_enabled = true; - } + /* WaDisableBackToBackFlipFix:ivb */ + I915_WRITE(IVB_CHICKEN3, + CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | + CHICKEN3_DGMG_DONE_FIX_DISABLE); - check_power_well_state(dev_priv, power_well); - } + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(dev)) + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - power_domains->domain_use_count[domain]++; + /* WaDisable_RenderCache_OperationalFlush:ivb */ + I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - mutex_unlock(&power_domains->lock); -} + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ + I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); -void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) -{ - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - int i; + /* WaApplyL3ControlAndL3ChickenMode:ivb */ + I915_WRITE(GEN7_L3CNTLREG1, + GEN7_WA_FOR_GEN7_L3_CONTROL); + I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, + GEN7_WA_L3_CHICKEN_MODE); + if (IS_IVB_GT1(dev)) + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + else { + /* must write both registers */ + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + I915_WRITE(GEN7_ROW_CHICKEN2_GT2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + } - power_domains = &dev_priv->power_domains; + /* WaForceL3Serialization:ivb */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - mutex_lock(&power_domains->lock); + /* + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating:ivb workaround. + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCZUNIT_CLOCK_GATE_DISABLE); - WARN_ON(!power_domains->domain_use_count[domain]); - power_domains->domain_use_count[domain]--; + /* This is required by WaCatErrorRejectionIssue:ivb */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { - WARN_ON(!power_well->count); + g4x_disable_trickle_feed(dev); - if (!--power_well->count && i915.disable_power_well) { - DRM_DEBUG_KMS("disabling %s\n", power_well->name); - power_well->hw_enabled = false; - power_well->ops->disable(dev_priv, power_well); - } + gen7_setup_fixed_func_scheduler(dev_priv); - check_power_well_state(dev_priv, power_well); + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + I915_WRITE(CACHE_MODE_0_GEN7, + _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); } - mutex_unlock(&power_domains->lock); - - intel_runtime_pm_put(dev_priv); -} - -static struct i915_power_domains *hsw_pwr; - -/* Display audio driver power well request */ -int i915_request_power_well(void) -{ - struct drm_i915_private *dev_priv; - - if (!hsw_pwr) - return -ENODEV; + /* WaDisable4x2SubspanOptimization:ivb */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - dev_priv = container_of(hsw_pwr, struct drm_i915_private, - power_domains); - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); - return 0; -} -EXPORT_SYMBOL_GPL(i915_request_power_well); + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); -/* Display audio driver power well release */ -int i915_release_power_well(void) -{ - struct drm_i915_private *dev_priv; + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= GEN6_MBC_SNPCR_MED; + I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); - if (!hsw_pwr) - return -ENODEV; + if (!HAS_PCH_NOP(dev)) + cpt_init_clock_gating(dev); - dev_priv = container_of(hsw_pwr, struct drm_i915_private, - power_domains); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - return 0; + gen6_check_mch_setup(dev); } -EXPORT_SYMBOL_GPL(i915_release_power_well); -/* - * Private interface for the audio driver to get CDCLK in kHz. - * - * Caller must request power well using i915_request_power_well() prior to - * making the call. - */ -int i915_get_cdclk_freq(void) +static void valleyview_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv; - - if (!hsw_pwr) - return -ENODEV; - - dev_priv = container_of(hsw_pwr, struct drm_i915_private, - power_domains); - - return intel_ddi_get_cdclk_freq(dev_priv); -} -EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); - - -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) - -#define HSW_ALWAYS_ON_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ - BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_PLLS) | \ - BIT(POWER_DOMAIN_INIT)) -#define HSW_DISPLAY_POWER_DOMAINS ( \ - (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) | \ - BIT(POWER_DOMAIN_INIT)) - -#define BDW_ALWAYS_ON_POWER_DOMAINS ( \ - HSW_ALWAYS_ON_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER)) -#define BDW_DISPLAY_POWER_DOMAINS ( \ - (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_ALWAYS_ON_POWER_DOMAINS BIT(POWER_DOMAIN_INIT) -#define VLV_DISPLAY_POWER_DOMAINS POWER_DOMAIN_MASK - -#define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, - .enable = i9xx_always_on_power_well_noop, - .disable = i9xx_always_on_power_well_noop, - .is_enabled = i9xx_always_on_power_well_enabled, -}; - -static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, - .enable = chv_pipe_power_well_enable, - .disable = chv_pipe_power_well_disable, - .is_enabled = chv_pipe_power_well_enabled, -}; - -static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = chv_dpio_cmn_power_well_enable, - .disable = chv_dpio_cmn_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; - -static struct i915_power_well i9xx_always_on_power_well[] = { - { - .name = "always-on", - .always_on = 1, - .domains = POWER_DOMAIN_MASK, - .ops = &i9xx_always_on_power_well_ops, - }, -}; - -static const struct i915_power_well_ops hsw_power_well_ops = { - .sync_hw = hsw_power_well_sync_hw, - .enable = hsw_power_well_enable, - .disable = hsw_power_well_disable, - .is_enabled = hsw_power_well_enabled, -}; - -static struct i915_power_well hsw_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = HSW_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, - { - .name = "display", - .domains = HSW_DISPLAY_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - }, -}; + struct drm_i915_private *dev_priv = dev->dev_private; -static struct i915_power_well bdw_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = BDW_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, - { - .name = "display", - .domains = BDW_DISPLAY_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - }, -}; + I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); -static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = vlv_display_power_well_enable, - .disable = vlv_display_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; + /* WaDisableEarlyCull:vlv */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); -static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = vlv_dpio_cmn_power_well_enable, - .disable = vlv_dpio_cmn_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; + /* WaDisableBackToBackFlipFix:vlv */ + I915_WRITE(IVB_CHICKEN3, + CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | + CHICKEN3_DGMG_DONE_FIX_DISABLE); -static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = vlv_power_well_enable, - .disable = vlv_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); -static struct i915_power_well vlv_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = VLV_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, - { - .name = "display", - .domains = VLV_DISPLAY_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DISP2D, - .ops = &vlv_display_power_well_ops, - }, - { - .name = "dpio-tx-b-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, - }, - { - .name = "dpio-tx-b-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, - }, - { - .name = "dpio-tx-c-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, - }, - { - .name = "dpio-tx-c-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, - }, - { - .name = "dpio-common", - .domains = VLV_DPIO_CMN_BC_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_BC, - .ops = &vlv_dpio_cmn_power_well_ops, - }, -}; + /* WaDisable_RenderCache_OperationalFlush:vlv */ + I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -static struct i915_power_well chv_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = VLV_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, -#if 0 - { - .name = "display", - .domains = VLV_DISPLAY_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DISP2D, - .ops = &vlv_display_power_well_ops, - }, - { - .name = "pipe-a", - .domains = CHV_PIPE_A_POWER_DOMAINS, - .data = PIPE_A, - .ops = &chv_pipe_power_well_ops, - }, - { - .name = "pipe-b", - .domains = CHV_PIPE_B_POWER_DOMAINS, - .data = PIPE_B, - .ops = &chv_pipe_power_well_ops, - }, - { - .name = "pipe-c", - .domains = CHV_PIPE_C_POWER_DOMAINS, - .data = PIPE_C, - .ops = &chv_pipe_power_well_ops, - }, -#endif - { - .name = "dpio-common-bc", - /* - * XXX: cmnreset for one PHY seems to disturb the other. - * As a workaround keep both powered on at the same - * time for now. - */ - .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_BC, - .ops = &chv_dpio_cmn_power_well_ops, - }, - { - .name = "dpio-common-d", - /* - * XXX: cmnreset for one PHY seems to disturb the other. - * As a workaround keep both powered on at the same - * time for now. - */ - .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_D, - .ops = &chv_dpio_cmn_power_well_ops, - }, -#if 0 - { - .name = "dpio-tx-b-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, - }, - { - .name = "dpio-tx-b-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, - }, - { - .name = "dpio-tx-c-01", - .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, - }, - { - .name = "dpio-tx-c-23", - .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, - }, - { - .name = "dpio-tx-d-01", - .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | - CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01, - }, - { - .name = "dpio-tx-d-23", - .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | - CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23, - }, -#endif -}; + /* WaForceL3Serialization:vlv */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); -static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, - enum punit_power_well power_well_id) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; - int i; + /* WaDisableDopClockGating:vlv */ + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - if (power_well->data == power_well_id) - return power_well; - } + /* This is required by WaCatErrorRejectionIssue:vlv */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - return NULL; -} + gen7_setup_fixed_func_scheduler(dev_priv); -#define set_power_wells(power_domains, __power_wells) ({ \ - (power_domains)->power_wells = (__power_wells); \ - (power_domains)->power_well_count = ARRAY_SIZE(__power_wells); \ -}) + /* + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating:vlv workaround. + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCZUNIT_CLOCK_GATE_DISABLE); -int intel_power_domains_init(struct drm_i915_private *dev_priv) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; + /* WaDisableL3Bank2xClockGate:vlv + * Disabling L3 clock gating- MMIO 940c[25] = 1 + * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); - mutex_init(&power_domains->lock); + I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); /* - * The enabling order will be from lower to higher indexed wells, - * the disabling order is reversed. + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization isn't listed for VLV. */ - if (IS_HASWELL(dev_priv->dev)) { - set_power_wells(power_domains, hsw_power_wells); - hsw_pwr = power_domains; - } else if (IS_BROADWELL(dev_priv->dev)) { - set_power_wells(power_domains, bdw_power_wells); - hsw_pwr = power_domains; - } else if (IS_CHERRYVIEW(dev_priv->dev)) { - set_power_wells(power_domains, chv_power_wells); - } else if (IS_VALLEYVIEW(dev_priv->dev)) { - set_power_wells(power_domains, vlv_power_wells); - } else { - set_power_wells(power_domains, i9xx_always_on_power_well); - } + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - return 0; -} + /* + * WaIncreaseL3CreditsForVLVB0:vlv + * This is the hardware default actually. + */ + I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); -void intel_power_domains_remove(struct drm_i915_private *dev_priv) -{ - hsw_pwr = NULL; + /* + * WaDisableVLVClockGating_VBIIssue:vlv + * Disable clock gating on th GCFG unit to prevent a delay + * in the reporting of vblank events. + */ + I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); } -static void intel_power_domains_resume(struct drm_i915_private *dev_priv) +static void cherryview_init_clock_gating(struct drm_device *dev) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; - int i; - - mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - power_well->ops->sync_hw(dev_priv, power_well); - power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, - power_well); - } - mutex_unlock(&power_domains->lock); -} + struct drm_i915_private *dev_priv = dev->dev_private; -static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) -{ - struct i915_power_well *cmn = - lookup_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_BC); - struct i915_power_well *disp2d = - lookup_power_well(dev_priv, PUNIT_POWER_WELL_DISP2D); + I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); - /* nothing to do if common lane is already off */ - if (!cmn->ops->is_enabled(dev_priv, cmn)) - return; + I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); - /* If the display might be already active skip this */ - if (disp2d->ops->is_enabled(dev_priv, disp2d) && - I915_READ(DPIO_CTL) & DPIO_CMNRST) - return; + /* WaVSRefCountFullforceMissDisable:chv */ + /* WaDSRefCountFullforceMissDisable:chv */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - DRM_DEBUG_KMS("toggling display PHY side reset\n"); + /* WaDisableSemaphoreAndSyncFlipWait:chv */ + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); - /* cmnlane needs DPLL registers */ - disp2d->ops->enable(dev_priv, disp2d); + /* WaDisableCSUnitClockGating:chv */ + I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | + GEN6_CSUNIT_CLOCK_GATE_DISABLE); - /* - * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx: - * Need to assert and de-assert PHY SB reset by gating the - * common lane power, then un-gating it. - * Simply ungating isn't enough to reset the PHY enough to get - * ports and lanes running. - */ - cmn->ops->disable(dev_priv, cmn); + /* WaDisableSDEUnitClockGating:chv */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); } -void intel_power_domains_init_hw(struct drm_i915_private *dev_priv) +static void g4x_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dspclk_gate; + + I915_WRITE(RENCLK_GATE_D1, 0); + I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | + GS_UNIT_CLOCK_GATE_DISABLE | + CL_UNIT_CLOCK_GATE_DISABLE); + I915_WRITE(RAMCLK_GATE_D, 0); + dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | + OVRUNIT_CLOCK_GATE_DISABLE | + OVCUNIT_CLOCK_GATE_DISABLE; + if (IS_GM45(dev)) + dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(DSPCLK_GATE_D, dspclk_gate); - power_domains->initializing = true; + /* WaDisableRenderCachePipelinedFlush */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) { - mutex_lock(&power_domains->lock); - vlv_cmnlane_wa(dev_priv); - mutex_unlock(&power_domains->lock); - } + /* WaDisable_RenderCache_OperationalFlush:g4x */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - /* For now, we need the power well to be always enabled. */ - intel_display_set_init_power(dev_priv, true); - intel_power_domains_resume(dev_priv); - power_domains->initializing = false; + g4x_disable_trickle_feed(dev); } -void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv) +static void crestline_init_clock_gating(struct drm_device *dev) { - intel_runtime_pm_get(dev_priv); -} + struct drm_i915_private *dev_priv = dev->dev_private; -void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv) -{ - intel_runtime_pm_put(dev_priv); + I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); + I915_WRITE(RENCLK_GATE_D2, 0); + I915_WRITE(DSPCLK_GATE_D, 0); + I915_WRITE(RAMCLK_GATE_D, 0); + I915_WRITE16(DEUC, 0); + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); + + /* WaDisable_RenderCache_OperationalFlush:gen4 */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -void intel_runtime_pm_get(struct drm_i915_private *dev_priv) +static void broadwater_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + struct drm_i915_private *dev_priv = dev->dev_private; - if (!HAS_RUNTIME_PM(dev)) - return; + I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | + I965_RCC_CLOCK_GATE_DISABLE | + I965_RCPB_CLOCK_GATE_DISABLE | + I965_ISC_CLOCK_GATE_DISABLE | + I965_FBC_CLOCK_GATE_DISABLE); + I915_WRITE(RENCLK_GATE_D2, 0); + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - pm_runtime_get_sync(device); - WARN(dev_priv->pm.suspended, "Device still suspended.\n"); + /* WaDisable_RenderCache_OperationalFlush:gen4 */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) +static void gen3_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dstate = I915_READ(D_STATE); - if (!HAS_RUNTIME_PM(dev)) - return; + dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | + DSTATE_DOT_CLOCK_GATING; + I915_WRITE(D_STATE, dstate); + + if (IS_PINEVIEW(dev)) + I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); + + /* IIR "flip pending" means done if this bit is set */ + I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); + + /* interrupts should cause a wake up from C3 */ + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); + + /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ + I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); - WARN(dev_priv->pm.suspended, "Getting nosync-ref while suspended.\n"); - pm_runtime_get_noresume(device); + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); } -void intel_runtime_pm_put(struct drm_i915_private *dev_priv) +static void i85x_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + struct drm_i915_private *dev_priv = dev->dev_private; - if (!HAS_RUNTIME_PM(dev)) - return; + I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); + + /* interrupts should cause a wake up from C3 */ + I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | + _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); - pm_runtime_mark_last_busy(device); - pm_runtime_put_autosuspend(device); + I915_WRITE(MEM_MODE, + _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); } -void intel_init_runtime_pm(struct drm_i915_private *dev_priv) +static void i830_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; - - if (!HAS_RUNTIME_PM(dev)) - return; + struct drm_i915_private *dev_priv = dev->dev_private; - pm_runtime_set_active(device); + I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); - /* - * RPM depends on RC6 to save restore the GT HW context, so make RC6 a - * requirement. - */ - if (!intel_enable_rc6(dev)) { - DRM_INFO("RC6 disabled, disabling runtime PM support\n"); - return; - } + I915_WRITE(MEM_MODE, + _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | + _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); +} - pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */ - pm_runtime_mark_last_busy(device); - pm_runtime_use_autosuspend(device); +void intel_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - pm_runtime_put_autosuspend(device); + dev_priv->display.init_clock_gating(dev); } -void intel_fini_runtime_pm(struct drm_i915_private *dev_priv) +void intel_suspend_hw(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + if (HAS_PCH_LPT(dev)) + lpt_suspend_hw(dev); +} - if (!HAS_RUNTIME_PM(dev)) +static void intel_init_fbc(struct drm_i915_private *dev_priv) +{ + if (!HAS_FBC(dev_priv)) { + dev_priv->fbc.enabled = false; return; + } - if (!intel_enable_rc6(dev)) - return; + if (INTEL_INFO(dev_priv)->gen >= 7) { + dev_priv->display.fbc_enabled = ironlake_fbc_enabled; + dev_priv->display.enable_fbc = gen7_enable_fbc; + dev_priv->display.disable_fbc = ironlake_disable_fbc; + } else if (INTEL_INFO(dev_priv)->gen >= 5) { + dev_priv->display.fbc_enabled = ironlake_fbc_enabled; + dev_priv->display.enable_fbc = ironlake_enable_fbc; + dev_priv->display.disable_fbc = ironlake_disable_fbc; + } else if (IS_GM45(dev_priv)) { + dev_priv->display.fbc_enabled = g4x_fbc_enabled; + dev_priv->display.enable_fbc = g4x_enable_fbc; + dev_priv->display.disable_fbc = g4x_disable_fbc; + } else { + dev_priv->display.fbc_enabled = i8xx_fbc_enabled; + dev_priv->display.enable_fbc = i8xx_enable_fbc; + dev_priv->display.disable_fbc = i8xx_disable_fbc; + + /* This value was pulled out of someone's hat */ + I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); + } - /* Make sure we're not suspended first. */ - pm_runtime_get_sync(device); - pm_runtime_disable(device); + dev_priv->fbc.enabled = dev_priv->display.fbc_enabled(dev_priv->dev); } /* Set up chip specific power management-related functions */ @@ -7198,28 +7060,7 @@ void intel_init_pm(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (HAS_FBC(dev)) { - if (INTEL_INFO(dev)->gen >= 7) { - dev_priv->display.fbc_enabled = ironlake_fbc_enabled; - dev_priv->display.enable_fbc = gen7_enable_fbc; - dev_priv->display.disable_fbc = ironlake_disable_fbc; - } else if (INTEL_INFO(dev)->gen >= 5) { - dev_priv->display.fbc_enabled = ironlake_fbc_enabled; - dev_priv->display.enable_fbc = ironlake_enable_fbc; - dev_priv->display.disable_fbc = ironlake_disable_fbc; - } else if (IS_GM45(dev)) { - dev_priv->display.fbc_enabled = g4x_fbc_enabled; - dev_priv->display.enable_fbc = g4x_enable_fbc; - dev_priv->display.disable_fbc = g4x_disable_fbc; - } else { - dev_priv->display.fbc_enabled = i8xx_fbc_enabled; - dev_priv->display.enable_fbc = i8xx_enable_fbc; - dev_priv->display.disable_fbc = i8xx_disable_fbc; - - /* This value was pulled out of someone's hat */ - I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); - } - } + intel_init_fbc(dev_priv); /* For cxsr */ if (IS_PINEVIEW(dev)) @@ -7228,7 +7069,13 @@ void intel_init_pm(struct drm_device *dev) i915_ironlake_get_mem_freq(dev); /* For FIFO watermark updates */ - if (HAS_PCH_SPLIT(dev)) { + if (INTEL_INFO(dev)->gen >= 9) { + skl_setup_wm_latency(dev); + + dev_priv->display.init_clock_gating = gen9_init_clock_gating; + dev_priv->display.update_wm = skl_update_wm; + dev_priv->display.update_sprite_wm = skl_update_sprite_wm; + } else if (HAS_PCH_SPLIT(dev)) { ilk_setup_wm_latency(dev); if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] && @@ -7309,7 +7156,7 @@ void intel_init_pm(struct drm_device *dev) } } -int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) +int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -7319,6 +7166,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) } I915_WRITE(GEN6_PCODE_DATA, *val); + I915_WRITE(GEN6_PCODE_DATA1, 0); I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, @@ -7333,7 +7181,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) return 0; } -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) +int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -7356,99 +7204,66 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) return 0; } -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) +static int vlv_gpu_freq_div(unsigned int czclk_freq) { - int div; - - /* 4 x czclk */ - switch (dev_priv->mem_freq) { - case 800: - div = 10; - break; - case 1066: - div = 12; - break; - case 1333: - div = 16; - break; + switch (czclk_freq) { + case 200: + return 10; + case 267: + return 12; + case 320: + case 333: + return 16; + case 400: + return 20; default: return -1; } +} + +static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) +{ + int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4); - return DIV_ROUND_CLOSEST(dev_priv->mem_freq * (val + 6 - 0xbd), 4 * div); + div = vlv_gpu_freq_div(czclk_freq); + if (div < 0) + return div; + + return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div); } static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) { - int mul; + int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4); - /* 4 x czclk */ - switch (dev_priv->mem_freq) { - case 800: - mul = 10; - break; - case 1066: - mul = 12; - break; - case 1333: - mul = 16; - break; - default: - return -1; - } + mul = vlv_gpu_freq_div(czclk_freq); + if (mul < 0) + return mul; - return DIV_ROUND_CLOSEST(4 * mul * val, dev_priv->mem_freq) + 0xbd - 6; + return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6; } static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) { - int div, freq; - - switch (dev_priv->rps.cz_freq) { - case 200: - div = 5; - break; - case 267: - div = 6; - break; - case 320: - case 333: - case 400: - div = 8; - break; - default: - return -1; - } + int div, czclk_freq = dev_priv->rps.cz_freq; - freq = (DIV_ROUND_CLOSEST((dev_priv->rps.cz_freq * val), 2 * div) / 2); + div = vlv_gpu_freq_div(czclk_freq) / 2; + if (div < 0) + return div; - return freq; + return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2; } static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) { - int mul, opcode; + int mul, czclk_freq = dev_priv->rps.cz_freq; - switch (dev_priv->rps.cz_freq) { - case 200: - mul = 5; - break; - case 267: - mul = 6; - break; - case 320: - case 333: - case 400: - mul = 8; - break; - default: - return -1; - } + mul = vlv_gpu_freq_div(czclk_freq) / 2; + if (mul < 0) + return mul; /* CHV needs even values */ - opcode = (DIV_ROUND_CLOSEST((val * 2 * mul), dev_priv->rps.cz_freq) * 2); - - return opcode; + return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2; } int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val) @@ -7485,5 +7300,4 @@ void intel_pm_setup(struct drm_device *dev) intel_gen6_powersave_work); dev_priv->pm.suspended = false; - dev_priv->pm._irqs_disabled = false; }