From: Arun Siluvery Date: Fri, 3 Jul 2015 13:27:31 +0000 (+0100) Subject: drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch X-Git-Tag: firefly_0821_release~176^2~721^2~33^2~295 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9e00084750c0f0603ec8a6ff15e0bcf78b8202bd;p=firefly-linux-kernel-4.4.55.git drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL instruction but there is a slight complication as this is applied in WA batch where the values are only initialized once. Dave identified an issue with the current implementation where the register value is read once at the beginning and it is reused; this patch corrects this by saving the register value to memory, update register with the bit of our interest and restore it back with original value. This implementation uses MI_LOAD_REGISTER_MEM which is currently only used by command parser and was using a default length of 0. This is now updated with correct length and moved to appropriate place. Cc: Chris Wilson Cc: Dave Gordon Signed-off-by: Arun Siluvery Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 306d9e4e5cf3..430571b977db 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { .mask = MI_GLOBAL_GTT, .expected = 0, }}, ), - CMD( MI_LOAD_REGISTER_MEM, SMI, !F, 0xFF, W | B, + CMD( MI_LOAD_REGISTER_MEM(1), SMI, !F, 0xFF, W | B, .reg = { .offset = 1, .mask = 0x007FFFFC }, .bits = {{ .offset = 0, @@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * only MI_LOAD_REGISTER_IMM commands. */ if (reg_addr == OACONTROL) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) { DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n"); return false; } @@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring, * allowed mask/value pair given in the whitelist entry. */ if (reg->mask) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) { DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n", reg_addr); return false; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 45ff3d3e79c8..1c4d7894b429 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -354,6 +354,8 @@ #define MI_INVALIDATE_BSD (1<<7) #define MI_FLUSH_DW_USE_GTT (1<<2) #define MI_FLUSH_DW_USE_PPGTT (0<<2) +#define MI_LOAD_REGISTER_MEM(x) MI_INSTR(0x29, 2*(x)-1) +#define MI_LOAD_REGISTER_MEM_GEN8(x) MI_INSTR(0x29, 3*(x)-1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -459,7 +461,6 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 0) #define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) #define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0160bec1e7ba..a499f16db194 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1092,6 +1092,56 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) batch[index++] = (cmd); \ } while (0) + +/* + * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after + * PIPE_CONTROL instruction. This is required for the flush to happen correctly + * but there is a slight complication as this is applied in WA batch where the + * values are only initialized once so we cannot take register value at the + * beginning and reuse it further; hence we save its value to memory, upload a + * constant value with bit21 set and then we restore it back with the saved value. + * To simplify the WA, a constant value is formed by using the default value + * of this register. This shouldn't be a problem because we are only modifying + * it for a short period and this batch in non-premptible. We can ofcourse + * use additional instructions that read the actual value of the register + * at that time and set our bit of interest but it makes the WA complicated. + * + * This WA is also required for Gen9 so extracting as a function avoids + * code duplication. + */ +static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring, + uint32_t *const batch, + uint32_t index) +{ + uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); + + wa_ctx_emit(batch, (MI_STORE_REGISTER_MEM_GEN8(1) | + MI_SRM_LRM_GLOBAL_GTT)); + wa_ctx_emit(batch, GEN8_L3SQCREG4); + wa_ctx_emit(batch, ring->scratch.gtt_offset + 256); + wa_ctx_emit(batch, 0); + + wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1)); + wa_ctx_emit(batch, GEN8_L3SQCREG4); + wa_ctx_emit(batch, l3sqc4_flush); + + wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6)); + wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE)); + wa_ctx_emit(batch, 0); + wa_ctx_emit(batch, 0); + wa_ctx_emit(batch, 0); + wa_ctx_emit(batch, 0); + + wa_ctx_emit(batch, (MI_LOAD_REGISTER_MEM_GEN8(1) | + MI_SRM_LRM_GLOBAL_GTT)); + wa_ctx_emit(batch, GEN8_L3SQCREG4); + wa_ctx_emit(batch, ring->scratch.gtt_offset + 256); + wa_ctx_emit(batch, 0); + + return index; +} + static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, uint32_t offset, uint32_t start_alignment) @@ -1152,25 +1202,9 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ if (IS_BROADWELL(ring->dev)) { - struct drm_i915_private *dev_priv = to_i915(ring->dev); - uint32_t l3sqc4_flush = (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES); - - wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit(batch, GEN8_L3SQCREG4); - wa_ctx_emit(batch, l3sqc4_flush); - - wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE)); - wa_ctx_emit(batch, 0); - wa_ctx_emit(batch, 0); - wa_ctx_emit(batch, 0); - wa_ctx_emit(batch, 0); - - wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit(batch, GEN8_L3SQCREG4); - wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES); + index = gen8_emit_flush_coherentl3_wa(ring, batch, index); + if (index < 0) + return index; } /* WaClearSlmSpaceAtContextSwitch:bdw,chv */