drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch
authorArun Siluvery <arun.siluvery@linux.intel.com>
Fri, 3 Jul 2015 13:27:31 +0000 (14:27 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Mon, 6 Jul 2015 12:37:39 +0000 (14:37 +0200)
In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
instruction but there is a slight complication as this is applied in WA batch
where the values are only initialized once.
Dave identified an issue with the current implementation where the register value
is read once at the beginning and it is reused; this patch corrects this by saving
the register value to memory, update register with the bit of our interest and
restore it back with original value.

This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
by command parser and was using a default length of 0. This is now updated
with correct length and moved to appropriate place.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_cmd_parser.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_lrc.c

index 306d9e4e5cf376bc57901d665622a8b16fdce4d9..430571b977db9d771a2ff15ae543526e3a321418 100644 (file)
@@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
                        .mask = MI_GLOBAL_GTT,
                        .expected = 0,
              }},                                                      ),
-       CMD(  MI_LOAD_REGISTER_MEM,             SMI,   !F,  0xFF,   W | B,
+       CMD(  MI_LOAD_REGISTER_MEM(1),             SMI,   !F,  0xFF,   W | B,
              .reg = { .offset = 1, .mask = 0x007FFFFC },
              .bits = {{
                        .offset = 0,
@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
                         * only MI_LOAD_REGISTER_IMM commands.
                         */
                        if (reg_addr == OACONTROL) {
-                               if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+                               if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
                                        DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n");
                                        return false;
                                }
@@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
                         * allowed mask/value pair given in the whitelist entry.
                         */
                        if (reg->mask) {
-                               if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+                               if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
                                        DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n",
                                                         reg_addr);
                                        return false;
index 45ff3d3e79c8111a61629409732c667434b8627e..1c4d7894b429a880bffa5c9cea38c9600730ebbd 100644 (file)
 #define   MI_INVALIDATE_BSD            (1<<7)
 #define   MI_FLUSH_DW_USE_GTT          (1<<2)
 #define   MI_FLUSH_DW_USE_PPGTT                (0<<2)
+#define MI_LOAD_REGISTER_MEM(x) MI_INSTR(0x29, 2*(x)-1)
+#define MI_LOAD_REGISTER_MEM_GEN8(x) MI_INSTR(0x29, 3*(x)-1)
 #define MI_BATCH_BUFFER                MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE          (1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
 #define MI_CLFLUSH              MI_INSTR(0x27, 0)
 #define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
 #define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_MEM    MI_INSTR(0x29, 0)
 #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
 #define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
 #define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
index 0160bec1e7ba798a9104c697db3f2cdabfbbf938..a499f16db194b4471520ff0d9c02978ade1fa110 100644 (file)
@@ -1092,6 +1092,56 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
                batch[index++] = (cmd);                                 \
        } while (0)
 
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
+                                               uint32_t *const batch,
+                                               uint32_t index)
+{
+       uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+       wa_ctx_emit(batch, (MI_STORE_REGISTER_MEM_GEN8(1) |
+                           MI_SRM_LRM_GLOBAL_GTT));
+       wa_ctx_emit(batch, GEN8_L3SQCREG4);
+       wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
+       wa_ctx_emit(batch, 0);
+
+       wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
+       wa_ctx_emit(batch, GEN8_L3SQCREG4);
+       wa_ctx_emit(batch, l3sqc4_flush);
+
+       wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
+       wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
+                           PIPE_CONTROL_DC_FLUSH_ENABLE));
+       wa_ctx_emit(batch, 0);
+       wa_ctx_emit(batch, 0);
+       wa_ctx_emit(batch, 0);
+       wa_ctx_emit(batch, 0);
+
+       wa_ctx_emit(batch, (MI_LOAD_REGISTER_MEM_GEN8(1) |
+                           MI_SRM_LRM_GLOBAL_GTT));
+       wa_ctx_emit(batch, GEN8_L3SQCREG4);
+       wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
+       wa_ctx_emit(batch, 0);
+
+       return index;
+}
+
 static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
                                    uint32_t offset,
                                    uint32_t start_alignment)
@@ -1152,25 +1202,9 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
 
        /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
        if (IS_BROADWELL(ring->dev)) {
-               struct drm_i915_private *dev_priv = to_i915(ring->dev);
-               uint32_t l3sqc4_flush = (I915_READ(GEN8_L3SQCREG4) |
-                                        GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-               wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
-               wa_ctx_emit(batch, GEN8_L3SQCREG4);
-               wa_ctx_emit(batch, l3sqc4_flush);
-
-               wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
-               wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
-                                   PIPE_CONTROL_DC_FLUSH_ENABLE));
-               wa_ctx_emit(batch, 0);
-               wa_ctx_emit(batch, 0);
-               wa_ctx_emit(batch, 0);
-               wa_ctx_emit(batch, 0);
-
-               wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
-               wa_ctx_emit(batch, GEN8_L3SQCREG4);
-               wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
+               index = gen8_emit_flush_coherentl3_wa(ring, batch, index);
+               if (index < 0)
+                       return index;
        }
 
        /* WaClearSlmSpaceAtContextSwitch:bdw,chv */