return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
}
-void __intel_ring_advance(struct intel_engine_cs *ring)
+static void __intel_ring_advance(struct intel_engine_cs *ring)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
ringbuf->tail &= ringbuf->size - 1;
}
static int
-gen2_render_ring_flush(struct intel_engine_cs *ring,
+gen2_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains,
u32 flush_domains)
{
+ struct intel_engine_cs *ring = req->ring;
u32 cmd;
int ret;
if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
cmd |= MI_READ_FLUSH;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
}
static int
-gen4_render_ring_flush(struct intel_engine_cs *ring,
+gen4_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains,
u32 flush_domains)
{
+ struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ring->dev;
u32 cmd;
int ret;
(IS_G4X(dev) || IS_GEN5(dev)))
cmd |= MI_INVALIDATE_ISP;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
* really our business. That leaves only stall at scoreboard.
*/
static int
-intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
+intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
-
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
}
static int
-gen6_render_ring_flush(struct intel_engine_cs *ring,
- u32 invalidate_domains, u32 flush_domains)
+gen6_render_ring_flush(struct drm_i915_gem_request *req,
+ u32 invalidate_domains, u32 flush_domains)
{
+ struct intel_engine_cs *ring = req->ring;
u32 flags = 0;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
/* Force SNB workarounds for PIPE_CONTROL flushes */
- ret = intel_emit_post_sync_nonzero_flush(ring);
+ ret = intel_emit_post_sync_nonzero_flush(req);
if (ret)
return ret;
flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
}
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
}
static int
-gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
+gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
}
static int
-gen7_render_ring_flush(struct intel_engine_cs *ring,
+gen7_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains, u32 flush_domains)
{
+ struct intel_engine_cs *ring = req->ring;
u32 flags = 0;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
/* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache
* invalidate bit set. */
- gen7_render_ring_cs_stall_wa(ring);
+ gen7_render_ring_cs_stall_wa(req);
}
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
}
static int
-gen8_emit_pipe_control(struct intel_engine_cs *ring,
+gen8_emit_pipe_control(struct drm_i915_gem_request *req,
u32 flags, u32 scratch_addr)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
}
static int
-gen8_render_ring_flush(struct intel_engine_cs *ring,
+gen8_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains, u32 flush_domains)
{
u32 flags = 0;
- u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+ u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
flags |= PIPE_CONTROL_CS_STALL;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
- ret = gen8_emit_pipe_control(ring,
+ ret = gen8_emit_pipe_control(req,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD,
0);
return ret;
}
- return gen8_emit_pipe_control(ring, flags, scratch_addr);
+ return gen8_emit_pipe_control(req, flags, scratch_addr);
}
static void ring_write_tail(struct intel_engine_cs *ring,
return ret;
}
-static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
- struct intel_context *ctx)
+static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
{
int ret, i;
+ struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_workarounds *w = &dev_priv->workarounds;
return 0;
ring->gpu_caches_dirty = true;
- ret = intel_ring_flush_all_caches(ring);
+ ret = intel_ring_flush_all_caches(req);
if (ret)
return ret;
- ret = intel_ring_begin(ring, (w->count * 2 + 2));
+ ret = intel_ring_begin(req, (w->count * 2 + 2));
if (ret)
return ret;
intel_ring_advance(ring);
ring->gpu_caches_dirty = true;
- ret = intel_ring_flush_all_caches(ring);
+ ret = intel_ring_flush_all_caches(req);
if (ret)
return ret;
return 0;
}
-static int intel_rcs_ctx_init(struct intel_engine_cs *ring,
- struct intel_context *ctx)
+static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
{
int ret;
- ret = intel_ring_workarounds_emit(ring, ctx);
+ ret = intel_ring_workarounds_emit(req);
if (ret != 0)
return ret;
- ret = i915_gem_render_state_init(ring);
+ ret = i915_gem_render_state_init(req);
if (ret)
DRM_ERROR("init render state: %d\n", ret);
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+
+ /* WaDisableAsyncFlipPerfMode:bdw */
+ WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+
/* WaDisablePartialInstShootdown:bdw */
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+
+ /* WaDisableAsyncFlipPerfMode:chv */
+ WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+
/* WaDisablePartialInstShootdown:chv */
/* WaDisableThreadStallDopClockGating:chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t tmp;
- /* WaDisablePartialInstShootdown:skl */
+ /* WaDisablePartialInstShootdown:skl,bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
- /* Syncing dependencies between camera and graphics */
+ /* Syncing dependencies between camera and graphics:skl,bxt */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
- if (INTEL_REVID(dev) == SKL_REVID_A0 ||
- INTEL_REVID(dev) == SKL_REVID_B0) {
- /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */
+ if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
+ INTEL_REVID(dev) == SKL_REVID_B0)) ||
+ (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
+ /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
GEN9_DG_MIRROR_FIX_ENABLE);
}
- if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) {
- /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */
+ if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
+ (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
+ /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
GEN9_RHWO_OPTIMIZATION_DISABLE);
- WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0,
- DISABLE_PIXEL_MASK_CAMMING);
+ /*
+ * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
+ * but we do that in per ctx batchbuffer as there is an issue
+ * with this register not getting restored on ctx restore
+ */
}
- if (INTEL_REVID(dev) >= SKL_REVID_C0) {
- /* WaEnableYV12BugFixInHalfSliceChicken7:skl */
+ if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
+ IS_BROXTON(dev)) {
+ /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
GEN9_ENABLE_YV12_BUGFIX);
}
- if (INTEL_REVID(dev) <= SKL_REVID_D0) {
- /*
- *Use Force Non-Coherent whenever executing a 3D context. This
- * is a workaround for a possible hang in the unlikely event
- * a TLB invalidation occurs during a PSD flush.
- */
- /* WaForceEnableNonCoherent:skl */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_NON_COHERENT);
- }
-
- /* Wa4x4STCOptimizationDisable:skl */
+ /* Wa4x4STCOptimizationDisable:skl,bxt */
WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
- /* WaDisablePartialResolveInVc:skl */
+ /* WaDisablePartialResolveInVc:skl,bxt */
WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
- /* WaCcsTlbPrefetchDisable:skl */
+ /* WaCcsTlbPrefetchDisable:skl,bxt */
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
GEN9_CCS_TLB_PREFETCH_ENABLE);
+ /* WaDisableMaskBasedCammingInRCC:skl,bxt */
+ if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
+ (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
+ WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
+ PIXEL_MASK_CAMMING_DISABLE);
+
+ /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
+ tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
+ if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
+ (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+ tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
+ WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+
return 0;
}
WA_SET_BIT_MASKED(HIZ_CHICKEN,
BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
+ if (INTEL_REVID(dev) == SKL_REVID_C0 ||
+ INTEL_REVID(dev) == SKL_REVID_D0)
+ /* WaBarrierPerformanceFixDisable:skl */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FENCE_DEST_SLM_DISABLE |
+ HDC_BARRIER_PERFORMANCE_DISABLE);
+
+ if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+ /*
+ *Use Force Non-Coherent whenever executing a 3D context. This
+ * is a workaround for a possible hang in the unlikely event
+ * a TLB invalidation occurs during a PSD flush.
+ */
+ /* WaForceEnableNonCoherent:skl */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FORCE_NON_COHERENT);
+ }
+
if (INTEL_REVID(dev) == SKL_REVID_C0 ||
INTEL_REVID(dev) == SKL_REVID_D0)
/* WaBarrierPerformanceFixDisable:skl */
return skl_tune_iz_hashing(ring);
}
+static int bxt_init_workarounds(struct intel_engine_cs *ring)
+{
+ struct drm_device *dev = ring->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ gen9_init_workarounds(ring);
+
+ /* WaDisableThreadStallDopClockGating:bxt */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ STALL_DOP_GATING_DISABLE);
+
+ /* WaDisableSbeCacheDispatchPortSharing:bxt */
+ if (INTEL_REVID(dev) <= BXT_REVID_B0) {
+ WA_SET_BIT_MASKED(
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+ }
+
+ return 0;
+}
+
int init_workarounds_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
if (IS_SKYLAKE(dev))
return skl_init_workarounds(ring);
- else if (IS_GEN9(dev))
- return gen9_init_workarounds(ring);
+
+ if (IS_BROXTON(dev))
+ return bxt_init_workarounds(ring);
return 0;
}
* to use MI_WAIT_FOR_EVENT within the CS. It should already be
* programmed to '1' on all products.
*
- * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
+ * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
*/
- if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9)
+ if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
/* Required for the hardware to program scanline values for waiting */
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
}
- if (INTEL_INFO(dev)->gen >= 6)
+ if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
if (HAS_L3_DPF(dev))
intel_fini_pipe_control(ring);
}
-static int gen8_rcs_signal(struct intel_engine_cs *signaller,
+static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
unsigned int num_dwords)
{
#define MBOX_UPDATE_DWORDS 8
+ struct intel_engine_cs *signaller = signaller_req->ring;
struct drm_device *dev = signaller->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *waiter;
num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
#undef MBOX_UPDATE_DWORDS
- ret = intel_ring_begin(signaller, num_dwords);
+ ret = intel_ring_begin(signaller_req, num_dwords);
if (ret)
return ret;
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
- seqno = i915_gem_request_get_seqno(
- signaller->outstanding_lazy_request);
+ seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_QW_WRITE |
return 0;
}
-static int gen8_xcs_signal(struct intel_engine_cs *signaller,
+static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
unsigned int num_dwords)
{
#define MBOX_UPDATE_DWORDS 6
+ struct intel_engine_cs *signaller = signaller_req->ring;
struct drm_device *dev = signaller->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *waiter;
num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
#undef MBOX_UPDATE_DWORDS
- ret = intel_ring_begin(signaller, num_dwords);
+ ret = intel_ring_begin(signaller_req, num_dwords);
if (ret)
return ret;
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
- seqno = i915_gem_request_get_seqno(
- signaller->outstanding_lazy_request);
+ seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
MI_FLUSH_DW_OP_STOREDW);
intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
return 0;
}
-static int gen6_signal(struct intel_engine_cs *signaller,
+static int gen6_signal(struct drm_i915_gem_request *signaller_req,
unsigned int num_dwords)
{
+ struct intel_engine_cs *signaller = signaller_req->ring;
struct drm_device *dev = signaller->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *useless;
num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
#undef MBOX_UPDATE_DWORDS
- ret = intel_ring_begin(signaller, num_dwords);
+ ret = intel_ring_begin(signaller_req, num_dwords);
if (ret)
return ret;
for_each_ring(useless, dev_priv, i) {
u32 mbox_reg = signaller->semaphore.mbox.signal[i];
if (mbox_reg != GEN6_NOSYNC) {
- u32 seqno = i915_gem_request_get_seqno(
- signaller->outstanding_lazy_request);
+ u32 seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(signaller, mbox_reg);
intel_ring_emit(signaller, seqno);
/**
* gen6_add_request - Update the semaphore mailbox registers
- *
- * @ring - ring that is adding a request
- * @seqno - return seqno stuck into the ring
+ *
+ * @request - request to write to the ring
*
* Update the mailbox registers in the *other* rings with the current seqno.
* This acts like a signal in the canonical semaphore.
*/
static int
-gen6_add_request(struct intel_engine_cs *ring)
+gen6_add_request(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
if (ring->semaphore.signal)
- ret = ring->semaphore.signal(ring, 4);
+ ret = ring->semaphore.signal(req, 4);
else
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(ring,
- i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+ intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, MI_USER_INTERRUPT);
__intel_ring_advance(ring);
*/
static int
-gen8_ring_sync(struct intel_engine_cs *waiter,
+gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
struct intel_engine_cs *signaller,
u32 seqno)
{
+ struct intel_engine_cs *waiter = waiter_req->ring;
struct drm_i915_private *dev_priv = waiter->dev->dev_private;
int ret;
- ret = intel_ring_begin(waiter, 4);
+ ret = intel_ring_begin(waiter_req, 4);
if (ret)
return ret;
}
static int
-gen6_ring_sync(struct intel_engine_cs *waiter,
+gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
struct intel_engine_cs *signaller,
u32 seqno)
{
+ struct intel_engine_cs *waiter = waiter_req->ring;
u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
- ret = intel_ring_begin(waiter, 4);
+ ret = intel_ring_begin(waiter_req, 4);
if (ret)
return ret;
} while (0)
static int
-pc_render_add_request(struct intel_engine_cs *ring)
+pc_render_add_request(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
* incoherence by flushing the 6 PIPE_NOTIFY buffers out to
* memory before requesting an interrupt.
*/
- ret = intel_ring_begin(ring, 32);
+ ret = intel_ring_begin(req, 32);
if (ret)
return ret;
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
- intel_ring_emit(ring,
- i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+ intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, 0);
PIPE_CONTROL_FLUSH(ring, scratch_addr);
scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_NOTIFY);
intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
- intel_ring_emit(ring,
- i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+ intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, 0);
__intel_ring_advance(ring);
}
static int
-bsd_ring_flush(struct intel_engine_cs *ring,
+bsd_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains,
u32 flush_domains)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
}
static int
-i9xx_add_request(struct intel_engine_cs *ring)
+i9xx_add_request(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(ring,
- i915_gem_request_get_seqno(ring->outstanding_lazy_request));
+ intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, MI_USER_INTERRUPT);
__intel_ring_advance(ring);
}
static int
-i965_dispatch_execbuffer(struct intel_engine_cs *ring,
+i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 length,
unsigned dispatch_flags)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
#define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
static int
-i830_dispatch_execbuffer(struct intel_engine_cs *ring,
+i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned dispatch_flags)
{
+ struct intel_engine_cs *ring = req->ring;
u32 cs_offset = ring->scratch.gtt_offset;
int ret;
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
if (len > I830_BATCH_LIMIT)
return -ENOSPC;
- ret = intel_ring_begin(ring, 6 + 2);
+ ret = intel_ring_begin(req, 6 + 2);
if (ret)
return ret;
offset = cs_offset;
}
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
}
static int
-i915_dispatch_execbuffer(struct intel_engine_cs *ring,
+i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned dispatch_flags)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
INIT_LIST_HEAD(&ring->execlist_queue);
+ i915_gem_batch_pool_init(dev, &ring->batch_pool);
ringbuf->size = 32 * PAGE_SIZE;
ringbuf->ring = ring;
memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
intel_unpin_ringbuffer_obj(ringbuf);
intel_destroy_ringbuffer_obj(ringbuf);
- i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
if (ring->cleanup)
ring->cleanup(ring);
cleanup_status_page(ring);
i915_cmd_parser_fini_ring(ring);
+ i915_gem_batch_pool_fini(&ring->batch_pool);
kfree(ringbuf);
ring->buffer = NULL;
}
-static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
+static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
struct drm_i915_gem_request *request;
+ unsigned space;
int ret;
if (intel_ring_space(ringbuf) >= n)
return 0;
+ /* The whole point of reserving space is to not wait! */
+ WARN_ON(ringbuf->reserved_in_use);
+
list_for_each_entry(request, &ring->request_list, list) {
- if (__intel_ring_space(request->postfix, ringbuf->tail,
- ringbuf->size) >= n) {
+ space = __intel_ring_space(request->postfix, ringbuf->tail,
+ ringbuf->size);
+ if (space >= n)
break;
- }
}
- if (&request->list == &ring->request_list)
+ if (WARN_ON(&request->list == &ring->request_list))
return -ENOSPC;
ret = i915_wait_request(request);
if (ret)
return ret;
- i915_gem_retire_requests_ring(ring);
-
+ ringbuf->space = space;
return 0;
}
-static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
-{
- struct drm_device *dev = ring->dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct intel_ringbuffer *ringbuf = ring->buffer;
- unsigned long end;
- int ret;
-
- ret = intel_ring_wait_request(ring, n);
- if (ret != -ENOSPC)
- return ret;
-
- /* force the tail write in case we have been skipping them */
- __intel_ring_advance(ring);
-
- /* With GEM the hangcheck timer should kick us out of the loop,
- * leaving it early runs the risk of corrupting GEM state (due
- * to running on almost untested codepaths). But on resume
- * timers don't work yet, so prevent a complete hang in that
- * case by choosing an insanely large timeout. */
- end = jiffies + 60 * HZ;
-
- ret = 0;
- trace_i915_ring_wait_begin(ring);
- do {
- if (intel_ring_space(ringbuf) >= n)
- break;
- ringbuf->head = I915_READ_HEAD(ring);
- if (intel_ring_space(ringbuf) >= n)
- break;
-
- msleep(1);
-
- if (dev_priv->mm.interruptible && signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
-
- ret = i915_gem_check_wedge(&dev_priv->gpu_error,
- dev_priv->mm.interruptible);
- if (ret)
- break;
-
- if (time_after(jiffies, end)) {
- ret = -EBUSY;
- break;
- }
- } while (1);
- trace_i915_ring_wait_end(ring);
- return ret;
-}
-
-static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
{
uint32_t __iomem *virt;
- struct intel_ringbuffer *ringbuf = ring->buffer;
int rem = ringbuf->size - ringbuf->tail;
- if (ringbuf->space < rem) {
- int ret = ring_wait_for_space(ring, rem);
- if (ret)
- return ret;
- }
-
virt = ringbuf->virtual_start + ringbuf->tail;
rem /= 4;
while (rem--)
ringbuf->tail = 0;
intel_ring_update_space(ringbuf);
-
- return 0;
}
int intel_ring_idle(struct intel_engine_cs *ring)
{
struct drm_i915_gem_request *req;
- int ret;
-
- /* We need to add any requests required to flush the objects and ring */
- if (ring->outstanding_lazy_request) {
- ret = i915_add_request(ring);
- if (ret)
- return ret;
- }
/* Wait upon the last request to be completed */
if (list_empty(&ring->request_list))
return 0;
req = list_entry(ring->request_list.prev,
- struct drm_i915_gem_request,
- list);
+ struct drm_i915_gem_request,
+ list);
- return i915_wait_request(req);
+ /* Make sure we do not trigger any retires */
+ return __i915_wait_request(req,
+ atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
+ to_i915(ring->dev)->mm.interruptible,
+ NULL, NULL);
}
-static int
-intel_ring_alloc_request(struct intel_engine_cs *ring)
+int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
{
- int ret;
- struct drm_i915_gem_request *request;
- struct drm_i915_private *dev_private = ring->dev->dev_private;
+ request->ringbuf = request->ring->buffer;
+ return 0;
+}
- if (ring->outstanding_lazy_request)
- return 0;
+int intel_ring_reserve_space(struct drm_i915_gem_request *request)
+{
+ /*
+ * The first call merely notes the reserve request and is common for
+ * all back ends. The subsequent localised _begin() call actually
+ * ensures that the reservation is available. Without the begin, if
+ * the request creator immediately submitted the request without
+ * adding any commands to it then there might not actually be
+ * sufficient room for the submission commands.
+ */
+ intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
- request = kzalloc(sizeof(*request), GFP_KERNEL);
- if (request == NULL)
- return -ENOMEM;
+ return intel_ring_begin(request, 0);
+}
- kref_init(&request->ref);
- request->ring = ring;
- request->ringbuf = ring->buffer;
- request->uniq = dev_private->request_uniq++;
+void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
+{
+ WARN_ON(ringbuf->reserved_size);
+ WARN_ON(ringbuf->reserved_in_use);
- ret = i915_gem_get_seqno(ring->dev, &request->seqno);
- if (ret) {
- kfree(request);
- return ret;
+ ringbuf->reserved_size = size;
+}
+
+void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
+{
+ WARN_ON(ringbuf->reserved_in_use);
+
+ ringbuf->reserved_size = 0;
+ ringbuf->reserved_in_use = false;
+}
+
+void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
+{
+ WARN_ON(ringbuf->reserved_in_use);
+
+ ringbuf->reserved_in_use = true;
+ ringbuf->reserved_tail = ringbuf->tail;
+}
+
+void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
+{
+ WARN_ON(!ringbuf->reserved_in_use);
+ if (ringbuf->tail > ringbuf->reserved_tail) {
+ WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
+ "request reserved size too small: %d vs %d!\n",
+ ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+ } else {
+ /*
+ * The ring was wrapped while the reserved space was in use.
+ * That means that some unknown amount of the ring tail was
+ * no-op filled and skipped. Thus simply adding the ring size
+ * to the tail and doing the above space check will not work.
+ * Rather than attempt to track how much tail was skipped,
+ * it is much simpler to say that also skipping the sanity
+ * check every once in a while is not a big issue.
+ */
}
- ring->outstanding_lazy_request = request;
- return 0;
+ ringbuf->reserved_size = 0;
+ ringbuf->reserved_in_use = false;
}
-static int __intel_ring_prepare(struct intel_engine_cs *ring,
- int bytes)
+static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
- int ret;
+ int remain_usable = ringbuf->effective_size - ringbuf->tail;
+ int remain_actual = ringbuf->size - ringbuf->tail;
+ int ret, total_bytes, wait_bytes = 0;
+ bool need_wrap = false;
- if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
- ret = intel_wrap_ring_buffer(ring);
- if (unlikely(ret))
- return ret;
+ if (ringbuf->reserved_in_use)
+ total_bytes = bytes;
+ else
+ total_bytes = bytes + ringbuf->reserved_size;
+
+ if (unlikely(bytes > remain_usable)) {
+ /*
+ * Not enough space for the basic request. So need to flush
+ * out the remainder and then wait for base + reserved.
+ */
+ wait_bytes = remain_actual + total_bytes;
+ need_wrap = true;
+ } else {
+ if (unlikely(total_bytes > remain_usable)) {
+ /*
+ * The base request will fit but the reserved space
+ * falls off the end. So only need to to wait for the
+ * reserved size after flushing out the remainder.
+ */
+ wait_bytes = remain_actual + ringbuf->reserved_size;
+ need_wrap = true;
+ } else if (total_bytes > ringbuf->space) {
+ /* No wrapping required, just waiting. */
+ wait_bytes = total_bytes;
+ }
}
- if (unlikely(ringbuf->space < bytes)) {
- ret = ring_wait_for_space(ring, bytes);
+ if (wait_bytes) {
+ ret = ring_wait_for_space(ring, wait_bytes);
if (unlikely(ret))
return ret;
+
+ if (need_wrap)
+ __wrap_ring_buffer(ringbuf);
}
return 0;
}
-int intel_ring_begin(struct intel_engine_cs *ring,
+int intel_ring_begin(struct drm_i915_gem_request *req,
int num_dwords)
{
- struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct intel_engine_cs *ring;
+ struct drm_i915_private *dev_priv;
int ret;
+ WARN_ON(req == NULL);
+ ring = req->ring;
+ dev_priv = ring->dev->dev_private;
+
ret = i915_gem_check_wedge(&dev_priv->gpu_error,
dev_priv->mm.interruptible);
if (ret)
if (ret)
return ret;
- /* Preallocate the olr before touching the ring */
- ret = intel_ring_alloc_request(ring);
- if (ret)
- return ret;
-
ring->buffer->space -= num_dwords * sizeof(uint32_t);
return 0;
}
/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct intel_engine_cs *ring)
+int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
int ret;
return 0;
num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
- ret = intel_ring_begin(ring, num_dwords);
+ ret = intel_ring_begin(req, num_dwords);
if (ret)
return ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- BUG_ON(ring->outstanding_lazy_request);
-
if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
_MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
}
-static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
+static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate, u32 flush)
{
+ struct intel_engine_cs *ring = req->ring;
uint32_t cmd;
int ret;
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
}
static int
-gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned dispatch_flags)
{
+ struct intel_engine_cs *ring = req->ring;
bool ppgtt = USES_PPGTT(ring->dev) &&
!(dispatch_flags & I915_DISPATCH_SECURE);
int ret;
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
/* FIXME(BDW): Address space and security selectors. */
- intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+ intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+ (dispatch_flags & I915_DISPATCH_RS ?
+ MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
}
static int
-hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned dispatch_flags)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
- 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+ 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+ (dispatch_flags & I915_DISPATCH_RS ?
+ MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
}
static int
-gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
+gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned dispatch_flags)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
- ret = intel_ring_begin(ring, 2);
+ ret = intel_ring_begin(req, 2);
if (ret)
return ret;
/* Blitter support (SandyBridge+) */
-static int gen6_ring_flush(struct intel_engine_cs *ring,
+static int gen6_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate, u32 flush)
{
+ struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ring->dev;
uint32_t cmd;
int ret;
- ret = intel_ring_begin(ring, 4);
+ ret = intel_ring_begin(req, 4);
if (ret)
return ret;
}
int
-intel_ring_flush_all_caches(struct intel_engine_cs *ring)
+intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
if (!ring->gpu_caches_dirty)
return 0;
- ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+ ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
- trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+ trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
ring->gpu_caches_dirty = false;
return 0;
}
int
-intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
+intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
uint32_t flush_domains;
int ret;
if (ring->gpu_caches_dirty)
flush_domains = I915_GEM_GPU_DOMAINS;
- ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+ ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
if (ret)
return ret;
- trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+ trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
ring->gpu_caches_dirty = false;
return 0;