Merge tag 'drm-intel-fixes-2015-07-15' into drm-intel-next-queued
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
index eb436a03fae95cb6b1490714e3a87b6d4a91e102..177f7ed16cf0abbe6c2518b0a98ae7bf16a34b44 100644 (file)
@@ -106,7 +106,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req,
        if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
                cmd |= MI_READ_FLUSH;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
@@ -165,7 +165,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req,
            (IS_G4X(dev) || IS_GEN5(dev)))
                cmd |= MI_INVALIDATE_ISP;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
@@ -220,8 +220,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
        u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
        int ret;
 
-
-       ret = intel_ring_begin(ring, 6);
+       ret = intel_ring_begin(req, 6);
        if (ret)
                return ret;
 
@@ -234,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_advance(ring);
 
-       ret = intel_ring_begin(ring, 6);
+       ret = intel_ring_begin(req, 6);
        if (ret)
                return ret;
 
@@ -289,7 +288,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req,
                flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
        }
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
@@ -308,7 +307,7 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
@@ -371,7 +370,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req,
                gen7_render_ring_cs_stall_wa(req);
        }
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
@@ -391,7 +390,7 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req,
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 6);
+       ret = intel_ring_begin(req, 6);
        if (ret)
                return ret;
 
@@ -726,7 +725,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
        if (ret)
                return ret;
 
-       ret = intel_ring_begin(ring, (w->count * 2 + 2));
+       ret = intel_ring_begin(req, (w->count * 2 + 2));
        if (ret)
                return ret;
 
@@ -947,8 +946,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
                /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
                WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
                                  GEN9_RHWO_OPTIMIZATION_DISABLE);
-               WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0,
-                                 DISABLE_PIXEL_MASK_CAMMING);
+               /*
+                * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
+                * but we do that in per ctx batchbuffer as there is an issue
+                * with this register not getting restored on ctx restore
+                */
        }
 
        if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
@@ -1039,6 +1041,13 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
                WA_SET_BIT_MASKED(HIZ_CHICKEN,
                                  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
+       if (INTEL_REVID(dev) == SKL_REVID_C0 ||
+           INTEL_REVID(dev) == SKL_REVID_D0)
+               /* WaBarrierPerformanceFixDisable:skl */
+               WA_SET_BIT_MASKED(HDC_CHICKEN0,
+                                 HDC_FENCE_DEST_SLM_DISABLE |
+                                 HDC_BARRIER_PERFORMANCE_DISABLE);
+
        if (INTEL_REVID(dev) <= SKL_REVID_D0) {
                /*
                 *Use Force Non-Coherent whenever executing a 3D context. This
@@ -1171,10 +1180,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)
        intel_fini_pipe_control(ring);
 }
 
-static int gen8_rcs_signal(struct intel_engine_cs *signaller,
+static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
                           unsigned int num_dwords)
 {
 #define MBOX_UPDATE_DWORDS 8
+       struct intel_engine_cs *signaller = signaller_req->ring;
        struct drm_device *dev = signaller->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *waiter;
@@ -1184,7 +1194,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
        num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
 #undef MBOX_UPDATE_DWORDS
 
-       ret = intel_ring_begin(signaller, num_dwords);
+       ret = intel_ring_begin(signaller_req, num_dwords);
        if (ret)
                return ret;
 
@@ -1194,8 +1204,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
                if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
                        continue;
 
-               seqno = i915_gem_request_get_seqno(
-                                          signaller->outstanding_lazy_request);
+               seqno = i915_gem_request_get_seqno(signaller_req);
                intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
                intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
                                           PIPE_CONTROL_QW_WRITE |
@@ -1212,10 +1221,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
        return 0;
 }
 
-static int gen8_xcs_signal(struct intel_engine_cs *signaller,
+static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
                           unsigned int num_dwords)
 {
 #define MBOX_UPDATE_DWORDS 6
+       struct intel_engine_cs *signaller = signaller_req->ring;
        struct drm_device *dev = signaller->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *waiter;
@@ -1225,7 +1235,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
        num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
 #undef MBOX_UPDATE_DWORDS
 
-       ret = intel_ring_begin(signaller, num_dwords);
+       ret = intel_ring_begin(signaller_req, num_dwords);
        if (ret)
                return ret;
 
@@ -1235,8 +1245,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
                if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
                        continue;
 
-               seqno = i915_gem_request_get_seqno(
-                                          signaller->outstanding_lazy_request);
+               seqno = i915_gem_request_get_seqno(signaller_req);
                intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
                                           MI_FLUSH_DW_OP_STOREDW);
                intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
@@ -1251,9 +1260,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
        return 0;
 }
 
-static int gen6_signal(struct intel_engine_cs *signaller,
+static int gen6_signal(struct drm_i915_gem_request *signaller_req,
                       unsigned int num_dwords)
 {
+       struct intel_engine_cs *signaller = signaller_req->ring;
        struct drm_device *dev = signaller->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *useless;
@@ -1264,15 +1274,14 @@ static int gen6_signal(struct intel_engine_cs *signaller,
        num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
 #undef MBOX_UPDATE_DWORDS
 
-       ret = intel_ring_begin(signaller, num_dwords);
+       ret = intel_ring_begin(signaller_req, num_dwords);
        if (ret)
                return ret;
 
        for_each_ring(useless, dev_priv, i) {
                u32 mbox_reg = signaller->semaphore.mbox.signal[i];
                if (mbox_reg != GEN6_NOSYNC) {
-                       u32 seqno = i915_gem_request_get_seqno(
-                                          signaller->outstanding_lazy_request);
+                       u32 seqno = i915_gem_request_get_seqno(signaller_req);
                        intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
                        intel_ring_emit(signaller, mbox_reg);
                        intel_ring_emit(signaller, seqno);
@@ -1301,9 +1310,9 @@ gen6_add_request(struct drm_i915_gem_request *req)
        int ret;
 
        if (ring->semaphore.signal)
-               ret = ring->semaphore.signal(ring, 4);
+               ret = ring->semaphore.signal(req, 4);
        else
-               ret = intel_ring_begin(ring, 4);
+               ret = intel_ring_begin(req, 4);
 
        if (ret)
                return ret;
@@ -1341,7 +1350,7 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
        struct drm_i915_private *dev_priv = waiter->dev->dev_private;
        int ret;
 
-       ret = intel_ring_begin(waiter, 4);
+       ret = intel_ring_begin(waiter_req, 4);
        if (ret)
                return ret;
 
@@ -1378,7 +1387,7 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
 
        WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
 
-       ret = intel_ring_begin(waiter, 4);
+       ret = intel_ring_begin(waiter_req, 4);
        if (ret)
                return ret;
 
@@ -1423,7 +1432,7 @@ pc_render_add_request(struct drm_i915_gem_request *req)
         * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
         * memory before requesting an interrupt.
         */
-       ret = intel_ring_begin(ring, 32);
+       ret = intel_ring_begin(req, 32);
        if (ret)
                return ret;
 
@@ -1608,7 +1617,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req,
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
@@ -1624,7 +1633,7 @@ i9xx_add_request(struct drm_i915_gem_request *req)
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
@@ -1769,7 +1778,7 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
@@ -1797,7 +1806,7 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
        u32 cs_offset = ring->scratch.gtt_offset;
        int ret;
 
-       ret = intel_ring_begin(ring, 6);
+       ret = intel_ring_begin(req, 6);
        if (ret)
                return ret;
 
@@ -1814,7 +1823,7 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
                if (len > I830_BATCH_LIMIT)
                        return -ENOSPC;
 
-               ret = intel_ring_begin(ring, 6 + 2);
+               ret = intel_ring_begin(req, 6 + 2);
                if (ret)
                        return ret;
 
@@ -1837,7 +1846,7 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
                offset = cs_offset;
        }
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
@@ -1859,7 +1868,7 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
@@ -2102,7 +2111,6 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 
        intel_unpin_ringbuffer_obj(ringbuf);
        intel_destroy_ringbuffer_obj(ringbuf);
-       i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);
 
        if (ring->cleanup)
                ring->cleanup(ring);
@@ -2123,12 +2131,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
        unsigned space;
        int ret;
 
-       /* The whole point of reserving space is to not wait! */
-       WARN_ON(ringbuf->reserved_in_use);
-
        if (intel_ring_space(ringbuf) >= n)
                return 0;
 
+       /* The whole point of reserving space is to not wait! */
+       WARN_ON(ringbuf->reserved_in_use);
+
        list_for_each_entry(request, &ring->request_list, list) {
                space = __intel_ring_space(request->postfix, ringbuf->tail,
                                           ringbuf->size);
@@ -2147,21 +2155,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
        return 0;
 }
 
-static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
        uint32_t __iomem *virt;
-       struct intel_ringbuffer *ringbuf = ring->buffer;
        int rem = ringbuf->size - ringbuf->tail;
 
-       /* Can't wrap if space has already been reserved! */
-       WARN_ON(ringbuf->reserved_in_use);
-
-       if (ringbuf->space < rem) {
-               int ret = ring_wait_for_space(ring, rem);
-               if (ret)
-                       return ret;
-       }
-
        virt = ringbuf->virtual_start + ringbuf->tail;
        rem /= 4;
        while (rem--)
@@ -2169,19 +2167,12 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
        ringbuf->tail = 0;
        intel_ring_update_space(ringbuf);
-
-       return 0;
 }
 
 int intel_ring_idle(struct intel_engine_cs *ring)
 {
        struct drm_i915_gem_request *req;
 
-       /* We need to add any requests required to flush the objects and ring */
-       WARN_ON(ring->outstanding_lazy_request);
-       if (ring->outstanding_lazy_request)
-               i915_add_request(ring->outstanding_lazy_request);
-
        /* Wait upon the last request to be completed */
        if (list_empty(&ring->request_list))
                return 0;
@@ -2203,24 +2194,27 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
        return 0;
 }
 
+int intel_ring_reserve_space(struct drm_i915_gem_request *request)
+{
+       /*
+        * The first call merely notes the reserve request and is common for
+        * all back ends. The subsequent localised _begin() call actually
+        * ensures that the reservation is available. Without the begin, if
+        * the request creator immediately submitted the request without
+        * adding any commands to it then there might not actually be
+        * sufficient room for the submission commands.
+        */
+       intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
+
+       return intel_ring_begin(request, 0);
+}
+
 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
 {
-       /* NB: Until request management is fully tidied up and the OLR is
-        * removed, there are too many ways for get false hits on this
-        * anti-recursion check! */
-       /*WARN_ON(ringbuf->reserved_size);*/
+       WARN_ON(ringbuf->reserved_size);
        WARN_ON(ringbuf->reserved_in_use);
 
        ringbuf->reserved_size = size;
-
-       /*
-        * Really need to call _begin() here but that currently leads to
-        * recursion problems! This will be fixed later but for now just
-        * return and hope for the best. Note that there is only a real
-        * problem if the create of the request never actually calls _begin()
-        * but if they are not submitting any work then why did they create
-        * the request in the first place?
-        */
 }
 
 void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
@@ -2242,9 +2236,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 {
        WARN_ON(!ringbuf->reserved_in_use);
-       WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
-            "request reserved size too small: %d vs %d!\n",
-            ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+       if (ringbuf->tail > ringbuf->reserved_tail) {
+               WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
+                    "request reserved size too small: %d vs %d!\n",
+                    ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+       } else {
+               /*
+                * The ring was wrapped while the reserved space was in use.
+                * That means that some unknown amount of the ring tail was
+                * no-op filled and skipped. Thus simply adding the ring size
+                * to the tail and doing the above space check will not work.
+                * Rather than attempt to track how much tail was skipped,
+                * it is much simpler to say that also skipping the sanity
+                * check every once in a while is not a big issue.
+                */
+       }
 
        ringbuf->reserved_size   = 0;
        ringbuf->reserved_in_use = false;
@@ -2253,45 +2259,61 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 {
        struct intel_ringbuffer *ringbuf = ring->buffer;
-       int ret;
-
-       /*
-        * Add on the reserved size to the request to make sure that after
-        * the intended commands have been emitted, there is guaranteed to
-        * still be enough free space to send them to the hardware.
-        */
-       if (!ringbuf->reserved_in_use)
-               bytes += ringbuf->reserved_size;
+       int remain_usable = ringbuf->effective_size - ringbuf->tail;
+       int remain_actual = ringbuf->size - ringbuf->tail;
+       int ret, total_bytes, wait_bytes = 0;
+       bool need_wrap = false;
 
-       if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-               ret = intel_wrap_ring_buffer(ring);
-               if (unlikely(ret))
-                       return ret;
-
-               if(ringbuf->reserved_size) {
-                       uint32_t size = ringbuf->reserved_size;
+       if (ringbuf->reserved_in_use)
+               total_bytes = bytes;
+       else
+               total_bytes = bytes + ringbuf->reserved_size;
 
-                       intel_ring_reserved_space_cancel(ringbuf);
-                       intel_ring_reserved_space_reserve(ringbuf, size);
+       if (unlikely(bytes > remain_usable)) {
+               /*
+                * Not enough space for the basic request. So need to flush
+                * out the remainder and then wait for base + reserved.
+                */
+               wait_bytes = remain_actual + total_bytes;
+               need_wrap = true;
+       } else {
+               if (unlikely(total_bytes > remain_usable)) {
+                       /*
+                        * The base request will fit but the reserved space
+                        * falls off the end. So only need to to wait for the
+                        * reserved size after flushing out the remainder.
+                        */
+                       wait_bytes = remain_actual + ringbuf->reserved_size;
+                       need_wrap = true;
+               } else if (total_bytes > ringbuf->space) {
+                       /* No wrapping required, just waiting. */
+                       wait_bytes = total_bytes;
                }
        }
 
-       if (unlikely(ringbuf->space < bytes)) {
-               ret = ring_wait_for_space(ring, bytes);
+       if (wait_bytes) {
+               ret = ring_wait_for_space(ring, wait_bytes);
                if (unlikely(ret))
                        return ret;
+
+               if (need_wrap)
+                       __wrap_ring_buffer(ringbuf);
        }
 
        return 0;
 }
 
-int intel_ring_begin(struct intel_engine_cs *ring,
+int intel_ring_begin(struct drm_i915_gem_request *req,
                     int num_dwords)
 {
-       struct drm_i915_gem_request *req;
-       struct drm_i915_private *dev_priv = ring->dev->dev_private;
+       struct intel_engine_cs *ring;
+       struct drm_i915_private *dev_priv;
        int ret;
 
+       WARN_ON(req == NULL);
+       ring = req->ring;
+       dev_priv = ring->dev->dev_private;
+
        ret = i915_gem_check_wedge(&dev_priv->gpu_error,
                                   dev_priv->mm.interruptible);
        if (ret)
@@ -2301,18 +2323,14 @@ int intel_ring_begin(struct intel_engine_cs *ring,
        if (ret)
                return ret;
 
-       /* Preallocate the olr before touching the ring */
-       ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-       if (ret)
-               return ret;
-
        ring->buffer->space -= num_dwords * sizeof(uint32_t);
        return 0;
 }
 
 /* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct intel_engine_cs *ring)
+int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
+       struct intel_engine_cs *ring = req->ring;
        int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
        int ret;
 
@@ -2320,7 +2338,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring)
                return 0;
 
        num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
-       ret = intel_ring_begin(ring, num_dwords);
+       ret = intel_ring_begin(req, num_dwords);
        if (ret)
                return ret;
 
@@ -2337,8 +2355,6 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
        struct drm_device *dev = ring->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       BUG_ON(ring->outstanding_lazy_request);
-
        if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
                I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
                I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
@@ -2390,7 +2406,7 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
        uint32_t cmd;
        int ret;
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
@@ -2437,12 +2453,14 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
                        !(dispatch_flags & I915_DISPATCH_SECURE);
        int ret;
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;
 
        /* FIXME(BDW): Address space and security selectors. */
-       intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+       intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+                       (dispatch_flags & I915_DISPATCH_RS ?
+                        MI_BATCH_RESOURCE_STREAMER : 0));
        intel_ring_emit(ring, lower_32_bits(offset));
        intel_ring_emit(ring, upper_32_bits(offset));
        intel_ring_emit(ring, MI_NOOP);
@@ -2459,14 +2477,16 @@ hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
        intel_ring_emit(ring,
                        MI_BATCH_BUFFER_START |
                        (dispatch_flags & I915_DISPATCH_SECURE ?
-                        0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+                        0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+                       (dispatch_flags & I915_DISPATCH_RS ?
+                        MI_BATCH_RESOURCE_STREAMER : 0));
        /* bit0-7 is the length on GEN6+ */
        intel_ring_emit(ring, offset);
        intel_ring_advance(ring);
@@ -2482,7 +2502,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
        struct intel_engine_cs *ring = req->ring;
        int ret;
 
-       ret = intel_ring_begin(ring, 2);
+       ret = intel_ring_begin(req, 2);
        if (ret)
                return ret;
 
@@ -2507,7 +2527,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req,
        uint32_t cmd;
        int ret;
 
-       ret = intel_ring_begin(ring, 4);
+       ret = intel_ring_begin(req, 4);
        if (ret)
                return ret;