drm/i915: Update i915_gem_object_sync() to take a request structure
authorJohn Harrison <John.C.Harrison@Intel.com>
Thu, 18 Jun 2015 12:14:56 +0000 (13:14 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Tue, 23 Jun 2015 12:02:13 +0000 (14:02 +0200)
The plan is to pass requests around as the basic submission tracking structure
rather than rings and contexts. This patch updates the i915_gem_object_sync()
code path.

v2: Much more complex patch to share a single request between the sync and the
page flip. The _sync() function now supports lazy allocation of the request
structure. That is, if one is passed in then that will be used. If one is not,
then a request will be allocated and passed back out. Note that the _sync() code
does not necessarily require a request. Thus one will only be created until
certain situations. The reason the lazy allocation must be done within the
_sync() code itself is because the decision to need one or not is not really
something that code above can second guess (except in the case where one is
definitely not required because no ring is passed in).

The call chains above _sync() now support passing a request through which most
callers passing in NULL and assuming that no request will be required (because
they also pass in NULL for the ring and therefore can't be generating any ring
code).

The exeception is intel_crtc_page_flip() which now supports having a request
returned from _sync(). If one is, then that request is shared by the page flip
(if the page flip is of a type to need a request). If _sync() does not generate
a request but the page flip does need one, then the page flip path will create
its own request.

v3: Updated comment description to be clearer about 'to_req' parameter (Tomas
Elf review request). Rebased onto newer tree that significantly changed the
synchronisation code.

v4: Updated comments from review feedback (Tomas Elf)

For: VIZ-5115
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Tomas Elf <tomas.elf@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_fbdev.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_overlay.c

index b96d4b1a09780585782136416aa70c2f41d7cbd7..6f2fd3de88e4d1541e07674fc08bff8470b3cc58 100644 (file)
@@ -2805,7 +2805,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-                        struct intel_engine_cs *to);
+                        struct intel_engine_cs *to,
+                        struct drm_i915_gem_request **to_req);
 void i915_vma_move_to_active(struct i915_vma *vma,
                             struct intel_engine_cs *ring);
 int i915_gem_dumb_create(struct drm_file *file_priv,
@@ -2916,6 +2917,7 @@ int __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                     u32 alignment,
                                     struct intel_engine_cs *pipelined,
+                                    struct drm_i915_gem_request **pipelined_request,
                                     const struct i915_ggtt_view *view);
 void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
                                              const struct i915_ggtt_view *view);
index 4625a2fdc180f4504a2575fca4d7b1d65ab5ec76..e80b08b864e77b27a5debd01fa56f6961b21e3d8 100644 (file)
@@ -3094,25 +3094,26 @@ out:
 static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
                       struct intel_engine_cs *to,
-                      struct drm_i915_gem_request *req)
+                      struct drm_i915_gem_request *from_req,
+                      struct drm_i915_gem_request **to_req)
 {
        struct intel_engine_cs *from;
        int ret;
 
-       from = i915_gem_request_get_ring(req);
+       from = i915_gem_request_get_ring(from_req);
        if (to == from)
                return 0;
 
-       if (i915_gem_request_completed(req, true))
+       if (i915_gem_request_completed(from_req, true))
                return 0;
 
-       ret = i915_gem_check_olr(req);
+       ret = i915_gem_check_olr(from_req);
        if (ret)
                return ret;
 
        if (!i915_semaphore_is_enabled(obj->base.dev)) {
                struct drm_i915_private *i915 = to_i915(obj->base.dev);
-               ret = __i915_wait_request(req,
+               ret = __i915_wait_request(from_req,
                                          atomic_read(&i915->gpu_error.reset_counter),
                                          i915->mm.interruptible,
                                          NULL,
@@ -3120,15 +3121,23 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
                if (ret)
                        return ret;
 
-               i915_gem_object_retire_request(obj, req);
+               i915_gem_object_retire_request(obj, from_req);
        } else {
                int idx = intel_ring_sync_index(from, to);
-               u32 seqno = i915_gem_request_get_seqno(req);
+               u32 seqno = i915_gem_request_get_seqno(from_req);
+
+               WARN_ON(!to_req);
 
                if (seqno <= from->semaphore.sync_seqno[idx])
                        return 0;
 
-               trace_i915_gem_ring_sync_to(from, to, req);
+               if (*to_req == NULL) {
+                       ret = i915_gem_request_alloc(to, to->default_context, to_req);
+                       if (ret)
+                               return ret;
+               }
+
+               trace_i915_gem_ring_sync_to(from, to, from_req);
                ret = to->semaphore.sync_to(to, from, seqno);
                if (ret)
                        return ret;
@@ -3149,11 +3158,14 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  *
  * @obj: object which may be in use on another ring.
  * @to: ring we wish to use the object on. May be NULL.
+ * @to_req: request we wish to use the object for. See below.
+ *          This will be allocated and returned if a request is
+ *          required but not passed in.
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
  * rather than a particular GPU ring. Conceptually we serialise writes
- * between engines inside the GPU. We only allow on engine to write
+ * between engines inside the GPU. We only allow one engine to write
  * into a buffer at any time, but multiple readers. To ensure each has
  * a coherent view of memory, we must:
  *
@@ -3164,11 +3176,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * - If we are a write request (pending_write_domain is set), the new
  *   request must wait for outstanding read requests to complete.
  *
+ * For CPU synchronisation (NULL to) no request is required. For syncing with
+ * rings to_req must be non-NULL. However, a request does not have to be
+ * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
+ * request will be allocated automatically and returned through *to_req. Note
+ * that it is not guaranteed that commands will be emitted (because the system
+ * might already be idle). Hence there is no need to create a request that
+ * might never have any work submitted. Note further that if a request is
+ * returned in *to_req, it is the responsibility of the caller to submit
+ * that request (after potentially adding more work to it).
+ *
  * Returns 0 if successful, else propagates up the lower layer error.
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-                    struct intel_engine_cs *to)
+                    struct intel_engine_cs *to,
+                    struct drm_i915_gem_request **to_req)
 {
        const bool readonly = obj->base.pending_write_domain == 0;
        struct drm_i915_gem_request *req[I915_NUM_RINGS];
@@ -3190,7 +3213,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
                                req[n++] = obj->last_read_req[i];
        }
        for (i = 0; i < n; i++) {
-               ret = __i915_gem_object_sync(obj, to, req[i]);
+               ret = __i915_gem_object_sync(obj, to, req[i], to_req);
                if (ret)
                        return ret;
        }
@@ -4140,12 +4163,13 @@ int
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                     u32 alignment,
                                     struct intel_engine_cs *pipelined,
+                                    struct drm_i915_gem_request **pipelined_request,
                                     const struct i915_ggtt_view *view)
 {
        u32 old_read_domains, old_write_domain;
        int ret;
 
-       ret = i915_gem_object_sync(obj, pipelined);
+       ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
        if (ret)
                return ret;
 
index d0ced5b04f4d32345e1ea05cbc3f96cf8f5eb708..9968c02f76f33db10cf1d9d2d022f5e6f81a2b89 100644 (file)
@@ -904,7 +904,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
                struct drm_i915_gem_object *obj = vma->obj;
 
                if (obj->active & other_rings) {
-                       ret = i915_gem_object_sync(obj, req->ring);
+                       ret = i915_gem_object_sync(obj, req->ring, &req);
                        if (ret)
                                return ret;
                }
index de6f8cc3c6d00cff67c251cc64e6b11465da9966..733308697094def8d479f6f08f03a44af8a4ea64 100644 (file)
@@ -2304,7 +2304,8 @@ int
 intel_pin_and_fence_fb_obj(struct drm_plane *plane,
                           struct drm_framebuffer *fb,
                           const struct drm_plane_state *plane_state,
-                          struct intel_engine_cs *pipelined)
+                          struct intel_engine_cs *pipelined,
+                          struct drm_i915_gem_request **pipelined_request)
 {
        struct drm_device *dev = fb->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2362,7 +2363,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 
        dev_priv->mm.interruptible = false;
        ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
-                                                  &view);
+                                                  pipelined_request, &view);
        if (ret)
                goto err_interruptible;
 
@@ -11352,6 +11353,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
        struct intel_unpin_work *work;
        struct intel_engine_cs *ring;
        bool mmio_flip;
+       struct drm_i915_gem_request *request = NULL;
        int ret;
 
        /*
@@ -11458,7 +11460,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
         */
        ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
                                         crtc->primary->state,
-                                        mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring);
+                                        mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
        if (ret)
                goto cleanup_pending;
 
@@ -11489,6 +11491,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                                        intel_ring_get_request(ring));
        }
 
+       if (request)
+               i915_add_request_no_flush(request->ring);
+
        work->flip_queued_vblank = drm_crtc_vblank_count(crtc);
        work->enable_stall_check = true;
 
@@ -11506,6 +11511,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 cleanup_unpin:
        intel_unpin_fb_obj(fb, crtc->primary->state);
 cleanup_pending:
+       if (request)
+               i915_gem_request_cancel(request);
        atomic_dec(&intel_crtc->unpin_work_count);
        mutex_unlock(&dev->struct_mutex);
 cleanup:
@@ -13620,7 +13627,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
                if (ret)
                        DRM_DEBUG_KMS("failed to attach phys object\n");
        } else {
-               ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL);
+               ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
        }
 
        if (ret == 0)
@@ -15560,7 +15567,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
                ret = intel_pin_and_fence_fb_obj(c->primary,
                                                 c->primary->fb,
                                                 c->primary->state,
-                                                NULL);
+                                                NULL, NULL);
                mutex_unlock(&dev->struct_mutex);
                if (ret) {
                        DRM_ERROR("failed to pin boot fb on pipe %d\n",
index e2174fd3030bcce9758de5fdcb7843b42f470a2f..3529c9c9c42093098f977ad63124df7244093bf5 100644 (file)
@@ -1045,7 +1045,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
                               struct drm_framebuffer *fb,
                               const struct drm_plane_state *plane_state,
-                              struct intel_engine_cs *pipelined);
+                              struct intel_engine_cs *pipelined,
+                              struct drm_i915_gem_request **pipelined_request);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
                           struct drm_mode_fb_cmd2 *mode_cmd,
index 838214666cc373e396982414ee931b0ebcc1ca11..2a1724e34a36ae23aa9a2d47da57cfe21cfc0ab1 100644 (file)
@@ -177,7 +177,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
        }
 
        /* Flush everything out, we'll be doing GTT only from now on */
-       ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL);
+       ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
        if (ret) {
                DRM_ERROR("failed to pin obj: %d\n", ret);
                goto out_fb;
index 78f3bac9403bb023b92a511b3e9e8cadc7b232cb..7bcf1ec4d6aa0711ec32d76b6c4dc6e0441d1e30 100644 (file)
@@ -637,7 +637,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
                struct drm_i915_gem_object *obj = vma->obj;
 
                if (obj->active & other_rings) {
-                       ret = i915_gem_object_sync(obj, req->ring);
+                       ret = i915_gem_object_sync(obj, req->ring, &req);
                        if (ret)
                                return ret;
                }
index e7534b9466957832f7b80c23f36cd6c188fcc45d..0f8187a121829cad929aa6db513f607ca2045408 100644 (file)
@@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
        if (ret != 0)
                return ret;
 
-       ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL,
+       ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
                                                   &i915_ggtt_view_normal);
        if (ret != 0)
                return ret;