drm/i915: Don't emit semaphore wait if wrap happened
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / i915_gem.c
index b0016bb656311a2126c3c7f7f0a7e26a231671f0..a81b78a59bd94c94a212e747a8fda50742dd40c7 100644 (file)
@@ -192,6 +192,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
        return 0;
 }
 
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+       kmem_cache_free(dev_priv->slab, obj);
+}
+
 static int
 i915_gem_create(struct drm_file *file,
                struct drm_device *dev,
@@ -215,7 +227,7 @@ i915_gem_create(struct drm_file *file,
        if (ret) {
                drm_gem_object_release(&obj->base);
                i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-               kfree(obj);
+               i915_gem_object_free(obj);
                return ret;
        }
 
@@ -259,14 +271,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
                               args->size, &args->handle);
 }
 
-static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
-{
-       drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
-
-       return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
-               obj->tiling_mode != I915_TILING_NONE;
-}
-
 static inline int
 __copy_to_user_swizzled(char __user *cpu_vaddr,
                        const char *gpu_vaddr, int gpu_offset,
@@ -407,7 +411,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
        loff_t offset;
        int shmem_page_offset, page_length, ret = 0;
        int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-       int hit_slowpath = 0;
        int prefaulted = 0;
        int needs_clflush = 0;
        struct scatterlist *sg;
@@ -469,7 +472,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
                if (ret == 0)
                        goto next_page;
 
-               hit_slowpath = 1;
                mutex_unlock(&dev->struct_mutex);
 
                if (!prefaulted) {
@@ -502,12 +504,6 @@ next_page:
 out:
        i915_gem_object_unpin_pages(obj);
 
-       if (hit_slowpath) {
-               /* Fixup: Kill any reinstated backing storage pages */
-               if (obj->madv == __I915_MADV_PURGED)
-                       i915_gem_object_truncate(obj);
-       }
-
        return ret;
 }
 
@@ -838,12 +834,13 @@ out:
        i915_gem_object_unpin_pages(obj);
 
        if (hit_slowpath) {
-               /* Fixup: Kill any reinstated backing storage pages */
-               if (obj->madv == __I915_MADV_PURGED)
-                       i915_gem_object_truncate(obj);
-               /* and flush dirty cachelines in case the object isn't in the cpu write
-                * domain anymore. */
-               if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+               /*
+                * Fixup: Flush cpu caches in case we didn't flush the dirty
+                * cachelines in-line while writing and the object moved
+                * out of the cpu write domain while we've dropped the lock.
+                */
+               if (!needs_clflush_after &&
+                   obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
                        i915_gem_clflush_object(obj);
                        i915_gem_chipset_flush(dev);
                }
@@ -1857,11 +1854,11 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-                              struct intel_ring_buffer *ring,
-                              u32 seqno)
+                              struct intel_ring_buffer *ring)
 {
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 seqno = intel_ring_get_seqno(ring);
 
        BUG_ON(ring == NULL);
        obj->ring = ring;
@@ -1922,26 +1919,58 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
        WARN_ON(i915_verify_lists(dev));
 }
 
-static u32
-i915_gem_get_seqno(struct drm_device *dev)
+static int
+i915_gem_handle_seqno_wrap(struct drm_device *dev)
 {
-       drm_i915_private_t *dev_priv = dev->dev_private;
-       u32 seqno = dev_priv->next_seqno;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_ring_buffer *ring;
+       int ret, i, j;
 
-       /* reserve 0 for non-seqno */
-       if (++dev_priv->next_seqno == 0)
-               dev_priv->next_seqno = 1;
+       /* The hardware uses various monotonic 32-bit counters, if we
+        * detect that they will wraparound we need to idle the GPU
+        * and reset those counters.
+        */
+       ret = 0;
+       for_each_ring(ring, dev_priv, i) {
+               for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
+                       ret |= ring->sync_seqno[j] != 0;
+       }
+       if (ret == 0)
+               return ret;
 
-       return seqno;
+       ret = i915_gpu_idle(dev);
+       if (ret)
+               return ret;
+
+       i915_gem_retire_requests(dev);
+       for_each_ring(ring, dev_priv, i) {
+               ret = intel_ring_handle_seqno_wrap(ring);
+               if (ret)
+                       return ret;
+
+               for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
+                       ring->sync_seqno[j] = 0;
+       }
+
+       return 0;
 }
 
-u32
-i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
+int
+i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 {
-       if (ring->outstanding_lazy_request == 0)
-               ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /* reserve 0 for non-seqno */
+       if (dev_priv->next_seqno == 0) {
+               int ret = i915_gem_handle_seqno_wrap(dev);
+               if (ret)
+                       return ret;
 
-       return ring->outstanding_lazy_request;
+               dev_priv->next_seqno = 1;
+       }
+
+       *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
+       return 0;
 }
 
 int
@@ -1952,7 +1981,6 @@ i915_add_request(struct intel_ring_buffer *ring,
        drm_i915_private_t *dev_priv = ring->dev->dev_private;
        struct drm_i915_gem_request *request;
        u32 request_ring_position;
-       u32 seqno;
        int was_empty;
        int ret;
 
@@ -1971,7 +1999,6 @@ i915_add_request(struct intel_ring_buffer *ring,
        if (request == NULL)
                return -ENOMEM;
 
-       seqno = i915_gem_next_request_seqno(ring);
 
        /* Record the position of the start of the request so that
         * should we detect the updated seqno part-way through the
@@ -1980,15 +2007,13 @@ i915_add_request(struct intel_ring_buffer *ring,
         */
        request_ring_position = intel_ring_get_tail(ring);
 
-       ret = ring->add_request(ring, &seqno);
+       ret = ring->add_request(ring);
        if (ret) {
                kfree(request);
                return ret;
        }
 
-       trace_i915_gem_request_add(ring, seqno);
-
-       request->seqno = seqno;
+       request->seqno = intel_ring_get_seqno(ring);
        request->ring = ring;
        request->tail = request_ring_position;
        request->emitted_jiffies = jiffies;
@@ -2006,6 +2031,7 @@ i915_add_request(struct intel_ring_buffer *ring,
                spin_unlock(&file_priv->mm.lock);
        }
 
+       trace_i915_gem_request_add(ring, request->seqno);
        ring->outstanding_lazy_request = 0;
 
        if (!dev_priv->mm.suspended) {
@@ -2022,7 +2048,7 @@ i915_add_request(struct intel_ring_buffer *ring,
        }
 
        if (out_seqno)
-               *out_seqno = seqno;
+               *out_seqno = request->seqno;
        return 0;
 }
 
@@ -2120,7 +2146,6 @@ void
 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 {
        uint32_t seqno;
-       int i;
 
        if (list_empty(&ring->request_list))
                return;
@@ -2129,10 +2154,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 
        seqno = ring->get_seqno(ring, true);
 
-       for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
-               if (seqno >= ring->sync_seqno[i])
-                       ring->sync_seqno[i] = 0;
-
        while (!list_empty(&ring->request_list)) {
                struct drm_i915_gem_request *request;
 
@@ -2377,7 +2398,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 
        ret = to->sync_to(to, from, seqno);
        if (!ret)
-               from->sync_seqno[idx] = seqno;
+               /* We use last_read_seqno because sync_to()
+                * might have just caused seqno wrap under
+                * the radar.
+                */
+               from->sync_seqno[idx] = obj->last_read_seqno;
 
        return ret;
 }
@@ -2460,14 +2485,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
        return 0;
 }
 
-static int i915_ring_idle(struct intel_ring_buffer *ring)
-{
-       if (list_empty(&ring->active_list))
-               return 0;
-
-       return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
-}
-
 int i915_gpu_idle(struct drm_device *dev)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2480,7 +2497,7 @@ int i915_gpu_idle(struct drm_device *dev)
                if (ret)
                        return ret;
 
-               ret = i915_ring_idle(ring);
+               ret = intel_ring_idle(ring);
                if (ret)
                        return ret;
        }
@@ -2797,7 +2814,7 @@ static bool i915_gem_valid_gtt_space(struct drm_device *dev,
 
        /* On non-LLC machines we have to be careful when putting differing
         * types of snoopable memory together to avoid the prefetcher
-        * crossing memory domains and dieing.
+        * crossing memory domains and dying.
         */
        if (HAS_LLC(dev))
                return true;
@@ -3684,14 +3701,14 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 {
        struct drm_i915_gem_object *obj;
        struct address_space *mapping;
-       u32 mask;
+       gfp_t mask;
 
-       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+       obj = i915_gem_object_alloc(dev);
        if (obj == NULL)
                return NULL;
 
        if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-               kfree(obj);
+               i915_gem_object_free(obj);
                return NULL;
        }
 
@@ -3763,6 +3780,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
        obj->pages_pin_count = 0;
        i915_gem_object_put_pages(obj);
        i915_gem_object_free_mmap_offset(obj);
+       i915_gem_object_release_stolen(obj);
 
        BUG_ON(obj->pages);
 
@@ -3773,7 +3791,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
        i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
        kfree(obj->bit_17);
-       kfree(obj);
+       i915_gem_object_free(obj);
 }
 
 int
@@ -4091,8 +4109,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
 void
 i915_gem_load(struct drm_device *dev)
 {
-       int i;
        drm_i915_private_t *dev_priv = dev->dev_private;
+       int i;
+
+       dev_priv->slab =
+               kmem_cache_create("i915_gem_object",
+                                 sizeof(struct drm_i915_gem_object), 0,
+                                 SLAB_HWCACHE_ALIGN,
+                                 NULL);
 
        INIT_LIST_HEAD(&dev_priv->mm.active_list);
        INIT_LIST_HEAD(&dev_priv->mm.inactive_list);