drm/i915: Update rules for writing through the LLC with the cpu
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 9 Aug 2013 11:26:45 +0000 (12:26 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Sat, 10 Aug 2013 09:20:49 +0000 (11:20 +0200)
As mentioned in the previous commit, reads and writes from both the CPU
and GPU go through the LLC. This gives us coherency between the CPU and
GPU irrespective of the attribute settings either device sets. We can
use to avoid having to clflush even uncached memory.

Except for the scanout.

The scanout resides within another functional block that does not use
the LLC but reads directly from main memory. So in order to maintain
coherency with the scanout, writes to uncached memory must be flushed.
In order to optimize writes elsewhere, we start tracking whether an
framebuffer is attached to an object.

v2: Use pin_display tracking rather than fb_count (to ensure we flush
cursors as well etc) and only force the clflush along explicit writes to
the scanout paths (i.e. pin_to_display_plane and pwrite into scanout).

v3: Force the flush after hitting the slowpath in pwrite, as after
dropping the lock the object's cache domain may be invalidated. (Ville)

Based on a patch by Ville Syrjälä.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_gtt.c

index b5df2308f8e6533d752a58e2aa7b719c1efd935e..34d3f2fae8ac21e7162ea87eeb7d69dc5c9ad5be 100644 (file)
@@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 }
 
 void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
index b98c3b0e5a02970647d61093412bc069ec040503..54d76e9392d8825672f232c323682a7cd803b74f 100644 (file)
@@ -37,7 +37,8 @@
 #include <linux/dma-buf.h>
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+                                                  bool force);
 static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
                           struct i915_address_space *vm,
@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
        return HAS_LLC(dev) || level != I915_CACHE_NONE;
 }
 
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+       if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+               return true;
+
+       return obj->pin_display;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
        if (obj->tiling_mode)
@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
                 * write domain and manually flush cachelines (if required). This
                 * optimizes for the case when the gpu will use the data
                 * right away and we therefore have to clflush anyway. */
-               if (obj->cache_level == I915_CACHE_NONE)
-                       needs_clflush_after = 1;
+               needs_clflush_after = cpu_write_needs_clflush(obj);
                if (i915_gem_obj_bound_any(obj)) {
                        ret = i915_gem_object_set_to_gtt_domain(obj, true);
                        if (ret)
@@ -833,7 +841,7 @@ out:
                 */
                if (!needs_clflush_after &&
                    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-                       i915_gem_clflush_object(obj);
+                       i915_gem_clflush_object(obj, obj->pin_display);
                        i915_gem_chipset_flush(dev);
                }
        }
@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                goto out;
        }
 
-       if (obj->cache_level == I915_CACHE_NONE &&
-           obj->tiling_mode == I915_TILING_NONE &&
-           obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+       if (obj->tiling_mode == I915_TILING_NONE &&
+           obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+           cpu_write_needs_clflush(obj)) {
                ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
                /* Note that the gtt paths might fail with non-page-backed user
                 * pointers (e.g. gtt mappings when moving data between
@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
        }
 
        /* Pinned buffers may be scanout, so flush the cache */
-       if (obj->pin_count)
-               i915_gem_object_flush_cpu_write_domain(obj);
+       if (obj->pin_display)
+               i915_gem_object_flush_cpu_write_domain(obj, true);
 
        drm_gem_object_unreference(&obj->base);
 unlock:
@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
                 * hope for the best.
                 */
                WARN_ON(ret != -EIO);
-               i915_gem_clflush_object(obj);
+               i915_gem_clflush_object(obj, true);
                obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        }
 
@@ -3217,7 +3225,8 @@ err_unpin:
 }
 
 void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+                       bool force)
 {
        /* If we don't have a page list set up, then we're not pinned
         * to GPU, and we can ignore the cache flush because it'll happen
@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
         * snooping behaviour occurs naturally as the result of our domain
         * tracking.
         */
-       if (obj->cache_level != I915_CACHE_NONE)
+       if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
                return;
 
        trace_i915_gem_object_clflush(obj);
@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 
 /** Flushes the CPU write domain for the object if it's dirty. */
 static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+                                      bool force)
 {
        uint32_t old_write_domain;
 
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                return;
 
-       i915_gem_clflush_object(obj);
+       i915_gem_clflush_object(obj, force);
        i915_gem_chipset_flush(obj->base.dev);
        old_write_domain = obj->base.write_domain;
        obj->base.write_domain = 0;
@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        if (ret)
                return ret;
 
-       i915_gem_object_flush_cpu_write_domain(obj);
+       i915_gem_object_flush_cpu_write_domain(obj, false);
 
        /* Serialise direct access to this object with the barriers for
         * coherent writes from the GPU, by effectively invalidating the
@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                               obj, cache_level);
        }
 
-       if (cache_level == I915_CACHE_NONE) {
+       list_for_each_entry(vma, &obj->vma_list, vma_link)
+               vma->node.color = cache_level;
+       obj->cache_level = cache_level;
+
+       if (cpu_write_needs_clflush(obj)) {
                u32 old_read_domains, old_write_domain;
 
                /* If we're coming from LLC cached, then we haven't
@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                                    old_write_domain);
        }
 
-       list_for_each_entry(vma, &obj->vma_list, vma_link)
-               vma->node.color = cache_level;
-       obj->cache_level = cache_level;
        i915_gem_verify_gtt(dev);
        return 0;
 }
@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        if (ret)
                goto err_unpin_display;
 
-       i915_gem_object_flush_cpu_write_domain(obj);
+       i915_gem_object_flush_cpu_write_domain(obj, true);
 
        old_write_domain = obj->base.write_domain;
        old_read_domains = obj->base.read_domains;
@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
        /* Flush the CPU cache if it's still invalid. */
        if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-               if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
-                       i915_gem_clflush_object(obj);
+               i915_gem_clflush_object(obj, false);
 
                obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
        }
@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
        obj->user_pin_count++;
        obj->pin_filp = file;
 
-       /* XXX - flush the CPU caches for pinned objects
-        * as the X server doesn't manage domains yet
-        */
-       i915_gem_object_flush_cpu_write_domain(obj);
        args->offset = i915_gem_obj_ggtt_offset(obj);
 out:
        drm_gem_object_unreference(&obj->base);
index 8ccc29ac962938739da9407dedb07e21aaa2a1f2..e999578a021caab3847e8fd24f9196a24f9f72e1 100644 (file)
@@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
                        return ret;
 
                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-                       i915_gem_clflush_object(obj);
+                       i915_gem_clflush_object(obj, false);
 
                flush_domains |= obj->base.write_domain;
        }
index 24fb989593f030f98c81ed13f11a014b2a467b1a..c9420c280cf00838989d5af48acfb3cedcf3bc21 100644 (file)
@@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
                                       dev_priv->gtt.base.total / PAGE_SIZE);
 
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-               i915_gem_clflush_object(obj);
+               i915_gem_clflush_object(obj, obj->pin_display);
                i915_gem_gtt_bind_object(obj, obj->cache_level);
        }