drm/i915: Allow tiling of objects with bit 17 swizzling by the CPU.
authorEric Anholt <eric@anholt.net>
Thu, 12 Mar 2009 23:56:27 +0000 (16:56 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 8 Apr 2009 17:50:57 +0000 (10:50 -0700)
Save the bit 17 state of the pages when freeing the page list, and
reswizzle them if necessary when rebinding the pages (in case they were
swapped out).  Since we have userland with expectations that the swizzle
enums let it pread and pwrite contents accurately, we can't expose a new
swizzle enum for bit 17 (which it would have to GTT map to handle), so we
handle it down in pread and pwrite by swizzling the copy when bit 17 of the
page address is set.

Signed-off-by: Eric Anholt <eric@anholt.net>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_tiling.c
include/drm/i915_drm.h

index efcd610d4fca7c56f748c1083a67807d7f45faa9..bccd4146d55c50c0a1c09ebf80f8fa0e50dd9eb9 100644 (file)
@@ -446,6 +446,9 @@ struct drm_i915_gem_object {
        uint32_t tiling_mode;
        uint32_t stride;
 
+       /** Record of address bit 17 of each page at last unbind. */
+       long *bit_17;
+
        /** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
        uint32_t agp_type;
 
@@ -640,6 +643,8 @@ void i915_gem_object_put_pages(struct drm_gem_object *obj);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
+void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
+void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
index 3a1189d94a9a793852767a23ddbf34d08d510c4e..6dca9fc7c1dbd40bb12bad95986d148501af1d49 100644 (file)
@@ -155,6 +155,15 @@ fast_shmem_read(struct page **pages,
        return 0;
 }
 
+static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
+{
+       drm_i915_private_t *dev_priv = obj->dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+       return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+               obj_priv->tiling_mode != I915_TILING_NONE;
+}
+
 static inline int
 slow_shmem_copy(struct page *dst_page,
                int dst_offset,
@@ -182,6 +191,64 @@ slow_shmem_copy(struct page *dst_page,
        return 0;
 }
 
+static inline int
+slow_shmem_bit17_copy(struct page *gpu_page,
+                     int gpu_offset,
+                     struct page *cpu_page,
+                     int cpu_offset,
+                     int length,
+                     int is_read)
+{
+       char *gpu_vaddr, *cpu_vaddr;
+
+       /* Use the unswizzled path if this page isn't affected. */
+       if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
+               if (is_read)
+                       return slow_shmem_copy(cpu_page, cpu_offset,
+                                              gpu_page, gpu_offset, length);
+               else
+                       return slow_shmem_copy(gpu_page, gpu_offset,
+                                              cpu_page, cpu_offset, length);
+       }
+
+       gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
+       if (gpu_vaddr == NULL)
+               return -ENOMEM;
+
+       cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
+       if (cpu_vaddr == NULL) {
+               kunmap_atomic(gpu_vaddr, KM_USER0);
+               return -ENOMEM;
+       }
+
+       /* Copy the data, XORing A6 with A17 (1). The user already knows he's
+        * XORing with the other bits (A9 for Y, A9 and A10 for X)
+        */
+       while (length > 0) {
+               int cacheline_end = ALIGN(gpu_offset + 1, 64);
+               int this_length = min(cacheline_end - gpu_offset, length);
+               int swizzled_gpu_offset = gpu_offset ^ 64;
+
+               if (is_read) {
+                       memcpy(cpu_vaddr + cpu_offset,
+                              gpu_vaddr + swizzled_gpu_offset,
+                              this_length);
+               } else {
+                       memcpy(gpu_vaddr + swizzled_gpu_offset,
+                              cpu_vaddr + cpu_offset,
+                              this_length);
+               }
+               cpu_offset += this_length;
+               gpu_offset += this_length;
+               length -= this_length;
+       }
+
+       kunmap_atomic(cpu_vaddr, KM_USER1);
+       kunmap_atomic(gpu_vaddr, KM_USER0);
+
+       return 0;
+}
+
 /**
  * This is the fast shmem pread path, which attempts to copy_from_user directly
  * from the backing pages of the object to the user's address space.  On a
@@ -270,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
        int page_length;
        int ret;
        uint64_t data_ptr = args->data_ptr;
+       int do_bit17_swizzling;
 
        remain = args->size;
 
@@ -294,6 +362,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
                goto fail_put_user_pages;
        }
 
+       do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
        mutex_lock(&dev->struct_mutex);
 
        ret = i915_gem_object_get_pages(obj);
@@ -328,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
                if ((data_page_offset + page_length) > PAGE_SIZE)
                        page_length = PAGE_SIZE - data_page_offset;
 
-               ret = slow_shmem_copy(user_pages[data_page_index],
-                                     data_page_offset,
-                                     obj_priv->pages[shmem_page_index],
-                                     shmem_page_offset,
-                                     page_length);
+               if (do_bit17_swizzling) {
+                       ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+                                                   shmem_page_offset,
+                                                   user_pages[data_page_index],
+                                                   data_page_offset,
+                                                   page_length,
+                                                   1);
+               } else {
+                       ret = slow_shmem_copy(user_pages[data_page_index],
+                                             data_page_offset,
+                                             obj_priv->pages[shmem_page_index],
+                                             shmem_page_offset,
+                                             page_length);
+               }
                if (ret)
                        goto fail_put_pages;
 
@@ -384,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
                return -EINVAL;
        }
 
-       ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
-       if (ret != 0)
+       if (i915_gem_object_needs_bit17_swizzle(obj)) {
                ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
+       } else {
+               ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
+               if (ret != 0)
+                       ret = i915_gem_shmem_pread_slow(dev, obj, args,
+                                                       file_priv);
+       }
 
        drm_gem_object_unreference(obj);
 
@@ -728,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
        int page_length;
        int ret;
        uint64_t data_ptr = args->data_ptr;
+       int do_bit17_swizzling;
 
        remain = args->size;
 
@@ -752,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
                goto fail_put_user_pages;
        }
 
+       do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
        mutex_lock(&dev->struct_mutex);
 
        ret = i915_gem_object_get_pages(obj);
@@ -786,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
                if ((data_page_offset + page_length) > PAGE_SIZE)
                        page_length = PAGE_SIZE - data_page_offset;
 
-               ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
-                                     shmem_page_offset,
-                                     user_pages[data_page_index],
-                                     data_page_offset,
-                                     page_length);
+               if (do_bit17_swizzling) {
+                       ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+                                                   shmem_page_offset,
+                                                   user_pages[data_page_index],
+                                                   data_page_offset,
+                                                   page_length,
+                                                   0);
+               } else {
+                       ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
+                                             shmem_page_offset,
+                                             user_pages[data_page_index],
+                                             data_page_offset,
+                                             page_length);
+               }
                if (ret)
                        goto fail_put_pages;
 
@@ -855,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                        ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
                                                       file_priv);
                }
+       } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
+               ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
        } else {
                ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
                if (ret == -EFAULT) {
@@ -1298,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
        if (--obj_priv->pages_refcount != 0)
                return;
 
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               i915_gem_object_save_bit_17_swizzle(obj);
+
        for (i = 0; i < page_count; i++)
                if (obj_priv->pages[i] != NULL) {
                        if (obj_priv->dirty)
@@ -1923,6 +2024,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
                }
                obj_priv->pages[i] = page;
        }
+
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               i915_gem_object_do_bit_17_swizzle(obj);
+
        return 0;
 }
 
@@ -3601,6 +3706,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
        i915_gem_free_mmap_offset(obj);
 
        drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
+       kfree(obj_priv->bit_17);
        drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
 
index 6be3f927c86a78b9a4e85afbfc8abc48a2619d1b..f27e523c764fa41f342ccc97574453388f29a476 100644 (file)
@@ -25,6 +25,8 @@
  *
  */
 
+#include "linux/string.h"
+#include "linux/bitops.h"
 #include "drmP.h"
 #include "drm.h"
 #include "i915_drm.h"
@@ -127,8 +129,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
                                swizzle_y = I915_BIT_6_SWIZZLE_9_11;
                        } else {
                                /* Bit 17 swizzling by the CPU in addition. */
-                               swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-                               swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+                               swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+                               swizzle_y = I915_BIT_6_SWIZZLE_9_17;
                        }
                        break;
                }
@@ -288,6 +290,19 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
                        args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
                else
                        args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
+
+               /* Hide bit 17 swizzling from the user.  This prevents old Mesa
+                * from aborting the application on sw fallbacks to bit 17,
+                * and we use the pread/pwrite bit17 paths to swizzle for it.
+                * If there was a user that was relying on the swizzle
+                * information for drm_intel_bo_map()ed reads/writes this would
+                * break it, but we don't have any of those.
+                */
+               if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+                       args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+               if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+                       args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
                /* If we can't handle the swizzling, make it untiled. */
                if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
                        args->tiling_mode = I915_TILING_NONE;
@@ -354,8 +369,100 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
                DRM_ERROR("unknown tiling mode\n");
        }
 
+       /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
+       if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+               args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+       if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+               args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
        drm_gem_object_unreference(obj);
        mutex_unlock(&dev->struct_mutex);
 
        return 0;
 }
+
+/**
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static int
+i915_gem_swizzle_page(struct page *page)
+{
+       char *vaddr;
+       int i;
+       char temp[64];
+
+       vaddr = kmap(page);
+       if (vaddr == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < PAGE_SIZE; i += 128) {
+               memcpy(temp, &vaddr[i], 64);
+               memcpy(&vaddr[i], &vaddr[i + 64], 64);
+               memcpy(&vaddr[i + 64], temp, 64);
+       }
+
+       kunmap(page);
+
+       return 0;
+}
+
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj)
+{
+       struct drm_device *dev = obj->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       int page_count = obj->size >> PAGE_SHIFT;
+       int i;
+
+       if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
+               return;
+
+       if (obj_priv->bit_17 == NULL)
+               return;
+
+       for (i = 0; i < page_count; i++) {
+               char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17;
+               if ((new_bit_17 & 0x1) !=
+                   (test_bit(i, obj_priv->bit_17) != 0)) {
+                       int ret = i915_gem_swizzle_page(obj_priv->pages[i]);
+                       if (ret != 0) {
+                               DRM_ERROR("Failed to swizzle page\n");
+                               return;
+                       }
+                       set_page_dirty(obj_priv->pages[i]);
+               }
+       }
+}
+
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj)
+{
+       struct drm_device *dev = obj->dev;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       int page_count = obj->size >> PAGE_SHIFT;
+       int i;
+
+       if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
+               return;
+
+       if (obj_priv->bit_17 == NULL) {
+               obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) *
+                                          sizeof(long), GFP_KERNEL);
+               if (obj_priv->bit_17 == NULL) {
+                       DRM_ERROR("Failed to allocate memory for bit 17 "
+                                 "record\n");
+                       return;
+               }
+       }
+
+       for (i = 0; i < page_count; i++) {
+               if (page_to_phys(obj_priv->pages[i]) & (1 << 17))
+                       __set_bit(i, obj_priv->bit_17);
+               else
+                       __clear_bit(i, obj_priv->bit_17);
+       }
+}
index 67e3353a56d6e1190e7f2ee8aad3c975d9f6b55c..95962fa8398a2851b33214f9d1eedf6a69ef2a67 100644 (file)
@@ -594,6 +594,9 @@ struct drm_i915_gem_busy {
 #define I915_BIT_6_SWIZZLE_9_10_11     4
 /* Not seen by userland */
 #define I915_BIT_6_SWIZZLE_UNKNOWN     5
+/* Seen by userland. */
+#define I915_BIT_6_SWIZZLE_9_17                6
+#define I915_BIT_6_SWIZZLE_9_10_17     7
 
 struct drm_i915_gem_set_tiling {
        /** Handle of the buffer to have its tiling state updated */