drm/radeon: make VM flushs a ring operation
authorChristian König <deathsimple@vodafone.de>
Wed, 8 Aug 2012 10:22:43 +0000 (12:22 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 20 Sep 2012 17:10:39 +0000 (13:10 -0400)
Move flushing the VMs as function into the rings.
First step to make VM operations async.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_gart.c
drivers/gpu/drm/radeon/radeon_ring.c
drivers/gpu/drm/radeon/si.c

index 7786c4a60d6c6819cb04f8810712f5a48bb646df..8e3d70c7c9b72ec218a61d6aae27555b81e6d81b 100644 (file)
@@ -1502,24 +1502,9 @@ int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0);
        WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
-       /* flush hdp cache */
-       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-       /* bits 0-7 are the VM contexts0-7 */
-       WREG32(VM_INVALIDATE_REQUEST, 1 << id);
        return 0;
 }
 
-void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-       if (vm->id == -1)
-               return;
-
-       /* flush hdp cache */
-       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-       /* bits 0-7 are the VM contexts0-7 */
-       WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
-}
-
 #define R600_PTE_VALID     (1 << 0)
 #define R600_PTE_SYSTEM    (1 << 1)
 #define R600_PTE_SNOOPED   (1 << 2)
@@ -1551,3 +1536,19 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
        addr |= flags;
        writeq(addr, ptr + (pfn * 8));
 }
+
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+       if (!ib->vm || ib->vm->id == -1)
+               return;
+
+       /* flush hdp cache */
+       radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+       radeon_ring_write(ring, 0x1);
+
+       /* bits 0-7 are the VM contexts0-7 */
+       radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
+       radeon_ring_write(ring, 1 << ib->vm->id);
+}
index 19a29db7c1f546de9942e04035b58e5a35f48712..320355de9c5494d6ca50f0ca1e8a40b7fd605655 100644 (file)
@@ -639,6 +639,8 @@ struct radeon_vm {
        struct mutex                    mutex;
        /* last fence for cs using this vm */
        struct radeon_fence             *fence;
+       /* last flush or NULL if we still need to flush */
+       struct radeon_fence             *last_flush;
 };
 
 struct radeon_vm_manager {
@@ -1116,7 +1118,6 @@ struct radeon_asic {
                int (*init)(struct radeon_device *rdev);
                void (*fini)(struct radeon_device *rdev);
                int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
-               void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm);
                uint32_t (*page_flags)(struct radeon_device *rdev,
                                       struct radeon_vm *vm,
                                       uint32_t flags);
@@ -1135,6 +1136,7 @@ struct radeon_asic {
                int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
                int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
                bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
+               void (*vm_flush)(struct radeon_device *rdev, struct radeon_ib *ib);
        } ring[RADEON_NUM_RINGS];
        /* irqs */
        struct {
@@ -1733,7 +1735,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id))
-#define radeon_asic_vm_tlb_flush(rdev, v) (rdev)->asic->vm.tlb_flush((rdev), (v))
 #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags))
 #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags))
 #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
@@ -1742,6 +1743,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
 #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
 #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
+#define radeon_ring_vm_flush(rdev, r, ib) (rdev)->asic->ring[(r)].vm_flush((rdev), (ib))
 #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
index f524735f05adf66912fd057b300fe87ed786e30a..4a6e39f7ffd130bcbcdb042798809dabfe1a7643 100644 (file)
@@ -1376,7 +1376,6 @@ static struct radeon_asic cayman_asic = {
                .init = &cayman_vm_init,
                .fini = &cayman_vm_fini,
                .bind = &cayman_vm_bind,
-               .tlb_flush = &cayman_vm_tlb_flush,
                .page_flags = &cayman_vm_page_flags,
                .set_page = &cayman_vm_set_page,
        },
@@ -1390,6 +1389,7 @@ static struct radeon_asic cayman_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP1_INDEX] = {
                        .ib_execute = &cayman_ring_ib_execute,
@@ -1400,6 +1400,7 @@ static struct radeon_asic cayman_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP2_INDEX] = {
                        .ib_execute = &cayman_ring_ib_execute,
@@ -1410,6 +1411,7 @@ static struct radeon_asic cayman_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                }
        },
        .irq = {
@@ -1479,7 +1481,6 @@ static struct radeon_asic trinity_asic = {
                .init = &cayman_vm_init,
                .fini = &cayman_vm_fini,
                .bind = &cayman_vm_bind,
-               .tlb_flush = &cayman_vm_tlb_flush,
                .page_flags = &cayman_vm_page_flags,
                .set_page = &cayman_vm_set_page,
        },
@@ -1493,6 +1494,7 @@ static struct radeon_asic trinity_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP1_INDEX] = {
                        .ib_execute = &cayman_ring_ib_execute,
@@ -1503,6 +1505,7 @@ static struct radeon_asic trinity_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP2_INDEX] = {
                        .ib_execute = &cayman_ring_ib_execute,
@@ -1513,6 +1516,7 @@ static struct radeon_asic trinity_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &evergreen_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                }
        },
        .irq = {
@@ -1582,7 +1586,6 @@ static struct radeon_asic si_asic = {
                .init = &si_vm_init,
                .fini = &si_vm_fini,
                .bind = &si_vm_bind,
-               .tlb_flush = &si_vm_tlb_flush,
                .page_flags = &cayman_vm_page_flags,
                .set_page = &cayman_vm_set_page,
        },
@@ -1596,6 +1599,7 @@ static struct radeon_asic si_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &si_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP1_INDEX] = {
                        .ib_execute = &si_ring_ib_execute,
@@ -1606,6 +1610,7 @@ static struct radeon_asic si_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &si_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP2_INDEX] = {
                        .ib_execute = &si_ring_ib_execute,
@@ -1616,6 +1621,7 @@ static struct radeon_asic si_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &si_gpu_is_lockup,
+                       .vm_flush = &cayman_vm_flush,
                }
        },
        .irq = {
index 75d6c9e6aa8fef6eb5a9605414cfc9d882f8c40d..11a31d64bacc40bf2ac71c0b3e60b70f3cf81820 100644 (file)
@@ -442,7 +442,7 @@ int cayman_vm_init(struct radeon_device *rdev);
 void cayman_vm_fini(struct radeon_device *rdev);
 int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
 void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
-void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
 uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
                              struct radeon_vm *vm,
                              uint32_t flags);
@@ -472,7 +472,6 @@ int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
 int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
 void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
-void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 uint64_t si_get_gpu_clock(struct radeon_device *rdev);
 
index 85a80e467482dcad4d304dc838e7e12b97a0da73..d4a804b58feba7d998d2ddd249f4d6a01f73d0ab 100644 (file)
@@ -484,6 +484,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
                goto out;
        }
        radeon_cs_sync_rings(parser);
+       radeon_cs_sync_to(parser, vm->last_flush);
 
        if ((rdev->family >= CHIP_TAHITI) &&
            (parser->chunk_const_ib_idx != -1)) {
index 1bcf26ff6a36ffdb7a4525b4e516545e1779a092..125b7c31fafc48bcbbfa26c5269d2de013e4746b 100644 (file)
@@ -520,6 +520,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
                break;
        }
        radeon_fence_unref(&vm->fence);
+       radeon_fence_unref(&vm->last_flush);
 
        /* hw unbind */
        rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
@@ -639,6 +640,7 @@ retry_id:
 
        /* do hw bind */
        r = radeon_asic_vm_bind(rdev, vm, id);
+       radeon_fence_unref(&vm->last_flush);
        if (r) {
                radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
                return r;
@@ -836,7 +838,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
                }
                radeon_asic_vm_set_page(rdev, bo_va->vm, i + pfn, addr, flags);
        }
-       radeon_asic_vm_tlb_flush(rdev, bo_va->vm);
+       radeon_fence_unref(&vm->last_flush);
        return 0;
 }
 
index b9b1eddcd09757e8e0b77cbf560f847ea41f2a34..b4df25e2ac88f308213660a036bf43bab3c1ee11 100644 (file)
@@ -160,6 +160,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
        if (!need_sync) {
                radeon_semaphore_free(rdev, &ib->semaphore, NULL);
        }
+       /* if we can't remember our last VM flush then flush now! */
+       if (ib->vm && !ib->vm->last_flush) {
+               radeon_ring_vm_flush(rdev, ib->ring, ib);
+       }
        if (const_ib) {
                radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
                radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
@@ -174,6 +178,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
        if (const_ib) {
                const_ib->fence = radeon_fence_ref(ib->fence);
        }
+       /* we just flushed the VM, remember that */
+       if (ib->vm && !ib->vm->last_flush) {
+               ib->vm->last_flush = radeon_fence_ref(ib->fence);
+       }
        radeon_ring_unlock_commit(rdev, ring);
        return 0;
 }
index 1a459ad485c9ec912e48f07270dd51d0c614ab1e..4016927b268dd83a1dda4c255298c3b555ca7b0f 100644 (file)
@@ -2796,24 +2796,9 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
        else
                WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
                       vm->pt_gpu_addr >> 12);
-       /* flush hdp cache */
-       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-       /* bits 0-15 are the VM contexts0-15 */
-       WREG32(VM_INVALIDATE_REQUEST, 1 << id);
        return 0;
 }
 
-void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-       if (vm->id == -1)
-               return;
-
-       /* flush hdp cache */
-       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-       /* bits 0-15 are the VM contexts0-15 */
-       WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
-}
-
 /*
  * RLC
  */