From ee60e29f1dc650bf2239b757038973ef32f10878 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Thu, 9 Aug 2012 16:21:08 +0200 Subject: [PATCH] drm/radeon: rework VMID handling MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Move binding onto the ring, simplifying handling a bit. Signed-off-by: Christian König Reviewed-by: Jerome Glisse --- drivers/gpu/drm/radeon/ni.c | 20 ++-- drivers/gpu/drm/radeon/radeon.h | 30 +++++- drivers/gpu/drm/radeon/radeon_asic.c | 9 +- drivers/gpu/drm/radeon/radeon_asic.h | 4 +- drivers/gpu/drm/radeon/radeon_cs.c | 9 +- drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_gart.c | 126 +++++++++++++++++-------- drivers/gpu/drm/radeon/si.c | 30 ++++-- 8 files changed, 153 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8e3d70c7c9b7..de378d685803 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1497,14 +1497,6 @@ void cayman_vm_fini(struct radeon_device *rdev) { } -int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) -{ - WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn); - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); - return 0; -} - #define R600_PTE_VALID (1 << 0) #define R600_PTE_SYSTEM (1 << 1) #define R600_PTE_SNOOPED (1 << 2) @@ -1540,10 +1532,20 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_vm *vm = ib->vm; - if (!ib->vm || ib->vm->id == -1) + if (vm == NULL) return; + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); + radeon_ring_write(ring, vm->last_pfn); + + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); + radeon_ring_write(ring, vm->pt_gpu_addr >> 12); + /* flush hdp cache */ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); radeon_ring_write(ring, 0x1); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 320355de9c54..617ca45734de 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -253,6 +253,22 @@ static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, } } +static inline bool radeon_fence_is_earlier(struct radeon_fence *a, + struct radeon_fence *b) +{ + if (!a) { + return false; + } + + if (!b) { + return true; + } + + BUG_ON(a->ring != b->ring); + + return a->seq < b->seq; +} + /* * Tiling registers */ @@ -628,10 +644,13 @@ struct radeon_ring { /* * VM */ + +#define RADEON_NUM_VM 16 + struct radeon_vm { struct list_head list; struct list_head va; - int id; + unsigned id; unsigned last_pfn; u64 pt_gpu_addr; u64 *pt; @@ -646,7 +665,7 @@ struct radeon_vm { struct radeon_vm_manager { struct mutex lock; struct list_head lru_vm; - uint32_t use_bitmap; + struct radeon_fence *active[RADEON_NUM_VM]; struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ @@ -1117,7 +1136,6 @@ struct radeon_asic { struct { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); - int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); uint32_t (*page_flags)(struct radeon_device *rdev, struct radeon_vm *vm, uint32_t flags); @@ -1734,7 +1752,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) -#define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id)) #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) @@ -1817,6 +1834,11 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring); +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence); int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 4a6e39f7ffd1..98c586ac1999 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1375,7 +1375,6 @@ static struct radeon_asic cayman_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .bind = &cayman_vm_bind, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1480,7 +1479,6 @@ static struct radeon_asic trinity_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .bind = &cayman_vm_bind, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1585,7 +1583,6 @@ static struct radeon_asic si_asic = { .vm = { .init = &si_vm_init, .fini = &si_vm_fini, - .bind = &si_vm_bind, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1599,7 +1596,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, - .vm_flush = &cayman_vm_flush, + .vm_flush = &si_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1610,7 +1607,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, - .vm_flush = &cayman_vm_flush, + .vm_flush = &si_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1621,7 +1618,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, - .vm_flush = &cayman_vm_flush, + .vm_flush = &si_vm_flush, } }, .irq = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 11a31d64bacc..25e8d000dac9 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -440,7 +440,6 @@ int cayman_asic_reset(struct radeon_device *rdev); void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); -int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, @@ -470,8 +469,7 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); -void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); uint64_t si_get_gpu_clock(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d4a804b58feb..dc4554e0a711 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -485,6 +485,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } radeon_cs_sync_rings(parser); radeon_cs_sync_to(parser, vm->last_flush); + radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring)); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -493,13 +494,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, r = radeon_ib_schedule(rdev, &parser->ib, NULL); } -out: if (!r) { - if (vm->fence) { - radeon_fence_unref(&vm->fence); - } - vm->fence = radeon_fence_ref(parser->ib.fence); + radeon_vm_fence(rdev, vm, parser->ib.fence); } + +out: mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c78f0346dfe4..331a952c9b53 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1018,7 +1018,6 @@ int radeon_device_init(struct radeon_device *rdev, return r; /* initialize vm here */ mutex_init(&rdev->vm_manager.lock); - rdev->vm_manager.use_bitmap = 1; rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 125b7c31fafc..0fd0ba9236a6 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -437,7 +437,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev) int r; if (!rdev->vm_manager.enabled) { - /* mark first vm as always in use, it's the system one */ /* allocate enough for 2 full VM pts */ r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, rdev->vm_manager.max_pfn * 8 * 2, @@ -461,7 +460,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) /* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->id == -1) + if (vm->sa_bo == NULL) continue; list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -475,11 +474,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev) DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); } } - - r = radeon_asic_vm_bind(rdev, vm, vm->id); - if (r) { - DRM_ERROR("Failed to bind vm %d!\n", vm->id); - } } return 0; } @@ -500,10 +494,6 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, { struct radeon_bo_va *bo_va; - if (vm->id == -1) { - return; - } - /* wait for vm use to end */ while (vm->fence) { int r; @@ -523,9 +513,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, radeon_fence_unref(&vm->last_flush); /* hw unbind */ - rdev->vm_manager.use_bitmap &= ~(1 << vm->id); list_del_init(&vm->list); - vm->id = -1; radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); vm->pt = NULL; @@ -544,6 +532,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, void radeon_vm_manager_fini(struct radeon_device *rdev) { struct radeon_vm *vm, *tmp; + int i; if (!rdev->vm_manager.enabled) return; @@ -553,6 +542,9 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } + for (i = 0; i < RADEON_NUM_VM; ++i) { + radeon_fence_unref(&rdev->vm_manager.active[i]); + } radeon_asic_vm_fini(rdev); mutex_unlock(&rdev->vm_manager.lock); @@ -593,14 +585,13 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_vm *vm_evict; - unsigned i; - int id = -1, r; + int r; if (vm == NULL) { return -EINVAL; } - if (vm->id != -1) { + if (vm->sa_bo != NULL) { /* update lru */ list_del_init(&vm->list); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); @@ -623,33 +614,86 @@ retry: vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8)); -retry_id: - /* search for free vm */ - for (i = 0; i < rdev->vm_manager.nvm; i++) { - if (!(rdev->vm_manager.use_bitmap & (1 << i))) { - id = i; - break; + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); +} + +/** + * radeon_vm_grab_id - allocate the next free VMID + * + * @rdev: radeon_device pointer + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * + * Allocate an id for the vm (cayman+). + * Returns the fence we need to sync to (if any). + * + * Global and local mutex must be locked! + */ +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring) +{ + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + unsigned choices[2] = {}; + unsigned i; + + /* check if the id is still valid */ + if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ + radeon_fence_unref(&vm->last_flush); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < rdev->vm_manager.nvm; ++i) { + struct radeon_fence *fence = rdev->vm_manager.active[i]; + + if (fence == NULL) { + /* found a free one */ + vm->id = i; + return NULL; + } + + if (radeon_fence_is_earlier(fence, best[fence->ring])) { + best[fence->ring] = fence; + choices[fence->ring == ring ? 0 : 1] = i; } - } - /* evict vm if necessary */ - if (id == -1) { - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); - radeon_vm_unbind(rdev, vm_evict); - goto retry_id; } - /* do hw bind */ - r = radeon_asic_vm_bind(rdev, vm, id); - radeon_fence_unref(&vm->last_flush); - if (r) { - radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); - return r; + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; + return rdev->vm_manager.active[choices[i]]; + } } - rdev->vm_manager.use_bitmap |= 1 << id; - vm->id = id; - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); - return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, - &rdev->ring_tmp_bo.bo->tbo.mem); + + /* should never happen */ + BUG(); + return NULL; +} + +/** + * radeon_vm_fence - remember fence for vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to fence + * @fence: fence to remember + * + * Fence the vm (cayman+). + * Set the fence used to protect page table and id. + * + * Global and local mutex must be locked! + */ +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence) +{ + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); } /* object have to be reserved */ @@ -806,7 +850,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, uint32_t flags; /* nothing to do if vm isn't bound */ - if (vm->id == -1) + if (vm->sa_bo == NULL) return 0; bo_va = radeon_bo_va(bo, vm); @@ -928,7 +972,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { int r; - vm->id = -1; + vm->id = 0; vm->fence = NULL; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4016927b268d..51a471dc319c 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2789,14 +2789,30 @@ void si_vm_fini(struct radeon_device *rdev) { } -int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) +void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) { - if (id < 8) - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); - else - WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2), - vm->pt_gpu_addr >> 12); - return 0; + struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_vm *vm = ib->vm; + + if (vm == NULL) + return; + + if (vm->id < 8) { + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + + (vm->id << 2), 0)); + } else { + radeon_ring_write(ring, PACKET0(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + + ((vm->id - 8) << 2), 0)); + } + radeon_ring_write(ring, vm->pt_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); + radeon_ring_write(ring, 0x1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); + radeon_ring_write(ring, 1 << ib->vm->id); } /* -- 2.34.1