From: Ben Skeggs Date: Wed, 15 Dec 2010 01:04:39 +0000 (+1000) Subject: drm/nouveau: modify vm to accomodate dual page tables for nvc0 X-Git-Tag: firefly_0821_release~7613^2~1688^2~40^2~93 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3ee0128140eed7d32b785a335099a2ec38258283;p=firefly-linux-kernel-4.4.55.git drm/nouveau: modify vm to accomodate dual page tables for nvc0 Signed-off-by: Ben Skeggs --- diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.c b/drivers/gpu/drm/nouveau/nouveau_vm.c index 07ab1749cf7d..b023a64c27d8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vm.c @@ -32,6 +32,7 @@ nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram) { struct nouveau_vm *vm = vma->vm; struct nouveau_mm_node *r; + int big = vma->node->type != vm->spg_shift; u32 offset = vma->node->offset + (delta >> 12); u32 bits = vma->node->type - 12; u32 pde = (offset >> vm->pgt_bits) - vm->fpde; @@ -44,7 +45,7 @@ nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_vram *vram) u32 num = r->length >> bits; while (num) { - struct nouveau_gpuobj *pgt = vm->pgt[pde].obj; + struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; end = (pte + num); if (unlikely(end >= max)) @@ -76,6 +77,7 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length, dma_addr_t *list) { struct nouveau_vm *vm = vma->vm; + int big = vma->node->type != vm->spg_shift; u32 offset = vma->node->offset + (delta >> 12); u32 bits = vma->node->type - 12; u32 num = length >> vma->node->type; @@ -85,7 +87,7 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length, u32 end, len; while (num) { - struct nouveau_gpuobj *pgt = vm->pgt[pde].obj; + struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; end = (pte + num); if (unlikely(end >= max)) @@ -110,6 +112,7 @@ void nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64 length) { struct nouveau_vm *vm = vma->vm; + int big = vma->node->type != vm->spg_shift; u32 offset = vma->node->offset + (delta >> 12); u32 bits = vma->node->type - 12; u32 num = length >> vma->node->type; @@ -119,7 +122,7 @@ nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64 length) u32 end, len; while (num) { - struct nouveau_gpuobj *pgt = vm->pgt[pde].obj; + struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; end = (pte + num); if (unlikely(end >= max)) @@ -146,7 +149,7 @@ nouveau_vm_unmap(struct nouveau_vma *vma) } static void -nouveau_vm_unmap_pgt(struct nouveau_vm *vm, u32 fpde, u32 lpde) +nouveau_vm_unmap_pgt(struct nouveau_vm *vm, int big, u32 fpde, u32 lpde) { struct nouveau_vm_pgd *vpgd; struct nouveau_vm_pgt *vpgt; @@ -155,16 +158,16 @@ nouveau_vm_unmap_pgt(struct nouveau_vm *vm, u32 fpde, u32 lpde) for (pde = fpde; pde <= lpde; pde++) { vpgt = &vm->pgt[pde - vm->fpde]; - if (--vpgt->refcount) + if (--vpgt->refcount[big]) continue; + pgt = vpgt->obj[big]; + vpgt->obj[big] = NULL; + list_for_each_entry(vpgd, &vm->pgd_list, head) { - vm->unmap_pgt(vpgd->obj, pde); + vm->map_pgt(vpgd->obj, pde, vpgt->obj); } - pgt = vpgt->obj; - vpgt->obj = NULL; - mutex_unlock(&vm->mm->mutex); nouveau_gpuobj_ref(NULL, &pgt); mutex_lock(&vm->mm->mutex); @@ -177,6 +180,7 @@ nouveau_vm_map_pgt(struct nouveau_vm *vm, u32 pde, u32 type) struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; struct nouveau_vm_pgd *vpgd; struct nouveau_gpuobj *pgt; + int big = (type != vm->spg_shift); u32 pgt_size; int ret; @@ -191,19 +195,18 @@ nouveau_vm_map_pgt(struct nouveau_vm *vm, u32 pde, u32 type) return ret; /* someone beat us to filling the PDE while we didn't have the lock */ - if (unlikely(vpgt->refcount++)) { + if (unlikely(vpgt->refcount[big]++)) { mutex_unlock(&vm->mm->mutex); nouveau_gpuobj_ref(NULL, &pgt); mutex_lock(&vm->mm->mutex); return 0; } + vpgt->obj[big] = pgt; list_for_each_entry(vpgd, &vm->pgd_list, head) { - vm->map_pgt(vpgd->obj, type, pde, pgt); + vm->map_pgt(vpgd->obj, pde, vpgt->obj); } - vpgt->page_shift = type; - vpgt->obj = pgt; return 0; } @@ -227,16 +230,17 @@ nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift, lpde = (vma->node->offset + vma->node->length - 1) >> vm->pgt_bits; for (pde = fpde; pde <= lpde; pde++) { struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; + int big = (vma->node->type != vm->spg_shift); - if (likely(vpgt->refcount)) { - vpgt->refcount++; + if (likely(vpgt->refcount[big])) { + vpgt->refcount[big]++; continue; } ret = nouveau_vm_map_pgt(vm, pde, vma->node->type); if (ret) { if (pde != fpde) - nouveau_vm_unmap_pgt(vm, fpde, pde - 1); + nouveau_vm_unmap_pgt(vm, big, fpde, pde - 1); nouveau_mm_put(vm->mm, vma->node); mutex_unlock(&vm->mm->mutex); vma->node = NULL; @@ -263,21 +267,20 @@ nouveau_vm_put(struct nouveau_vma *vma) lpde = (vma->node->offset + vma->node->length - 1) >> vm->pgt_bits; mutex_lock(&vm->mm->mutex); + nouveau_vm_unmap_pgt(vm, vma->node->type != vm->spg_shift, fpde, lpde); nouveau_mm_put(vm->mm, vma->node); vma->node = NULL; - nouveau_vm_unmap_pgt(vm, fpde, lpde); mutex_unlock(&vm->mm->mutex); } int nouveau_vm_new(struct drm_device *dev, u64 offset, u64 length, u64 mm_offset, - u8 pgt_bits, u8 spg_shift, u8 lpg_shift, struct nouveau_vm **pvm) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_vm *vm; u64 mm_length = (offset + length) - mm_offset; - u32 block; + u32 block, pgt_bits; int ret; vm = kzalloc(sizeof(*vm), GFP_KERNEL); @@ -286,11 +289,13 @@ nouveau_vm_new(struct drm_device *dev, u64 offset, u64 length, u64 mm_offset, if (dev_priv->card_type == NV_50) { vm->map_pgt = nv50_vm_map_pgt; - vm->unmap_pgt = nv50_vm_unmap_pgt; vm->map = nv50_vm_map; vm->map_sg = nv50_vm_map_sg; vm->unmap = nv50_vm_unmap; vm->flush = nv50_vm_flush; + vm->spg_shift = 12; + vm->lpg_shift = 16; + pgt_bits = 29; } else { kfree(vm); return -ENOSYS; @@ -308,8 +313,6 @@ nouveau_vm_new(struct drm_device *dev, u64 offset, u64 length, u64 mm_offset, vm->dev = dev; vm->refcount = 1; vm->pgt_bits = pgt_bits - 12; - vm->spg_shift = spg_shift; - vm->lpg_shift = lpg_shift; block = (1 << pgt_bits); if (length < block) @@ -342,16 +345,8 @@ nouveau_vm_link(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd) nouveau_gpuobj_ref(pgd, &vpgd->obj); mutex_lock(&vm->mm->mutex); - for (i = vm->fpde; i <= vm->lpde; i++) { - struct nouveau_vm_pgt *vpgt = &vm->pgt[i - vm->fpde]; - - if (!vpgt->obj) { - vm->unmap_pgt(pgd, i); - continue; - } - - vm->map_pgt(pgd, vpgt->page_shift, i, vpgt->obj); - } + for (i = vm->fpde; i <= vm->lpde; i++) + vm->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj); list_add(&vpgd->head, &vm->pgd_list); mutex_unlock(&vm->mm->mutex); return 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.h b/drivers/gpu/drm/nouveau/nouveau_vm.h index b6755cfa7b71..105b6f65f19d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vm.h +++ b/drivers/gpu/drm/nouveau/nouveau_vm.h @@ -31,9 +31,8 @@ #include "nouveau_mm.h" struct nouveau_vm_pgt { - struct nouveau_gpuobj *obj; - u32 page_shift; - u32 refcount; + struct nouveau_gpuobj *obj[2]; + u32 refcount[2]; }; struct nouveau_vm_pgd { @@ -65,9 +64,8 @@ struct nouveau_vm { u8 spg_shift; u8 lpg_shift; - void (*map_pgt)(struct nouveau_gpuobj *pgd, u32 type, u32 pde, - struct nouveau_gpuobj *pgt); - void (*unmap_pgt)(struct nouveau_gpuobj *pgd, u32 pde); + void (*map_pgt)(struct nouveau_gpuobj *pgd, u32 pde, + struct nouveau_gpuobj *pgt[2]); void (*map)(struct nouveau_vma *, struct nouveau_gpuobj *, struct nouveau_vram *, u32 pte, u32 cnt, u64 phys); void (*map_sg)(struct nouveau_vma *, struct nouveau_gpuobj *, @@ -78,7 +76,6 @@ struct nouveau_vm { /* nouveau_vm.c */ int nouveau_vm_new(struct drm_device *, u64 offset, u64 length, u64 mm_offset, - u8 pgt_bits, u8 spg_shift, u8 lpg_shift, struct nouveau_vm **); int nouveau_vm_ref(struct nouveau_vm *, struct nouveau_vm **, struct nouveau_gpuobj *pgd); @@ -93,9 +90,8 @@ void nouveau_vm_map_sg(struct nouveau_vma *, u64 offset, u64 length, dma_addr_t *); /* nv50_vm.c */ -void nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 type, u32 pde, - struct nouveau_gpuobj *pgt); -void nv50_vm_unmap_pgt(struct nouveau_gpuobj *pgd, u32 pde); +void nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde, + struct nouveau_gpuobj *pgt[2]); void nv50_vm_map(struct nouveau_vma *, struct nouveau_gpuobj *, struct nouveau_vram *, u32 pte, u32 cnt, u64 phys); void nv50_vm_map_sg(struct nouveau_vma *, struct nouveau_gpuobj *, diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c index adac4da98f7e..2e1b1cd19a4b 100644 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c @@ -151,20 +151,19 @@ nv50_instmem_init(struct drm_device *dev) /* BAR3 */ ret = nouveau_vm_new(dev, BAR3_VM_BASE, BAR3_VM_SIZE, BAR3_VM_BASE, - 29, 12, 16, &dev_priv->bar3_vm); + &dev_priv->bar3_vm); if (ret) goto error; ret = nouveau_gpuobj_new(dev, NULL, (BAR3_VM_SIZE >> 12) * 8, 0x1000, NVOBJ_FLAG_DONT_MAP | NVOBJ_FLAG_ZERO_ALLOC, - &dev_priv->bar3_vm->pgt[0].obj); + &dev_priv->bar3_vm->pgt[0].obj[0]); if (ret) goto error; - dev_priv->bar3_vm->pgt[0].page_shift = 12; - dev_priv->bar3_vm->pgt[0].refcount = 1; + dev_priv->bar3_vm->pgt[0].refcount[0] = 1; - nv50_instmem_map(dev_priv->bar3_vm->pgt[0].obj); + nv50_instmem_map(dev_priv->bar3_vm->pgt[0].obj[0]); ret = nv50_channel_new(dev, 128 * 1024, dev_priv->bar3_vm, &chan); if (ret) @@ -195,8 +194,7 @@ nv50_instmem_init(struct drm_device *dev) nv_wo32(chan->ramin, 0, tmp); /* BAR1 */ - ret = nouveau_vm_new(dev, BAR1_VM_BASE, BAR1_VM_SIZE, BAR1_VM_BASE, - 29, 12, 16, &vm); + ret = nouveau_vm_new(dev, BAR1_VM_BASE, BAR1_VM_SIZE, BAR1_VM_BASE, &vm); if (ret) goto error; @@ -220,7 +218,7 @@ nv50_instmem_init(struct drm_device *dev) * to catch "NULL pointer" references */ ret = nouveau_vm_new(dev, 0, (1ULL << 40), 0x0020000000ULL, - 29, 12, 16, &dev_priv->chan_vm); + &dev_priv->chan_vm); if (ret) return ret; @@ -258,7 +256,7 @@ nv50_instmem_takedown(struct drm_device *dev) dev_priv->channels.ptr[127] = 0; nv50_channel_del(&dev_priv->channels.ptr[0]); - nouveau_gpuobj_ref(NULL, &dev_priv->bar3_vm->pgt[0].obj); + nouveau_gpuobj_ref(NULL, &dev_priv->bar3_vm->pgt[0].obj[0]); nouveau_vm_ref(NULL, &dev_priv->bar3_vm, NULL); if (dev_priv->ramin_heap.free_stack.next) diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c index 7939387f7f80..38e523e10995 100644 --- a/drivers/gpu/drm/nouveau/nv50_vm.c +++ b/drivers/gpu/drm/nouveau/nv50_vm.c @@ -28,39 +28,40 @@ #include "nouveau_vm.h" void -nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 type, u32 pde, - struct nouveau_gpuobj *pgt) +nv50_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 pde, + struct nouveau_gpuobj *pgt[2]) { struct drm_nouveau_private *dev_priv = pgd->dev->dev_private; - u32 coverage = (pgt->size >> 3) << type; - u64 phys; - - phys = pgt->vinst; - phys |= 0x01; /* present */ - phys |= (type == 12) ? 0x02 : 0x00; /* 4KiB pages */ - if (dev_priv->vram_sys_base) { - phys += dev_priv->vram_sys_base; - phys |= 0x30; + u64 phys = 0xdeadcafe00000000ULL; + u32 coverage = 0; + + if (pgt[0]) { + phys = 0x00000003 | pgt[0]->vinst; /* present, 4KiB pages */ + coverage = (pgt[0]->size >> 3) << 12; + } else + if (pgt[1]) { + phys = 0x00000001 | pgt[1]->vinst; /* present */ + coverage = (pgt[1]->size >> 3) << 16; } - if (coverage <= 32 * 1024 * 1024) - phys |= 0x60; - else if (coverage <= 64 * 1024 * 1024) - phys |= 0x40; - else if (coverage < 128 * 1024 * 1024) - phys |= 0x20; + if (phys & 1) { + if (dev_priv->vram_sys_base) { + phys += dev_priv->vram_sys_base; + phys |= 0x30; + } + + if (coverage <= 32 * 1024 * 1024) + phys |= 0x60; + else if (coverage <= 64 * 1024 * 1024) + phys |= 0x40; + else if (coverage < 128 * 1024 * 1024) + phys |= 0x20; + } nv_wo32(pgd, (pde * 8) + 0, lower_32_bits(phys)); nv_wo32(pgd, (pde * 8) + 4, upper_32_bits(phys)); } -void -nv50_vm_unmap_pgt(struct nouveau_gpuobj *pgd, u32 pde) -{ - nv_wo32(pgd, (pde * 8) + 0, 0x00000000); - nv_wo32(pgd, (pde * 8) + 4, 0xdeadcafe); -} - static inline u64 nv50_vm_addr(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt, u64 phys, u32 memtype, u32 target)