From: Paul Mackerras Date: Mon, 12 Dec 2011 12:31:41 +0000 (+0000) Subject: KVM: PPC: Allow use of small pages to back Book3S HV guests X-Git-Tag: firefly_0821_release~3680^2~3220^2~76 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=da9d1d7f2875cc8c1ffbce8f3501d0b33f4e7a4d;p=firefly-linux-kernel-4.4.55.git KVM: PPC: Allow use of small pages to back Book3S HV guests This relaxes the requirement that the guest memory be provided as 16MB huge pages, allowing it to be provided as normal memory, i.e. in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index the kvm->arch.slot_phys[] arrays with a small page index, even if huge pages are being used, and use the low-order 5 bits of each entry to store the order of the enclosing page with respect to normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE). Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 7e6f2ede44ac..10920f7a2b8d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) return 0; /* error */ } +static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, + unsigned long pagesize) +{ + unsigned long mask = (pagesize >> PAGE_SHIFT) - 1; + + if (pagesize <= PAGE_SIZE) + return 1; + return !(memslot->base_gfn & mask) && !(memslot->npages & mask); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index beb22ba71e26..9252d5e3758d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -177,14 +177,13 @@ struct revmap_entry { }; /* Low-order bits in kvm->arch.slot_phys[][] */ +#define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned long ram_psize; - unsigned long ram_porder; unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1458c6740ea3..fb70414db90c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fdc2c0b7fa0..64447f6c049a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -237,6 +237,7 @@ #define LPCR_ISL (1ul << (63-2)) #define LPCR_VC_SH (63-2) #define LPCR_DPFD_SH (63-11) +#define LPCR_VRMASD (0x1ful << (63-16)) #define LPCR_VRMA_L (1ul << (63-12)) #define LPCR_VRMA_LP0 (1ul << (63-15)) #define LPCR_VRMA_LP1 (1ul << (63-16)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 87016ccd8648..cc18f3d67a57 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -34,8 +34,6 @@ #include #include -/* Pages in the VRMA are 16MB pages */ -#define VRMA_PAGE_ORDER 24 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ @@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm) free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); } -void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) +/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; +} + +/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ +static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) +{ + return (pgsize == 0x10000) ? 0x1000 : 0; +} + +void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, + unsigned long porder) { - struct kvm *kvm = vcpu->kvm; unsigned long i; unsigned long npages; unsigned long hp_v, hp_r; unsigned long addr, hash; - unsigned long porder = kvm->arch.ram_porder; + unsigned long psize; + unsigned long hp0, hp1; long ret; - npages = kvm->arch.slot_npages[memslot->id]; + psize = 1ul << porder; + npages = memslot->npages >> (porder - PAGE_SHIFT); /* VRMA can't be > 1TB */ if (npages > 1ul << (40 - porder)) @@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) if (npages > HPT_NPTEG) npages = HPT_NPTEG; + hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | + HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); + hp1 = hpte1_pgsize_encoding(psize) | + HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ @@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot) * is available and use it. */ hash = (hash << 3) + 7; - hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | - (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | - HPTE_V_LARGE | HPTE_V_VALID; - hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; + hp_v = hp0 | ((addr >> 16) & ~0x7fUL); + hp_r = hp1 | addr; ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); if (ret != H_SUCCESS) { pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", @@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) * one already in the kvm->arch.slot_phys[][] arrays. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot, + unsigned long psize) { unsigned long start; - long np; - struct page *page, *pages[1]; + long np, err; + struct page *page, *hpage, *pages[1]; + unsigned long s, pgsize; unsigned long *physp; - unsigned long pfn, i; + unsigned int got, pgorder; + unsigned long pfn, i, npages; physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return -EINVAL; - i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT); - if (physp[i]) + if (physp[gfn - memslot->base_gfn]) return 0; page = NULL; + pgsize = psize; start = gfn_to_hva_memslot(memslot, gfn); /* Instantiate and get the page we want access to */ @@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, if (np != 1) return -EINVAL; page = pages[0]; - - /* Check it's a 16MB page */ - if (!PageHead(page) || - compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) { - pr_err("page at %lx isn't 16MB (o=%d)\n", - start, compound_order(page)); - put_page(page); - return -EINVAL; + got = KVMPPC_GOT_PAGE; + + /* See if this is a large page */ + s = PAGE_SIZE; + if (PageHuge(page)) { + hpage = compound_head(page); + s <<= compound_order(hpage); + /* Get the whole large page if slot alignment is ok */ + if (s > psize && slot_is_aligned(memslot, s) && + !(memslot->userspace_addr & (s - 1))) { + start &= ~(s - 1); + pgsize = s; + page = hpage; + } } + err = -EINVAL; + if (s < psize) + goto out; pfn = page_to_pfn(page); + npages = pgsize >> PAGE_SHIFT; + pgorder = __ilog2(npages); + physp += (gfn - memslot->base_gfn) & ~(npages - 1); spin_lock(&kvm->arch.slot_phys_lock); - if (!physp[i]) - physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE; - else - put_page(page); + for (i = 0; i < npages; ++i) { + if (!physp[i]) { + physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder; + got = 0; + } + } spin_unlock(&kvm->arch.slot_phys_lock); + err = 0; - return 0; + out: + if (got) { + if (PageHuge(page)) + page = compound_head(page); + put_page(page); + } + return err; } /* @@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; + if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) return H_PARAMETER; preempt_disable(); @@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct kvm_memory_slot *memslot; unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page; - unsigned long offset; - unsigned long pfn, pa; + unsigned long psize, offset; + unsigned long pa; unsigned long *physp; memslot = gfn_to_memslot(kvm, gfn); @@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, physp = kvm->arch.slot_phys[memslot->id]; if (!physp) return NULL; - physp += (gfn - memslot->base_gfn) >> - (kvm->arch.ram_porder - PAGE_SHIFT); + physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0) + if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) return NULL; pa = *physp; } - pfn = pa >> PAGE_SHIFT; - page = pfn_to_page(pfn); + page = pfn_to_page(pa >> PAGE_SHIFT); + psize = PAGE_SIZE; + if (PageHuge(page)) { + page = compound_head(page); + psize <<= compound_order(page); + } get_page(page); - offset = gpa & (kvm->arch.ram_psize - 1); + offset = gpa & (psize - 1); if (nb_ret) - *nb_ret = kvm->arch.ram_psize - offset; + *nb_ret = psize - offset; return page_address(page) + offset; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ce5a13fb974b..6ed0a84ef91c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -51,8 +51,6 @@ #include #include -#define LARGE_PAGE_ORDER 24 /* 16MB pages */ - /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -1074,24 +1072,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - unsigned long psize; unsigned long npages; unsigned long *phys; - /* For now, only allow 16MB-aligned slots */ - psize = kvm->arch.ram_psize; - if ((mem->memory_size & (psize - 1)) || - (mem->guest_phys_addr & (psize - 1))) { - pr_err("bad memory_size=%llx @ %llx\n", - mem->memory_size, mem->guest_phys_addr); - return -EINVAL; - } - /* Allocate a slot_phys array */ - npages = mem->memory_size >> kvm->arch.ram_porder; + npages = mem->memory_size >> PAGE_SHIFT; phys = kvm->arch.slot_phys[mem->slot]; if (!phys) { phys = vzalloc(npages * sizeof(unsigned long)); @@ -1119,6 +1119,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id) continue; pfn = physp[j] >> PAGE_SHIFT; page = pfn_to_page(pfn); + if (PageHuge(page)) + page = compound_head(page); SetPageDirty(page); put_page(page); } @@ -1141,12 +1143,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; - unsigned long lpcr; + unsigned long lpcr, senc; unsigned long psize, porder; unsigned long rma_size; unsigned long rmls; unsigned long *physp; - unsigned long i, npages, pa; + unsigned long i, npages; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1168,8 +1170,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto up_out; psize = vma_kernel_pagesize(vma); - if (psize != kvm->arch.ram_psize) - goto up_out; + porder = __ilog2(psize); /* Is this one of our preallocated RMAs? */ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && @@ -1186,13 +1187,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) goto out; } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out; + /* Update VRMASD field in the LPCR */ - lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH); - lpcr |= LPCR_VRMA_L; + senc = slb_pgsize_encoding(psize); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); kvm->arch.lpcr = lpcr; /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot); + kvmppc_map_vrma(vcpu, memslot, porder); } else { /* Set up to use an RMO region */ @@ -1231,13 +1239,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); /* Initialize phys addrs of pages in RMO */ - porder = kvm->arch.ram_porder; - npages = rma_size >> porder; - pa = ri->base_pfn << PAGE_SHIFT; + npages = ri->npages; + porder = __ilog2(npages); physp = kvm->arch.slot_phys[memslot->id]; spin_lock(&kvm->arch.slot_phys_lock); for (i = 0; i < npages; ++i) - physp[i] = pa + (i << porder); + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; spin_unlock(&kvm->arch.slot_phys_lock); } @@ -1266,8 +1273,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); - kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; - kvm->arch.ram_porder = LARGE_PAGE_ORDER; kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 047c5e1fd70f..c086eb0fa992 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -77,6 +77,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, memslot = builtin_gfn_to_memslot(kvm, gfn); if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) return H_PARAMETER; + + /* Check if the requested page fits entirely in the memslot. */ + if (!slot_is_aligned(memslot, psize)) + return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; physp = kvm->arch.slot_phys[memslot->id]; @@ -88,9 +92,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, pa = *physp; if (!pa) return H_TOO_HARD; + pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); pa &= PAGE_MASK; - pte_size = kvm->arch.ram_psize; if (pte_size < psize) return H_PARAMETER; if (pa && pte_size > psize)