2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 #include <linux/hugetlb.h>
14 #include <linux/module.h>
15 #include <linux/log2.h>
17 #include <asm/tlbflush.h>
18 #include <asm/kvm_ppc.h>
19 #include <asm/kvm_book3s.h>
20 #include <asm/mmu-hash64.h>
21 #include <asm/hvcall.h>
22 #include <asm/synch.h>
23 #include <asm/ppc-opcode.h>
25 /* Translate address of a vmalloc'd thing to a linear map address */
26 static void *real_vmalloc_addr(void *x)
28 unsigned long addr = (unsigned long) x;
31 * assume we don't have huge pages in vmalloc space...
32 * So don't worry about THP collapse/split. Called
33 * Only in realmode, hence won't need irq_save/restore.
35 p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
36 if (!p || !pte_present(*p))
38 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
43 static int global_invalidates(struct kvm *kvm, unsigned long flags)
48 * If there is only one vcore, and it's currently running,
49 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
50 * we can use tlbiel as long as we mark all other physical
51 * cores as potentially having stale TLB entries for this lpid.
52 * Otherwise, don't use tlbiel.
54 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
60 /* any other core might now have stale TLB entries... */
62 cpumask_setall(&kvm->arch.need_tlb_flush);
63 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
64 &kvm->arch.need_tlb_flush);
71 * Add this HPTE into the chain for the real page.
72 * Must be called with the chain locked; it unlocks the chain.
74 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
75 unsigned long *rmap, long pte_index, int realmode)
77 struct revmap_entry *head, *tail;
80 if (*rmap & KVMPPC_RMAP_PRESENT) {
81 i = *rmap & KVMPPC_RMAP_INDEX;
82 head = &kvm->arch.revmap[i];
84 head = real_vmalloc_addr(head);
85 tail = &kvm->arch.revmap[head->back];
87 tail = real_vmalloc_addr(tail);
89 rev->back = head->back;
90 tail->forw = pte_index;
91 head->back = pte_index;
93 rev->forw = rev->back = pte_index;
94 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
95 pte_index | KVMPPC_RMAP_PRESENT;
99 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
101 /* Update the changed page order field of an rmap entry */
102 void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
108 order = ilog2(psize);
109 order <<= KVMPPC_RMAP_CHG_SHIFT;
110 if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
111 *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
113 EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
115 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */
116 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
117 unsigned long hpte_gr)
119 struct kvm_memory_slot *memslot;
123 gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
124 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
128 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
132 /* Remove this HPTE from the chain for a real page */
133 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
134 struct revmap_entry *rev,
135 unsigned long hpte_v, unsigned long hpte_r)
137 struct revmap_entry *next, *prev;
138 unsigned long ptel, head;
140 unsigned long rcbits;
142 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
143 ptel = rev->guest_rpte |= rcbits;
144 rmap = revmap_for_hpte(kvm, hpte_v, ptel);
149 head = *rmap & KVMPPC_RMAP_INDEX;
150 next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
151 prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
152 next->back = rev->back;
153 prev->forw = rev->forw;
154 if (head == pte_index) {
156 if (head == pte_index)
157 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
159 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
161 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
162 if (rcbits & HPTE_R_C)
163 kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
167 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
168 long pte_index, unsigned long pteh, unsigned long ptel,
169 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
171 unsigned long i, pa, gpa, gfn, psize;
172 unsigned long slot_fn, hva;
174 struct revmap_entry *rev;
175 unsigned long g_ptel;
176 struct kvm_memory_slot *memslot;
177 unsigned hpage_shift;
181 unsigned int writing;
182 unsigned long mmu_seq;
183 unsigned long rcbits, irq_flags = 0;
185 psize = hpte_page_size(pteh, ptel);
188 writing = hpte_is_writable(ptel);
189 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
190 ptel &= ~HPTE_GR_RESERVED;
193 /* used later to detect if we might have been invalidated */
194 mmu_seq = kvm->mmu_notifier_seq;
197 /* Find the memslot (if any) for this address */
198 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
199 gfn = gpa >> PAGE_SHIFT;
200 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
204 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
205 /* Emulated MMIO - mark this with key=31 */
206 pteh |= HPTE_V_ABSENT;
207 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
211 /* Check if the requested page fits entirely in the memslot. */
212 if (!slot_is_aligned(memslot, psize))
214 slot_fn = gfn - memslot->base_gfn;
215 rmap = &memslot->arch.rmap[slot_fn];
217 /* Translate to host virtual address */
218 hva = __gfn_to_hva_memslot(memslot, gfn);
220 * If we had a page table table change after lookup, we would
221 * retry via mmu_notifier_retry.
224 ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
226 local_irq_save(irq_flags);
227 ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
231 unsigned int host_pte_size;
234 host_pte_size = 1ul << hpage_shift;
236 host_pte_size = PAGE_SIZE;
238 * We should always find the guest page size
239 * to <= host page size, if host is using hugepage
241 if (host_pte_size < psize) {
243 local_irq_restore(flags);
246 pte = kvmppc_read_update_linux_pte(ptep, writing);
247 if (pte_present(pte) && !pte_protnone(pte)) {
248 if (writing && !pte_write(pte))
249 /* make the actual HPTE be read-only */
250 ptel = hpte_make_readonly(ptel);
251 is_io = hpte_cache_bits(pte_val(pte));
252 pa = pte_pfn(pte) << PAGE_SHIFT;
253 pa |= hva & (host_pte_size - 1);
254 pa |= gpa & ~PAGE_MASK;
258 local_irq_restore(irq_flags);
260 ptel &= ~(HPTE_R_PP0 - psize);
264 pteh |= HPTE_V_VALID;
266 pteh |= HPTE_V_ABSENT;
269 if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
273 * Allow guest to map emulated device memory as
274 * uncacheable, but actually make it cacheable.
276 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
280 /* Find and lock the HPTEG slot to use */
282 if (pte_index >= kvm->arch.hpt_npte)
284 if (likely((flags & H_EXACT) == 0)) {
286 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
287 for (i = 0; i < 8; ++i) {
288 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
289 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
296 * Since try_lock_hpte doesn't retry (not even stdcx.
297 * failures), it could be that there is a free slot
298 * but we transiently failed to lock it. Try again,
299 * actually locking each slot and checking it.
302 for (i = 0; i < 8; ++i) {
304 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
306 pte = be64_to_cpu(hpte[0]);
307 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
309 __unlock_hpte(hpte, pte);
317 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
318 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
320 /* Lock the slot and check again */
323 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
325 pte = be64_to_cpu(hpte[0]);
326 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
327 __unlock_hpte(hpte, pte);
333 /* Save away the guest's idea of the second HPTE dword */
334 rev = &kvm->arch.revmap[pte_index];
336 rev = real_vmalloc_addr(rev);
338 rev->guest_rpte = g_ptel;
339 note_hpte_modification(kvm, rev);
342 /* Link HPTE into reverse-map chain */
343 if (pteh & HPTE_V_VALID) {
345 rmap = real_vmalloc_addr(rmap);
347 /* Check for pending invalidations under the rmap chain lock */
348 if (mmu_notifier_retry(kvm, mmu_seq)) {
349 /* inval in progress, write a non-present HPTE */
350 pteh |= HPTE_V_ABSENT;
351 pteh &= ~HPTE_V_VALID;
354 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
356 /* Only set R/C in real HPTE if already set in *rmap */
357 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
358 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
362 hpte[1] = cpu_to_be64(ptel);
364 /* Write the first HPTE dword, unlocking the HPTE and making it valid */
366 __unlock_hpte(hpte, pteh);
367 asm volatile("ptesync" : : : "memory");
369 *pte_idx_ret = pte_index;
372 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
374 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
375 long pte_index, unsigned long pteh, unsigned long ptel)
377 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
378 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
381 #ifdef __BIG_ENDIAN__
382 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
384 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
387 static inline int try_lock_tlbie(unsigned int *lock)
389 unsigned int tmp, old;
390 unsigned int token = LOCK_TOKEN;
392 asm volatile("1:lwarx %1,0,%2\n"
399 : "=&r" (tmp), "=&r" (old)
400 : "r" (lock), "r" (token)
405 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
406 long npages, int global, bool need_sync)
411 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
414 asm volatile("ptesync" : : : "memory");
415 for (i = 0; i < npages; ++i)
416 asm volatile(PPC_TLBIE(%1,%0) : :
417 "r" (rbvalues[i]), "r" (kvm->arch.lpid));
418 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
419 kvm->arch.tlbie_lock = 0;
422 asm volatile("ptesync" : : : "memory");
423 for (i = 0; i < npages; ++i)
424 asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
425 asm volatile("ptesync" : : : "memory");
429 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
430 unsigned long pte_index, unsigned long avpn,
431 unsigned long *hpret)
434 unsigned long v, r, rb;
435 struct revmap_entry *rev;
438 if (pte_index >= kvm->arch.hpt_npte)
440 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
441 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
443 pte = be64_to_cpu(hpte[0]);
444 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
445 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
446 ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
447 __unlock_hpte(hpte, pte);
451 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
452 v = pte & ~HPTE_V_HVLOCK;
453 if (v & HPTE_V_VALID) {
454 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
455 rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
456 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
458 * The reference (R) and change (C) bits in a HPT
459 * entry can be set by hardware at any time up until
460 * the HPTE is invalidated and the TLB invalidation
461 * sequence has completed. This means that when
462 * removing a HPTE, we need to re-read the HPTE after
463 * the invalidation sequence has completed in order to
464 * obtain reliable values of R and C.
466 remove_revmap_chain(kvm, pte_index, rev, v,
467 be64_to_cpu(hpte[1]));
469 r = rev->guest_rpte & ~HPTE_GR_RESERVED;
470 note_hpte_modification(kvm, rev);
471 unlock_hpte(hpte, 0);
477 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
479 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
480 unsigned long pte_index, unsigned long avpn)
482 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
486 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
488 struct kvm *kvm = vcpu->kvm;
489 unsigned long *args = &vcpu->arch.gpr[4];
490 __be64 *hp, *hptes[4];
491 unsigned long tlbrb[4];
492 long int i, j, k, n, found, indexes[4];
493 unsigned long flags, req, pte_index, rcbits;
495 long int ret = H_SUCCESS;
496 struct revmap_entry *rev, *revs[4];
499 global = global_invalidates(kvm, 0);
500 for (i = 0; i < 4 && ret == H_SUCCESS; ) {
505 flags = pte_index >> 56;
506 pte_index &= ((1ul << 56) - 1);
509 if (req == 3) { /* no more requests */
513 if (req != 1 || flags == 3 ||
514 pte_index >= kvm->arch.hpt_npte) {
515 /* parameter error */
516 args[j] = ((0xa0 | flags) << 56) + pte_index;
520 hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
521 /* to avoid deadlock, don't spin except for first */
522 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
525 while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
529 hp0 = be64_to_cpu(hp[0]);
530 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
532 case 0: /* absolute */
535 case 1: /* andcond */
536 if (!(hp0 & args[j + 1]))
540 if ((hp0 & ~0x7fUL) == args[j + 1])
546 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
547 args[j] = ((0x90 | flags) << 56) + pte_index;
551 args[j] = ((0x80 | flags) << 56) + pte_index;
552 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
553 note_hpte_modification(kvm, rev);
555 if (!(hp0 & HPTE_V_VALID)) {
556 /* insert R and C bits from PTE */
557 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
558 args[j] |= rcbits << (56 - 5);
563 /* leave it locked */
564 hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
565 tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
566 be64_to_cpu(hp[1]), pte_index);
576 /* Now that we've collected a batch, do the tlbies */
577 do_tlbies(kvm, tlbrb, n, global, true);
579 /* Read PTE low words after tlbie to get final R/C values */
580 for (k = 0; k < n; ++k) {
582 pte_index = args[j] & ((1ul << 56) - 1);
585 remove_revmap_chain(kvm, pte_index, rev,
586 be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
587 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
588 args[j] |= rcbits << (56 - 5);
589 __unlock_hpte(hp, 0);
596 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
597 unsigned long pte_index, unsigned long avpn,
600 struct kvm *kvm = vcpu->kvm;
602 struct revmap_entry *rev;
603 unsigned long v, r, rb, mask, bits;
606 if (pte_index >= kvm->arch.hpt_npte)
609 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
610 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
612 pte = be64_to_cpu(hpte[0]);
613 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
614 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
615 __unlock_hpte(hpte, pte);
620 bits = (flags << 55) & HPTE_R_PP0;
621 bits |= (flags << 48) & HPTE_R_KEY_HI;
622 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
624 /* Update guest view of 2nd HPTE dword */
625 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
626 HPTE_R_KEY_HI | HPTE_R_KEY_LO;
627 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
629 r = (rev->guest_rpte & ~mask) | bits;
631 note_hpte_modification(kvm, rev);
635 if (v & HPTE_V_VALID) {
637 * If the page is valid, don't let it transition from
638 * readonly to writable. If it should be writable, we'll
639 * take a trap and let the page fault code sort it out.
641 pte = be64_to_cpu(hpte[1]);
642 r = (pte & ~mask) | bits;
643 if (hpte_is_writable(r) && !hpte_is_writable(pte))
644 r = hpte_make_readonly(r);
645 /* If the PTE is changing, invalidate it first */
647 rb = compute_tlbie_rb(v, r, pte_index);
648 hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
650 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
652 hpte[1] = cpu_to_be64(r);
655 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
656 asm volatile("ptesync" : : : "memory");
660 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
661 unsigned long pte_index)
663 struct kvm *kvm = vcpu->kvm;
667 struct revmap_entry *rev = NULL;
669 if (pte_index >= kvm->arch.hpt_npte)
671 if (flags & H_READ_4) {
675 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
676 for (i = 0; i < n; ++i, ++pte_index) {
677 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
678 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
679 r = be64_to_cpu(hpte[1]);
680 if (v & HPTE_V_ABSENT) {
684 if (v & HPTE_V_VALID) {
685 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
686 r &= ~HPTE_GR_RESERVED;
688 vcpu->arch.gpr[4 + i * 2] = v;
689 vcpu->arch.gpr[5 + i * 2] = r;
694 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
695 unsigned long pte_index)
697 struct kvm *kvm = vcpu->kvm;
699 unsigned long v, r, gr;
700 struct revmap_entry *rev;
702 long ret = H_NOT_FOUND;
704 if (pte_index >= kvm->arch.hpt_npte)
707 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
708 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
709 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
711 v = be64_to_cpu(hpte[0]);
712 r = be64_to_cpu(hpte[1]);
713 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
716 gr = rev->guest_rpte;
717 if (rev->guest_rpte & HPTE_R_R) {
718 rev->guest_rpte &= ~HPTE_R_R;
719 note_hpte_modification(kvm, rev);
721 if (v & HPTE_V_VALID) {
722 gr |= r & (HPTE_R_R | HPTE_R_C);
724 kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
725 rmap = revmap_for_hpte(kvm, v, gr);
728 *rmap |= KVMPPC_RMAP_REFERENCED;
733 vcpu->arch.gpr[4] = gr;
736 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
740 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
741 unsigned long pte_index)
743 struct kvm *kvm = vcpu->kvm;
745 unsigned long v, r, gr;
746 struct revmap_entry *rev;
748 long ret = H_NOT_FOUND;
750 if (pte_index >= kvm->arch.hpt_npte)
753 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
754 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
755 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
757 v = be64_to_cpu(hpte[0]);
758 r = be64_to_cpu(hpte[1]);
759 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
762 gr = rev->guest_rpte;
764 rev->guest_rpte &= ~HPTE_R_C;
765 note_hpte_modification(kvm, rev);
767 if (v & HPTE_V_VALID) {
768 /* need to make it temporarily absent so C is stable */
769 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
770 kvmppc_invalidate_hpte(kvm, hpte, pte_index);
771 r = be64_to_cpu(hpte[1]);
772 gr |= r & (HPTE_R_R | HPTE_R_C);
774 unsigned long psize = hpte_page_size(v, r);
775 hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
777 rmap = revmap_for_hpte(kvm, v, gr);
780 *rmap |= KVMPPC_RMAP_CHANGED;
781 kvmppc_update_rmap_change(rmap, psize);
786 vcpu->arch.gpr[4] = gr;
789 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
793 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
794 unsigned long pte_index)
798 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
799 rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
801 do_tlbies(kvm, &rb, 1, 1, true);
803 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
805 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
806 unsigned long pte_index)
811 rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
813 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
814 /* modify only the second-last byte, which contains the ref bit */
815 *((char *)hptep + 14) = rbyte;
816 do_tlbies(kvm, &rb, 1, 1, false);
818 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
820 static int slb_base_page_shift[4] = {
824 20, /* 1M, unsupported */
827 /* When called from virtmode, this func should be protected by
828 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
829 * can trigger deadlock issue.
831 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
836 unsigned long somask;
837 unsigned long vsid, hash;
840 unsigned long mask, val;
843 /* Get page shift, work out hash and AVPN etc. */
844 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
847 if (slb_v & SLB_VSID_L) {
848 mask |= HPTE_V_LARGE;
850 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
852 if (slb_v & SLB_VSID_B_1T) {
853 somask = (1UL << 40) - 1;
854 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
857 somask = (1UL << 28) - 1;
858 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
860 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
861 avpn = slb_v & ~(somask >> 16); /* also includes B */
862 avpn |= (eaddr & somask) >> 16;
865 avpn &= ~((1UL << (pshift - 16)) - 1);
871 hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
873 for (i = 0; i < 16; i += 2) {
874 /* Read the PTE racily */
875 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
877 /* Check valid/absent, hash, segment size and AVPN */
878 if (!(v & valid) || (v & mask) != val)
881 /* Lock the PTE and read it under the lock */
882 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
884 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
885 r = be64_to_cpu(hpte[i+1]);
888 * Check the HPTE again, including base page size
890 if ((v & valid) && (v & mask) == val &&
891 hpte_base_page_size(v, r) == (1ul << pshift))
892 /* Return with the HPTE still locked */
893 return (hash << 3) + (i >> 1);
895 __unlock_hpte(&hpte[i], v);
898 if (val & HPTE_V_SECONDARY)
900 val |= HPTE_V_SECONDARY;
901 hash = hash ^ kvm->arch.hpt_mask;
905 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
908 * Called in real mode to check whether an HPTE not found fault
909 * is due to accessing a paged-out page or an emulated MMIO page,
910 * or if a protection fault is due to accessing a page that the
911 * guest wanted read/write access to but which we made read-only.
912 * Returns a possibly modified status (DSISR) value if not
913 * (i.e. pass the interrupt to the guest),
914 * -1 to pass the fault up to host kernel mode code, -2 to do that
915 * and also load the instruction word (for MMIO emulation),
916 * or 0 if we should make the guest retry the access.
918 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
919 unsigned long slb_v, unsigned int status, bool data)
921 struct kvm *kvm = vcpu->kvm;
923 unsigned long v, r, gr;
926 struct revmap_entry *rev;
927 unsigned long pp, key;
929 /* For protection fault, expect to find a valid HPTE */
930 valid = HPTE_V_VALID;
931 if (status & DSISR_NOHPTE)
932 valid |= HPTE_V_ABSENT;
934 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
936 if (status & DSISR_NOHPTE)
937 return status; /* there really was no HPTE */
938 return 0; /* for prot fault, HPTE disappeared */
940 hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
941 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
942 r = be64_to_cpu(hpte[1]);
943 rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
944 gr = rev->guest_rpte;
946 unlock_hpte(hpte, v);
948 /* For not found, if the HPTE is valid by now, retry the instruction */
949 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
952 /* Check access permissions to the page */
953 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
954 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
955 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
957 if (gr & (HPTE_R_N | HPTE_R_G))
958 return status | SRR1_ISI_N_OR_G;
959 if (!hpte_read_permission(pp, slb_v & key))
960 return status | SRR1_ISI_PROT;
961 } else if (status & DSISR_ISSTORE) {
962 /* check write permission */
963 if (!hpte_write_permission(pp, slb_v & key))
964 return status | DSISR_PROTFAULT;
966 if (!hpte_read_permission(pp, slb_v & key))
967 return status | DSISR_PROTFAULT;
970 /* Check storage key, if applicable */
971 if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
972 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
973 if (status & DSISR_ISSTORE)
976 return status | DSISR_KEYFAULT;
979 /* Save HPTE info for virtual-mode handler */
980 vcpu->arch.pgfault_addr = addr;
981 vcpu->arch.pgfault_index = index;
982 vcpu->arch.pgfault_hpte[0] = v;
983 vcpu->arch.pgfault_hpte[1] = r;
985 /* Check the storage key to see if it is possibly emulated MMIO */
986 if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
987 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
988 (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
989 return -2; /* MMIO emulation - load instr word */
991 return -1; /* send fault up to host kernel mode */