KVM: MMU: hypercall based pte updates and TLB flushes

author Marcelo Tosatti <mtosatti@redhat.com>

Fri, 22 Feb 2008 17:21:37 +0000 (12:21 -0500)

committer Avi Kivity <avi@qumranet.com>

Sun, 27 Apr 2008 09:00:27 +0000 (12:00 +0300)
author Marcelo Tosatti <mtosatti@redhat.com>
Fri, 22 Feb 2008 17:21:37 +0000 (12:21 -0500)
committer Avi Kivity <avi@qumranet.com>
Sun, 27 Apr 2008 09:00:27 +0000 (12:00 +0300)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 414405b6ec13388aaa120e4697c9b327dcfe3461..072e9422c9145ab682296b0a239002211e2fc399 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -28,6 +28,7 @@
  #include <linux/module.h>
  #include <linux/swap.h>
  #include <linux/hugetlb.h>
+#include <linux/compiler.h>
  
  #include <asm/page.h>
  #include <asm/cmpxchg.h>
@@ -40,7 +41,7 @@
   * 2. while doing 1. it walks guest-physical to host-physical
   * If the hardware supports that we don't need to do shadow paging.
   */
-static bool tdp_enabled = false;
+bool tdp_enabled = false;
  
  #undef MMU_DEBUG
  
@@ -167,6 +168,13 @@ static int dbg = 1;
  #define ACC_USER_MASK    PT_USER_MASK
  #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
  
+struct kvm_pv_mmu_op_buffer {
+       void *ptr;
+       unsigned len;
+       unsigned processed;
+       char buf[512] __aligned(sizeof(long));
+};
+
  struct kvm_rmap_desc {
         u64 *shadow_ptes[RMAP_EXT];
         struct kvm_rmap_desc *more;
@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
         return nr_mmu_pages;
  }
  
+static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+                               unsigned len)
+{
+       if (len > buffer->len)
+               return NULL;
+       return buffer->ptr;
+}
+
+static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+                               unsigned len)
+{
+       void *ret;
+
+       ret = pv_mmu_peek_buffer(buffer, len);
+       if (!ret)
+               return ret;
+       buffer->ptr += len;
+       buffer->len -= len;
+       buffer->processed += len;
+       return ret;
+}
+
+static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
+                            gpa_t addr, gpa_t value)
+{
+       int bytes = 8;
+       int r;
+
+       if (!is_long_mode(vcpu) && !is_pae(vcpu))
+               bytes = 4;
+
+       r = mmu_topup_memory_caches(vcpu);
+       if (r)
+               return r;
+
+       if (!__emulator_write_phys(vcpu, addr, &value, bytes))
+               return -EFAULT;
+
+       return 1;
+}
+
+static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->tlb_flush(vcpu);
+       return 1;
+}
+
+static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+       spin_lock(&vcpu->kvm->mmu_lock);
+       mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       return 1;
+}
+
+static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
+                            struct kvm_pv_mmu_op_buffer *buffer)
+{
+       struct kvm_mmu_op_header *header;
+
+       header = pv_mmu_peek_buffer(buffer, sizeof *header);
+       if (!header)
+               return 0;
+       switch (header->op) {
+       case KVM_MMU_OP_WRITE_PTE: {
+               struct kvm_mmu_op_write_pte *wpte;
+
+               wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
+               if (!wpte)
+                       return 0;
+               return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
+                                       wpte->pte_val);
+       }
+       case KVM_MMU_OP_FLUSH_TLB: {
+               struct kvm_mmu_op_flush_tlb *ftlb;
+
+               ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
+               if (!ftlb)
+                       return 0;
+               return kvm_pv_mmu_flush_tlb(vcpu);
+       }
+       case KVM_MMU_OP_RELEASE_PT: {
+               struct kvm_mmu_op_release_pt *rpt;
+
+               rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
+               if (!rpt)
+                       return 0;
+               return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
+       }
+       default: return 0;
+       }
+}
+
+int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
+                 gpa_t addr, unsigned long *ret)
+{
+       int r;
+       struct kvm_pv_mmu_op_buffer buffer;
+
+       down_read(&vcpu->kvm->slots_lock);
+       down_read(&current->mm->mmap_sem);
+
+       buffer.ptr = buffer.buf;
+       buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
+       buffer.processed = 0;
+
+       r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
+       if (r)
+               goto out;
+
+       while (buffer.len) {
+               r = kvm_pv_mmu_op_one(vcpu, &buffer);
+               if (r < 0)
+                       goto out;
+               if (r == 0)
+                       break;
+       }
+
+       r = 1;
+out:
+       *ret = buffer.processed;
+       up_read(&current->mm->mmap_sem);
+       up_read(&vcpu->kvm->slots_lock);
+       return r;
+}
+
  #ifdef AUDIT
  
  static const char *audit_msg;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 03ba402c476ad202c857eb9aa5e26652ba44b483..63afca1c295f7b7b2362e89259775918558daae4 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
         case KVM_CAP_NR_MEMSLOTS:
                 r = KVM_MEMORY_SLOTS;
                 break;
+       case KVM_CAP_PV_MMU:
+               r = !tdp_enabled;
+               break;
         default:
                 r = 0;
                 break;
@@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_halt);
  
+static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
+                          unsigned long a1)
+{
+       if (is_long_mode(vcpu))
+               return a0;
+       else
+               return a0 | ((gpa_t)a1 << 32);
+}
+
  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
  {
         unsigned long nr, a0, a1, a2, a3, ret;
+       int r = 1;
  
         kvm_x86_ops->cache_regs(vcpu);
  
@@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
         case KVM_HC_VAPIC_POLL_IRQ:
                 ret = 0;
                 break;
+       case KVM_HC_MMU_OP:
+               r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
+               break;
         default:
                 ret = -KVM_ENOSYS;
                 break;
@@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
         vcpu->arch.regs[VCPU_REGS_RAX] = ret;
         kvm_x86_ops->decache_regs(vcpu);
         ++vcpu->stat.hypercalls;
-       return 0;
+       return r;
  }
  EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
  
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h

index 99d31f5ed9ff14f90e0ed3078c7bb6be4e3c4b5a..772ba95f0a0ef6c0585e789727d6f52fe643276c 100644 (file)
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -434,6 +434,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
  
  int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                           const void *val, int bytes);
+int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
+                 gpa_t addr, unsigned long *ret);
+
+extern bool tdp_enabled;
  
  enum emulation_result {
         EMULATE_DONE,       /* no further processing */
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h

index ed5df3a54aab6340b2cf6194e3efcd09de057ee9..5098459420705ea53f41c2d05b0a91b688980334 100644 (file)
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -12,10 +12,39 @@
  #define KVM_CPUID_FEATURES     0x40000001
  #define KVM_FEATURE_CLOCKSOURCE                0
  #define KVM_FEATURE_NOP_IO_DELAY       1
+#define KVM_FEATURE_MMU_OP             2
  
  #define MSR_KVM_WALL_CLOCK  0x11
  #define MSR_KVM_SYSTEM_TIME 0x12
  
+#define KVM_MAX_MMU_OP_BATCH           32
+
+/* Operations for KVM_HC_MMU_OP */
+#define KVM_MMU_OP_WRITE_PTE            1
+#define KVM_MMU_OP_FLUSH_TLB           2
+#define KVM_MMU_OP_RELEASE_PT          3
+
+/* Payload for KVM_HC_MMU_OP */
+struct kvm_mmu_op_header {
+       __u32 op;
+       __u32 pad;
+};
+
+struct kvm_mmu_op_write_pte {
+       struct kvm_mmu_op_header header;
+       __u64 pte_phys;
+       __u64 pte_val;
+};
+
+struct kvm_mmu_op_flush_tlb {
+       struct kvm_mmu_op_header header;
+};
+
+struct kvm_mmu_op_release_pt {
+       struct kvm_mmu_op_header header;
+       __u64 pt_phys;
+};
+
  #ifdef __KERNEL__
  #include <asm/processor.h>
  
diff --git a/include/linux/kvm.h b/include/linux/kvm.h

index 76f09474be98d22b6a95c8ce49047a4e9045dcff..c1b502a50a01c0af8b8cbef0b48bf9b58b77f419 100644 (file)
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -238,6 +238,7 @@ struct kvm_vapic_addr {
  #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
  #define KVM_CAP_PIT 11
  #define KVM_CAP_NOP_IO_DELAY 12
+#define KVM_CAP_PV_MMU 13
  
  /*
   * ioctls for VM fds
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h

index 9c462c91a6b12f36574691407afc33dc60c209e5..3ddce03766caf5796f84f71bd7462f117c9a064a 100644 (file)
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -11,8 +11,11 @@
  
  /* Return values for hypercalls */
  #define KVM_ENOSYS             1000
+#define KVM_EFAULT             EFAULT
+#define KVM_E2BIG              E2BIG
  
-#define KVM_HC_VAPIC_POLL_IRQ            1
+#define KVM_HC_VAPIC_POLL_IRQ          1
+#define KVM_HC_MMU_OP                  2
  
  /*
   * hypercalls use architecture specific
author	Marcelo Tosatti <mtosatti@redhat.com>
	Fri, 22 Feb 2008 17:21:37 +0000 (12:21 -0500)
committer	Avi Kivity <avi@qumranet.com>
	Sun, 27 Apr 2008 09:00:27 +0000 (12:00 +0300)
arch/x86/kvm/mmu.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/asm-x86/kvm_host.h		patch \| blob \| history
include/asm-x86/kvm_para.h		patch \| blob \| history
include/linux/kvm.h		patch \| blob \| history
include/linux/kvm_para.h		patch \| blob \| history