kvm: x86: vmx: move down hardware_setup() and hardware_unsetup()
[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / vmx.c
index 0acac81f198b0b03c83b4f4f9ef86178fa84f821..576b01b7b6588ab6d0d39847f0e09d40dd3b4389 100644 (file)
@@ -105,7 +105,7 @@ module_param(nested, bool, S_IRUGO);
        (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS                                     \
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-        | X86_CR4_OSXMMEXCPT)
+        | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        default:
                msr = find_msr_entry(vmx, msr_index);
                if (msr) {
+                       u64 old_msr_data = msr->data;
                        msr->data = data;
                        if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
                                preempt_disable();
-                               kvm_set_shared_msr(msr->index, msr->data,
-                                                  msr->mask);
+                               ret = kvm_set_shared_msr(msr->index, msr->data,
+                                                        msr->mask);
                                preempt_enable();
+                               if (ret)
+                                       msr->data = old_msr_data;
                        }
                        break;
                }
@@ -3107,76 +3110,6 @@ static __init int alloc_kvm_area(void)
        return 0;
 }
 
-static __init int hardware_setup(void)
-{
-       if (setup_vmcs_config(&vmcs_config) < 0)
-               return -EIO;
-
-       if (boot_cpu_has(X86_FEATURE_NX))
-               kvm_enable_efer_bits(EFER_NX);
-
-       if (!cpu_has_vmx_vpid())
-               enable_vpid = 0;
-       if (!cpu_has_vmx_shadow_vmcs())
-               enable_shadow_vmcs = 0;
-       if (enable_shadow_vmcs)
-               init_vmcs_shadow_fields();
-
-       if (!cpu_has_vmx_ept() ||
-           !cpu_has_vmx_ept_4levels()) {
-               enable_ept = 0;
-               enable_unrestricted_guest = 0;
-               enable_ept_ad_bits = 0;
-       }
-
-       if (!cpu_has_vmx_ept_ad_bits())
-               enable_ept_ad_bits = 0;
-
-       if (!cpu_has_vmx_unrestricted_guest())
-               enable_unrestricted_guest = 0;
-
-       if (!cpu_has_vmx_flexpriority()) {
-               flexpriority_enabled = 0;
-
-               /*
-                * set_apic_access_page_addr() is used to reload apic access
-                * page upon invalidation.  No need to do anything if the
-                * processor does not have the APIC_ACCESS_ADDR VMCS field.
-                */
-               kvm_x86_ops->set_apic_access_page_addr = NULL;
-       }
-
-       if (!cpu_has_vmx_tpr_shadow())
-               kvm_x86_ops->update_cr8_intercept = NULL;
-
-       if (enable_ept && !cpu_has_vmx_ept_2m_page())
-               kvm_disable_largepages();
-
-       if (!cpu_has_vmx_ple())
-               ple_gap = 0;
-
-       if (!cpu_has_vmx_apicv())
-               enable_apicv = 0;
-
-       if (enable_apicv)
-               kvm_x86_ops->update_cr8_intercept = NULL;
-       else {
-               kvm_x86_ops->hwapic_irr_update = NULL;
-               kvm_x86_ops->deliver_posted_interrupt = NULL;
-               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-       }
-
-       if (nested)
-               nested_vmx_setup_ctls_msrs();
-
-       return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-       free_kvm_area();
-}
-
 static bool emulation_required(struct kvm_vcpu *vcpu)
 {
        return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4576,7 +4509,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                vmcs_write32(TPR_THRESHOLD, 0);
        }
 
-       kvm_vcpu_reload_apic_access_page(vcpu);
+       kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
        if (vmx_vm_has_apicv(vcpu->kvm))
                memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -5160,13 +5093,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
-       int dr, reg;
+       int dr, dr7, reg;
+
+       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+       /* First, if DR does not exist, trigger UD */
+       if (!kvm_require_dr(vcpu, dr))
+               return 1;
 
        /* Do not handle if the CPL > 0, will trigger GP on re-entry */
        if (!kvm_require_cpl(vcpu, 0))
                return 1;
-       dr = vmcs_readl(GUEST_DR7);
-       if (dr & DR7_GD) {
+       dr7 = vmcs_readl(GUEST_DR7);
+       if (dr7 & DR7_GD) {
                /*
                 * As the vm-exit takes precedence over the debug trap, we
                 * need to emulate the latter, either for the host or the
@@ -5174,7 +5114,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                 */
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                        vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-                       vcpu->run->debug.arch.dr7 = dr;
+                       vcpu->run->debug.arch.dr7 = dr7;
                        vcpu->run->debug.arch.pc =
                                vmcs_readl(GUEST_CS_BASE) +
                                vmcs_readl(GUEST_RIP);
@@ -5182,9 +5122,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                        vcpu->run->exit_reason = KVM_EXIT_DEBUG;
                        return 0;
                } else {
-                       vcpu->arch.dr7 &= ~DR7_GD;
                        vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-                       vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
                        kvm_queue_exception(vcpu, DB_VECTOR);
                        return 1;
                }
@@ -5206,8 +5144,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
        reg = DEBUG_REG_ACCESS_REG(exit_qualification);
        if (exit_qualification & TYPE_MOV_FROM_DR) {
                unsigned long val;
@@ -5291,7 +5227,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
        msr.data = data;
        msr.index = ecx;
        msr.host_initiated = false;
-       if (vmx_set_msr(vcpu, &msr) != 0) {
+       if (kvm_set_msr(vcpu, &msr) != 0) {
                trace_kvm_msr_write_ex(ecx, data);
                kvm_inject_gp(vcpu, 0);
                return 1;
@@ -5489,7 +5425,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
        }
 
        /* clear all local breakpoint enable flags */
-       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
 
        /*
         * TODO: What about debug traps on tss switch?
@@ -5782,6 +5718,76 @@ static void update_ple_window_actual_max(void)
                                            ple_window_grow, INT_MIN);
 }
 
+static __init int hardware_setup(void)
+{
+       if (setup_vmcs_config(&vmcs_config) < 0)
+               return -EIO;
+
+       if (boot_cpu_has(X86_FEATURE_NX))
+               kvm_enable_efer_bits(EFER_NX);
+
+       if (!cpu_has_vmx_vpid())
+               enable_vpid = 0;
+       if (!cpu_has_vmx_shadow_vmcs())
+               enable_shadow_vmcs = 0;
+       if (enable_shadow_vmcs)
+               init_vmcs_shadow_fields();
+
+       if (!cpu_has_vmx_ept() ||
+           !cpu_has_vmx_ept_4levels()) {
+               enable_ept = 0;
+               enable_unrestricted_guest = 0;
+               enable_ept_ad_bits = 0;
+       }
+
+       if (!cpu_has_vmx_ept_ad_bits())
+               enable_ept_ad_bits = 0;
+
+       if (!cpu_has_vmx_unrestricted_guest())
+               enable_unrestricted_guest = 0;
+
+       if (!cpu_has_vmx_flexpriority()) {
+               flexpriority_enabled = 0;
+
+               /*
+                * set_apic_access_page_addr() is used to reload apic access
+                * page upon invalidation.  No need to do anything if the
+                * processor does not have the APIC_ACCESS_ADDR VMCS field.
+                */
+               kvm_x86_ops->set_apic_access_page_addr = NULL;
+       }
+
+       if (!cpu_has_vmx_tpr_shadow())
+               kvm_x86_ops->update_cr8_intercept = NULL;
+
+       if (enable_ept && !cpu_has_vmx_ept_2m_page())
+               kvm_disable_largepages();
+
+       if (!cpu_has_vmx_ple())
+               ple_gap = 0;
+
+       if (!cpu_has_vmx_apicv())
+               enable_apicv = 0;
+
+       if (enable_apicv)
+               kvm_x86_ops->update_cr8_intercept = NULL;
+       else {
+               kvm_x86_ops->hwapic_irr_update = NULL;
+               kvm_x86_ops->deliver_posted_interrupt = NULL;
+               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+       }
+
+       if (nested)
+               nested_vmx_setup_ctls_msrs();
+
+       return alloc_kvm_area();
+}
+
+static __exit void hardware_unsetup(void)
+{
+       free_kvm_area();
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6423,6 +6429,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
        const unsigned long *fields = shadow_read_write_fields;
        const int num_fields = max_shadow_read_write_fields;
 
+       preempt_disable();
+
        vmcs_load(shadow_vmcs);
 
        for (i = 0; i < num_fields; i++) {
@@ -6446,6 +6454,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 
        vmcs_clear(shadow_vmcs);
        vmcs_load(vmx->loaded_vmcs->vmcs);
+
+       preempt_enable();
 }
 
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
@@ -6743,6 +6753,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6804,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_mwait,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
        [EXIT_REASON_INVEPT]                  = handle_invept,
+       [EXIT_REASON_INVVPID]                 = handle_invvpid,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7040,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
-       case EXIT_REASON_INVEPT:
+       case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7181,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
            && kvm_vmx_exit_handlers[exit_reason])
                return kvm_vmx_exit_handlers[exit_reason](vcpu);
        else {
-               vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
-               vcpu->run->hw.hardware_exit_reason = exit_reason;
+               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
        }
-       return 0;
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)