KVM: async_pf: avoid async pf injection when in guest mode

[firefly-linux-kernel-4.4.55.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index af823a388c1994ba244e3ef0098f1a578408101e..50ca8f409a7ce692e6a09afb09f85f3b774bbe95 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -408,6 +408,7 @@ struct nested_vmx {
         struct list_head vmcs02_pool;
         int vmcs02_num;
         u64 vmcs01_tsc_offset;
+       bool change_vmcs01_virtual_x2apic_mode;
         /* L2 must run next, and mustn't decide to exit to L1. */
         bool nested_run_pending;
         /*
@@ -595,6 +596,8 @@ struct vcpu_vmx {
         /* Support for PML */
  #define PML_ENTITY_NUM         512
         struct page *pml_pg;
+
+       u64 current_tsc_ratio;
  };
  
  enum segment_cache_field {
@@ -1244,10 +1247,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
         return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
  }
  
-static inline bool is_exception(u32 intr_info)
+static inline bool is_nmi(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
-               == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
+               == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
  }
  
  static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
@@ -1746,6 +1749,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                         return;
                 }
                 break;
+       case MSR_IA32_PEBS_ENABLE:
+               /* PEBS needs a quiescent period after being disabled (to write
+                * a record).  Disabling PEBS through VMX MSR swapping doesn't
+                * provide that period, so a CPU could write host's record into
+                * guest's memory.
+                */
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
         }
  
         for (i = 0; i < m->nr; ++i)
@@ -1783,26 +1793,31 @@ static void reload_tss(void)
  
  static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
  {
-       u64 guest_efer;
-       u64 ignore_bits;
+       u64 guest_efer = vmx->vcpu.arch.efer;
+       u64 ignore_bits = 0;
  
-       guest_efer = vmx->vcpu.arch.efer;
+       if (!enable_ept) {
+               /*
+                * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+                * host CPUID is more efficient than testing guest CPUID
+                * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+                */
+               if (boot_cpu_has(X86_FEATURE_SMEP))
+                       guest_efer |= EFER_NX;
+               else if (!(guest_efer & EFER_NX))
+                       ignore_bits |= EFER_NX;
+       }
  
         /*
-        * NX is emulated; LMA and LME handled by hardware; SCE meaningless
-        * outside long mode
+        * LMA and LME handled by hardware; SCE meaningless outside long mode.
          */
-       ignore_bits = EFER_NX | EFER_SCE;
+       ignore_bits |= EFER_SCE;
  #ifdef CONFIG_X86_64
         ignore_bits |= EFER_LMA | EFER_LME;
         /* SCE is meaningful only in long mode on Intel */
         if (guest_efer & EFER_LMA)
                 ignore_bits &= ~(u64)EFER_SCE;
  #endif
-       guest_efer &= ~ignore_bits;
-       guest_efer |= host_efer & ignore_bits;
-       vmx->guest_msrs[efer_offset].data = guest_efer;
-       vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
  
         clear_atomic_switch_msr(vmx, MSR_EFER);
  
@@ -1813,16 +1828,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
          */
         if (cpu_has_load_ia32_efer ||
             (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
-               guest_efer = vmx->vcpu.arch.efer;
                 if (!(guest_efer & EFER_LMA))
                         guest_efer &= ~EFER_LME;
                 if (guest_efer != host_efer)
                         add_atomic_switch_msr(vmx, MSR_EFER,
                                               guest_efer, host_efer);
                 return false;
-       }
+       } else {
+               guest_efer &= ~ignore_bits;
+               guest_efer |= host_efer & ignore_bits;
  
-       return true;
+               vmx->guest_msrs[efer_offset].data = guest_efer;
+               vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+               return true;
+       }
  }
  
  static unsigned long segment_base(u16 selector)
@@ -2062,14 +2082,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
                 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
  
-               /* Setup TSC multiplier */
-               if (cpu_has_vmx_tsc_scaling())
-                       vmcs_write64(TSC_MULTIPLIER,
-                                    vcpu->arch.tsc_scaling_ratio);
-
                 vmx->loaded_vmcs->cpu = cpu;
         }
  
+       /* Setup TSC multiplier */
+       if (kvm_has_tsc_control &&
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
+               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+       }
+
         vmx_vcpu_pi_load(vcpu, cpu);
  }
  
@@ -2616,8 +2638,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         } else
                 vmx->nested.nested_vmx_ept_caps = 0;
  
+       /*
+        * Old versions of KVM use the single-context version without
+        * checking for support, so declare that it is supported even
+        * though it is treated as global context.  The alternative is
+        * not failing the single-context invvpid, and it is worse.
+        */
         if (enable_vpid)
                 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+                               VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
                                 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
         else
                 vmx->nested.nested_vmx_vpid_caps = 0;
@@ -2803,7 +2832,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 msr_info->data = vcpu->arch.ia32_xss;
                 break;
         case MSR_TSC_AUX:
-               if (!guest_cpuid_has_rdtscp(vcpu))
+               if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
                         return 1;
                 /* Otherwise falls through */
         default:
@@ -2909,7 +2938,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                         clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
                 break;
         case MSR_TSC_AUX:
-               if (!guest_cpuid_has_rdtscp(vcpu))
+               if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
                         return 1;
                 /* Check reserved bit, higher 32 bits should be zero */
                 if ((data >> 32) != 0)
@@ -3470,7 +3499,7 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save)
         }
  
         vmcs_write16(sf->selector, var.selector);
-       vmcs_write32(sf->base, var.base);
+       vmcs_writel(sf->base, var.base);
         vmcs_write32(sf->limit, var.limit);
         vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
  }
@@ -4838,6 +4867,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
         if (vmx_xsaves_supported())
                 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
  
+       if (enable_pml) {
+               ASSERT(vmx->pml_pg);
+               vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+               vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+       }
+
         return 0;
  }
  
@@ -4926,8 +4961,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
                 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
  
         cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-       vmx_set_cr0(vcpu, cr0); /* enter rmode */
         vmx->vcpu.arch.cr0 = cr0;
+       vmx_set_cr0(vcpu, cr0); /* enter rmode */
         vmx_set_cr4(vcpu, 0);
         vmx_set_efer(vcpu, 0);
         vmx_fpu_activate(vcpu);
@@ -5205,7 +5240,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
         if (is_machine_check(intr_info))
                 return handle_machine_check(vcpu);
  
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+       if (is_nmi(intr_info))
                 return 1;  /* already handled by vmx_vcpu_run() */
  
         if (is_no_device(intr_info)) {
@@ -6551,7 +6586,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
  
         /* Checks for #GP/#SS exceptions. */
         exn = false;
-       if (is_protmode(vcpu)) {
+       if (is_long_mode(vcpu)) {
+               /* Long mode: #GP(0)/#SS(0) if the memory address is in a
+                * non-canonical form. This is the only check on the memory
+                * destination for long mode!
+                */
+               exn = is_noncanonical_address(*ret);
+       } else if (is_protmode(vcpu)) {
                 /* Protected mode: apply checks for segment validity in the
                  * following order:
                  * - segment type check (#GP(0) may be thrown)
@@ -6568,17 +6609,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
                          * execute-only code segment
                          */
                         exn = ((s.type & 0xa) == 8);
-       }
-       if (exn) {
-               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-               return 1;
-       }
-       if (is_long_mode(vcpu)) {
-               /* Long mode: #GP(0)/#SS(0) if the memory address is in a
-                * non-canonical form. This is an only check for long mode.
-                */
-               exn = is_noncanonical_address(*ret);
-       } else if (is_protmode(vcpu)) {
+               if (exn) {
+                       kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+                       return 1;
+               }
                 /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
                  */
                 exn = (s.unusable != 0);
@@ -6644,14 +6678,20 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
                 }
  
                 page = nested_get_page(vcpu, vmptr);
-               if (page == NULL ||
-                   *(u32 *)kmap(page) != VMCS12_REVISION) {
+               if (page == NULL) {
                         nested_vmx_failInvalid(vcpu);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+               if (*(u32 *)kmap(page) != VMCS12_REVISION) {
                         kunmap(page);
+                       nested_release_page_clean(page);
+                       nested_vmx_failInvalid(vcpu);
                         skip_emulated_instruction(vcpu);
                         return 1;
                 }
                 kunmap(page);
+               nested_release_page_clean(page);
                 vmx->nested.vmxon_ptr = vmptr;
                 break;
         case EXIT_REASON_VMCLEAR:
@@ -7319,6 +7359,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
         if (!(types & (1UL << type))) {
                 nested_vmx_failValid(vcpu,
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               skip_emulated_instruction(vcpu);
                 return 1;
         }
  
@@ -7377,6 +7418,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
         if (!(types & (1UL << type))) {
                 nested_vmx_failValid(vcpu,
                         VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               skip_emulated_instruction(vcpu);
                 return 1;
         }
  
@@ -7393,12 +7435,17 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
         }
  
         switch (type) {
+       case VMX_VPID_EXTENT_SINGLE_CONTEXT:
+               /*
+                * Old versions of KVM use the single-context version so we
+                * have to support it; just treat it the same as all-context.
+                */
         case VMX_VPID_EXTENT_ALL_CONTEXT:
                 __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
                 nested_vmx_succeed(vcpu);
                 break;
         default:
-               /* Trap single context invalidation invvpid calls */
+               /* Trap individual address invalidation invvpid calls */
                 BUG_ON(1);
                 break;
         }
@@ -7687,7 +7734,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
  
         switch (exit_reason) {
         case EXIT_REASON_EXCEPTION_NMI:
-               if (!is_exception(intr_info))
+               if (is_nmi(intr_info))
                         return false;
                 else if (is_page_fault(intr_info))
                         return enable_ept;
@@ -7707,8 +7754,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
         case EXIT_REASON_TASK_SWITCH:
                 return true;
         case EXIT_REASON_CPUID:
-               if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
-                       return false;
                 return true;
         case EXIT_REASON_HLT:
                 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
@@ -7793,6 +7838,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
         case EXIT_REASON_PCOMMIT:
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
+       case EXIT_REASON_PML_FULL:
+               /* We don't expose PML support to L1. */
+               return false;
         default:
                 return true;
         }
@@ -7804,22 +7852,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
         *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
  }
  
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
-       struct page *pml_pg;
-
-       pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
-       if (!pml_pg)
-               return -ENOMEM;
-
-       vmx->pml_pg = pml_pg;
-
-       vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-       vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
-       return 0;
-}
-
  static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
  {
         if (vmx->pml_pg) {
@@ -7880,7 +7912,7 @@ static void kvm_flush_pml_buffers(struct kvm *kvm)
  static void vmx_dump_sel(char *name, uint32_t sel)
  {
         pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
-              name, vmcs_read32(sel),
+              name, vmcs_read16(sel),
                vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
                vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
                vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
@@ -8042,6 +8074,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
         u32 exit_reason = vmx->exit_reason;
         u32 vectoring_info = vmx->idt_vectoring_info;
  
+       trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
+
         /*
          * Flush logged GPAs PML buffer, this will make dirty_bitmap more
          * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
@@ -8088,6 +8122,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
         if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
                         (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
                         exit_reason != EXIT_REASON_EPT_VIOLATION &&
+                       exit_reason != EXIT_REASON_PML_FULL &&
                         exit_reason != EXIT_REASON_TASK_SWITCH)) {
                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -8147,6 +8182,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
  {
         u32 sec_exec_control;
  
+       /* Postpone execution until vmcs01 is the current VMCS. */
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               return;
+       }
+
         /*
          * There is not point to enable virtualize x2apic without enable
          * apicv
@@ -8285,8 +8326,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
                 kvm_machine_check();
  
         /* We need to handle NMIs before interrupts are enabled */
-       if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK)) {
+       if (is_nmi(exit_intr_info)) {
                 kvm_before_handle_nmi(&vmx->vcpu);
                 asm("int $2");
                 kvm_after_handle_nmi(&vmx->vcpu);
@@ -8668,7 +8708,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         vmx->loaded_vmcs->launched = 1;
  
         vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
-       trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
  
         /*
          * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
@@ -8701,6 +8740,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
         put_cpu();
  }
  
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int r;
+
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8709,8 +8764,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
                 vmx_destroy_pml_buffer(vmx);
         free_vpid(vmx->vpid);
         leave_guest_mode(vcpu);
-       vmx_load_vmcs01(vcpu);
-       free_nested(vmx);
+       vmx_free_vcpu_nested(vcpu);
         free_loaded_vmcs(vmx->loaded_vmcs);
         kfree(vmx->guest_msrs);
         kvm_vcpu_uninit(vcpu);
@@ -8732,14 +8786,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
         if (err)
                 goto free_vcpu;
  
+       err = -ENOMEM;
+
+       /*
+        * If PML is turned on, failure on enabling PML just results in failure
+        * of creating the vcpu, therefore we can simplify PML logic (by
+        * avoiding dealing with cases, such as enabling PML partially on vcpus
+        * for the guest, etc.
+        */
+       if (enable_pml) {
+               vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (!vmx->pml_pg)
+                       goto uninit_vcpu;
+       }
+
         vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
         BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
                      > PAGE_SIZE);
  
-       err = -ENOMEM;
-       if (!vmx->guest_msrs) {
-               goto uninit_vcpu;
-       }
+       if (!vmx->guest_msrs)
+               goto free_pml;
  
         vmx->loaded_vmcs = &vmx->vmcs01;
         vmx->loaded_vmcs->vmcs = alloc_vmcs();
@@ -8783,18 +8849,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
         vmx->nested.current_vmptr = -1ull;
         vmx->nested.current_vmcs12 = NULL;
  
-       /*
-        * If PML is turned on, failure on enabling PML just results in failure
-        * of creating the vcpu, therefore we can simplify PML logic (by
-        * avoiding dealing with cases, such as enabling PML partially on vcpus
-        * for the guest, etc.
-        */
-       if (enable_pml) {
-               err = vmx_create_pml_buffer(vmx);
-               if (err)
-                       goto free_vmcs;
-       }
-
         return &vmx->vcpu;
  
  free_vmcs:
@@ -8802,6 +8856,8 @@ free_vmcs:
         free_loaded_vmcs(vmx->loaded_vmcs);
  free_msrs:
         kfree(vmx->guest_msrs);
+free_pml:
+       vmx_destroy_pml_buffer(vmx);
  uninit_vcpu:
         kvm_vcpu_uninit(&vmx->vcpu);
  free_vcpu:
@@ -8931,7 +8987,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
                         best->ebx &= ~bit(X86_FEATURE_INVPCID);
         }
  
-       vmcs_set_secondary_exec_control(secondary_exec_ctl);
+       if (cpu_has_secondary_exec_ctrls())
+               vmcs_set_secondary_exec_control(secondary_exec_ctl);
  
         if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
                 if (guest_cpuid_has_pcommit(vcpu))
@@ -9703,6 +9760,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  
         }
  
+       if (enable_pml) {
+               /*
+                * Conceptually we want to copy the PML address and index from
+                * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
+                * since we always flush the log on each vmexit, this happens
+                * to be equivalent to simply resetting the fields in vmcs02.
+                */
+               ASSERT(vmx->pml_pg);
+               vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+               vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+       }
+
         if (nested_cpu_has_ept(vmcs12)) {
                 kvm_mmu_unload(vcpu);
                 nested_ept_init_mmu_context(vcpu);
@@ -10431,6 +10500,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
         /* Update TSC_OFFSET if TSC was changed while L2 ran */
         vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
  
+       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       }
+
         /* This is needed for same reason as it was needed in prepare_vmcs02 */
         vmx->host_rsp = 0;