x86: Save cr2 in NMI in case NMIs take a page fault (for i386)
authorSteven Rostedt <srostedt@redhat.com>
Thu, 7 Jun 2012 15:54:37 +0000 (11:54 -0400)
committerSteven Rostedt <rostedt@goodmis.org>
Fri, 8 Jun 2012 22:51:12 +0000 (18:51 -0400)
Avi Kivity reported that page faults in NMIs could cause havic if
the NMI preempted another page fault handler:

   The recent changes to NMI allow exceptions to take place in NMI
   handlers, but I think that a #PF (say, due to access to vmalloc space)
   is still problematic.  Consider the sequence

    #PF  (cr2 set by processor)
      NMI
        ...
        #PF (cr2 clobbered)
          do_page_fault()
          IRET
        ...
        IRET
      do_page_fault()
        address = read_cr2()

   The last line reads the overwritten cr2 value.

This is the i386 version, which has the luxury of doing the work
in C code.

Link: http://lkml.kernel.org/r/4FBB8C40.6080304@redhat.com
Reported-by: Avi Kivity <avi@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
arch/x86/kernel/nmi.c

index a15a888006616b7621c6135a34607e5d10d3a2b6..f84f5c57de35b0bb5693067c3799de8093ad237e 100644 (file)
@@ -395,6 +395,14 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
  * thus there is no race between the first check of state for NOT_RUNNING
  * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
  * at this point.
+ *
+ * In case the NMI takes a page fault, we need to save off the CR2
+ * because the NMI could have preempted another page fault and corrupt
+ * the CR2 that is about to be read. As nested NMIs must be restarted
+ * and they can not take breakpoints or page faults, the update of the
+ * CR2 must be done before converting the nmi state back to NOT_RUNNING.
+ * Otherwise, there would be a race of another nested NMI coming in
+ * after setting state to NOT_RUNNING but before updating the nmi_cr2.
  */
 enum nmi_states {
        NMI_NOT_RUNNING = 0,
@@ -402,6 +410,7 @@ enum nmi_states {
        NMI_LATCHED,
 };
 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+static DEFINE_PER_CPU(unsigned long, nmi_cr2);
 
 #define nmi_nesting_preprocess(regs)                                   \
        do {                                                            \
@@ -410,11 +419,14 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state);
                        return;                                         \
                }                                                       \
                this_cpu_write(nmi_state, NMI_EXECUTING);               \
+               this_cpu_write(nmi_cr2, read_cr2());                    \
        } while (0);                                                    \
        nmi_restart:
 
 #define nmi_nesting_postprocess()                                      \
        do {                                                            \
+               if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))     \
+                       write_cr2(this_cpu_read(nmi_cr2));              \
                if (this_cpu_dec_return(nmi_state))                     \
                        goto nmi_restart;                               \
        } while (0)