From 553f265fe883a23502ee351845f09334790f18b8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 7 Apr 2006 19:49:57 +0200 Subject: [PATCH] [PATCH] x86_64: Don't run NMI watchdog during machine checks Machine checks can stall the machine for a long time and it's not good to trigger the nmi watchdog during that. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 8 +++++++- arch/x86_64/kernel/nmi.c | 7 +++++++ include/asm-x86_64/mce.h | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 10b3e348fc99..6f0790e8b6d3 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -29,6 +29,8 @@ #define MISC_MCELOG_MINOR 227 #define NR_BANKS 6 +atomic_t mce_entry; + static int mce_dont_init; /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, @@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) int i; int panicm_found = 0; + atomic_inc(&mce_entry); + if (regs) notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); if (!banks) - return; + goto out2; memset(&m, 0, sizeof(struct mce)); m.cpu = safe_smp_processor_id(); @@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) out: /* Last thing done in the machine check exception to clear state. */ wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); } /* diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index d9e4067faf05..4e6357fe0ec3 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -34,6 +34,7 @@ #include #include #include +#include /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) __get_cpu_var(nmi_touch) = 0; touched = 1; } +#ifdef CONFIG_X86_MCE + /* Could check oops_in_progress here too, but it's safer + not too */ + if (atomic_read(&mce_entry) > 0) + touched = 1; +#endif if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 5d298b799a9f..7229785094e3 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h @@ -70,6 +70,9 @@ struct mce_log { #define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ #define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 +#ifdef __KERNEL__ +#include + void mce_log(struct mce *m); #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); @@ -87,4 +90,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) } #endif +extern atomic_t mce_entry; + +#endif + #endif -- 2.34.1