#endif
#define NCAPINTS 10 /* N 32-bit words worth of info */
+ #define NBUGINTS 1 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
+ /*
+ * BUG word(s)
+ */
+ #define X86_BUG(x) (NCAPINTS*32 + (x))
+
+ #define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
+ #define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
+ #define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* AMD Erratum 383 */
+ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* AMD Erratum 400 */
+
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
#include <asm/asm.h>
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
+#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#define static_cpu_has(bit) boot_cpu_has(bit)
#endif
+ #define cpu_has_bug(c, bit) cpu_has(c, (bit))
+ #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
+ #define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit));
+
+ #define static_cpu_has_bug(bit) static_cpu_has((bit))
+ #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
+
#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
#endif /* _ASM_X86_CPUFEATURE_H */
sched_clock_stable = 1;
}
+ /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
+ if (c->x86 == 6) {
+ switch (c->x86_model) {
+ case 0x27: /* Penwell */
+ case 0x35: /* Cloverview */
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
+ break;
+ default:
+ break;
+ }
+ }
+
/*
* There is a known erratum on Pentium III and Core Solo
* and Core Duo CPUs.
* system.
* Note that the workaround only should be initialized once...
*/
- c->f00f_bug = 0;
+ clear_cpu_bug(c, X86_BUG_F00F);
if (!paravirt_enabled() && c->x86 == 5) {
static int f00f_workaround_enabled;
- c->f00f_bug = 1;
+ set_cpu_bug(c, X86_BUG_F00F);
if (!f00f_workaround_enabled) {
trap_init_f00f_bug();
printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
}
#endif
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
+void arch_cpu_idle_prepare(void)
{
/*
* If we're the non-boot CPU, nothing set the stack canary up
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
- current_thread_info()->status |= TS_POLLING;
-
- while (1) {
- tick_nohz_idle_enter();
-
- while (!need_resched()) {
- rmb();
-
- if (cpu_is_offline(smp_processor_id()))
- play_dead();
-
- /*
- * Idle routines should keep interrupts disabled
- * from here on, until they go to idle.
- * Otherwise, idle callbacks can misfire.
- */
- local_touch_nmi();
- local_irq_disable();
-
- enter_idle();
-
- /* Don't trace irqs off for idle */
- stop_critical_timings();
-
- /* enter_idle() needs rcu for notifiers */
- rcu_idle_enter();
+}
- if (cpuidle_idle_call())
- x86_idle();
+void arch_cpu_idle_enter(void)
+{
+ local_touch_nmi();
+ enter_idle();
+}
- rcu_idle_exit();
- start_critical_timings();
+void arch_cpu_idle_exit(void)
+{
+ __exit_idle();
+}
- /* In many cases the interrupt that ended idle
- has already called exit_idle. But some idle
- loops can be woken up without interrupt. */
- __exit_idle();
- }
+void arch_cpu_idle_dead(void)
+{
+ play_dead();
+}
- tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- }
+/*
+ * Called from the generic idle code.
+ */
+void arch_cpu_idle(void)
+{
+ if (cpuidle_idle_call())
+ x86_idle();
}
/*
- * We use this if we don't have any better
- * idle routine..
+ * We use this if we don't have any better idle routine..
*/
void default_idle(void)
{
trace_cpu_idle_rcuidle(1, smp_processor_id());
- current_thread_info()->status &= ~TS_POLLING;
- /*
- * TS_POLLING-cleared state must be visible before we
- * test NEED_RESCHED:
- */
- smp_mb();
-
- if (!need_resched())
- safe_halt(); /* enables interrupts racelessly */
- else
- local_irq_enable();
- current_thread_info()->status |= TS_POLLING;
+ safe_halt();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_APM_MODULE
halt();
}
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
- trace_cpu_idle_rcuidle(0, smp_processor_id());
- local_irq_enable();
- while (!need_resched())
- cpu_relax();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
-}
-
bool amd_e400_c1e_detected;
EXPORT_SYMBOL(amd_e400_c1e_detected);
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- if (x86_idle == poll_idle && smp_num_siblings > 1)
+ if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
#endif
- if (x86_idle)
+ if (x86_idle || boot_option_idle_override == IDLE_POLL)
return;
- if (cpu_has_amd_erratum(amd_erratum_400)) {
+ if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
if (!strcmp(str, "poll")) {
pr_info("using polling idle threads\n");
- x86_idle = poll_idle;
boot_option_idle_override = IDLE_POLL;
+ cpu_idle_poll_ctrl(true);
} else if (!strcmp(str, "halt")) {
/*
* When the boot option of idle=halt is added, halt is
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __cpuinitdata = {
.wp_works_ok = -1,
- .fdiv_bug = -1,
};
/* common cpu data for all cpus */
struct cpuinfo_x86 boot_cpu_data __read_mostly = {
.wp_works_ok = -1,
- .fdiv_bug = -1,
};
EXPORT_SYMBOL(boot_cpu_data);
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
-# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20)
+# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20)
#else
-# define CRASH_KERNEL_ADDR_MAX MAXMEM
+# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20)
+# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM
#endif
static void __init reserve_crashkernel_low(void)
unsigned long long low_base = 0, low_size = 0;
unsigned long total_low_mem;
unsigned long long base;
+ bool auto_set = false;
int ret;
total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+ /* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem,
&low_size, &base);
- if (ret != 0 || low_size <= 0)
- return;
+ if (ret != 0) {
+ /*
+ * two parts from lib/swiotlb.c:
+ * swiotlb size: user specified with swiotlb= or default.
+ * swiotlb overflow buffer: now is hardcoded to 32k.
+ * We round it to 8M for other buffers that
+ * may need to stay low too.
+ */
+ low_size = swiotlb_size_or_default() + (8UL<<20);
+ auto_set = true;
+ } else {
+ /* passed with crashkernel=0,low ? */
+ if (!low_size)
+ return;
+ }
low_base = memblock_find_in_range(low_size, (1ULL<<32),
low_size, alignment);
if (!low_base) {
- pr_info("crashkernel low reservation failed - No suitable area found.\n");
+ if (!auto_set)
+ pr_info("crashkernel low reservation failed - No suitable area found.\n");
return;
}
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
+ bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
+ /* crashkernel=XM */
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
- if (ret != 0 || crash_size <= 0)
- return;
+ if (ret != 0 || crash_size <= 0) {
+ /* crashkernel=X,high */
+ ret = parse_crashkernel_high(boot_command_line, total_mem,
+ &crash_size, &crash_base);
+ if (ret != 0 || crash_size <= 0)
+ return;
+ high = true;
+ }
/* 0 means: find the address automatically */
if (crash_base <= 0) {
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
crash_base = memblock_find_in_range(alignment,
- CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
+ high ? CRASH_KERNEL_ADDR_HIGH_MAX :
+ CRASH_KERNEL_ADDR_LOW_MAX,
+ crash_size, alignment);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
#include <linux/perf_event.h> /* perf_sw_event */
#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <linux/prefetch.h> /* prefetchw */
+#include <linux/context_tracking.h> /* exception_enter(), ... */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
-#include <asm/context_tracking.h> /* exception_enter(), ... */
/*
* Page fault error code bits:
if (pgd_none(*pgd_ref))
return -1;
- if (pgd_none(*pgd))
+ if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
- else
+ arch_flush_lazy_mmu_mode();
+ } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
/*
* Below here mismatches are bugs because these lower tables
/*
* Pentium F0 0F C7 C8 bug workaround:
*/
- if (boot_cpu_data.f00f_bug) {
+ if (boot_cpu_has_bug(X86_BUG_F00F)) {
nr = (address - idt_descr.address) >> 3;
if (nr == 6) {
dotraplinkage void __kprobes
do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
- exception_enter(regs);
+ enum ctx_state prev_state;
+
+ prev_state = exception_enter();
__do_page_fault(regs, error_code);
- exception_exit(regs);
+ exception_exit(prev_state);
}