From 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH] x86: make percpu symbols zerobased on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/head_64.S | 24 ++++++++++++++- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 17 ++++++++++- include/asm-generic/sections.h | 2 +- include/asm-generic/vmlinux.lds.h | 51 +++++++++++++++++++++++++++---- 6 files changed, 88 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b935..bc2900ca82c7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,6 +44,8 @@ void __init x86_64_init_pda(void) { _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; + cpu_pda(0)->data_offset = + (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d495563..7ee0363871e8 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -204,6 +204,23 @@ ENTRY(secondary_startup_64) pushq $0 popfq +#ifdef CONFIG_SMP + /* + * early_gdt_base should point to the gdt_page in static percpu init + * data area. Computing this requires two symbols - __per_cpu_load + * and per_cpu__gdt_page. As linker can't do no such relocation, do + * it by hand. As early_gdt_descr is manipulated by C code for + * secondary CPUs, this should be done only once for the boot CPU + * when early_gdt_descr_base contains zero. + */ + movq early_gdt_descr_base(%rip), %rax + testq %rax, %rax + jnz 1f + movq $__per_cpu_load, %rax + addq $per_cpu__gdt_page, %rax + movq %rax, early_gdt_descr_base(%rip) +1: +#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 - .quad per_cpu__gdt_page +#ifdef CONFIG_SMP +early_gdt_descr_base: + .quad 0x0000000000000000 +#else + .quad per_cpu__gdt_page +#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 56c63ac62b10..44845842e722 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void) } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6bef..f50280db0dfe 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -19,6 +19,9 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -208,14 +211,26 @@ SECTIONS __initramfs_end = .; #endif +#ifdef CONFIG_SMP + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * switch it back to data.init. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else PERCPU(PAGE_SIZE) +#endif . = ALIGN(PAGE_SIZE); __init_end = .; . = ALIGN(PAGE_SIZE); __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + *(.data.nosave) + } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 79a7ff925bf8..4ce48e878530 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char _end[]; -extern char __per_cpu_start[], __per_cpu_end[]; +extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f8..fc2f55f2dcd6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,12 +430,51 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU(align) \ - . = ALIGN(align); \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ +#define PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; + +#define PERCPU_EPILOG(phdr) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = __per_cpu_load + SIZEOF(.data.percpu); + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. If @vaddr + * is not blank, it specifies explicit base address and all percpu + * symbols will be offset from the given address. If blank, @vaddr + * always equals @laddr + LOAD_OFFSET. + * + * @phdr defines the output PHDR to use if not blank. Be warned that + * output PHDR is sticky. If @phdr is specified, the next output + * section in the linker script will go there too. @phdr should have + * a leading colon. + * + * This macro defines three symbols, __per_cpu_load, __per_cpu_start + * and __per_cpu_end. The first one is the vaddr of loaded percpu + * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the + * end offset. + */ +#define PERCPU_VADDR(vaddr, phdr) \ + PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - } \ - VMLINUX_SYMBOL(__per_cpu_end) = .; + PERCPU_EPILOG(phdr) + +/** + * PERCPU - define output section for percpu area, simple version + * @align: required alignment + * + * Align to @align and outputs output section for percpu area. This + * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and + * __per_cpu_start will be identical. + */ +#define PERCPU(align) \ + . = ALIGN(align); \ + PERCPU_VADDR( , ) -- 2.34.1