2 * Copyright 2007 Andi Kleen, SUSE Labs.
3 * Subject to the GPL, v.2
5 * This contains most of the x86 vDSO kernel-side code.
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/init.h>
12 #include <linux/random.h>
13 #include <linux/elf.h>
14 #include <linux/cpu.h>
15 #include <asm/vgtod.h>
16 #include <asm/proto.h>
23 #if defined(CONFIG_X86_64)
24 unsigned int __read_mostly vdso64_enabled = 1;
27 void __init init_vdso_image(const struct vdso_image *image)
30 int npages = (image->size) / PAGE_SIZE;
32 BUG_ON(image->size % PAGE_SIZE != 0);
33 for (i = 0; i < npages; i++)
34 image->text_mapping.pages[i] =
35 virt_to_page(image->data + i*PAGE_SIZE);
37 apply_alternatives((struct alt_instr *)(image->data + image->alt),
38 (struct alt_instr *)(image->data + image->alt +
44 /* Put the vdso above the (randomized) stack with another randomized offset.
45 This way there is no hole in the middle of address space.
46 To save memory make sure it is still in the same PTE as the stack top.
47 This doesn't give that many random bits.
49 Only used for the 64-bit and x32 vdsos. */
50 static unsigned long vdso_addr(unsigned long start, unsigned len)
55 unsigned long addr, end;
57 end = (start + PMD_SIZE - 1) & PMD_MASK;
58 if (end >= TASK_SIZE_MAX)
61 /* This loses some more bits than a modulo, but is cheaper */
62 offset = get_random_int() & (PTRS_PER_PTE - 1);
63 addr = start + (offset << PAGE_SHIFT);
68 * page-align it here so that get_unmapped_area doesn't
69 * align it wrongfully again to the next page. addr can come in 4K
70 * unaligned here as a result of stack start randomization.
72 addr = PAGE_ALIGN(addr);
73 addr = align_vdso_addr(addr);
79 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
81 struct mm_struct *mm = current->mm;
82 struct vm_area_struct *vma;
83 unsigned long addr, text_start;
85 static struct page *no_pages[] = {NULL};
86 static struct vm_special_mapping vvar_mapping = {
92 addr = vdso_addr(current->mm->start_stack,
93 image->size - image->sym_vvar_start);
98 down_write(&mm->mmap_sem);
100 addr = get_unmapped_area(NULL, addr,
101 image->size - image->sym_vvar_start, 0, 0);
102 if (IS_ERR_VALUE(addr)) {
107 text_start = addr - image->sym_vvar_start;
108 current->mm->context.vdso = (void __user *)text_start;
111 * MAYWRITE to allow gdb to COW and set breakpoints
113 vma = _install_special_mapping(mm,
117 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
118 &image->text_mapping);
125 vma = _install_special_mapping(mm,
127 -image->sym_vvar_start,
136 if (image->sym_vvar_page)
137 ret = remap_pfn_range(vma,
138 text_start + image->sym_vvar_page,
139 __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
146 #ifdef CONFIG_HPET_TIMER
147 if (hpet_address && image->sym_hpet_page) {
148 ret = io_remap_pfn_range(vma,
149 text_start + image->sym_hpet_page,
150 hpet_address >> PAGE_SHIFT,
152 pgprot_noncached(PAGE_READONLY));
161 current->mm->context.vdso = NULL;
163 up_write(&mm->mmap_sem);
167 #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
168 static int load_vdso32(void)
172 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
175 ret = map_vdso(selected_vdso32, false);
179 if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
180 current_thread_info()->sysenter_return =
181 current->mm->context.vdso +
182 selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
189 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
194 return map_vdso(&vdso_image_64, true);
198 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
201 #ifdef CONFIG_X86_X32_ABI
202 if (test_thread_flag(TIF_X32)) {
206 return map_vdso(&vdso_image_x32, true);
210 return load_vdso32();
214 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
216 return load_vdso32();
221 static __init int vdso_setup(char *s)
223 vdso64_enabled = simple_strtoul(s, NULL, 0);
226 __setup("vdso=", vdso_setup);
230 static void vgetcpu_cpu_init(void *arg)
232 int cpu = smp_processor_id();
233 struct desc_struct d = { };
234 unsigned long node = 0;
236 node = cpu_to_node(cpu);
238 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
239 write_rdtscp_aux((node << 12) | cpu);
242 * Store cpu number in limit so that it can be loaded
243 * quickly in user space in vgetcpu. (12 bits for the CPU
244 * and 8 bits for the node)
246 d.limit0 = cpu | ((node & 0xf) << 12);
248 d.type = 5; /* RO data, expand down, accessed */
249 d.dpl = 3; /* Visible to user code */
250 d.s = 1; /* Not a system segment */
251 d.p = 1; /* Present */
252 d.d = 1; /* 32-bit */
254 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
258 vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
260 long cpu = (long)arg;
262 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
263 smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
268 static int __init init_vdso(void)
270 init_vdso_image(&vdso_image_64);
272 #ifdef CONFIG_X86_X32_ABI
273 init_vdso_image(&vdso_image_x32);
276 cpu_notifier_register_begin();
278 on_each_cpu(vgetcpu_cpu_init, NULL, 1);
279 /* notifier priority > KVM */
280 __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
282 cpu_notifier_register_done();
286 subsys_initcall(init_vdso);
287 #endif /* CONFIG_X86_64 */