From 06d67d54741a5bfefa31945ef195dfa748c29025 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 10 Oct 2005 22:29:05 +1000 Subject: [PATCH] powerpc: make process.c suitable for both 32-bit and 64-bit Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/init_task.c | 36 +++ arch/powerpc/kernel/process.c | 436 ++++++++++++++++++++++---------- include/asm-powerpc/elf.h | 6 +- 3 files changed, 347 insertions(+), 131 deletions(-) create mode 100644 arch/powerpc/kernel/init_task.c diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c new file mode 100644 index 000000000000..941043ae040f --- /dev/null +++ b/arch/powerpc/kernel/init_task.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); + +EXPORT_SYMBOL(init_mm); + +/* + * Initial thread structure. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ae316e9ed581..f09908a0beea 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -44,6 +46,11 @@ #include #include #include +#ifdef CONFIG_PPC64 +#include +#include +#include +#endif extern unsigned long _get_SP(void); @@ -53,26 +60,6 @@ struct task_struct *last_task_used_altivec = NULL; struct task_struct *last_task_used_spe = NULL; #endif -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); -EXPORT_SYMBOL(init_mm); - -/* this is 8kB-aligned so we can get to the thread_info struct - at the base of it from the stack pointer with 1 integer instruction. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -{ INIT_THREAD_INFO(init_task) }; - -/* initial task structure */ -struct task_struct init_task = INIT_TASK(init_task); -EXPORT_SYMBOL(init_task); - -/* only used to get secondary processor up */ -struct task_struct *current_set[NR_CPUS] = {&init_task, }; - /* * Make sure the floating-point register state in the * the thread_struct is up to date for task tsk. @@ -237,7 +224,10 @@ int set_dabr(unsigned long dabr) return ret; } +#ifdef CONFIG_PPC64 +DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); static DEFINE_PER_CPU(unsigned long, current_dabr); +#endif struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) @@ -308,10 +298,27 @@ struct task_struct *__switch_to(struct task_struct *prev, set_dabr(new->thread.dabr); __get_cpu_var(current_dabr) = new->thread.dabr; } + + flush_tlb_pending(); #endif new_thread = &new->thread; old_thread = ¤t->thread; + +#ifdef CONFIG_PPC64 + /* + * Collect processor utilization data per process + */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + long unsigned start_tb, current_tb; + start_tb = old_thread->start_tb; + cu->current_tb = current_tb = mfspr(SPRN_PURR); + old_thread->accum_tb += (current_tb - start_tb); + new_thread->start_tb = current_tb; + } +#endif + local_irq_save(flags); last = _switch(old_thread, new_thread); @@ -320,37 +327,106 @@ struct task_struct *__switch_to(struct task_struct *prev, return last; } +static int instructions_to_print = 16; + +#ifdef CONFIG_PPC64 +#define BAD_PC(pc) ((REGION_ID(pc) != KERNEL_REGION_ID) && \ + (REGION_ID(pc) != VMALLOC_REGION_ID)) +#else +#define BAD_PC(pc) ((pc) < KERNELBASE) +#endif + +static void show_instructions(struct pt_regs *regs) +{ + int i; + unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * + sizeof(int)); + + printk("Instruction dump:"); + + for (i = 0; i < instructions_to_print; i++) { + int instr; + + if (!(i % 8)) + printk("\n"); + + if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) { + printk("XXXXXXXX "); + } else { + if (regs->nip == pc) + printk("<%08x> ", instr); + else + printk("%08x ", instr); + } + + pc += sizeof(int); + } + + printk("\n"); +} + +static struct regbit { + unsigned long bit; + const char *name; +} msr_bits[] = { + {MSR_EE, "EE"}, + {MSR_PR, "PR"}, + {MSR_FP, "FP"}, + {MSR_ME, "ME"}, + {MSR_IR, "IR"}, + {MSR_DR, "DR"}, + {0, NULL} +}; + +static void printbits(unsigned long val, struct regbit *bits) +{ + const char *sep = ""; + + printk("<"); + for (; bits->bit; ++bits) + if (val & bits->bit) { + printk("%s%s", sep, bits->name); + sep = ","; + } + printk(">"); +} + +#ifdef CONFIG_PPC64 +#define REG "%016lX" +#define REGS_PER_LINE 4 +#define LAST_VOLATILE 13 +#else +#define REG "%08lX" +#define REGS_PER_LINE 8 +#define LAST_VOLATILE 12 +#endif + void show_regs(struct pt_regs * regs) { int i, trap; - printk("NIP: %08lX LR: %08lX SP: %08lX REGS: %p TRAP: %04lx %s\n", - regs->nip, regs->link, regs->gpr[1], regs, regs->trap, - print_tainted()); - printk("MSR: %08lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n", - regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0, - regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0, - regs->msr&MSR_IR ? 1 : 0, - regs->msr&MSR_DR ? 1 : 0); + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", + regs->nip, regs->link, regs->ctr); + printk("REGS: %p TRAP: %04lx %s (%s)\n", + regs, regs->trap, print_tainted(), system_utsname.release); + printk("MSR: "REG" ", regs->msr); + printbits(regs->msr, msr_bits); + printk(" CR: %08lX XER: %08lX\n", regs->ccr, regs->xer); trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) - printk("DAR: %08lX, DSISR: %08lX\n", regs->dar, regs->dsisr); - printk("TASK = %p[%d] '%s' THREAD: %p\n", + printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); + printk("TASK = %p[%d] '%s' THREAD: %p", current, current->pid, current->comm, current->thread_info); - printk("Last syscall: %ld ", current->thread.last_syscall); #ifdef CONFIG_SMP printk(" CPU: %d", smp_processor_id()); #endif /* CONFIG_SMP */ for (i = 0; i < 32; i++) { - long r; - if ((i % 8) == 0) + if ((i % REGS_PER_LINE) == 0) printk("\n" KERN_INFO "GPR%02d: ", i); - if (__get_user(r, ®s->gpr[i])) - break; - printk("%08lX ", r); - if (i == 12 && !FULL_REGS(regs)) + printk(REG " ", regs->gpr[i]); + if (i == LAST_VOLATILE && !FULL_REGS(regs)) break; } printk("\n"); @@ -359,16 +435,20 @@ void show_regs(struct pt_regs * regs) * Lookup NIP late so we have the best change of getting the * above info out without failing */ - printk("NIP [%08lx] ", regs->nip); + printk("NIP ["REG"] ", regs->nip); print_symbol("%s\n", regs->nip); - printk("LR [%08lx] ", regs->link); + printk("LR ["REG"] ", regs->link); print_symbol("%s\n", regs->link); #endif show_stack(current, (unsigned long *) regs->gpr[1]); + if (!user_mode(regs)) + show_instructions(regs); } void exit_thread(void) { + kprobe_flush_task(current); + #ifndef CONFIG_SMP if (last_task_used_math == current) last_task_used_math = NULL; @@ -385,6 +465,14 @@ void exit_thread(void) void flush_thread(void) { +#ifdef CONFIG_PPC64 + struct thread_info *t = current_thread_info(); + + if (t->flags & _TIF_ABI_PENDING) + t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); +#endif + kprobe_flush_task(current); + #ifndef CONFIG_SMP if (last_task_used_math == current) last_task_used_math = NULL; @@ -425,15 +513,13 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ -int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, - unsigned long unused, - struct task_struct *p, struct pt_regs *regs) +int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, struct task_struct *p, + struct pt_regs *regs) { struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE; - unsigned long childframe; CHECK_FULL_REGS(regs); /* Copy registers */ @@ -443,17 +529,26 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp, if ((childregs->msr & MSR_PR) == 0) { /* for kernel thread, set `current' and stackptr in new task */ childregs->gpr[1] = sp + sizeof(struct pt_regs); +#ifdef CONFIG_PPC32 childregs->gpr[2] = (unsigned long) p; +#else + clear_ti_thread_flag(p->thread_info, TIF_32BIT); +#endif p->thread.regs = NULL; /* no user register state */ } else { childregs->gpr[1] = usp; p->thread.regs = childregs; - if (clone_flags & CLONE_SETTLS) - childregs->gpr[2] = childregs->gpr[6]; + if (clone_flags & CLONE_SETTLS) { +#ifdef CONFIG_PPC64 + if (!test_thread_flag(TIF_32BIT)) + childregs->gpr[13] = childregs->gpr[6]; + else +#endif + childregs->gpr[2] = childregs->gpr[6]; + } } childregs->gpr[3] = 0; /* Result from fork() */ sp -= STACK_FRAME_OVERHEAD; - childframe = sp; /* * The way this works is that at some point in the future @@ -467,9 +562,30 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; - kregs->nip = (unsigned long)ret_from_fork; +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_SLB)) { + unsigned long sp_vsid = get_kernel_vsid(sp); + + sp_vsid <<= SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL; + if (cpu_has_feature(CPU_FTR_16M_PAGE)) + sp_vsid |= SLB_VSID_L; + + p->thread.ksp_vsid = sp_vsid; + } + + /* + * The PPC64 ABI makes use of a TOC to contain function + * pointers. The function (ret_from_except) is actually a pointer + * to the TOC entry. The first entry is a pointer to the actual + * function. + */ + kregs->nip = *((unsigned long *)ret_from_fork); +#else + kregs->nip = (unsigned long)ret_from_fork; p->thread.last_syscall = -1; +#endif return 0; } @@ -477,18 +593,61 @@ copy_thread(int nr, unsigned long clone_flags, unsigned long usp, /* * Set up a thread for executing a new program */ -void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp) +void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) { set_fs(USER_DS); + + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + unsigned long childregs = (unsigned long)current->thread_info + + THREAD_SIZE; + childregs -= sizeof(struct pt_regs); + current->thread.regs = (struct pt_regs *)childregs; + } + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; regs->xer = 0; regs->ccr = 0; - regs->mq = 0; - regs->nip = nip; regs->gpr[1] = sp; + +#ifdef CONFIG_PPC32 + regs->mq = 0; + regs->nip = start; regs->msr = MSR_USER; +#else + if (test_thread_flag(TIF_32BIT)) { + unsigned long entry, toc, load_addr = regs->gpr[2]; + + /* start is a relocated pointer to the function descriptor for + * the elf _start routine. The first entry in the function + * descriptor is the entry address of _start and the second + * entry is the TOC value we need to use. + */ + __get_user(entry, (unsigned long __user *)start); + __get_user(toc, (unsigned long __user *)start+1); + + /* Check whether the e_entry function descriptor entries + * need to be relocated before we can use them. + */ + if (load_addr != 0) { + entry += load_addr; + toc += load_addr; + } + regs->nip = entry; + regs->gpr[2] = toc; + regs->msr = MSR_USER64; + } else { + regs->nip = start; + regs->gpr[2] = 0; + regs->msr = MSR_USER32; + } +#endif + #ifndef CONFIG_SMP if (last_task_used_math == current) last_task_used_math = NULL; @@ -506,6 +665,7 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp) #ifdef CONFIG_ALTIVEC memset(current->thread.vr, 0, sizeof(current->thread.vr)); memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); + current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ current->thread.vrsave = 0; current->thread.used_vr = 0; #endif /* CONFIG_ALTIVEC */ @@ -532,22 +692,23 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) #ifdef CONFIG_SPE tsk->thread.fpexc_mode = val & (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); + return 0; #else return -EINVAL; #endif - } else { - /* on a CONFIG_SPE this does not hurt us. The bits that - * __pack_fe01 use do not overlap with bits used for - * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits - * on CONFIG_SPE implementations are reserved so writing to - * them does not change anything */ - if (val > PR_FP_EXC_PRECISE) - return -EINVAL; - tsk->thread.fpexc_mode = __pack_fe01(val); - if (regs != NULL && (regs->msr & MSR_FP) != 0) - regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) - | tsk->thread.fpexc_mode; } + + /* on a CONFIG_SPE this does not hurt us. The bits that + * __pack_fe01 use do not overlap with bits used for + * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits + * on CONFIG_SPE implementations are reserved so writing to + * them does not change anything */ + if (val > PR_FP_EXC_PRECISE) + return -EINVAL; + tsk->thread.fpexc_mode = __pack_fe01(val); + if (regs != NULL && (regs->msr & MSR_FP) != 0) + regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode; return 0; } @@ -566,6 +727,8 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) return put_user(val, (unsigned int __user *) adr); } +#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff)) + int sys_clone(unsigned long clone_flags, unsigned long usp, int __user *parent_tidp, void __user *child_threadptr, int __user *child_tidp, int p6, @@ -574,6 +737,12 @@ int sys_clone(unsigned long clone_flags, unsigned long usp, CHECK_FULL_REGS(regs); if (usp == 0) usp = regs->gpr[1]; /* stack pointer for child */ +#ifdef CONFIG_PPC64 + if (test_thread_flag(TIF_32BIT)) { + parent_tidp = TRUNC_PTR(parent_tidp); + child_tidp = TRUNC_PTR(child_tidp); + } +#endif return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp); } @@ -599,7 +768,7 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, struct pt_regs *regs) { int error; - char * filename; + char *filename; filename = getname((char __user *) a0); error = PTR_ERR(filename); @@ -644,67 +813,19 @@ static int validate_sp(unsigned long sp, struct task_struct *p, return 0; } -void dump_stack(void) -{ - show_stack(current, NULL); -} - -EXPORT_SYMBOL(dump_stack); - -void show_stack(struct task_struct *tsk, unsigned long *stack) -{ - unsigned long sp, stack_top, prev_sp, ret; - int count = 0; - unsigned long next_exc = 0; - struct pt_regs *regs; - extern char ret_from_except, ret_from_except_full, ret_from_syscall; - - sp = (unsigned long) stack; - if (tsk == NULL) - tsk = current; - if (sp == 0) { - if (tsk == current) - asm("mr %0,1" : "=r" (sp)); - else - sp = tsk->thread.ksp; - } - - prev_sp = (unsigned long) (tsk->thread_info + 1); - stack_top = (unsigned long) tsk->thread_info + THREAD_SIZE; - while (count < 16 && sp > prev_sp && sp < stack_top && (sp & 3) == 0) { - if (count == 0) { - printk("Call trace:"); -#ifdef CONFIG_KALLSYMS - printk("\n"); -#endif - } else { - if (next_exc) { - ret = next_exc; - next_exc = 0; - } else - ret = *(unsigned long *)(sp + 4); - printk(" [%08lx] ", ret); -#ifdef CONFIG_KALLSYMS - print_symbol("%s", ret); - printk("\n"); -#endif - if (ret == (unsigned long) &ret_from_except - || ret == (unsigned long) &ret_from_except_full - || ret == (unsigned long) &ret_from_syscall) { - /* sp + 16 points to an exception frame */ - regs = (struct pt_regs *) (sp + 16); - if (sp + 16 + sizeof(*regs) <= stack_top) - next_exc = regs->nip; - } - } - ++count; - sp = *(unsigned long *)sp; - } -#ifndef CONFIG_KALLSYMS - if (count > 0) - printk("\n"); +#ifdef CONFIG_PPC64 +#define MIN_STACK_FRAME 112 /* same as STACK_FRAME_OVERHEAD, in fact */ +#define FRAME_LR_SAVE 2 +#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD + 288) +#define REGS_MARKER 0x7265677368657265ul +#define FRAME_MARKER 12 +#else +#define MIN_STACK_FRAME 16 +#define FRAME_LR_SAVE 1 +#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) +#define REGS_MARKER 0x72656773ul +#define FRAME_MARKER 2 #endif -} unsigned long get_wchan(struct task_struct *p) { @@ -715,15 +836,15 @@ unsigned long get_wchan(struct task_struct *p) return 0; sp = p->thread.ksp; - if (!validate_sp(sp, p, 16)) + if (!validate_sp(sp, p, MIN_STACK_FRAME)) return 0; do { sp = *(unsigned long *)sp; - if (!validate_sp(sp, p, 16)) + if (!validate_sp(sp, p, MIN_STACK_FRAME)) return 0; if (count > 0) { - ip = *(unsigned long *)(sp + 4); + ip = ((unsigned long *)sp)[FRAME_LR_SAVE]; if (!in_sched_functions(ip)) return ip; } @@ -731,3 +852,64 @@ unsigned long get_wchan(struct task_struct *p) return 0; } EXPORT_SYMBOL(get_wchan); + +static int kstack_depth_to_print = 64; + +void show_stack(struct task_struct *tsk, unsigned long *stack) +{ + unsigned long sp, ip, lr, newsp; + int count = 0; + int firstframe = 1; + + sp = (unsigned long) stack; + if (tsk == NULL) + tsk = current; + if (sp == 0) { + if (tsk == current) + asm("mr %0,1" : "=r" (sp)); + else + sp = tsk->thread.ksp; + } + + lr = 0; + printk("Call Trace:\n"); + do { + if (!validate_sp(sp, tsk, MIN_STACK_FRAME)) + return; + + stack = (unsigned long *) sp; + newsp = stack[0]; + ip = stack[FRAME_LR_SAVE]; + if (!firstframe || ip != lr) { + printk("["REG"] ["REG"] ", sp, ip); + print_symbol("%s", ip); + if (firstframe) + printk(" (unreliable)"); + printk("\n"); + } + firstframe = 0; + + /* + * See if this is an exception frame. + * We look for the "regshere" marker in the current frame. + */ + if (validate_sp(sp, tsk, INT_FRAME_SIZE) + && stack[FRAME_MARKER] == REGS_MARKER) { + struct pt_regs *regs = (struct pt_regs *) + (sp + STACK_FRAME_OVERHEAD); + printk("--- Exception: %lx", regs->trap); + print_symbol(" at %s\n", regs->nip); + lr = regs->link; + print_symbol(" LR = %s\n", lr); + firstframe = 1; + } + + sp = newsp; + } while (count++ < kstack_depth_to_print); +} + +void dump_stack(void) +{ + show_stack(current, NULL); +} +EXPORT_SYMBOL(dump_stack); diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index f0a6779fbe52..d22b10021b5d 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -214,10 +214,8 @@ extern int dump_task_fpu(struct task_struct *, elf_fpregset_t *); but it's not easy, and we've already done it here. */ # define ELF_HWCAP (cur_cpu_spec->cpu_user_features) #ifdef __powerpc64__ -# define ELF_PLAT_INIT(_r, load_addr) do { \ - memset(_r->gpr, 0, sizeof(_r->gpr)); \ - _r->ctr = _r->link = _r->xer = _r->ccr = 0; \ - _r->gpr[2] = load_addr; \ +# define ELF_PLAT_INIT(_r, load_addr) do { \ + _r->gpr[2] = load_addr; \ } while (0) #endif /* __powerpc64__ */ -- 2.34.1