From aec5e0e1c179fac4bbca4007a3f0d3107275a73c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 25 Dec 2006 09:51:47 +0900 Subject: [PATCH] sh: Use a per-cpu ASID cache. Previously this was implemented using a global cache, cache this per-CPU instead and bump up the number of context IDs to match NR_CPUS. Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/init.c | 11 ++++-- arch/sh/kernel/process.c | 66 +++++++++++++++--------------------- arch/sh/mm/init.c | 5 --- arch/sh/mm/tlb-flush.c | 26 ++++++++------ include/asm-sh/mmu.h | 20 ++++------- include/asm-sh/mmu_context.h | 61 +++++++++++++++++++-------------- include/asm-sh/processor.h | 1 + 7 files changed, 97 insertions(+), 93 deletions(-) diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index 48121766e8d2..6c3c7687e81f 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -3,7 +3,7 @@ * * CPU init code * - * Copyright (C) 2002, 2003 Paul Mundt + * Copyright (C) 2002 - 2006 Paul Mundt * Copyright (C) 2003 Richard Curnow * * This file is subject to the terms and conditions of the GNU General Public @@ -12,6 +12,8 @@ */ #include #include +#include +#include #include #include #include @@ -218,6 +220,12 @@ asmlinkage void __init sh_cpu_init(void) clear_used_math(); } + /* + * Initialize the per-CPU ASID cache very early, since the + * TLB flushing routines depend on this being setup. + */ + current_cpu_data.asid_cache = NO_CONTEXT; + #ifdef CONFIG_SH_DSP /* Probe for DSP */ dsp_init(); @@ -240,4 +248,3 @@ asmlinkage void __init sh_cpu_init(void) ubc_wakeup(); #endif } - diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index cc8f306fd682..0298f0faa6e6 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -1,42 +1,30 @@ -/* $Id: process.c,v 1.28 2004/05/05 16:54:23 lethal Exp $ +/* + * arch/sh/kernel/process.c * - * linux/arch/sh/kernel/process.c + * This file handles the architecture-dependent parts of process handling.. * * Copyright (C) 1995 Linus Torvalds * * SuperH version: Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima * Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC + * Copyright (C) 2002 - 2006 Paul Mundt */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - #include -#include #include #include -#include -#include #include -#include #include #include - -#include #include #include -#include #include -static int hlt_counter=0; - +static int hlt_counter; int ubc_usercnt = 0; #define HARD_IDLE_TIMEOUT (HZ / 3) void (*pm_idle)(void); - void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -44,14 +32,12 @@ void disable_hlt(void) { hlt_counter++; } - EXPORT_SYMBOL(disable_hlt); void enable_hlt(void) { hlt_counter--; } - EXPORT_SYMBOL(enable_hlt); void default_idle(void) @@ -152,19 +138,21 @@ __asm__(".align 5\n" ".align 2\n\t" "1:.long do_exit"); +/* Don't use this in BL=1(cli). Or else, CPU resets! */ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ /* Don't use this in BL=1(cli). Or else, CPU resets! */ +{ struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.regs[4] = (unsigned long) arg; - regs.regs[5] = (unsigned long) fn; + regs.regs[4] = (unsigned long)arg; + regs.regs[5] = (unsigned long)fn; - regs.pc = (unsigned long) kernel_thread_helper; + regs.pc = (unsigned long)kernel_thread_helper; regs.sr = (1 << 30); /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, + ®s, 0, NULL, NULL); } /* @@ -211,21 +199,20 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) return fpvalid; } -/* +/* * Capture the user space registers if the task is not running (in user space) */ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) { struct pt_regs ptregs; - + ptregs = *task_pt_regs(tsk); elf_core_copy_regs(regs, &ptregs); return 1; } -int -dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *fpu) +int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpu) { int fpvalid = 0; @@ -263,12 +250,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, childregs->regs[15] = usp; ti->addr_limit = USER_DS; } else { - childregs->regs[15] = (unsigned long)task_stack_page(p) + THREAD_SIZE; + childregs->regs[15] = (unsigned long)task_stack_page(p) + + THREAD_SIZE; ti->addr_limit = KERNEL_DS; } - if (clone_flags & CLONE_SETTLS) { + + if (clone_flags & CLONE_SETTLS) childregs->gbr = childregs->regs[0]; - } + childregs->regs[0] = 0; /* Set return value for child */ p->thread.sp = (unsigned long) childregs; @@ -280,8 +269,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, } /* Tracing by user break controller. */ -static void -ubc_set_tracing(int asid, unsigned long pc) +static void ubc_set_tracing(int asid, unsigned long pc) { #if defined(CONFIG_CPU_SH4A) unsigned long val; @@ -297,7 +285,7 @@ ubc_set_tracing(int asid, unsigned long pc) val = (UBC_CRR_RES | UBC_CRR_PCB | UBC_CRR_BIE); ctrl_outl(val, UBC_CRR0); - /* Read UBC register that we writed last. For chekking UBC Register changed */ + /* Read UBC register that we wrote last, for checking update */ val = ctrl_inl(UBC_CRR0); #else /* CONFIG_CPU_SH4A */ @@ -325,7 +313,8 @@ ubc_set_tracing(int asid, unsigned long pc) * switch_to(x,y) should switch tasks from x to y. * */ -struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next) { #if defined(CONFIG_SH_FPU) unlazy_fpu(prev, task_pt_regs(prev)); @@ -354,7 +343,7 @@ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *ne #ifdef CONFIG_MMU /* * Restore the kernel mode register - * k7 (r7_bank1) + * k7 (r7_bank1) */ asm volatile("ldc %0, r7_bank" : /* no output */ @@ -367,7 +356,7 @@ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *ne else if (next->thread.ubc_pc && next->mm) { int asid = 0; #ifdef CONFIG_MMU - asid |= next->mm->context.id & MMU_CONTEXT_ASID_MASK; + asid |= cpu_asid(smp_processor_id(), next->mm); #endif ubc_set_tracing(asid, next->thread.ubc_pc); } else { @@ -405,7 +394,8 @@ asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, if (!newsp) newsp = regs->regs[15]; return do_fork(clone_flags, newsp, regs, 0, - (int __user *)parent_tidptr, (int __user *)child_tidptr); + (int __user *)parent_tidptr, + (int __user *)child_tidptr); } /* diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index bf0c263cb6fd..d172065182fb 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -39,11 +39,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); pgd_t swapper_pg_dir[PTRS_PER_PGD]; -/* - * Cache of MMU context last used. - */ -unsigned long mmu_context_cache = NO_CONTEXT; - #ifdef CONFIG_MMU /* It'd be good if these lines were in the standard header file. */ #define START_PFN (NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT) diff --git a/arch/sh/mm/tlb-flush.c b/arch/sh/mm/tlb-flush.c index ef3e4d477864..b829c17c1d17 100644 --- a/arch/sh/mm/tlb-flush.c +++ b/arch/sh/mm/tlb-flush.c @@ -16,12 +16,14 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { - if (vma->vm_mm && vma->vm_mm->context.id != NO_CONTEXT) { + unsigned int cpu = smp_processor_id(); + + if (vma->vm_mm && cpu_context(cpu, vma->vm_mm) != NO_CONTEXT) { unsigned long flags; unsigned long asid; unsigned long saved_asid = MMU_NO_ASID; - asid = vma->vm_mm->context.id & MMU_CONTEXT_ASID_MASK; + asid = cpu_asid(cpu, vma->vm_mm); page &= PAGE_MASK; local_irq_save(flags); @@ -40,22 +42,23 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; + unsigned int cpu = smp_processor_id(); - if (mm->context.id != NO_CONTEXT) { + if (cpu_context(cpu, mm) != NO_CONTEXT) { unsigned long flags; int size; local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */ - mm->context.id = NO_CONTEXT; + cpu_context(cpu, mm) = NO_CONTEXT; if (mm == current->mm) - activate_context(mm); + activate_context(mm, cpu); } else { unsigned long asid; unsigned long saved_asid = MMU_NO_ASID; - asid = mm->context.id & MMU_CONTEXT_ASID_MASK; + asid = cpu_asid(cpu, mm); start &= PAGE_MASK; end += (PAGE_SIZE - 1); end &= PAGE_MASK; @@ -76,6 +79,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { + unsigned int cpu = smp_processor_id(); unsigned long flags; int size; @@ -87,7 +91,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) unsigned long asid; unsigned long saved_asid = get_asid(); - asid = init_mm.context.id & MMU_CONTEXT_ASID_MASK; + asid = cpu_asid(cpu, &init_mm); start &= PAGE_MASK; end += (PAGE_SIZE - 1); end &= PAGE_MASK; @@ -103,15 +107,17 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) void flush_tlb_mm(struct mm_struct *mm) { + unsigned int cpu = smp_processor_id(); + /* Invalidate all TLB of this process. */ /* Instead of invalidating each TLB, we get new MMU context. */ - if (mm->context.id != NO_CONTEXT) { + if (cpu_context(cpu, mm) != NO_CONTEXT) { unsigned long flags; local_irq_save(flags); - mm->context.id = NO_CONTEXT; + cpu_context(cpu, mm) = NO_CONTEXT; if (mm == current->mm) - activate_context(mm); + activate_context(mm, cpu); local_irq_restore(flags); } } diff --git a/include/asm-sh/mmu.h b/include/asm-sh/mmu.h index cf47df79bb94..eb0358c097d0 100644 --- a/include/asm-sh/mmu.h +++ b/include/asm-sh/mmu.h @@ -1,25 +1,19 @@ #ifndef __MMU_H #define __MMU_H -#if !defined(CONFIG_MMU) +/* Default "unsigned long" context */ +typedef unsigned long mm_context_id_t[NR_CPUS]; typedef struct { +#ifdef CONFIG_MMU + mm_context_id_t id; + void *vdso; +#else struct vm_list_struct *vmlist; unsigned long end_brk; +#endif } mm_context_t; -#else - -/* Default "unsigned long" context */ -typedef unsigned long mm_context_id_t; - -typedef struct { - mm_context_id_t id; - void *vdso; -} mm_context_t; - -#endif /* CONFIG_MMU */ - /* * Privileged Space Mapping Buffer (PMB) definitions */ diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h index 46f04e23bd45..342024425b7d 100644 --- a/include/asm-sh/mmu_context.h +++ b/include/asm-sh/mmu_context.h @@ -1,6 +1,6 @@ /* * Copyright (C) 1999 Niibe Yutaka - * Copyright (C) 2003 Paul Mundt + * Copyright (C) 2003 - 2006 Paul Mundt * * ASID handling idea taken from MIPS implementation. */ @@ -19,11 +19,6 @@ * (b) ASID (Address Space IDentifier) */ -/* - * Cache of MMU context last used. - */ -extern unsigned long mmu_context_cache; - #define MMU_CONTEXT_ASID_MASK 0x000000ff #define MMU_CONTEXT_VERSION_MASK 0xffffff00 #define MMU_CONTEXT_FIRST_VERSION 0x00000100 @@ -32,6 +27,11 @@ extern unsigned long mmu_context_cache; /* ASID is 8-bit value, so it can't be 0x100 */ #define MMU_NO_ASID 0x100 +#define cpu_context(cpu, mm) ((mm)->context.id[cpu]) +#define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & \ + MMU_CONTEXT_ASID_MASK) +#define asid_cache(cpu) (cpu_data[cpu].asid_cache) + /* * Virtual Page Number mask */ @@ -41,18 +41,17 @@ extern unsigned long mmu_context_cache; /* * Get MMU context if needed. */ -static inline void get_mmu_context(struct mm_struct *mm) +static inline void get_mmu_context(struct mm_struct *mm, unsigned int cpu) { - unsigned long mc = mmu_context_cache; + unsigned long asid = asid_cache(cpu); /* Check if we have old version of context. */ - if (((mm->context.id ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0) + if (((cpu_context(cpu, mm) ^ asid) & MMU_CONTEXT_VERSION_MASK) == 0) /* It's up to date, do nothing */ return; /* It's old, we need to get new context with new version. */ - mc = ++mmu_context_cache; - if (!(mc & MMU_CONTEXT_ASID_MASK)) { + if (!(++asid & MMU_CONTEXT_ASID_MASK)) { /* * We exhaust ASID of this version. * Flush all TLB and start new cycle. @@ -63,10 +62,11 @@ static inline void get_mmu_context(struct mm_struct *mm) * Fix version; Note that we avoid version #0 * to distingush NO_CONTEXT. */ - if (!mc) - mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION; + if (!asid) + asid = MMU_CONTEXT_FIRST_VERSION; } - mm->context.id = mc; + + cpu_context(cpu, mm) = asid_cache(cpu) = asid; } /* @@ -74,9 +74,13 @@ static inline void get_mmu_context(struct mm_struct *mm) * instance. */ static inline int init_new_context(struct task_struct *tsk, - struct mm_struct *mm) + struct mm_struct *mm) { - mm->context.id = NO_CONTEXT; + int i; + + for (i = 0; i < num_online_cpus(); i++) + cpu_context(i, mm) = NO_CONTEXT; + return 0; } @@ -117,10 +121,10 @@ static inline unsigned long get_asid(void) * After we have set current->mm to a new value, this activates * the context for the new mm so we see the new mappings. */ -static inline void activate_context(struct mm_struct *mm) +static inline void activate_context(struct mm_struct *mm, unsigned int cpu) { - get_mmu_context(mm); - set_asid(mm->context.id & MMU_CONTEXT_ASID_MASK); + get_mmu_context(mm, cpu); + set_asid(cpu_asid(cpu, mm)); } /* MMU_TTB is used for optimizing the fault handling. */ @@ -138,10 +142,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + unsigned int cpu = smp_processor_id(); + if (likely(prev != next)) { + cpu_set(cpu, next->cpu_vm_mask); set_TTB(next->pgd); - activate_context(next); - } + activate_context(next, cpu); + } else + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) + activate_context(next, cpu); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -159,7 +168,7 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #define destroy_context(mm) do { } while (0) #define set_asid(asid) do { } while (0) #define get_asid() (0) -#define activate_context(mm) do { } while (0) +#define activate_context(mm,cpu) do { } while (0) #define switch_mm(prev,next,tsk) do { } while (0) #define deactivate_mm(tsk,mm) do { } while (0) #define activate_mm(prev,next) do { } while (0) @@ -174,14 +183,16 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) */ static inline void enable_mmu(void) { + unsigned int cpu = smp_processor_id(); + /* Enable MMU */ ctrl_outl(MMU_CONTROL_INIT, MMUCR); ctrl_barrier(); - if (mmu_context_cache == NO_CONTEXT) - mmu_context_cache = MMU_CONTEXT_FIRST_VERSION; + if (asid_cache(cpu) == NO_CONTEXT) + asid_cache(cpu) = MMU_CONTEXT_FIRST_VERSION; - set_asid(mmu_context_cache & MMU_CONTEXT_ASID_MASK); + set_asid(asid_cache(cpu) & MMU_CONTEXT_ASID_MASK); } static inline void disable_mmu(void) diff --git a/include/asm-sh/processor.h b/include/asm-sh/processor.h index e29f2abb92de..da229aae8e0f 100644 --- a/include/asm-sh/processor.h +++ b/include/asm-sh/processor.h @@ -66,6 +66,7 @@ enum cpu_type { struct sh_cpuinfo { unsigned int type; unsigned long loops_per_jiffy; + unsigned long asid_cache; struct cache_info icache; /* Primary I-cache */ struct cache_info dcache; /* Primary D-cache */ -- 2.34.1