From: Frederik Deweerdt Date: Mon, 12 Jan 2009 21:35:42 +0000 (+0100) Subject: x86, tlb flush_data: replace per_cpu with an array X-Git-Tag: firefly_0821_release~13991^2~247^2~60^2~3 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=09b3ec7315a18d885127544204f1e389d41058d0;p=firefly-linux-kernel-4.4.55.git x86, tlb flush_data: replace per_cpu with an array Impact: micro-optimization, memory reduction On x86_64 flush tlb data is stored in per_cpu variables. This is unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries are accessed. This patch aims at making the code less confusing (there's nothing really "per_cpu") by using a plain array. It also would save some memory on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default). [ Ravikiran G Thirumalai also pointed out that the correct alignment is ____cacheline_internodealigned_in_smp, so that there's no bouncing on vsmp. ] Signed-off-by: Frederik Deweerdt Acked-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..8cfea5d14517 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -33,7 +33,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -48,13 +48,13 @@ union smp_flush_state { unsigned long flush_va; spinlock_t tlbstate_lock; }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; if (!cpu_isset(cpu, f->flush_cpumask)) goto out; @@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; }