x86: Spread tlb flush vector between nodes

author Shaohua Li <shaohua.li@intel.com>

Wed, 20 Oct 2010 03:07:03 +0000 (11:07 +0800)

committer H. Peter Anvin <hpa@linux.intel.com>

Wed, 20 Oct 2010 21:44:42 +0000 (14:44 -0700)
author Shaohua Li <shaohua.li@intel.com>
Wed, 20 Oct 2010 03:07:03 +0000 (11:07 +0800)
committer H. Peter Anvin <hpa@linux.intel.com>
Wed, 20 Oct 2010 21:44:42 +0000 (14:44 -0700)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index c03f14ab666742d6960ff3339ebcfe28a003308b..49358481c733235918cde7c576a3332fba50c364 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
  #include <linux/smp.h>
  #include <linux/interrupt.h>
  #include <linux/module.h>
+#include <linux/cpu.h>
  
  #include <asm/tlbflush.h>
  #include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
     want false sharing in the per cpu data segment. */
  static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
  
+static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
+
  /*
   * We cannot call mmdrop() because we are in interrupt context,
   * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
         union smp_flush_state *f;
  
         /* Caller has disabled preemption */
-       sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+       sender = this_cpu_read(tlb_vector_offset);
         f = &flush_state[sender];
  
         /*
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
         flush_tlb_others_ipi(cpumask, mm, va);
  }
  
+static void __cpuinit calculate_tlb_offset(void)
+{
+       int cpu, node, nr_node_vecs;
+       /*
+        * we are changing tlb_vector_offset for each CPU in runtime, but this
+        * will not cause inconsistency, as the write is atomic under X86. we
+        * might see more lock contentions in a short time, but after all CPU's
+        * tlb_vector_offset are changed, everything should go normal
+        *
+        * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
+        * waste some vectors.
+        **/
+       if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
+               nr_node_vecs = 1;
+       else
+               nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
+
+       for_each_online_node(node) {
+               int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
+                       nr_node_vecs;
+               int cpu_offset = 0;
+               for_each_cpu(cpu, cpumask_of_node(node)) {
+                       per_cpu(tlb_vector_offset, cpu) = node_offset +
+                               cpu_offset;
+                       cpu_offset++;
+                       cpu_offset = cpu_offset % nr_node_vecs;
+               }
+       }
+}
+
+static int tlb_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       switch (action & 0xf) {
+       case CPU_ONLINE:
+       case CPU_DEAD:
+               calculate_tlb_offset();
+       }
+       return NOTIFY_OK;
+}
+
  static int __cpuinit init_smp_flush(void)
  {
         int i;
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void)
         for (i = 0; i < ARRAY_SIZE(flush_state); i++)
                 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
  
+       calculate_tlb_offset();
+       hotcpu_notifier(tlb_cpuhp_notify, 0);
         return 0;
  }
  core_initcall(init_smp_flush);
author	Shaohua Li <shaohua.li@intel.com>
	Wed, 20 Oct 2010 03:07:03 +0000 (11:07 +0800)
committer	H. Peter Anvin <hpa@linux.intel.com>
	Wed, 20 Oct 2010 21:44:42 +0000 (14:44 -0700)