From 3a6d7c64d78a78d279851524d39999637a549363 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Thu, 25 Jun 2015 11:27:10 -0700 Subject: [PATCH] rcu: Make expedited GP CPU stoppage asynchronous Sequentially stopping the CPUs slows down expedited grace periods by at least a factor of two, based on rcutorture's grace-period-per-second rate. This is a conservative measure because rcutorture uses unusually long RCU read-side critical sections and because rcutorture periodically quiesces the system in order to test RCU's ability to ramp down to and up from the idle state. This commit therefore replaces the stop_one_cpu() with stop_one_cpu_nowait(), using an atomic-counter scheme to determine when all CPUs have passed through the stopped state. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> --- kernel/rcu/tree.c | 31 +++++++++++++++++-------------- kernel/rcu/tree.h | 6 ++++++ kernel/rcu/tree_trace.c | 3 ++- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b310b40a49a2..c5c8509054ef 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3257,18 +3257,11 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu); static int synchronize_sched_expedited_cpu_stop(void *data) { - /* - * There must be a full memory barrier on each affected CPU - * between the time that try_stop_cpus() is called and the - * time that it returns. - * - * In the current initial implementation of cpu_stop, the - * above condition is already met when the control reaches - * this point and the following smp_mb() is not strictly - * necessary. Do smp_mb() anyway for documentation and - * robustness against future implementation changes. - */ - smp_mb(); /* See above comment block. */ + struct rcu_state *rsp = data; + + /* We are here: If we are last, do the wakeup. */ + if (atomic_dec_and_test(&rsp->expedited_need_qs)) + wake_up(&rsp->expedited_wq); return 0; } @@ -3308,9 +3301,9 @@ void synchronize_sched_expedited(void) { int cpu; long s; - struct rcu_state *rsp = &rcu_sched_state; struct rcu_node *rnp0; struct rcu_node *rnp1 = NULL; + struct rcu_state *rsp = &rcu_sched_state; /* Take a snapshot of the sequence number. */ smp_mb(); /* Caller's modifications seen first by other CPUs. */ @@ -3351,16 +3344,26 @@ void synchronize_sched_expedited(void) WARN_ON_ONCE(!(rsp->expedited_sequence & 0x1)); /* Stop each CPU that is online, non-idle, and not us. */ + init_waitqueue_head(&rsp->expedited_wq); + atomic_set(&rsp->expedited_need_qs, 1); /* Extra count avoids race. */ for_each_online_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); /* Skip our CPU and any idle CPUs. */ if (raw_smp_processor_id() == cpu || !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) continue; - stop_one_cpu(cpu, synchronize_sched_expedited_cpu_stop, NULL); + atomic_inc(&rsp->expedited_need_qs); + stop_one_cpu_nowait(cpu, synchronize_sched_expedited_cpu_stop, + rsp, &rdp->exp_stop_work); } + /* Remove extra count and, if necessary, wait for CPUs to stop. */ + if (!atomic_dec_and_test(&rsp->expedited_need_qs)) + wait_event(rsp->expedited_wq, + !atomic_read(&rsp->expedited_need_qs)); + smp_mb(); /* Ensure expedited GP seen before counter increment. */ WRITE_ONCE(rsp->expedited_sequence, rsp->expedited_sequence + 1); WARN_ON_ONCE(rsp->expedited_sequence & 0x1); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2ef036b356f7..4edc277d08eb 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -27,6 +27,7 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/seqlock.h> +#include <linux/stop_machine.h> /* * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and @@ -298,6 +299,9 @@ struct rcu_data { /* ticks this CPU has handled */ /* during and after the last grace */ /* period it is aware of. */ + struct cpu_stop_work exp_stop_work; + /* Expedited grace-period control */ + /* for CPU stopping. */ /* 2) batch handling */ /* @@ -491,6 +495,8 @@ struct rcu_state { atomic_long_t expedited_workdone1; /* # done by others #1. */ atomic_long_t expedited_workdone2; /* # done by others #2. */ atomic_long_t expedited_normal; /* # fallbacks to normal. */ + atomic_t expedited_need_qs; /* # CPUs left to check in. */ + wait_queue_head_t expedited_wq; /* Wait for check-ins. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index d2aab8dcd58e..36c04b46d3b8 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,12 +185,13 @@ static int show_rcuexp(struct seq_file *m, void *v) { struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu\n", + seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu enq=%d sc=%lu\n", rsp->expedited_sequence, atomic_long_read(&rsp->expedited_tryfail), atomic_long_read(&rsp->expedited_workdone1), atomic_long_read(&rsp->expedited_workdone2), atomic_long_read(&rsp->expedited_normal), + atomic_read(&rsp->expedited_need_qs), rsp->expedited_sequence / 2); return 0; } -- 2.34.1