smp: Fix smp_call_function_single_async() locking
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 11 Feb 2015 20:42:10 +0000 (12:42 -0800)
committerIngo Molnar <mingo@kernel.org>
Fri, 17 Apr 2015 07:57:52 +0000 (09:57 +0200)
The current smp_function_call code suffers a number of problems, most
notably smp_call_function_single_async() is broken.

The problem is that flush_smp_call_function_queue() does csd_unlock()
_after_ calling csd->func(). This means that a caller cannot properly
synchronize the csd usage as it has to.

Change the code to release the csd before calling ->func() for the
async case, and put a WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK) in
smp_call_function_single_async() to warn us of improper serialization,
because any waiting there can results in deadlocks when called with
IRQs disabled.

Rename the (currently) unused WAIT flag to SYNCHRONOUS and (re)use it
such that we know what to do in flush_smp_call_function_queue().

Rework csd_{,un}lock() to use smp_load_acquire() / smp_store_release()
to avoid some full barriers while more clearly providing lock
semantics.

Finally move the csd maintenance out of generic_exec_single() into its
callers for clearer code.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ Added changelog. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Rafael David Tinoco <inaddy@ubuntu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/CA+55aFz492bzLFhdbKN-Hygjcreup7CjMEYk3nTSfRWjppz-OA@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/smp.c

index f38a1e6922599e037d3f76622f3d7dfd50c874e6..2aaac2c47683d38efa407c139cbe1b6f4e18e678 100644 (file)
@@ -19,7 +19,7 @@
 
 enum {
        CSD_FLAG_LOCK           = 0x01,
-       CSD_FLAG_WAIT           = 0x02,
+       CSD_FLAG_SYNCHRONOUS    = 0x02,
 };
 
 struct call_function_data {
@@ -107,7 +107,7 @@ void __init call_function_init(void)
  */
 static void csd_lock_wait(struct call_single_data *csd)
 {
-       while (csd->flags & CSD_FLAG_LOCK)
+       while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
                cpu_relax();
 }
 
@@ -121,19 +121,17 @@ static void csd_lock(struct call_single_data *csd)
         * to ->flags with any subsequent assignments to other
         * fields of the specified call_single_data structure:
         */
-       smp_mb();
+       smp_wmb();
 }
 
 static void csd_unlock(struct call_single_data *csd)
 {
-       WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK));
+       WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
 
        /*
         * ensure we're all done before releasing data:
         */
-       smp_mb();
-
-       csd->flags &= ~CSD_FLAG_LOCK;
+       smp_store_release(&csd->flags, 0);
 }
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
@@ -144,13 +142,16 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
  * ->func, ->info, and ->flags set.
  */
 static int generic_exec_single(int cpu, struct call_single_data *csd,
-                              smp_call_func_t func, void *info, int wait)
+                              smp_call_func_t func, void *info)
 {
-       struct call_single_data csd_stack = { .flags = 0 };
-       unsigned long flags;
-
-
        if (cpu == smp_processor_id()) {
+               unsigned long flags;
+
+               /*
+                * We can unlock early even for the synchronous on-stack case,
+                * since we're doing this from the same CPU..
+                */
+               csd_unlock(csd);
                local_irq_save(flags);
                func(info);
                local_irq_restore(flags);
@@ -161,21 +162,9 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
        if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu))
                return -ENXIO;
 
-
-       if (!csd) {
-               csd = &csd_stack;
-               if (!wait)
-                       csd = this_cpu_ptr(&csd_data);
-       }
-
-       csd_lock(csd);
-
        csd->func = func;
        csd->info = info;
 
-       if (wait)
-               csd->flags |= CSD_FLAG_WAIT;
-
        /*
         * The list addition should be visible before sending the IPI
         * handler locks the list to pull the entry off it because of
@@ -190,9 +179,6 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
        if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
                arch_send_call_function_single_ipi(cpu);
 
-       if (wait)
-               csd_lock_wait(csd);
-
        return 0;
 }
 
@@ -250,8 +236,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
        }
 
        llist_for_each_entry_safe(csd, csd_next, entry, llist) {
-               csd->func(csd->info);
-               csd_unlock(csd);
+               smp_call_func_t func = csd->func;
+               void *info = csd->info;
+
+               /* Do we wait until *after* callback? */
+               if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+                       func(info);
+                       csd_unlock(csd);
+               } else {
+                       csd_unlock(csd);
+                       func(info);
+               }
        }
 
        /*
@@ -274,6 +269,8 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
                             int wait)
 {
+       struct call_single_data *csd;
+       struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS };
        int this_cpu;
        int err;
 
@@ -292,7 +289,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
        WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
                     && !oops_in_progress);
 
-       err = generic_exec_single(cpu, NULL, func, info, wait);
+       csd = &csd_stack;
+       if (!wait) {
+               csd = this_cpu_ptr(&csd_data);
+               csd_lock(csd);
+       }
+
+       err = generic_exec_single(cpu, csd, func, info);
+
+       if (wait)
+               csd_lock_wait(csd);
 
        put_cpu();
 
@@ -321,7 +327,15 @@ int smp_call_function_single_async(int cpu, struct call_single_data *csd)
        int err = 0;
 
        preempt_disable();
-       err = generic_exec_single(cpu, csd, csd->func, csd->info, 0);
+
+       /* We could deadlock if we have to wait here with interrupts disabled! */
+       if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
+               csd_lock_wait(csd);
+
+       csd->flags = CSD_FLAG_LOCK;
+       smp_wmb();
+
+       err = generic_exec_single(cpu, csd, csd->func, csd->info);
        preempt_enable();
 
        return err;
@@ -433,6 +447,8 @@ void smp_call_function_many(const struct cpumask *mask,
                struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
 
                csd_lock(csd);
+               if (wait)
+                       csd->flags |= CSD_FLAG_SYNCHRONOUS;
                csd->func = func;
                csd->info = info;
                llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));