ARM: rockchip_defconfig: enable DEVFREQ_GOV_SIMPLE_ONDEMAND

[firefly-linux-kernel-4.4.55.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index bcb14cafe007148b15edb5cfed5adc041a6d966c..2c2f971f3e759df3c812d749740047f05864931b 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -568,6 +568,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
                                                   int node)
  {
         assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+       /*
+        * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+        * delayed item is pending.  The plan is to keep CPU -> NODE
+        * mapping valid and stable across CPU on/offlines.  Once that
+        * happens, this workaround can be removed.
+        */
+       if (unlikely(node == NUMA_NO_NODE))
+               return wq->dfl_pwq;
+
         return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
  }
  
@@ -639,6 +649,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
          */
         smp_wmb();
         set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+       /*
+        * The following mb guarantees that previous clear of a PENDING bit
+        * will not be reordered with any speculative LOADS or STORES from
+        * work->current_func, which is executed afterwards.  This possible
+        * reordering can lead to a missed execution on attempt to qeueue
+        * the same @work.  E.g. consider this case:
+        *
+        *   CPU#0                         CPU#1
+        *   ----------------------------  --------------------------------
+        *
+        * 1  STORE event_indicated
+        * 2  queue_work_on() {
+        * 3    test_and_set_bit(PENDING)
+        * 4 }                             set_..._and_clear_pending() {
+        * 5                                 set_work_data() # clear bit
+        * 6                                 smp_mb()
+        * 7                               work->current_func() {
+        * 8                                  LOAD event_indicated
+        *                                 }
+        *
+        * Without an explicit full barrier speculative LOAD on line 8 can
+        * be executed before CPU#0 does STORE on line 1.  If that happens,
+        * CPU#0 observes the PENDING bit is still set and new execution of
+        * a @work is not queued in a hope, that CPU#1 will eventually
+        * finish the queued @work.  Meanwhile CPU#1 does not see
+        * event_indicated is set, because speculative LOAD was executed
+        * before actual STORE.
+        */
+       smp_mb();
  }
  
  static void clear_work_data(struct work_struct *work)
@@ -1458,13 +1497,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
         timer_stats_timer_set_start_info(&dwork->timer);
  
         dwork->wq = wq;
-       /* timer isn't guaranteed to run in this cpu, record earlier */
-       if (cpu == WORK_CPU_UNBOUND)
-               cpu = raw_smp_processor_id();
         dwork->cpu = cpu;
         timer->expires = jiffies + delay;
  
-       add_timer_on(timer, cpu);
+       if (unlikely(cpu != WORK_CPU_UNBOUND))
+               add_timer_on(timer, cpu);
+       else
+               add_timer(timer);
  }
  
  /**
@@ -3199,6 +3238,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
         u32 hash = wqattrs_hash(attrs);
         struct worker_pool *pool;
         int node;
+       int target_node = NUMA_NO_NODE;
  
         lockdep_assert_held(&wq_pool_mutex);
  
@@ -3210,13 +3250,25 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
                 }
         }
  
+       /* if cpumask is contained inside a NUMA node, we belong to that node */
+       if (wq_numa_enabled) {
+               for_each_node(node) {
+                       if (cpumask_subset(attrs->cpumask,
+                                          wq_numa_possible_cpumask[node])) {
+                               target_node = node;
+                               break;
+                       }
+               }
+       }
+
         /* nope, create a new one */
-       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
         if (!pool || init_worker_pool(pool) < 0)
                 goto fail;
  
         lockdep_set_subclass(&pool->lock, 1);   /* see put_pwq() */
         copy_workqueue_attrs(pool->attrs, attrs);
+       pool->node = target_node;
  
         /*
          * no_numa isn't a worker_pool attribute, always clear it.  See
@@ -3224,17 +3276,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
          */
         pool->attrs->no_numa = false;
  
-       /* if cpumask is contained inside a NUMA node, we belong to that node */
-       if (wq_numa_enabled) {
-               for_each_node(node) {
-                       if (cpumask_subset(pool->attrs->cpumask,
-                                          wq_numa_possible_cpumask[node])) {
-                               pool->node = node;
-                               break;
-                       }
-               }
-       }
-
         if (worker_pool_assign_id(pool) < 0)
                 goto fail;
  
@@ -4416,6 +4457,17 @@ static void rebind_workers(struct worker_pool *pool)
                                                   pool->attrs->cpumask) < 0);
  
         spin_lock_irq(&pool->lock);
+
+       /*
+        * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
+        * w/o preceding DOWN_PREPARE.  Work around it.  CPU hotplug is
+        * being reworked and this can go away in time.
+        */
+       if (!(pool->flags & POOL_DISASSOCIATED)) {
+               spin_unlock_irq(&pool->lock);
+               return;
+       }
+
         pool->flags &= ~POOL_DISASSOCIATED;
  
         for_each_pool_worker(worker, pool) {