Merge tag 'v3.10.49' into linux-linaro-lsk

[firefly-linux-kernel-4.4.55.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index ee8e29a2320c7c76d67df8f4816af52cd9da5f68..a2c7e437796039b7e24fc992222379486b9a1d9c 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
  static bool wq_disable_numa;
  module_param_named(disable_numa, wq_disable_numa, bool, 0444);
  
+/* see the comment above the definition of WQ_POWER_EFFICIENT */
+#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
+static bool wq_power_efficient = true;
+#else
+static bool wq_power_efficient;
+#endif
+
+module_param_named(power_efficient, wq_power_efficient, bool, 0444);
+
  static bool wq_numa_enabled;           /* unbound NUMA affinity enabled */
  
  /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -295,6 +304,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
  /* I: attributes used when instantiating standard unbound pools on demand */
  static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
  
+/* I: attributes used when instantiating ordered pools on demand */
+static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
+
  struct workqueue_struct *system_wq __read_mostly;
  EXPORT_SYMBOL(system_wq);
  struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -305,6 +317,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
  EXPORT_SYMBOL_GPL(system_unbound_wq);
  struct workqueue_struct *system_freezable_wq __read_mostly;
  EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_power_efficient_wq);
+struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  
  static int worker_thread(void *__worker);
  static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -1820,6 +1836,12 @@ static void destroy_worker(struct worker *worker)
         if (worker->flags & WORKER_IDLE)
                 pool->nr_idle--;
  
+       /*
+        * Once WORKER_DIE is set, the kworker may destroy itself at any
+        * point.  Pin to ensure the task stays until we're done with it.
+        */
+       get_task_struct(worker->task);
+
         list_del_init(&worker->entry);
         worker->flags |= WORKER_DIE;
  
@@ -1828,6 +1850,7 @@ static void destroy_worker(struct worker *worker)
         spin_unlock_irq(&pool->lock);
  
         kthread_stop(worker->task);
+       put_task_struct(worker->task);
         kfree(worker);
  
         spin_lock_irq(&pool->lock);
@@ -1871,6 +1894,12 @@ static void send_mayday(struct work_struct *work)
  
         /* mayday mayday mayday */
         if (list_empty(&pwq->mayday_node)) {
+               /*
+                * If @pwq is for an unbound wq, its base ref may be put at
+                * any time due to an attribute change.  Pin @pwq until the
+                * rescuer is done with it.
+                */
+               get_pwq(pwq);
                 list_add_tail(&pwq->mayday_node, &wq->maydays);
                 wake_up_process(wq->rescuer->task);
         }
@@ -2188,6 +2217,15 @@ __acquires(&pool->lock)
                 dump_stack();
         }
  
+       /*
+        * The following prevents a kworker from hogging CPU on !PREEMPT
+        * kernels, where a requeueing work item waiting for something to
+        * happen could deadlock with stop_machine as such work item could
+        * indefinitely requeue itself while all other CPUs are trapped in
+        * stop_machine.
+        */
+       cond_resched();
+
         spin_lock_irq(&pool->lock);
  
         /* clear cpu intensive status */
@@ -2337,6 +2375,7 @@ static int rescuer_thread(void *__rescuer)
         struct worker *rescuer = __rescuer;
         struct workqueue_struct *wq = rescuer->rescue_wq;
         struct list_head *scheduled = &rescuer->scheduled;
+       bool should_stop;
  
         set_user_nice(current, RESCUER_NICE_LEVEL);
  
@@ -2348,11 +2387,15 @@ static int rescuer_thread(void *__rescuer)
  repeat:
         set_current_state(TASK_INTERRUPTIBLE);
  
-       if (kthread_should_stop()) {
-               __set_current_state(TASK_RUNNING);
-               rescuer->task->flags &= ~PF_WQ_WORKER;
-               return 0;
-       }
+       /*
+        * By the time the rescuer is requested to stop, the workqueue
+        * shouldn't have any work pending, but @wq->maydays may still have
+        * pwq(s) queued.  This can happen by non-rescuer workers consuming
+        * all the work items before the rescuer got to them.  Go through
+        * @wq->maydays processing before acting on should_stop so that the
+        * list is always empty on exit.
+        */
+       should_stop = kthread_should_stop();
  
         /* see whether any pwq is asking for help */
         spin_lock_irq(&wq_mayday_lock);
@@ -2383,6 +2426,12 @@ repeat:
  
                 process_scheduled_works(rescuer);
  
+               /*
+                * Put the reference grabbed by send_mayday().  @pool won't
+                * go away while we're holding its lock.
+                */
+               put_pwq(pwq);
+
                 /*
                  * Leave this pool.  If keep_working() is %true, notify a
                  * regular worker; otherwise, we end up with 0 concurrency
@@ -2398,6 +2447,12 @@ repeat:
  
         spin_unlock_irq(&wq_mayday_lock);
  
+       if (should_stop) {
+               __set_current_state(TASK_RUNNING);
+               rescuer->task->flags &= ~PF_WQ_WORKER;
+               return 0;
+       }
+
         /* rescuers should never participate in concurrency management */
         WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
         schedule();
@@ -3331,6 +3386,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
                 }
         }
  
+       dev_set_uevent_suppress(&wq_dev->dev, false);
         kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
         return 0;
  }
@@ -3398,6 +3454,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  {
         to->nice = from->nice;
         cpumask_copy(to->cpumask, from->cpumask);
+       /*
+        * Unlike hash and equality test, this function doesn't ignore
+        * ->no_numa as it is used for both pool and wq attrs.  Instead,
+        * get_unbound_pool() explicitly clears ->no_numa after copying.
+        */
+       to->no_numa = from->no_numa;
  }
  
  /* hash value of the content of @attr */
@@ -3565,6 +3627,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
         lockdep_set_subclass(&pool->lock, 1);   /* see put_pwq() */
         copy_workqueue_attrs(pool->attrs, attrs);
  
+       /*
+        * no_numa isn't a worker_pool attribute, always clear it.  See
+        * 'struct workqueue_attrs' comments for detail.
+        */
+       pool->attrs->no_numa = false;
+
         /* if cpumask is contained inside a NUMA node, we belong to that node */
         if (wq_numa_enabled) {
                 for_each_node(node) {
@@ -4012,7 +4080,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
         if (!pwq) {
                 pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
                            wq->name);
-               goto out_unlock;
+               mutex_lock(&wq->mutex);
+               goto use_dfl_pwq;
         }
  
         /*
@@ -4038,7 +4107,7 @@ out_unlock:
  static int alloc_and_link_pwqs(struct workqueue_struct *wq)
  {
         bool highpri = wq->flags & WQ_HIGHPRI;
-       int cpu;
+       int cpu, ret;
  
         if (!(wq->flags & WQ_UNBOUND)) {
                 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
@@ -4058,6 +4127,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
                         mutex_unlock(&wq->mutex);
                 }
                 return 0;
+       } else if (wq->flags & __WQ_ORDERED) {
+               ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
+               /* there should only be single pwq for ordering guarantee */
+               WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
+                             wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
+                    "ordering guarantee broken for workqueue %s\n", wq->name);
+               return ret;
         } else {
                 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
         }
@@ -4086,6 +4162,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
         struct workqueue_struct *wq;
         struct pool_workqueue *pwq;
  
+       /* see the comment above the definition of WQ_POWER_EFFICIENT */
+       if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
+               flags |= WQ_UNBOUND;
+
         /* allocate wq and format name */
         if (flags & WQ_UNBOUND)
                 tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@@ -4905,7 +4985,7 @@ static void __init wq_numa_init(void)
         BUG_ON(!tbl);
  
         for_each_node(node)
-               BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+               BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
                                 node_online(node) ? node : NUMA_NO_NODE));
  
         for_each_possible_cpu(cpu) {
@@ -4969,13 +5049,23 @@ static int __init init_workqueues(void)
                 }
         }
  
-       /* create default unbound wq attrs */
+       /* create default unbound and ordered wq attrs */
         for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
                 struct workqueue_attrs *attrs;
  
                 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
                 attrs->nice = std_nice[i];
                 unbound_std_wq_attrs[i] = attrs;
+
+               /*
+                * An ordered wq should have only one pwq as ordering is
+                * guaranteed by max_active which is enforced by pwqs.
+                * Turn off NUMA so that dfl_pwq is used for all nodes.
+                */
+               BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+               attrs->nice = std_nice[i];
+               attrs->no_numa = true;
+               ordered_wq_attrs[i] = attrs;
         }
  
         system_wq = alloc_workqueue("events", 0, 0);
@@ -4985,8 +5075,15 @@ static int __init init_workqueues(void)
                                             WQ_UNBOUND_MAX_ACTIVE);
         system_freezable_wq = alloc_workqueue("events_freezable",
                                               WQ_FREEZABLE, 0);
+       system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+                                             WQ_POWER_EFFICIENT, 0);
+       system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+                                             WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+                                             0);
         BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-              !system_unbound_wq || !system_freezable_wq);
+              !system_unbound_wq || !system_freezable_wq ||
+              !system_power_efficient_wq ||
+              !system_freezable_power_efficient_wq);
         return 0;
  }
  early_initcall(init_workqueues);