workqueue: implement attribute-based unbound worker_pool management
[firefly-linux-kernel-4.4.55.git] / kernel / workqueue.c
index 2db1532b09dcb25334a01c8b0204ea3e23708a0e..3fe2c79bf166ccaf2fa2bbae401c4d75b1c63459 100644 (file)
@@ -41,7 +41,9 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
+#include <linux/jhash.h>
 #include <linux/hashtable.h>
+#include <linux/rculist.h>
 
 #include "workqueue_internal.h"
 
@@ -63,7 +65,6 @@ enum {
         * create_worker() is in progress.
         */
        POOL_MANAGE_WORKERS     = 1 << 0,       /* need to manage workers */
-       POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
        POOL_DISASSOCIATED      = 1 << 2,       /* cpu can't serve workers */
        POOL_FREEZING           = 1 << 3,       /* freeze in progress */
 
@@ -80,6 +81,7 @@ enum {
 
        NR_STD_WORKER_POOLS     = 2,            /* # standard pools per cpu */
 
+       UNBOUND_POOL_HASH_ORDER = 6,            /* hashed by pool->attrs */
        BUSY_WORKER_HASH_ORDER  = 6,            /* 64 pointers */
 
        MAX_IDLE_WORKERS_RATIO  = 4,            /* 1/4 of busy can be idle */
@@ -118,13 +120,15 @@ enum {
  * F: wq->flush_mutex protected.
  *
  * W: workqueue_lock protected.
+ *
+ * R: workqueue_lock protected for writes.  Sched-RCU protected for reads.
  */
 
 /* struct worker is defined in workqueue_internal.h */
 
 struct worker_pool {
        spinlock_t              lock;           /* the pool lock */
-       unsigned int            cpu;            /* I: the associated cpu */
+       int                     cpu;            /* I: the associated cpu */
        int                     id;             /* I: pool ID */
        unsigned int            flags;          /* X: flags */
 
@@ -142,15 +146,26 @@ struct worker_pool {
        DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
                                                /* L: hash of busy workers */
 
+       struct mutex            manager_arb;    /* manager arbitration */
        struct mutex            assoc_mutex;    /* protect POOL_DISASSOCIATED */
        struct ida              worker_ida;     /* L: for worker IDs */
 
+       struct workqueue_attrs  *attrs;         /* I: worker attributes */
+       struct hlist_node       hash_node;      /* R: unbound_pool_hash node */
+       int                     refcnt;         /* refcnt for unbound pools */
+
        /*
         * The current concurrency level.  As it's likely to be accessed
         * from other CPUs during try_to_wake_up(), put it in a separate
         * cacheline.
         */
        atomic_t                nr_running ____cacheline_aligned_in_smp;
+
+       /*
+        * Destruction of pool is sched-RCU protected to allow dereferences
+        * from get_work_pool().
+        */
+       struct rcu_head         rcu;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -169,7 +184,8 @@ struct pool_workqueue {
        int                     nr_active;      /* L: nr of active works */
        int                     max_active;     /* L: max active works */
        struct list_head        delayed_works;  /* L: delayed works */
-       struct list_head        pwqs_node;      /* I: node on wq->pwqs */
+       struct list_head        pwqs_node;      /* R: node on wq->pwqs */
+       struct list_head        mayday_node;    /* W: node on wq->maydays */
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
 /*
@@ -181,39 +197,14 @@ struct wq_flusher {
        struct completion       done;           /* flush completion */
 };
 
-/*
- * All cpumasks are assumed to be always set on UP and thus can't be
- * used to determine whether there's something to be done.
- */
-#ifdef CONFIG_SMP
-typedef cpumask_var_t mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask)     \
-       cpumask_test_and_set_cpu((cpu), (mask))
-#define mayday_clear_cpu(cpu, mask)            cpumask_clear_cpu((cpu), (mask))
-#define for_each_mayday_cpu(cpu, mask)         for_each_cpu((cpu), (mask))
-#define alloc_mayday_mask(maskp, gfp)          zalloc_cpumask_var((maskp), (gfp))
-#define free_mayday_mask(mask)                 free_cpumask_var((mask))
-#else
-typedef unsigned long mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask)     test_and_set_bit(0, &(mask))
-#define mayday_clear_cpu(cpu, mask)            clear_bit(0, &(mask))
-#define for_each_mayday_cpu(cpu, mask)         if ((cpu) = 0, (mask))
-#define alloc_mayday_mask(maskp, gfp)          true
-#define free_mayday_mask(mask)                 do { } while (0)
-#endif
-
 /*
  * The externally visible workqueue abstraction is an array of
  * per-CPU workqueues:
  */
 struct workqueue_struct {
        unsigned int            flags;          /* W: WQ_* flags */
-       union {
-               struct pool_workqueue __percpu          *pcpu;
-               struct pool_workqueue                   *single;
-               unsigned long                           v;
-       } pool_wq;                              /* I: pwq's */
-       struct list_head        pwqs;           /* I: all pwqs of this wq */
+       struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
+       struct list_head        pwqs;           /* R: all pwqs of this wq */
        struct list_head        list;           /* W: list of all workqueues */
 
        struct mutex            flush_mutex;    /* protects wq flushing */
@@ -224,7 +215,7 @@ struct workqueue_struct {
        struct list_head        flusher_queue;  /* F: flush waiters */
        struct list_head        flusher_overflow; /* F: flush overflow list */
 
-       mayday_mask_t           mayday_mask;    /* cpus requesting rescue */
+       struct list_head        maydays;        /* W: pwqs requesting rescue */
        struct worker           *rescuer;       /* I: rescue worker */
 
        int                     nr_drainers;    /* W: drain in progress */
@@ -237,6 +228,11 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
+/* hash of all unbound pools keyed by pool->attrs */
+static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
+
+static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
+
 struct workqueue_struct *system_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -251,6 +247,11 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
+#define assert_rcu_or_wq_lock()                                                \
+       rcu_lockdep_assert(rcu_read_lock_sched_held() ||                \
+                          lockdep_is_held(&workqueue_lock),            \
+                          "sched RCU or workqueue lock should be held")
+
 #define for_each_std_worker_pool(pool, cpu)                            \
        for ((pool) = &std_worker_pools(cpu)[0];                        \
             (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
@@ -294,13 +295,39 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
             (cpu) < WORK_CPU_END;                                      \
             (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
+/**
+ * for_each_pool - iterate through all worker_pools in the system
+ * @pool: iteration cursor
+ * @id: integer used for iteration
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked.  If the pool needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pool stays online.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
+ */
+#define for_each_pool(pool, id)                                                \
+       idr_for_each_entry(&worker_pool_idr, pool, id)                  \
+               if (({ assert_rcu_or_wq_lock(); false; })) { }          \
+               else
+
 /**
  * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
  * @pwq: iteration cursor
  * @wq: the target workqueue
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked.  If the pwq needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pwq stays online.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
  */
 #define for_each_pwq(pwq, wq)                                          \
-       list_for_each_entry((pwq), &(wq)->pwqs, pwqs_node)
+       list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node)          \
+               if (({ assert_rcu_or_wq_lock(); false; })) { }          \
+               else
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -431,8 +458,10 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
                                     cpu_std_worker_pools);
 static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
 
-/* idr of all pools */
-static DEFINE_MUTEX(worker_pool_idr_mutex);
+/*
+ * idr of all pools.  Modifications are protected by workqueue_lock.  Read
+ * accesses are protected by sched-RCU protected.
+ */
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
@@ -455,21 +484,16 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 {
        int ret;
 
-       mutex_lock(&worker_pool_idr_mutex);
-       idr_pre_get(&worker_pool_idr, GFP_KERNEL);
-       ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
-       mutex_unlock(&worker_pool_idr_mutex);
+       do {
+               if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
+                       return -ENOMEM;
 
-       return ret;
-}
+               spin_lock_irq(&workqueue_lock);
+               ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
+               spin_unlock_irq(&workqueue_lock);
+       } while (ret == -EAGAIN);
 
-/*
- * Lookup worker_pool by id.  The idr currently is built during boot and
- * never modified.  Don't worry about locking for now.
- */
-static struct worker_pool *worker_pool_by_id(int pool_id)
-{
-       return idr_find(&worker_pool_idr, pool_id);
+       return ret;
 }
 
 static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
@@ -479,15 +503,19 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
        return &pools[highpri];
 }
 
-static struct pool_workqueue *get_pwq(unsigned int cpu,
-                                     struct workqueue_struct *wq)
+/**
+ * first_pwq - return the first pool_workqueue of the specified workqueue
+ * @wq: the target workqueue
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked.  If the pwq needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pwq stays online.
+ */
+static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 {
-       if (!(wq->flags & WQ_UNBOUND)) {
-               if (likely(cpu < nr_cpu_ids))
-                       return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
-       } else if (likely(cpu == WORK_CPU_UNBOUND))
-               return wq->pool_wq.single;
-       return NULL;
+       assert_rcu_or_wq_lock();
+       return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
+                                     pwqs_node);
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -581,13 +609,23 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  * @work: the work item of interest
  *
  * Return the worker_pool @work was last associated with.  %NULL if none.
+ *
+ * Pools are created and destroyed under workqueue_lock, and allows read
+ * access under sched-RCU read lock.  As such, this function should be
+ * called under workqueue_lock or with preemption disabled.
+ *
+ * All fields of the returned pool are accessible as long as the above
+ * mentioned locking is in effect.  If the returned pool needs to be used
+ * beyond the critical section, the caller is responsible for ensuring the
+ * returned pool is and stays online.
  */
 static struct worker_pool *get_work_pool(struct work_struct *work)
 {
        unsigned long data = atomic_long_read(&work->data);
-       struct worker_pool *pool;
        int pool_id;
 
+       assert_rcu_or_wq_lock();
+
        if (data & WORK_STRUCT_PWQ)
                return ((struct pool_workqueue *)
                        (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
@@ -596,9 +634,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
        if (pool_id == WORK_OFFQ_POOL_NONE)
                return NULL;
 
-       pool = worker_pool_by_id(pool_id);
-       WARN_ON_ONCE(!pool);
-       return pool;
+       return idr_find(&worker_pool_idr, pool_id);
 }
 
 /**
@@ -687,7 +723,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-       bool managing = pool->flags & POOL_MANAGING_WORKERS;
+       bool managing = mutex_is_locked(&pool->manager_arb);
        int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
        int nr_busy = pool->nr_workers - nr_idle;
 
@@ -742,7 +778,7 @@ static void wake_up_worker(struct worker_pool *pool)
  * CONTEXT:
  * spin_lock_irq(rq->lock)
  */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+void wq_worker_waking_up(struct task_struct *task, int cpu)
 {
        struct worker *worker = kthread_data(task);
 
@@ -767,8 +803,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
  * RETURNS:
  * Worker task on @cpu to wake up, %NULL if none.
  */
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
-                                      unsigned int cpu)
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
 {
        struct worker *worker = kthread_data(task), *to_wakeup = NULL;
        struct worker_pool *pool;
@@ -1171,7 +1206,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
        return worker && worker->current_pwq->wq == wq;
 }
 
-static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
+static void __queue_work(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work)
 {
        struct pool_workqueue *pwq;
@@ -1207,7 +1242,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                 * work needs to be queued on that cpu to guarantee
                 * non-reentrancy.
                 */
-               pwq = get_pwq(cpu, wq);
+               pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
                last_pool = get_work_pool(work);
 
                if (last_pool && last_pool != pwq->pool) {
@@ -1218,7 +1253,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                        worker = find_worker_executing_work(last_pool, work);
 
                        if (worker && worker->current_pwq->wq == wq) {
-                               pwq = get_pwq(last_pool->cpu, wq);
+                               pwq = per_cpu_ptr(wq->cpu_pwqs, last_pool->cpu);
                        } else {
                                /* meh... not running there, queue here */
                                spin_unlock(&last_pool->lock);
@@ -1228,7 +1263,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
                        spin_lock(&pwq->pool->lock);
                }
        } else {
-               pwq = get_pwq(WORK_CPU_UNBOUND, wq);
+               pwq = first_pwq(wq);
                spin_lock(&pwq->pool->lock);
        }
 
@@ -1548,14 +1583,13 @@ __acquires(&pool->lock)
                 * against POOL_DISASSOCIATED.
                 */
                if (!(pool->flags & POOL_DISASSOCIATED))
-                       set_cpus_allowed_ptr(current, get_cpu_mask(pool->cpu));
+                       set_cpus_allowed_ptr(current, pool->attrs->cpumask);
 
                spin_lock_irq(&pool->lock);
                if (pool->flags & POOL_DISASSOCIATED)
                        return false;
                if (task_cpu(current) == pool->cpu &&
-                   cpumask_equal(&current->cpus_allowed,
-                                 get_cpu_mask(pool->cpu)))
+                   cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
                        return true;
                spin_unlock_irq(&pool->lock);
 
@@ -1661,12 +1695,12 @@ static void rebind_workers(struct worker_pool *pool)
                 * wq doesn't really matter but let's keep @worker->pool
                 * and @pwq->pool consistent for sanity.
                 */
-               if (std_worker_pool_pri(worker->pool))
+               if (worker->pool->attrs->nice < 0)
                        wq = system_highpri_wq;
                else
                        wq = system_wq;
 
-               insert_work(get_pwq(pool->cpu, wq), rebind_work,
+               insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
                            worker->scheduled.next,
                            work_color_to_flags(WORK_NO_COLOR));
        }
@@ -1703,7 +1737,7 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct worker_pool *pool)
 {
-       const char *pri = std_worker_pool_pri(pool) ? "H" : "";
+       const char *pri = pool->attrs->nice < 0  ? "H" : "";
        struct worker *worker = NULL;
        int id = -1;
 
@@ -1723,34 +1757,33 @@ static struct worker *create_worker(struct worker_pool *pool)
        worker->pool = pool;
        worker->id = id;
 
-       if (pool->cpu != WORK_CPU_UNBOUND)
+       if (pool->cpu >= 0)
                worker->task = kthread_create_on_node(worker_thread,
                                        worker, cpu_to_node(pool->cpu),
-                                       "kworker/%u:%d%s", pool->cpu, id, pri);
+                                       "kworker/%d:%d%s", pool->cpu, id, pri);
        else
                worker->task = kthread_create(worker_thread, worker,
                                              "kworker/u:%d%s", id, pri);
        if (IS_ERR(worker->task))
                goto fail;
 
-       if (std_worker_pool_pri(pool))
-               set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
+       set_user_nice(worker->task, pool->attrs->nice);
+       set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
        /*
-        * Determine CPU binding of the new worker depending on
-        * %POOL_DISASSOCIATED.  The caller is responsible for ensuring the
-        * flag remains stable across this function.  See the comments
-        * above the flag definition for details.
-        *
-        * As an unbound worker may later become a regular one if CPU comes
-        * online, make sure every worker has %PF_THREAD_BOUND set.
+        * %PF_THREAD_BOUND is used to prevent userland from meddling with
+        * cpumask of workqueue workers.  This is an abuse.  We need
+        * %PF_NO_SETAFFINITY.
         */
-       if (!(pool->flags & POOL_DISASSOCIATED)) {
-               kthread_bind(worker->task, pool->cpu);
-       } else {
-               worker->task->flags |= PF_THREAD_BOUND;
+       worker->task->flags |= PF_THREAD_BOUND;
+
+       /*
+        * The caller is responsible for ensuring %POOL_DISASSOCIATED
+        * remains stable across this function.  See the comments above the
+        * flag definition for details.
+        */
+       if (pool->flags & POOL_DISASSOCIATED)
                worker->flags |= WORKER_UNBOUND;
-       }
 
        return worker;
 fail:
@@ -1842,23 +1875,21 @@ static void idle_worker_timeout(unsigned long __pool)
        spin_unlock_irq(&pool->lock);
 }
 
-static bool send_mayday(struct work_struct *work)
+static void send_mayday(struct work_struct *work)
 {
        struct pool_workqueue *pwq = get_work_pwq(work);
        struct workqueue_struct *wq = pwq->wq;
-       unsigned int cpu;
+
+       lockdep_assert_held(&workqueue_lock);
 
        if (!(wq->flags & WQ_RESCUER))
-               return false;
+               return;
 
        /* mayday mayday mayday */
-       cpu = pwq->pool->cpu;
-       /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
-       if (cpu == WORK_CPU_UNBOUND)
-               cpu = 0;
-       if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
+       if (list_empty(&pwq->mayday_node)) {
+               list_add_tail(&pwq->mayday_node, &wq->maydays);
                wake_up_process(wq->rescuer->task);
-       return true;
+       }
 }
 
 static void pool_mayday_timeout(unsigned long __pool)
@@ -1866,7 +1897,8 @@ static void pool_mayday_timeout(unsigned long __pool)
        struct worker_pool *pool = (void *)__pool;
        struct work_struct *work;
 
-       spin_lock_irq(&pool->lock);
+       spin_lock_irq(&workqueue_lock);         /* for wq->maydays */
+       spin_lock(&pool->lock);
 
        if (need_to_create_worker(pool)) {
                /*
@@ -1879,7 +1911,8 @@ static void pool_mayday_timeout(unsigned long __pool)
                        send_mayday(work);
        }
 
-       spin_unlock_irq(&pool->lock);
+       spin_unlock(&pool->lock);
+       spin_unlock_irq(&workqueue_lock);
 
        mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
 }
@@ -2011,19 +2044,17 @@ static bool manage_workers(struct worker *worker)
        struct worker_pool *pool = worker->pool;
        bool ret = false;
 
-       if (pool->flags & POOL_MANAGING_WORKERS)
+       if (!mutex_trylock(&pool->manager_arb))
                return ret;
 
-       pool->flags |= POOL_MANAGING_WORKERS;
-
        /*
         * To simplify both worker management and CPU hotplug, hold off
         * management while hotplug is in progress.  CPU hotplug path can't
-        * grab %POOL_MANAGING_WORKERS to achieve this because that can
-        * lead to idle worker depletion (all become busy thinking someone
-        * else is managing) which in turn can result in deadlock under
-        * extreme circumstances.  Use @pool->assoc_mutex to synchronize
-        * manager against CPU hotplug.
+        * grab @pool->manager_arb to achieve this because that can lead to
+        * idle worker depletion (all become busy thinking someone else is
+        * managing) which in turn can result in deadlock under extreme
+        * circumstances.  Use @pool->assoc_mutex to synchronize manager
+        * against CPU hotplug.
         *
         * assoc_mutex would always be free unless CPU hotplug is in
         * progress.  trylock first without dropping @pool->lock.
@@ -2059,8 +2090,8 @@ static bool manage_workers(struct worker *worker)
        ret |= maybe_destroy_workers(pool);
        ret |= maybe_create_worker(pool);
 
-       pool->flags &= ~POOL_MANAGING_WORKERS;
        mutex_unlock(&pool->assoc_mutex);
+       mutex_unlock(&pool->manager_arb);
        return ret;
 }
 
@@ -2328,8 +2359,6 @@ static int rescuer_thread(void *__rescuer)
        struct worker *rescuer = __rescuer;
        struct workqueue_struct *wq = rescuer->rescue_wq;
        struct list_head *scheduled = &rescuer->scheduled;
-       bool is_unbound = wq->flags & WQ_UNBOUND;
-       unsigned int cpu;
 
        set_user_nice(current, RESCUER_NICE_LEVEL);
 
@@ -2347,18 +2376,19 @@ repeat:
                return 0;
        }
 
-       /*
-        * See whether any cpu is asking for help.  Unbounded
-        * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
-        */
-       for_each_mayday_cpu(cpu, wq->mayday_mask) {
-               unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
-               struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+       /* see whether any pwq is asking for help */
+       spin_lock_irq(&workqueue_lock);
+
+       while (!list_empty(&wq->maydays)) {
+               struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
+                                       struct pool_workqueue, mayday_node);
                struct worker_pool *pool = pwq->pool;
                struct work_struct *work, *n;
 
                __set_current_state(TASK_RUNNING);
-               mayday_clear_cpu(cpu, wq->mayday_mask);
+               list_del_init(&pwq->mayday_node);
+
+               spin_unlock_irq(&workqueue_lock);
 
                /* migrate to the target cpu if possible */
                worker_maybe_bind_and_lock(pool);
@@ -2384,9 +2414,12 @@ repeat:
                        wake_up_worker(pool);
 
                rescuer->pool = NULL;
-               spin_unlock_irq(&pool->lock);
+               spin_unlock(&pool->lock);
+               spin_lock(&workqueue_lock);
        }
 
+       spin_unlock_irq(&workqueue_lock);
+
        /* rescuers should never participate in concurrency management */
        WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
        schedule();
@@ -2507,10 +2540,12 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
                atomic_set(&wq->nr_pwqs_to_flush, 1);
        }
 
+       local_irq_disable();
+
        for_each_pwq(pwq, wq) {
                struct worker_pool *pool = pwq->pool;
 
-               spin_lock_irq(&pool->lock);
+               spin_lock(&pool->lock);
 
                if (flush_color >= 0) {
                        WARN_ON_ONCE(pwq->flush_color != -1);
@@ -2527,9 +2562,11 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
                        pwq->work_color = work_color;
                }
 
-               spin_unlock_irq(&pool->lock);
+               spin_unlock(&pool->lock);
        }
 
+       local_irq_enable();
+
        if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
                complete(&wq->first_flusher->done);
 
@@ -2720,12 +2757,14 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
        flush_workqueue(wq);
 
+       local_irq_disable();
+
        for_each_pwq(pwq, wq) {
                bool drained;
 
-               spin_lock_irq(&pwq->pool->lock);
+               spin_lock(&pwq->pool->lock);
                drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
-               spin_unlock_irq(&pwq->pool->lock);
+               spin_unlock(&pwq->pool->lock);
 
                if (drained)
                        continue;
@@ -2734,13 +2773,17 @@ reflush:
                    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
                        pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n",
                                wq->name, flush_cnt);
+
+               local_irq_enable();
                goto reflush;
        }
 
-       spin_lock_irq(&workqueue_lock);
+       spin_lock(&workqueue_lock);
        if (!--wq->nr_drainers)
                wq->flags &= ~WQ_DRAINING;
-       spin_unlock_irq(&workqueue_lock);
+       spin_unlock(&workqueue_lock);
+
+       local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
@@ -2751,11 +2794,15 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
        struct pool_workqueue *pwq;
 
        might_sleep();
+
+       local_irq_disable();
        pool = get_work_pool(work);
-       if (!pool)
+       if (!pool) {
+               local_irq_enable();
                return false;
+       }
 
-       spin_lock_irq(&pool->lock);
+       spin_lock(&pool->lock);
        /* see the comment in try_to_grab_pending() with the same code */
        pwq = get_work_pwq(work);
        if (pwq) {
@@ -3091,21 +3138,268 @@ int keventd_up(void)
        return system_wq != NULL;
 }
 
+/**
+ * free_workqueue_attrs - free a workqueue_attrs
+ * @attrs: workqueue_attrs to free
+ *
+ * Undo alloc_workqueue_attrs().
+ */
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
+{
+       if (attrs) {
+               free_cpumask_var(attrs->cpumask);
+               kfree(attrs);
+       }
+}
+
+/**
+ * alloc_workqueue_attrs - allocate a workqueue_attrs
+ * @gfp_mask: allocation mask to use
+ *
+ * Allocate a new workqueue_attrs, initialize with default settings and
+ * return it.  Returns NULL on failure.
+ */
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+{
+       struct workqueue_attrs *attrs;
+
+       attrs = kzalloc(sizeof(*attrs), gfp_mask);
+       if (!attrs)
+               goto fail;
+       if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+               goto fail;
+
+       cpumask_setall(attrs->cpumask);
+       return attrs;
+fail:
+       free_workqueue_attrs(attrs);
+       return NULL;
+}
+
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+                                const struct workqueue_attrs *from)
+{
+       to->nice = from->nice;
+       cpumask_copy(to->cpumask, from->cpumask);
+}
+
+/*
+ * Hacky implementation of jhash of bitmaps which only considers the
+ * specified number of bits.  We probably want a proper implementation in
+ * include/linux/jhash.h.
+ */
+static u32 jhash_bitmap(const unsigned long *bitmap, int bits, u32 hash)
+{
+       int nr_longs = bits / BITS_PER_LONG;
+       int nr_leftover = bits % BITS_PER_LONG;
+       unsigned long leftover = 0;
+
+       if (nr_longs)
+               hash = jhash(bitmap, nr_longs * sizeof(long), hash);
+       if (nr_leftover) {
+               bitmap_copy(&leftover, bitmap + nr_longs, nr_leftover);
+               hash = jhash(&leftover, sizeof(long), hash);
+       }
+       return hash;
+}
+
+/* hash value of the content of @attr */
+static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
+{
+       u32 hash = 0;
+
+       hash = jhash_1word(attrs->nice, hash);
+       hash = jhash_bitmap(cpumask_bits(attrs->cpumask), nr_cpu_ids, hash);
+       return hash;
+}
+
+/* content equality test */
+static bool wqattrs_equal(const struct workqueue_attrs *a,
+                         const struct workqueue_attrs *b)
+{
+       if (a->nice != b->nice)
+               return false;
+       if (!cpumask_equal(a->cpumask, b->cpumask))
+               return false;
+       return true;
+}
+
+/**
+ * init_worker_pool - initialize a newly zalloc'd worker_pool
+ * @pool: worker_pool to initialize
+ *
+ * Initiailize a newly zalloc'd @pool.  It also allocates @pool->attrs.
+ * Returns 0 on success, -errno on failure.  Even on failure, all fields
+ * inside @pool proper are initialized and put_unbound_pool() can be called
+ * on @pool safely to release it.
+ */
+static int init_worker_pool(struct worker_pool *pool)
+{
+       spin_lock_init(&pool->lock);
+       pool->id = -1;
+       pool->cpu = -1;
+       pool->flags |= POOL_DISASSOCIATED;
+       INIT_LIST_HEAD(&pool->worklist);
+       INIT_LIST_HEAD(&pool->idle_list);
+       hash_init(pool->busy_hash);
+
+       init_timer_deferrable(&pool->idle_timer);
+       pool->idle_timer.function = idle_worker_timeout;
+       pool->idle_timer.data = (unsigned long)pool;
+
+       setup_timer(&pool->mayday_timer, pool_mayday_timeout,
+                   (unsigned long)pool);
+
+       mutex_init(&pool->manager_arb);
+       mutex_init(&pool->assoc_mutex);
+       ida_init(&pool->worker_ida);
+
+       INIT_HLIST_NODE(&pool->hash_node);
+       pool->refcnt = 1;
+
+       /* shouldn't fail above this point */
+       pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+       if (!pool->attrs)
+               return -ENOMEM;
+       return 0;
+}
+
+static void rcu_free_pool(struct rcu_head *rcu)
+{
+       struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
+
+       ida_destroy(&pool->worker_ida);
+       free_workqueue_attrs(pool->attrs);
+       kfree(pool);
+}
+
+/**
+ * put_unbound_pool - put a worker_pool
+ * @pool: worker_pool to put
+ *
+ * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
+ * safe manner.
+ */
+static void put_unbound_pool(struct worker_pool *pool)
+{
+       struct worker *worker;
+
+       spin_lock_irq(&workqueue_lock);
+       if (--pool->refcnt) {
+               spin_unlock_irq(&workqueue_lock);
+               return;
+       }
+
+       /* sanity checks */
+       if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) ||
+           WARN_ON(!list_empty(&pool->worklist))) {
+               spin_unlock_irq(&workqueue_lock);
+               return;
+       }
+
+       /* release id and unhash */
+       if (pool->id >= 0)
+               idr_remove(&worker_pool_idr, pool->id);
+       hash_del(&pool->hash_node);
+
+       spin_unlock_irq(&workqueue_lock);
+
+       /* lock out manager and destroy all workers */
+       mutex_lock(&pool->manager_arb);
+       spin_lock_irq(&pool->lock);
+
+       while ((worker = first_worker(pool)))
+               destroy_worker(worker);
+       WARN_ON(pool->nr_workers || pool->nr_idle);
+
+       spin_unlock_irq(&pool->lock);
+       mutex_unlock(&pool->manager_arb);
+
+       /* shut down the timers */
+       del_timer_sync(&pool->idle_timer);
+       del_timer_sync(&pool->mayday_timer);
+
+       /* sched-RCU protected to allow dereferences from get_work_pool() */
+       call_rcu_sched(&pool->rcu, rcu_free_pool);
+}
+
+/**
+ * get_unbound_pool - get a worker_pool with the specified attributes
+ * @attrs: the attributes of the worker_pool to get
+ *
+ * Obtain a worker_pool which has the same attributes as @attrs, bump the
+ * reference count and return it.  If there already is a matching
+ * worker_pool, it will be used; otherwise, this function attempts to
+ * create a new one.  On failure, returns NULL.
+ */
+static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
+{
+       static DEFINE_MUTEX(create_mutex);
+       u32 hash = wqattrs_hash(attrs);
+       struct worker_pool *pool;
+       struct worker *worker;
+
+       mutex_lock(&create_mutex);
+
+       /* do we already have a matching pool? */
+       spin_lock_irq(&workqueue_lock);
+       hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
+               if (wqattrs_equal(pool->attrs, attrs)) {
+                       pool->refcnt++;
+                       goto out_unlock;
+               }
+       }
+       spin_unlock_irq(&workqueue_lock);
+
+       /* nope, create a new one */
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool || init_worker_pool(pool) < 0)
+               goto fail;
+
+       copy_workqueue_attrs(pool->attrs, attrs);
+
+       if (worker_pool_assign_id(pool) < 0)
+               goto fail;
+
+       /* create and start the initial worker */
+       worker = create_worker(pool);
+       if (!worker)
+               goto fail;
+
+       spin_lock_irq(&pool->lock);
+       start_worker(worker);
+       spin_unlock_irq(&pool->lock);
+
+       /* install */
+       spin_lock_irq(&workqueue_lock);
+       hash_add(unbound_pool_hash, &pool->hash_node, hash);
+out_unlock:
+       spin_unlock_irq(&workqueue_lock);
+       mutex_unlock(&create_mutex);
+       return pool;
+fail:
+       mutex_unlock(&create_mutex);
+       if (pool)
+               put_unbound_pool(pool);
+       return NULL;
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
        bool highpri = wq->flags & WQ_HIGHPRI;
        int cpu;
 
        if (!(wq->flags & WQ_UNBOUND)) {
-               wq->pool_wq.pcpu = alloc_percpu(struct pool_workqueue);
-               if (!wq->pool_wq.pcpu)
+               wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
+               if (!wq->cpu_pwqs)
                        return -ENOMEM;
 
                for_each_possible_cpu(cpu) {
-                       struct pool_workqueue *pwq = get_pwq(cpu, wq);
+                       struct pool_workqueue *pwq =
+                               per_cpu_ptr(wq->cpu_pwqs, cpu);
 
                        pwq->pool = get_std_worker_pool(cpu, highpri);
-                       list_add_tail(&pwq->pwqs_node, &wq->pwqs);
+                       list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
                }
        } else {
                struct pool_workqueue *pwq;
@@ -3114,9 +3408,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
                if (!pwq)
                        return -ENOMEM;
 
-               wq->pool_wq.single = pwq;
-               pwq->pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
-               list_add_tail(&pwq->pwqs_node, &wq->pwqs);
+               pwq->pool = get_unbound_pool(unbound_std_wq_attrs[highpri]);
+               if (!pwq->pool) {
+                       kmem_cache_free(pwq_cache, pwq);
+                       return -ENOMEM;
+               }
+
+               list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
        }
 
        return 0;
@@ -3125,9 +3423,10 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 static void free_pwqs(struct workqueue_struct *wq)
 {
        if (!(wq->flags & WQ_UNBOUND))
-               free_percpu(wq->pool_wq.pcpu);
-       else
-               kmem_cache_free(pwq_cache, wq->pool_wq.single);
+               free_percpu(wq->cpu_pwqs);
+       else if (!list_empty(&wq->pwqs))
+               kmem_cache_free(pwq_cache, list_first_entry(&wq->pwqs,
+                                       struct pool_workqueue, pwqs_node));
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
@@ -3184,6 +3483,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
        INIT_LIST_HEAD(&wq->pwqs);
        INIT_LIST_HEAD(&wq->flusher_queue);
        INIT_LIST_HEAD(&wq->flusher_overflow);
+       INIT_LIST_HEAD(&wq->maydays);
 
        lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
        INIT_LIST_HEAD(&wq->list);
@@ -3191,20 +3491,20 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
        if (alloc_and_link_pwqs(wq) < 0)
                goto err;
 
+       local_irq_disable();
        for_each_pwq(pwq, wq) {
                BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
                pwq->wq = wq;
                pwq->flush_color = -1;
                pwq->max_active = max_active;
                INIT_LIST_HEAD(&pwq->delayed_works);
+               INIT_LIST_HEAD(&pwq->mayday_node);
        }
+       local_irq_enable();
 
        if (flags & WQ_RESCUER) {
                struct worker *rescuer;
 
-               if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL))
-                       goto err;
-
                wq->rescuer = rescuer = alloc_worker();
                if (!rescuer)
                        goto err;
@@ -3238,7 +3538,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 err:
        if (wq) {
                free_pwqs(wq);
-               free_mayday_mask(wq->mayday_mask);
                kfree(wq->rescuer);
                kfree(wq);
        }
@@ -3259,32 +3558,48 @@ void destroy_workqueue(struct workqueue_struct *wq)
        /* drain it before proceeding with destruction */
        drain_workqueue(wq);
 
+       spin_lock_irq(&workqueue_lock);
+
        /* sanity checks */
        for_each_pwq(pwq, wq) {
                int i;
 
-               for (i = 0; i < WORK_NR_COLORS; i++)
-                       if (WARN_ON(pwq->nr_in_flight[i]))
+               for (i = 0; i < WORK_NR_COLORS; i++) {
+                       if (WARN_ON(pwq->nr_in_flight[i])) {
+                               spin_unlock_irq(&workqueue_lock);
                                return;
+                       }
+               }
+
                if (WARN_ON(pwq->nr_active) ||
-                   WARN_ON(!list_empty(&pwq->delayed_works)))
+                   WARN_ON(!list_empty(&pwq->delayed_works))) {
+                       spin_unlock_irq(&workqueue_lock);
                        return;
+               }
        }
 
        /*
         * wq list is used to freeze wq, remove from list after
         * flushing is complete in case freeze races us.
         */
-       spin_lock_irq(&workqueue_lock);
        list_del(&wq->list);
+
        spin_unlock_irq(&workqueue_lock);
 
        if (wq->flags & WQ_RESCUER) {
                kthread_stop(wq->rescuer->task);
-               free_mayday_mask(wq->mayday_mask);
                kfree(wq->rescuer);
        }
 
+       /*
+        * We're the sole accessor of @wq at this point.  Directly access
+        * the first pwq and put its pool.
+        */
+       if (wq->flags & WQ_UNBOUND) {
+               pwq = list_first_entry(&wq->pwqs, struct pool_workqueue,
+                                      pwqs_node);
+               put_unbound_pool(pwq->pool);
+       }
        free_pwqs(wq);
        kfree(wq);
 }
@@ -3358,11 +3673,22 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  * RETURNS:
  * %true if congested, %false otherwise.
  */
-bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
+bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 {
-       struct pool_workqueue *pwq = get_pwq(cpu, wq);
+       struct pool_workqueue *pwq;
+       bool ret;
+
+       preempt_disable();
+
+       if (!(wq->flags & WQ_UNBOUND))
+               pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
+       else
+               pwq = first_pwq(wq);
+
+       ret = !list_empty(&pwq->delayed_works);
+       preempt_enable();
 
-       return !list_empty(&pwq->delayed_works);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
@@ -3379,19 +3705,22 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  */
 unsigned int work_busy(struct work_struct *work)
 {
-       struct worker_pool *pool = get_work_pool(work);
+       struct worker_pool *pool;
        unsigned long flags;
        unsigned int ret = 0;
 
        if (work_pending(work))
                ret |= WORK_BUSY_PENDING;
 
+       local_irq_save(flags);
+       pool = get_work_pool(work);
        if (pool) {
-               spin_lock_irqsave(&pool->lock, flags);
+               spin_lock(&pool->lock);
                if (find_worker_executing_work(pool, work))
                        ret |= WORK_BUSY_RUNNING;
-               spin_unlock_irqrestore(&pool->lock, flags);
+               spin_unlock(&pool->lock);
        }
+       local_irq_restore(flags);
 
        return ret;
 }
@@ -3474,7 +3803,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
                                               unsigned long action,
                                               void *hcpu)
 {
-       unsigned int cpu = (unsigned long)hcpu;
+       int cpu = (unsigned long)hcpu;
        struct worker_pool *pool;
 
        switch (action & ~CPU_TASKS_FROZEN) {
@@ -3520,7 +3849,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
                                                 unsigned long action,
                                                 void *hcpu)
 {
-       unsigned int cpu = (unsigned long)hcpu;
+       int cpu = (unsigned long)hcpu;
        struct work_struct unbind_work;
 
        switch (action & ~CPU_TASKS_FROZEN) {
@@ -3560,7 +3889,7 @@ static void work_for_cpu_fn(struct work_struct *work)
  * It is up to the caller to ensure that the cpu doesn't go offline.
  * The caller must not hold any locks which would prevent @fn from completing.
  */
-long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
        struct work_for_cpu wfc = { .fn = fn, .arg = arg };
 
@@ -3586,32 +3915,33 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  */
 void freeze_workqueues_begin(void)
 {
-       unsigned int cpu;
+       struct worker_pool *pool;
+       struct workqueue_struct *wq;
+       struct pool_workqueue *pwq;
+       int id;
 
        spin_lock_irq(&workqueue_lock);
 
        WARN_ON_ONCE(workqueue_freezing);
        workqueue_freezing = true;
 
-       for_each_wq_cpu(cpu) {
-               struct worker_pool *pool;
-               struct workqueue_struct *wq;
-
-               for_each_std_worker_pool(pool, cpu) {
-                       spin_lock(&pool->lock);
-
-                       WARN_ON_ONCE(pool->flags & POOL_FREEZING);
-                       pool->flags |= POOL_FREEZING;
-
-                       list_for_each_entry(wq, &workqueues, list) {
-                               struct pool_workqueue *pwq = get_pwq(cpu, wq);
+       /* set FREEZING */
+       for_each_pool(pool, id) {
+               spin_lock(&pool->lock);
+               WARN_ON_ONCE(pool->flags & POOL_FREEZING);
+               pool->flags |= POOL_FREEZING;
+               spin_unlock(&pool->lock);
+       }
 
-                               if (pwq && pwq->pool == pool &&
-                                   (wq->flags & WQ_FREEZABLE))
-                                       pwq->max_active = 0;
-                       }
+       /* suppress further executions by setting max_active to zero */
+       list_for_each_entry(wq, &workqueues, list) {
+               if (!(wq->flags & WQ_FREEZABLE))
+                       continue;
 
-                       spin_unlock(&pool->lock);
+               for_each_pwq(pwq, wq) {
+                       spin_lock(&pwq->pool->lock);
+                       pwq->max_active = 0;
+                       spin_unlock(&pwq->pool->lock);
                }
        }
 
@@ -3633,25 +3963,22 @@ void freeze_workqueues_begin(void)
  */
 bool freeze_workqueues_busy(void)
 {
-       unsigned int cpu;
        bool busy = false;
+       struct workqueue_struct *wq;
+       struct pool_workqueue *pwq;
 
        spin_lock_irq(&workqueue_lock);
 
        WARN_ON_ONCE(!workqueue_freezing);
 
-       for_each_wq_cpu(cpu) {
-               struct workqueue_struct *wq;
+       list_for_each_entry(wq, &workqueues, list) {
+               if (!(wq->flags & WQ_FREEZABLE))
+                       continue;
                /*
                 * nr_active is monotonically decreasing.  It's safe
                 * to peek without lock.
                 */
-               list_for_each_entry(wq, &workqueues, list) {
-                       struct pool_workqueue *pwq = get_pwq(cpu, wq);
-
-                       if (!pwq || !(wq->flags & WQ_FREEZABLE))
-                               continue;
-
+               for_each_pwq(pwq, wq) {
                        WARN_ON_ONCE(pwq->nr_active < 0);
                        if (pwq->nr_active) {
                                busy = true;
@@ -3675,40 +4002,43 @@ out_unlock:
  */
 void thaw_workqueues(void)
 {
-       unsigned int cpu;
+       struct workqueue_struct *wq;
+       struct pool_workqueue *pwq;
+       struct worker_pool *pool;
+       int id;
 
        spin_lock_irq(&workqueue_lock);
 
        if (!workqueue_freezing)
                goto out_unlock;
 
-       for_each_wq_cpu(cpu) {
-               struct worker_pool *pool;
-               struct workqueue_struct *wq;
-
-               for_each_std_worker_pool(pool, cpu) {
-                       spin_lock(&pool->lock);
-
-                       WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
-                       pool->flags &= ~POOL_FREEZING;
-
-                       list_for_each_entry(wq, &workqueues, list) {
-                               struct pool_workqueue *pwq = get_pwq(cpu, wq);
-
-                               if (!pwq || pwq->pool != pool ||
-                                   !(wq->flags & WQ_FREEZABLE))
-                                       continue;
-
-                               /* restore max_active and repopulate worklist */
-                               pwq_set_max_active(pwq, wq->saved_max_active);
-                       }
+       /* clear FREEZING */
+       for_each_pool(pool, id) {
+               spin_lock(&pool->lock);
+               WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
+               pool->flags &= ~POOL_FREEZING;
+               spin_unlock(&pool->lock);
+       }
 
-                       wake_up_worker(pool);
+       /* restore max_active and repopulate worklist */
+       list_for_each_entry(wq, &workqueues, list) {
+               if (!(wq->flags & WQ_FREEZABLE))
+                       continue;
 
-                       spin_unlock(&pool->lock);
+               for_each_pwq(pwq, wq) {
+                       spin_lock(&pwq->pool->lock);
+                       pwq_set_max_active(pwq, wq->saved_max_active);
+                       spin_unlock(&pwq->pool->lock);
                }
        }
 
+       /* kick workers */
+       for_each_pool(pool, id) {
+               spin_lock(&pool->lock);
+               wake_up_worker(pool);
+               spin_unlock(&pool->lock);
+       }
+
        workqueue_freezing = false;
 out_unlock:
        spin_unlock_irq(&workqueue_lock);
@@ -3717,7 +4047,8 @@ out_unlock:
 
 static int __init init_workqueues(void)
 {
-       unsigned int cpu;
+       int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
+       int i, cpu;
 
        /* make sure we have enough bits for OFFQ pool ID */
        BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
@@ -3731,26 +4062,15 @@ static int __init init_workqueues(void)
        hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
        /* initialize CPU pools */
-       for_each_wq_cpu(cpu) {
+       for_each_possible_cpu(cpu) {
                struct worker_pool *pool;
 
+               i = 0;
                for_each_std_worker_pool(pool, cpu) {
-                       spin_lock_init(&pool->lock);
+                       BUG_ON(init_worker_pool(pool));
                        pool->cpu = cpu;
-                       pool->flags |= POOL_DISASSOCIATED;
-                       INIT_LIST_HEAD(&pool->worklist);
-                       INIT_LIST_HEAD(&pool->idle_list);
-                       hash_init(pool->busy_hash);
-
-                       init_timer_deferrable(&pool->idle_timer);
-                       pool->idle_timer.function = idle_worker_timeout;
-                       pool->idle_timer.data = (unsigned long)pool;
-
-                       setup_timer(&pool->mayday_timer, pool_mayday_timeout,
-                                   (unsigned long)pool);
-
-                       mutex_init(&pool->assoc_mutex);
-                       ida_init(&pool->worker_ida);
+                       cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
+                       pool->attrs->nice = std_nice[i++];
 
                        /* alloc pool ID */
                        BUG_ON(worker_pool_assign_id(pool));
@@ -3758,14 +4078,13 @@ static int __init init_workqueues(void)
        }
 
        /* create the initial worker */
-       for_each_online_wq_cpu(cpu) {
+       for_each_online_cpu(cpu) {
                struct worker_pool *pool;
 
                for_each_std_worker_pool(pool, cpu) {
                        struct worker *worker;
 
-                       if (cpu != WORK_CPU_UNBOUND)
-                               pool->flags &= ~POOL_DISASSOCIATED;
+                       pool->flags &= ~POOL_DISASSOCIATED;
 
                        worker = create_worker(pool);
                        BUG_ON(!worker);
@@ -3775,6 +4094,18 @@ static int __init init_workqueues(void)
                }
        }
 
+       /* create default unbound wq attrs */
+       for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
+               struct workqueue_attrs *attrs;
+
+               BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+
+               attrs->nice = std_nice[i];
+               cpumask_setall(attrs->cpumask);
+
+               unbound_std_wq_attrs[i] = attrs;
+       }
+
        system_wq = alloc_workqueue("events", 0, 0);
        system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
        system_long_wq = alloc_workqueue("events_long", 0, 0);