blkcg: let blkcg core manage per-queue blkg list and counter
authorTejun Heo <tj@kernel.org>
Mon, 5 Mar 2012 21:15:19 +0000 (13:15 -0800)
committerJens Axboe <axboe@kernel.dk>
Tue, 6 Mar 2012 20:27:23 +0000 (21:27 +0100)
With the previous patch to move blkg list heads and counters to
request_queue and blkg, logic to manage them in both policies are
almost identical and can be moved to blkcg core.

This patch moves blkg link logic into blkg_lookup_create(), implements
common blkg unlink code in blkg_destroy(), and updates
blkg_destory_all() so that it's policy specific and can skip root
group.  The updated blkg_destroy_all() is now used to both clear queue
for bypassing and elv switching, and release all blkgs on q exit.

This patch introduces a race window where policy [de]registration may
race against queue blkg clearing.  This can only be a problem on cfq
unload and shouldn't be a real problem in practice (and we have many
other places where this race already exists).  Future patches will
remove these unlikely races.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-throttle.c
block/cfq-iosched.c
block/elevator.c
include/linux/blkdev.h

index e940972ccd66d9e717c4466fa623d861f7cddb5d..2ca9a15db0f7d2063dc7fb6cd24aae9363fa9383 100644 (file)
@@ -596,8 +596,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
        /* insert */
        spin_lock(&blkcg->lock);
        swap(blkg, new_blkg);
+
        hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
-       pol->ops.blkio_link_group_fn(q, blkg);
+       list_add(&blkg->q_node[plid], &q->blkg_list[plid]);
+       q->nr_blkgs[plid]++;
+
        spin_unlock(&blkcg->lock);
 out:
        blkg_free(new_blkg);
@@ -646,36 +649,69 @@ struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
-void blkg_destroy_all(struct request_queue *q)
+static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
+{
+       struct request_queue *q = blkg->q;
+
+       lockdep_assert_held(q->queue_lock);
+
+       /* Something wrong if we are trying to remove same group twice */
+       WARN_ON_ONCE(list_empty(&blkg->q_node[plid]));
+       list_del_init(&blkg->q_node[plid]);
+
+       WARN_ON_ONCE(q->nr_blkgs[plid] <= 0);
+       q->nr_blkgs[plid]--;
+
+       /*
+        * Put the reference taken at the time of creation so that when all
+        * queues are gone, group can be destroyed.
+        */
+       blkg_put(blkg);
+}
+
+void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
+                     bool destroy_root)
 {
-       struct blkio_policy_type *pol;
+       struct blkio_group *blkg, *n;
 
        while (true) {
                bool done = true;
 
-               spin_lock(&blkio_list_lock);
                spin_lock_irq(q->queue_lock);
 
-               /*
-                * clear_queue_fn() might return with non-empty group list
-                * if it raced cgroup removal and lost.  cgroup removal is
-                * guaranteed to make forward progress and retrying after a
-                * while is enough.  This ugliness is scheduled to be
-                * removed after locking update.
-                */
-               list_for_each_entry(pol, &blkio_list, list)
-                       if (!pol->ops.blkio_clear_queue_fn(q))
+               list_for_each_entry_safe(blkg, n, &q->blkg_list[plid],
+                                        q_node[plid]) {
+                       /* skip root? */
+                       if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
+                               continue;
+
+                       /*
+                        * If cgroup removal path got to blk_group first
+                        * and removed it from cgroup list, then it will
+                        * take care of destroying cfqg also.
+                        */
+                       if (!blkiocg_del_blkio_group(blkg))
+                               blkg_destroy(blkg, plid);
+                       else
                                done = false;
+               }
 
                spin_unlock_irq(q->queue_lock);
-               spin_unlock(&blkio_list_lock);
 
+               /*
+                * Group list may not be empty if we raced cgroup removal
+                * and lost.  cgroup removal is guaranteed to make forward
+                * progress and retrying after a while is enough.  This
+                * ugliness is scheduled to be removed after locking
+                * update.
+                */
                if (done)
                        break;
 
                msleep(10);     /* just some random duration I like */
        }
 }
+EXPORT_SYMBOL_GPL(blkg_destroy_all);
 
 static void blkg_rcu_free(struct rcu_head *rcu_head)
 {
@@ -1549,11 +1585,13 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
                 * this event.
                 */
                spin_lock(&blkio_list_lock);
+               spin_lock_irqsave(q->queue_lock, flags);
                list_for_each_entry(blkiop, &blkio_list, list) {
                        if (blkiop->plid != blkg->plid)
                                continue;
-                       blkiop->ops.blkio_unlink_group_fn(q, blkg);
+                       blkg_destroy(blkg, blkiop->plid);
                }
+               spin_unlock_irqrestore(q->queue_lock, flags);
                spin_unlock(&blkio_list_lock);
        } while (1);
 
@@ -1695,12 +1733,14 @@ static void blkcg_bypass_start(void)
        __acquires(&all_q_mutex)
 {
        struct request_queue *q;
+       int i;
 
        mutex_lock(&all_q_mutex);
 
        list_for_each_entry(q, &all_q_list, all_q_node) {
                blk_queue_bypass_start(q);
-               blkg_destroy_all(q);
+               for (i = 0; i < BLKIO_NR_POLICIES; i++)
+                       blkg_destroy_all(q, i, false);
        }
 }
 
index ae96f196d4695e8ca245864b453edb48be6159f3..83ce5fa0a6043b53d31dee61b33566a5a0c07648 100644 (file)
@@ -196,11 +196,6 @@ struct blkio_group {
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
-typedef void (blkio_link_group_fn)(struct request_queue *q,
-                       struct blkio_group *blkg);
-typedef void (blkio_unlink_group_fn)(struct request_queue *q,
-                       struct blkio_group *blkg);
-typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
 typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
                        struct blkio_group *blkg, unsigned int weight);
 typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
@@ -214,9 +209,6 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
 
 struct blkio_policy_ops {
        blkio_init_group_fn *blkio_init_group_fn;
-       blkio_link_group_fn *blkio_link_group_fn;
-       blkio_unlink_group_fn *blkio_unlink_group_fn;
-       blkio_clear_queue_fn *blkio_clear_queue_fn;
        blkio_update_group_weight_fn *blkio_update_group_weight_fn;
        blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
        blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -238,7 +230,8 @@ extern void blkcg_exit_queue(struct request_queue *q);
 /* Blkio controller policy registration */
 extern void blkio_policy_register(struct blkio_policy_type *);
 extern void blkio_policy_unregister(struct blkio_policy_type *);
-extern void blkg_destroy_all(struct request_queue *q);
+extern void blkg_destroy_all(struct request_queue *q,
+                            enum blkio_policy_id plid, bool destroy_root);
 
 /**
  * blkg_to_pdata - get policy private data
@@ -319,7 +312,9 @@ static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
 static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
 static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
-static inline void blkg_destroy_all(struct request_queue *q) { }
+static inline void blkg_destroy_all(struct request_queue *q,
+                                   enum blkio_policy_id plid,
+                                   bool destory_root) { }
 
 static inline void *blkg_to_pdata(struct blkio_group *blkg,
                                struct blkio_policy_type *pol) { return NULL; }
index c15d38307e1d196cf690c113ea370c783b40f37e..132941260e58c42717933be939f01c0cad64bae4 100644 (file)
@@ -157,14 +157,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
        tg->iops[WRITE] = -1;
 }
 
-static void throtl_link_blkio_group(struct request_queue *q,
-                                   struct blkio_group *blkg)
-{
-       list_add(&blkg->q_node[BLKIO_POLICY_THROTL],
-                &q->blkg_list[BLKIO_POLICY_THROTL]);
-       q->nr_blkgs[BLKIO_POLICY_THROTL]++;
-}
-
 static struct
 throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 {
@@ -813,89 +805,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
        }
 }
 
-static void
-throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
-{
-       struct blkio_group *blkg = tg_to_blkg(tg);
-
-       /* Something wrong if we are trying to remove same group twice */
-       WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL]));
-
-       list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]);
-
-       /*
-        * Put the reference taken at the time of creation so that when all
-        * queues are gone, group can be destroyed.
-        */
-       blkg_put(tg_to_blkg(tg));
-       td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--;
-}
-
-static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
-{
-       struct request_queue *q = td->queue;
-       struct blkio_group *blkg, *n;
-       bool empty = true;
-
-       list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL],
-                                q_node[BLKIO_POLICY_THROTL]) {
-               struct throtl_grp *tg = blkg_to_tg(blkg);
-
-               /* skip root? */
-               if (!release_root && tg == td->root_tg)
-                       continue;
-
-               /*
-                * If cgroup removal path got to blk_group first and removed
-                * it from cgroup list, then it will take care of destroying
-                * cfqg also.
-                */
-               if (!blkiocg_del_blkio_group(blkg))
-                       throtl_destroy_tg(td, tg);
-               else
-                       empty = false;
-       }
-       return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). @q is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if queue was going away, cgroup deltion
- * path got to it first.
- */
-void throtl_unlink_blkio_group(struct request_queue *q,
-                              struct blkio_group *blkg)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       throtl_destroy_tg(q->td, blkg_to_tg(blkg));
-       spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static bool throtl_clear_queue(struct request_queue *q)
-{
-       lockdep_assert_held(q->queue_lock);
-
-       /*
-        * Clear tgs but leave the root one alone.  This is necessary
-        * because root_tg is expected to be persistent and safe because
-        * blk-throtl can never be disabled while @q is alive.  This is a
-        * kludge to prepare for unified blkg.  This whole function will be
-        * removed soon.
-        */
-       return throtl_release_tgs(q->td, false);
-}
-
 static void throtl_update_blkio_group_common(struct throtl_data *td,
                                struct throtl_grp *tg)
 {
@@ -960,9 +869,6 @@ static void throtl_shutdown_wq(struct request_queue *q)
 static struct blkio_policy_type blkio_policy_throtl = {
        .ops = {
                .blkio_init_group_fn = throtl_init_blkio_group,
-               .blkio_link_group_fn = throtl_link_blkio_group,
-               .blkio_unlink_group_fn = throtl_unlink_blkio_group,
-               .blkio_clear_queue_fn = throtl_clear_queue,
                .blkio_update_group_read_bps_fn =
                                        throtl_update_blkio_group_read_bps,
                .blkio_update_group_write_bps_fn =
@@ -1148,12 +1054,11 @@ void blk_throtl_exit(struct request_queue *q)
 
        throtl_shutdown_wq(q);
 
-       spin_lock_irq(q->queue_lock);
-       throtl_release_tgs(td, true);
+       blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
 
        /* If there are other groups */
+       spin_lock_irq(q->queue_lock);
        wait = q->nr_blkgs[BLKIO_POLICY_THROTL];
-
        spin_unlock_irq(q->queue_lock);
 
        /*
index e846803280a6bb866a0dbc580e39afd2d5a19cd6..dc73690dec44407e86c338d80581d15e93a8fe16 100644 (file)
@@ -1045,14 +1045,6 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
        cfqg->needs_update = true;
 }
 
-static void cfq_link_blkio_group(struct request_queue *q,
-                                struct blkio_group *blkg)
-{
-       list_add(&blkg->q_node[BLKIO_POLICY_PROP],
-                &q->blkg_list[BLKIO_POLICY_PROP]);
-       q->nr_blkgs[BLKIO_POLICY_PROP]++;
-}
-
 static void cfq_init_blkio_group(struct blkio_group *blkg)
 {
        struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1096,84 +1088,6 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
        blkg_get(cfqg_to_blkg(cfqg));
 }
 
-static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
-{
-       struct blkio_group *blkg = cfqg_to_blkg(cfqg);
-
-       /* Something wrong if we are trying to remove same group twice */
-       BUG_ON(list_empty(&blkg->q_node[BLKIO_POLICY_PROP]));
-
-       list_del_init(&blkg->q_node[BLKIO_POLICY_PROP]);
-
-       BUG_ON(cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP] <= 0);
-       cfqd->queue->nr_blkgs[BLKIO_POLICY_PROP]--;
-
-       /*
-        * Put the reference taken at the time of creation so that when all
-        * queues are gone, group can be destroyed.
-        */
-       blkg_put(cfqg_to_blkg(cfqg));
-}
-
-static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
-{
-       struct request_queue *q = cfqd->queue;
-       struct blkio_group *blkg, *n;
-       bool empty = true;
-
-       list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_PROP],
-                                q_node[BLKIO_POLICY_PROP]) {
-               /*
-                * If cgroup removal path got to blk_group first and removed
-                * it from cgroup list, then it will take care of destroying
-                * cfqg also.
-                */
-               if (!cfq_blkiocg_del_blkio_group(blkg))
-                       cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
-               else
-                       empty = false;
-       }
-       return empty;
-}
-
-/*
- * Blk cgroup controller notification saying that blkio_group object is being
- * delinked as associated cgroup object is going away. That also means that
- * no new IO will come in this group. So get rid of this group as soon as
- * any pending IO in the group is finished.
- *
- * This function is called under rcu_read_lock(). key is the rcu protected
- * pointer. That means @q is a valid request_queue pointer as long as we
- * are rcu read lock.
- *
- * @q was fetched from blkio_group under blkio_cgroup->lock. That means
- * it should not be NULL as even if elevator was exiting, cgroup deltion
- * path got to it first.
- */
-static void cfq_unlink_blkio_group(struct request_queue *q,
-                                  struct blkio_group *blkg)
-{
-       struct cfq_data *cfqd = q->elevator->elevator_data;
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       cfq_destroy_cfqg(cfqd, blkg_to_cfqg(blkg));
-       spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static struct elevator_type iosched_cfq;
-
-static bool cfq_clear_queue(struct request_queue *q)
-{
-       lockdep_assert_held(q->queue_lock);
-
-       /* shoot down blkgs iff the current elevator is cfq */
-       if (!q->elevator || q->elevator->type != &iosched_cfq)
-               return true;
-
-       return cfq_release_cfq_groups(q->elevator->elevator_data);
-}
-
 #else /* GROUP_IOSCHED */
 static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
                                                struct blkio_cgroup *blkcg)
@@ -1186,8 +1100,6 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
        cfqq->cfqg = cfqg;
 }
 
-static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-
 #endif /* GROUP_IOSCHED */
 
 /*
@@ -3547,17 +3459,20 @@ static void cfq_exit_queue(struct elevator_queue *e)
                __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
        cfq_put_async_queues(cfqd);
-       cfq_release_cfq_groups(cfqd);
+
+       spin_unlock_irq(q->queue_lock);
+
+       blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
 
 #ifdef CONFIG_BLK_CGROUP
        /*
         * If there are groups which we could not unlink from blkcg list,
         * wait for a rcu period for them to be freed.
         */
+       spin_lock_irq(q->queue_lock);
        wait = q->nr_blkgs[BLKIO_POLICY_PROP];
-#endif
        spin_unlock_irq(q->queue_lock);
-
+#endif
        cfq_shutdown_timer_wq(cfqd);
 
        /*
@@ -3794,9 +3709,6 @@ static struct elevator_type iosched_cfq = {
 static struct blkio_policy_type blkio_policy_cfq = {
        .ops = {
                .blkio_init_group_fn =          cfq_init_blkio_group,
-               .blkio_link_group_fn =          cfq_link_blkio_group,
-               .blkio_unlink_group_fn =        cfq_unlink_blkio_group,
-               .blkio_clear_queue_fn = cfq_clear_queue,
                .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
        },
        .plid = BLKIO_POLICY_PROP,
index 8c7561fd2c7983f5934d84113401646c445a37e2..d4d39dab841a2d5530aa48ecb26f7c49d7dd71d8 100644 (file)
@@ -876,7 +876,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
        struct elevator_queue *old = q->elevator;
        bool registered = old->registered;
-       int err;
+       int i, err;
 
        /*
         * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -895,7 +895,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
        ioc_clear_queue(q);
        spin_unlock_irq(q->queue_lock);
 
-       blkg_destroy_all(q);
+       for (i = 0; i < BLKIO_NR_POLICIES; i++)
+               blkg_destroy_all(q, i, false);
 
        /* allocate, init and register new elevator */
        err = -ENOMEM;
index f4e35edea70f48264e9e632bcc4ba97edf50d8f1..b4d1d4bfc1680a4b880ef77ee7a6e39364eed283 100644 (file)
@@ -364,8 +364,8 @@ struct request_queue {
        struct list_head        icq_list;
 #ifdef CONFIG_BLK_CGROUP
        /* XXX: array size hardcoded to avoid include dependency (temporary) */
-       struct list_head        blkg_list[2];
-       int                     nr_blkgs[2];
+       struct list_head        blkg_list;
+       int                     nr_blkgs;
 #endif
 
        struct queue_limits     limits;