dm: add full blk-mq support to request-based DM

author Mike Snitzer <snitzer@redhat.com>

Sun, 8 Mar 2015 05:51:47 +0000 (00:51 -0500)

committer Mike Snitzer <snitzer@redhat.com>

Wed, 15 Apr 2015 16:10:16 +0000 (12:10 -0400)
author Mike Snitzer <snitzer@redhat.com>
Sun, 8 Mar 2015 05:51:47 +0000 (00:51 -0500)
committer Mike Snitzer <snitzer@redhat.com>
Wed, 15 Apr 2015 16:10:16 +0000 (12:10 -0400)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c

index add6391f3f8e95a32f6828b1d4104a4d358b7100..c8f07e5a9a17b0c012e61302c7039549e6a9f2e8 100644 (file)
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1703,7 +1703,7 @@ out:
   *---------------------------------------------------------------*/
  static struct target_type multipath_target = {
         .name = "multipath",
-       .version = {1, 8, 0},
+       .version = {1, 9, 0},
         .module = THIS_MODULE,
         .ctr = multipath_ctr,
         .dtr = multipath_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 057312048b681bd549456a0b2960214a8a2e5c62..66600cab9fa5cf765b73943c875564c37d3482ad 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -18,6 +18,7 @@
  #include <linux/mutex.h>
  #include <linux/delay.h>
  #include <linux/atomic.h>
+#include <linux/blk-mq.h>
  
  #define DM_MSG_PREFIX "table"
  
@@ -1695,9 +1696,13 @@ void dm_table_run_md_queue_async(struct dm_table *t)
         md = dm_table_get_md(t);
         queue = dm_get_md_queue(md);
         if (queue) {
-               spin_lock_irqsave(queue->queue_lock, flags);
-               blk_run_queue_async(queue);
-               spin_unlock_irqrestore(queue->queue_lock, flags);
+               if (queue->mq_ops)
+                       blk_mq_run_hw_queues(queue, true);
+               else {
+                       spin_lock_irqsave(queue->queue_lock, flags);
+                       blk_run_queue_async(queue);
+                       spin_unlock_irqrestore(queue->queue_lock, flags);
+               }
         }
  }
  EXPORT_SYMBOL(dm_table_run_md_queue_async);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index 5294e016e92b4b5466c3309dea8ad8abea2f5a93..3a66baac76eda02d4bf22ac98d34d81cc9e053fd 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -23,6 +23,7 @@
  #include <linux/kthread.h>
  #include <linux/ktime.h>
  #include <linux/elevator.h> /* for rq_end_sector() */
+#include <linux/blk-mq.h>
  
  #include <trace/events/block.h>
  
@@ -224,6 +225,9 @@ struct mapped_device {
         int last_rq_rw;
         sector_t last_rq_pos;
         ktime_t last_rq_start_time;
+
+       /* for blk-mq request-based DM support */
+       struct blk_mq_tag_set tag_set;
  };
  
  /*
@@ -1025,6 +1029,11 @@ static void end_clone_bio(struct bio *clone, int error)
         blk_update_request(tio->orig, 0, nr_bytes);
  }
  
+static struct dm_rq_target_io *tio_from_request(struct request *rq)
+{
+       return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
+}
+
  /*
   * Don't touch any member of the md after calling this function because
   * the md may be freed in dm_put() at the end of this function.
@@ -1048,8 +1057,10 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
          * queue lock again.
          */
         if (run_queue) {
-               if (!nr_requests_pending ||
-                   (nr_requests_pending >= md->queue->nr_congestion_on))
+               if (md->queue->mq_ops)
+                       blk_mq_run_hw_queues(md->queue, true);
+               else if (!nr_requests_pending ||
+                        (nr_requests_pending >= md->queue->nr_congestion_on))
                         blk_run_queue_async(md->queue);
         }
  
@@ -1062,13 +1073,17 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  static void free_rq_clone(struct request *clone)
  {
         struct dm_rq_target_io *tio = clone->end_io_data;
+       struct mapped_device *md = tio->md;
  
         blk_rq_unprep_clone(clone);
+
         if (clone->q && clone->q->mq_ops)
                 tio->ti->type->release_clone_rq(clone);
         else
-               free_clone_request(tio->md, clone);
-       free_rq_tio(tio);
+               free_clone_request(md, clone);
+
+       if (!md->queue->mq_ops)
+               free_rq_tio(tio);
  }
  
  /*
@@ -1097,17 +1112,22 @@ static void dm_end_request(struct request *clone, int error)
         }
  
         free_rq_clone(clone);
-       blk_end_request_all(rq, error);
+       if (!rq->q->mq_ops)
+               blk_end_request_all(rq, error);
+       else
+               blk_mq_end_request(rq, error);
         rq_completed(md, rw, true);
  }
  
  static void dm_unprep_request(struct request *rq)
  {
-       struct dm_rq_target_io *tio = rq->special;
+       struct dm_rq_target_io *tio = tio_from_request(rq);
         struct request *clone = tio->clone;
  
-       rq->special = NULL;
-       rq->cmd_flags &= ~REQ_DONTPREP;
+       if (!rq->q->mq_ops) {
+               rq->special = NULL;
+               rq->cmd_flags &= ~REQ_DONTPREP;
+       }
  
         if (clone)
                 free_rq_clone(clone);
@@ -1116,18 +1136,29 @@ static void dm_unprep_request(struct request *rq)
  /*
   * Requeue the original request of a clone.
   */
-static void dm_requeue_unmapped_original_request(struct mapped_device *md,
-                                                struct request *rq)
+static void old_requeue_request(struct request *rq)
  {
-       int rw = rq_data_dir(rq);
         struct request_queue *q = rq->q;
         unsigned long flags;
  
-       dm_unprep_request(rq);
-
         spin_lock_irqsave(q->queue_lock, flags);
         blk_requeue_request(q, rq);
         spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_requeue_unmapped_original_request(struct mapped_device *md,
+                                                struct request *rq)
+{
+       int rw = rq_data_dir(rq);
+
+       dm_unprep_request(rq);
+
+       if (!rq->q->mq_ops)
+               old_requeue_request(rq);
+       else {
+               blk_mq_requeue_request(rq);
+               blk_mq_kick_requeue_list(rq->q);
+       }
  
         rq_completed(md, rw, false);
  }
@@ -1139,35 +1170,44 @@ static void dm_requeue_unmapped_request(struct request *clone)
         dm_requeue_unmapped_original_request(tio->md, tio->orig);
  }
  
-static void __stop_queue(struct request_queue *q)
-{
-       blk_stop_queue(q);
-}
-
-static void stop_queue(struct request_queue *q)
+static void old_stop_queue(struct request_queue *q)
  {
         unsigned long flags;
  
+       if (blk_queue_stopped(q))
+               return;
+
         spin_lock_irqsave(q->queue_lock, flags);
-       __stop_queue(q);
+       blk_stop_queue(q);
         spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
-static void __start_queue(struct request_queue *q)
+static void stop_queue(struct request_queue *q)
  {
-       if (blk_queue_stopped(q))
-               blk_start_queue(q);
+       if (!q->mq_ops)
+               old_stop_queue(q);
+       else
+               blk_mq_stop_hw_queues(q);
  }
  
-static void start_queue(struct request_queue *q)
+static void old_start_queue(struct request_queue *q)
  {
         unsigned long flags;
  
         spin_lock_irqsave(q->queue_lock, flags);
-       __start_queue(q);
+       if (blk_queue_stopped(q))
+               blk_start_queue(q);
         spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
+static void start_queue(struct request_queue *q)
+{
+       if (!q->mq_ops)
+               old_start_queue(q);
+       else
+               blk_mq_start_stopped_hw_queues(q, true);
+}
+
  static void dm_done(struct request *clone, int error, bool mapped)
  {
         int r = error;
@@ -1206,13 +1246,20 @@ static void dm_done(struct request *clone, int error, bool mapped)
  static void dm_softirq_done(struct request *rq)
  {
         bool mapped = true;
-       struct dm_rq_target_io *tio = rq->special;
+       struct dm_rq_target_io *tio = tio_from_request(rq);
         struct request *clone = tio->clone;
+       int rw;
  
         if (!clone) {
-               blk_end_request_all(rq, tio->error);
-               rq_completed(tio->md, rq_data_dir(rq), false);
-               free_rq_tio(tio);
+               rw = rq_data_dir(rq);
+               if (!rq->q->mq_ops) {
+                       blk_end_request_all(rq, tio->error);
+                       rq_completed(tio->md, rw, false);
+                       free_rq_tio(tio);
+               } else {
+                       blk_mq_end_request(rq, tio->error);
+                       rq_completed(tio->md, rw, false);
+               }
                 return;
         }
  
@@ -1228,7 +1275,7 @@ static void dm_softirq_done(struct request *rq)
   */
  static void dm_complete_request(struct request *rq, int error)
  {
-       struct dm_rq_target_io *tio = rq->special;
+       struct dm_rq_target_io *tio = tio_from_request(rq);
  
         tio->error = error;
         blk_complete_request(rq);
@@ -1247,7 +1294,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
  }
  
  /*
- * Called with the clone's queue lock held
+ * Called with the clone's queue lock held (for non-blk-mq)
   */
  static void end_clone_request(struct request *clone, int error)
  {
@@ -1808,6 +1855,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
  
  static void map_tio_request(struct kthread_work *work);
  
+static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
+                    struct mapped_device *md)
+{
+       tio->md = md;
+       tio->ti = NULL;
+       tio->clone = NULL;
+       tio->orig = rq;
+       tio->error = 0;
+       memset(&tio->info, 0, sizeof(tio->info));
+       init_kthread_work(&tio->work, map_tio_request);
+}
+
  static struct dm_rq_target_io *prep_tio(struct request *rq,
                                         struct mapped_device *md, gfp_t gfp_mask)
  {
@@ -1819,13 +1878,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
         if (!tio)
                 return NULL;
  
-       tio->md = md;
-       tio->ti = NULL;
-       tio->clone = NULL;
-       tio->orig = rq;
-       tio->error = 0;
-       memset(&tio->info, 0, sizeof(tio->info));
-       init_kthread_work(&tio->work, map_tio_request);
+       init_tio(tio, rq, md);
  
         table = dm_get_live_table(md, &srcu_idx);
         if (!dm_table_mq_request_based(table)) {
@@ -1869,11 +1922,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
   * DM_MAPIO_REQUEUE : the original request needs to be requeued
   * < 0              : the request was completed due to failure
   */
-static int map_request(struct dm_target *ti, struct request *rq,
+static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                        struct mapped_device *md)
  {
         int r;
-       struct dm_rq_target_io *tio = rq->special;
+       struct dm_target *ti = tio->ti;
         struct request *clone = NULL;
  
         if (tio->clone) {
@@ -1888,7 +1941,7 @@ static int map_request(struct dm_target *ti, struct request *rq,
                 }
                 if (IS_ERR(clone))
                         return DM_MAPIO_REQUEUE;
-               if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
+               if (setup_clone(clone, rq, tio, GFP_NOIO)) {
                         /* -ENOMEM */
                         ti->type->release_clone_rq(clone);
                         return DM_MAPIO_REQUEUE;
@@ -1929,13 +1982,16 @@ static void map_tio_request(struct kthread_work *work)
         struct request *rq = tio->orig;
         struct mapped_device *md = tio->md;
  
-       if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
+       if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
                 dm_requeue_unmapped_original_request(md, rq);
  }
  
  static void dm_start_request(struct mapped_device *md, struct request *orig)
  {
-       blk_start_request(orig);
+       if (!orig->q->mq_ops)
+               blk_start_request(orig);
+       else
+               blk_mq_start_request(orig);
         atomic_inc(&md->pending[rq_data_dir(orig)]);
  
         if (md->seq_rq_merge_deadline_usecs) {
@@ -2045,7 +2101,7 @@ static void dm_request_fn(struct request_queue *q)
  
                 dm_start_request(md, rq);
  
-               tio = rq->special;
+               tio = tio_from_request(rq);
                 /* Establish tio->ti before queuing work (map_tio_request) */
                 tio->ti = ti;
                 queue_kthread_work(&md->kworker, &tio->work);
@@ -2142,7 +2198,7 @@ static void dm_init_md_queue(struct mapped_device *md)
  {
         /*
          * Request-based dm devices cannot be stacked on top of bio-based dm
-        * devices.  The type of this dm device has not been decided yet.
+        * devices.  The type of this dm device may not have been decided yet.
          * The type is decided at the first table loading time.
          * To prevent problematic device stacking, clear the queue flag
          * for request stacking support until then.
@@ -2150,7 +2206,15 @@ static void dm_init_md_queue(struct mapped_device *md)
          * This queue is new, so no concurrency on the queue_flags.
          */
         queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
+}
+
+static void dm_init_old_md_queue(struct mapped_device *md)
+{
+       dm_init_md_queue(md);
  
+       /*
+        * Initialize aspects of queue that aren't relevant for blk-mq
+        */
         md->queue->queuedata = md;
         md->queue->backing_dev_info.congested_fn = dm_any_congested;
         md->queue->backing_dev_info.congested_data = md;
@@ -2273,6 +2337,7 @@ static void unlock_fs(struct mapped_device *md);
  static void free_dev(struct mapped_device *md)
  {
         int minor = MINOR(disk_devt(md->disk));
+       bool using_blk_mq = !!md->queue->mq_ops;
  
         unlock_fs(md);
         destroy_workqueue(md->wq);
@@ -2298,6 +2363,8 @@ static void free_dev(struct mapped_device *md)
         del_gendisk(md->disk);
         put_disk(md->disk);
         blk_cleanup_queue(md->queue);
+       if (using_blk_mq)
+               blk_mq_free_tag_set(&md->tag_set);
         bdput(md->bdev);
         free_minor(minor);
  
@@ -2457,7 +2524,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
          * This must be done before setting the queue restrictions,
          * because request-based dm may be run just after the setting.
          */
-       if (dm_table_request_based(t) && !blk_queue_stopped(q))
+       if (dm_table_request_based(t))
                 stop_queue(q);
  
         __bind_mempools(md, t);
@@ -2539,14 +2606,6 @@ unsigned dm_get_md_type(struct mapped_device *md)
         return md->type;
  }
  
-static bool dm_md_type_request_based(struct mapped_device *md)
-{
-       unsigned table_type = dm_get_md_type(md);
-
-       return (table_type == DM_TYPE_REQUEST_BASED ||
-               table_type == DM_TYPE_MQ_REQUEST_BASED);
-}
-
  struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
  {
         return md->immutable_target_type;
@@ -2563,6 +2622,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
  }
  EXPORT_SYMBOL_GPL(dm_get_queue_limits);
  
+static void init_rq_based_worker_thread(struct mapped_device *md)
+{
+       /* Initialize the request-based DM worker thread */
+       init_kthread_worker(&md->kworker);
+       md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
+                                      "kdmwork-%s", dm_device_name(md));
+}
+
  /*
   * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
   */
@@ -2571,29 +2638,131 @@ static int dm_init_request_based_queue(struct mapped_device *md)
         struct request_queue *q = NULL;
  
         if (md->queue->elevator)
-               return 1;
+               return 0;
  
         /* Fully initialize the queue */
         q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
         if (!q)
-               return 0;
+               return -EINVAL;
  
         /* disable dm_request_fn's merge heuristic by default */
         md->seq_rq_merge_deadline_usecs = 0;
  
         md->queue = q;
-       dm_init_md_queue(md);
+       dm_init_old_md_queue(md);
         blk_queue_softirq_done(md->queue, dm_softirq_done);
         blk_queue_prep_rq(md->queue, dm_prep_fn);
  
-       /* Also initialize the request-based DM worker thread */
-       init_kthread_worker(&md->kworker);
-       md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
-                                      "kdmwork-%s", dm_device_name(md));
+       init_rq_based_worker_thread(md);
  
         elv_register_queue(md->queue);
  
-       return 1;
+       return 0;
+}
+
+static int dm_mq_init_request(void *data, struct request *rq,
+                             unsigned int hctx_idx, unsigned int request_idx,
+                             unsigned int numa_node)
+{
+       struct mapped_device *md = data;
+       struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+
+       /*
+        * Must initialize md member of tio, otherwise it won't
+        * be available in dm_mq_queue_rq.
+        */
+       tio->md = md;
+
+       return 0;
+}
+
+static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+                         const struct blk_mq_queue_data *bd)
+{
+       struct request *rq = bd->rq;
+       struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+       struct mapped_device *md = tio->md;
+       int srcu_idx;
+       struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+       struct dm_target *ti;
+       sector_t pos;
+
+       /* always use block 0 to find the target for flushes for now */
+       pos = 0;
+       if (!(rq->cmd_flags & REQ_FLUSH))
+               pos = blk_rq_pos(rq);
+
+       ti = dm_table_find_target(map, pos);
+       if (!dm_target_is_valid(ti)) {
+               dm_put_live_table(md, srcu_idx);
+               DMERR_LIMIT("request attempted access beyond the end of device");
+               /*
+                * Must perform setup, that rq_completed() requires,
+                * before returning BLK_MQ_RQ_QUEUE_ERROR
+                */
+               dm_start_request(md, rq);
+               return BLK_MQ_RQ_QUEUE_ERROR;
+       }
+       dm_put_live_table(md, srcu_idx);
+
+       if (ti->type->busy && ti->type->busy(ti))
+               return BLK_MQ_RQ_QUEUE_BUSY;
+
+       dm_start_request(md, rq);
+
+       /* Init tio using md established in .init_request */
+       init_tio(tio, rq, md);
+
+       /* Establish tio->ti before queuing work (map_tio_request) */
+       tio->ti = ti;
+       queue_kthread_work(&md->kworker, &tio->work);
+
+       return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_ops dm_mq_ops = {
+       .queue_rq = dm_mq_queue_rq,
+       .map_queue = blk_mq_map_queue,
+       .complete = dm_softirq_done,
+       .init_request = dm_mq_init_request,
+};
+
+static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
+{
+       struct request_queue *q;
+       int err;
+
+       memset(&md->tag_set, 0, sizeof(md->tag_set));
+       md->tag_set.ops = &dm_mq_ops;
+       md->tag_set.queue_depth = BLKDEV_MAX_RQ;
+       md->tag_set.numa_node = NUMA_NO_NODE;
+       md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+       md->tag_set.nr_hw_queues = 1;
+       md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
+       md->tag_set.driver_data = md;
+
+       err = blk_mq_alloc_tag_set(&md->tag_set);
+       if (err)
+               return err;
+
+       q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
+       if (IS_ERR(q)) {
+               err = PTR_ERR(q);
+               goto out_tag_set;
+       }
+       md->queue = q;
+       dm_init_md_queue(md);
+
+       /* backfill 'mq' sysfs registration normally done in blk_register_queue */
+       blk_mq_register_disk(md->disk);
+
+       init_rq_based_worker_thread(md);
+
+       return 0;
+
+out_tag_set:
+       blk_mq_free_tag_set(&md->tag_set);
+       return err;
  }
  
  /*
@@ -2601,15 +2770,29 @@ static int dm_init_request_based_queue(struct mapped_device *md)
   */
  int dm_setup_md_queue(struct mapped_device *md)
  {
-       if (dm_md_type_request_based(md)) {
-               if (!dm_init_request_based_queue(md)) {
+       int r;
+       unsigned md_type = dm_get_md_type(md);
+
+       switch (md_type) {
+       case DM_TYPE_REQUEST_BASED:
+               r = dm_init_request_based_queue(md);
+               if (r) {
                         DMWARN("Cannot initialize queue for request-based mapped device");
-                       return -EINVAL;
+                       return r;
                 }
-       } else {
-               /* bio-based specific initialization */
+               break;
+       case DM_TYPE_MQ_REQUEST_BASED:
+               r = dm_init_request_based_blk_mq_queue(md);
+               if (r) {
+                       DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
+                       return r;
+               }
+               break;
+       case DM_TYPE_BIO_BASED:
+               dm_init_old_md_queue(md);
                 blk_queue_make_request(md->queue, dm_make_request);
                 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
+               break;
         }
  
         return 0;
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h

index 889f3a5b7b18267d91a81df449098bfd66da2b68..eac8c3641f39a629b5023782c050f84fffaa3036 100644 (file)
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
  #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
  
  #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       30
+#define DM_VERSION_MINOR       31
  #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2014-12-22)"
+#define DM_VERSION_EXTRA       "-ioctl (2015-3-12)"
  
  /* Status bits */
  #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
author	Mike Snitzer <snitzer@redhat.com>
	Sun, 8 Mar 2015 05:51:47 +0000 (00:51 -0500)
committer	Mike Snitzer <snitzer@redhat.com>
	Wed, 15 Apr 2015 16:10:16 +0000 (12:10 -0400)
drivers/md/dm-mpath.c		patch \| blob \| history
drivers/md/dm-table.c		patch \| blob \| history
drivers/md/dm.c		patch \| blob \| history
include/uapi/linux/dm-ioctl.h		patch \| blob \| history