blk-mq: Don't reserve a tag for flush request

author Shaohua Li <shli@kernel.org>

Tue, 31 Dec 2013 03:38:50 +0000 (11:38 +0800)

committer Jens Axboe <axboe@kernel.dk>

Thu, 30 Jan 2014 19:57:25 +0000 (12:57 -0700)
author Shaohua Li <shli@kernel.org>
Tue, 31 Dec 2013 03:38:50 +0000 (11:38 +0800)
committer Jens Axboe <axboe@kernel.dk>
Thu, 30 Jan 2014 19:57:25 +0000 (12:57 -0700)
diff --git a/block/blk-flush.c b/block/blk-flush.c

index 9288aaf35c21fc8c0f579fa001f316e697a4dfbb..9143e85226c7fcfd393b8e2b98f5e445af9a24ab 100644 (file)
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -284,9 +284,8 @@ static void mq_flush_work(struct work_struct *work)
  
         q = container_of(work, struct request_queue, mq_flush_work);
  
-       /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
         rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
-               __GFP_WAIT|GFP_ATOMIC, true);
+               __GFP_WAIT|GFP_ATOMIC, false);
         rq->cmd_type = REQ_TYPE_FS;
         rq->end_io = flush_end_io;
  
@@ -408,8 +407,11 @@ void blk_insert_flush(struct request *rq)
         /*
          * @policy now records what operations need to be done.  Adjust
          * REQ_FLUSH and FUA for the driver.
+        * We keep REQ_FLUSH for mq to track flush requests. For !FUA,
+        * we never dispatch the request directly.
          */
-       rq->cmd_flags &= ~REQ_FLUSH;
+       if (rq->cmd_flags & REQ_FUA)
+               rq->cmd_flags &= ~REQ_FLUSH;
         if (!(fflags & REQ_FUA))
                 rq->cmd_flags &= ~REQ_FUA;
  
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 57039fcd9c93e7c3e014842fbbcaf2fc6550edd1..9072d0ab184f217124407b0417242db625bf8718 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,9 +194,27 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
  }
  
  static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
-                                             gfp_t gfp, bool reserved)
+                                             gfp_t gfp, bool reserved,
+                                             int rw)
  {
-       return blk_mq_alloc_rq(hctx, gfp, reserved);
+       struct request *req;
+       bool is_flush = false;
+       /*
+        * flush need allocate a request, leave at least one request for
+        * non-flush IO to avoid deadlock
+        */
+       if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) {
+               if (atomic_inc_return(&hctx->pending_flush) >=
+                   hctx->queue_depth - hctx->reserved_tags - 1) {
+                       atomic_dec(&hctx->pending_flush);
+                       return NULL;
+               }
+               is_flush = true;
+       }
+       req = blk_mq_alloc_rq(hctx, gfp, reserved);
+       if (!req && is_flush)
+               atomic_dec(&hctx->pending_flush);
+       return req;
  }
  
  static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
@@ -209,7 +227,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
                 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
                 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
  
-               rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
+               rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw);
                 if (rq) {
                         blk_mq_rq_ctx_init(q, ctx, rq, rw);
                         break;
@@ -272,6 +290,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
         const int tag = rq->tag;
         struct request_queue *q = rq->q;
  
+       if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+               atomic_dec(&hctx->pending_flush);
+
         blk_mq_rq_init(hctx, rq);
         blk_mq_put_tag(hctx->tags, tag);
  
@@ -900,14 +921,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
         hctx = q->mq_ops->map_queue(q, ctx->cpu);
  
         trace_block_getrq(q, bio, rw);
-       rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
+       rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw);
         if (likely(rq))
-               blk_mq_rq_ctx_init(q, ctx, rq, rw);
+               blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw);
         else {
                 blk_mq_put_ctx(ctx);
                 trace_block_sleeprq(q, bio, rw);
-               rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC,
-                                                       false);
+               rq = blk_mq_alloc_request_pinned(q, bio->bi_rw,
+                               __GFP_WAIT|GFP_ATOMIC, false);
                 ctx = rq->mq_ctx;
                 hctx = q->mq_ops->map_queue(q, ctx->cpu);
         }
@@ -1184,7 +1205,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
                 hctx->queue_num = i;
                 hctx->flags = reg->flags;
                 hctx->queue_depth = reg->queue_depth;
+               hctx->reserved_tags = reg->reserved_tags;
                 hctx->cmd_size = reg->cmd_size;
+               atomic_set(&hctx->pending_flush, 0);
  
                 blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
                                                 blk_mq_hctx_notify, hctx);
@@ -1309,15 +1332,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
                 reg->queue_depth = BLK_MQ_MAX_DEPTH;
         }
  
-       /*
-        * Set aside a tag for flush requests.  It will only be used while
-        * another flush request is in progress but outside the driver.
-        *
-        * TODO: only allocate if flushes are supported
-        */
-       reg->queue_depth++;
-       reg->reserved_tags++;
-
         if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
                 return ERR_PTR(-EINVAL);
  
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h

index 161b23105b1ec9d90f3520f08fb66d0d9be66358..1e8f16f65af4551a3e6a52248ac85d4b310c9bf3 100644 (file)
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -36,12 +36,15 @@ struct blk_mq_hw_ctx {
         struct list_head        page_list;
         struct blk_mq_tags      *tags;
  
+       atomic_t                pending_flush;
+
         unsigned long           queued;
         unsigned long           run;
  #define BLK_MQ_MAX_DISPATCH_ORDER      10
         unsigned long           dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
  
         unsigned int            queue_depth;
+       unsigned int            reserved_tags;
         unsigned int            numa_node;
         unsigned int            cmd_size;       /* per-request extra data */
author	Shaohua Li <shli@kernel.org>
	Tue, 31 Dec 2013 03:38:50 +0000 (11:38 +0800)
committer	Jens Axboe <axboe@kernel.dk>
	Thu, 30 Jan 2014 19:57:25 +0000 (12:57 -0700)
block/blk-flush.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
include/linux/blk-mq.h		patch \| blob \| history