From: Tejun Heo <tj@kernel.org>
Date: Fri, 4 Mar 2011 18:09:02 +0000 (+0100)
Subject: Merge branch 'for-linus' of ../linux-2.6-block into block-for-2.6.39/core
X-Git-Tag: firefly_0821_release~7613^2~1808^2~41^2
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e83a46bbb1d4c03defd733a64b727632a40059ad;p=firefly-linux-kernel-4.4.55.git

Merge branch 'for-linus' of ../linux-2.6-block into block-for-2.6.39/core

This merge creates two set of conflicts.  One is simple context
conflicts caused by removal of throtl_scheduled_delayed_work() in
for-linus and removal of throtl_shutdown_timer_wq() in
for-2.6.39/core.

The other is caused by commit 255bb490c8 (block: blk-flush shouldn't
call directly into q->request_fn() __blk_run_queue()) in for-linus
crashing with FLUSH reimplementation in for-2.6.39/core.  The conflict
isn't trivial but the resolution is straight-forward.

* __blk_run_queue() calls in flush_end_io() and flush_data_end_io()
  should be called with @force_kblockd set to %true.

* elv_insert() in blk_kick_flush() should use
  %ELEVATOR_INSERT_REQUEUE.

Both changes are to avoid invoking ->request_fn() directly from
request completion path and closely match the changes in the commit
255bb490c8.

Signed-off-by: Tejun Heo <tj@kernel.org>
---

e83a46bbb1d4c03defd733a64b727632a40059ad
diff --cc block/blk-flush.c
index a867e3f524f3,b27d0208611b..0bd8c9c5d6e5
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@@ -74,276 -11,153 +74,284 @@@
  
  /* FLUSH/FUA sequences */
  enum {
 -	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
 -	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
 -	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
 -	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
 -	QUEUE_FSEQ_DONE		= (1 << 4),
 +	REQ_FSEQ_PREFLUSH	= (1 << 0), /* pre-flushing in progress */
 +	REQ_FSEQ_DATA		= (1 << 1), /* data write in progress */
 +	REQ_FSEQ_POSTFLUSH	= (1 << 2), /* post-flushing in progress */
 +	REQ_FSEQ_DONE		= (1 << 3),
 +
 +	REQ_FSEQ_ACTIONS	= REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
 +				  REQ_FSEQ_POSTFLUSH,
 +
 +	/*
 +	 * If flush has been pending longer than the following timeout,
 +	 * it's issued even if flush_data requests are still in flight.
 +	 */
 +	FLUSH_PENDING_TIMEOUT	= 5 * HZ,
  };
  
 -static struct request *queue_next_fseq(struct request_queue *q);
 +static bool blk_kick_flush(struct request_queue *q);
  
 -unsigned blk_flush_cur_seq(struct request_queue *q)
 +static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
  {
 -	if (!q->flush_seq)
 -		return 0;
 -	return 1 << ffz(q->flush_seq);
 +	unsigned int policy = 0;
 +
 +	if (fflags & REQ_FLUSH) {
 +		if (rq->cmd_flags & REQ_FLUSH)
 +			policy |= REQ_FSEQ_PREFLUSH;
 +		if (blk_rq_sectors(rq))
 +			policy |= REQ_FSEQ_DATA;
 +		if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
 +			policy |= REQ_FSEQ_POSTFLUSH;
 +	}
 +	return policy;
  }
  
 -static struct request *blk_flush_complete_seq(struct request_queue *q,
 -					      unsigned seq, int error)
 +static unsigned int blk_flush_cur_seq(struct request *rq)
  {
 -	struct request *next_rq = NULL;
 -
 -	if (error && !q->flush_err)
 -		q->flush_err = error;
 -
 -	BUG_ON(q->flush_seq & seq);
 -	q->flush_seq |= seq;
 -
 -	if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
 -		/* not complete yet, queue the next flush sequence */
 -		next_rq = queue_next_fseq(q);
 -	} else {
 -		/* complete this flush request */
 -		__blk_end_request_all(q->orig_flush_rq, q->flush_err);
 -		q->orig_flush_rq = NULL;
 -		q->flush_seq = 0;
 -
 -		/* dispatch the next flush if there's one */
 -		if (!list_empty(&q->pending_flushes)) {
 -			next_rq = list_entry_rq(q->pending_flushes.next);
 -			list_move(&next_rq->queuelist, &q->queue_head);
 -		}
 +	return 1 << ffz(rq->flush.seq);
 +}
 +
 +static void blk_flush_restore_request(struct request *rq)
 +{
 +	/*
 +	 * After flush data completion, @rq->bio is %NULL but we need to
 +	 * complete the bio again.  @rq->biotail is guaranteed to equal the
 +	 * original @rq->bio.  Restore it.
 +	 */
 +	rq->bio = rq->biotail;
 +
 +	/* make @rq a normal request */
 +	rq->cmd_flags &= ~REQ_FLUSH_SEQ;
 +	rq->end_io = NULL;
 +}
 +
 +/**
 + * blk_flush_complete_seq - complete flush sequence
 + * @rq: FLUSH/FUA request being sequenced
 + * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
 + * @error: whether an error occurred
 + *
 + * @rq just completed @seq part of its flush sequence, record the
 + * completion and trigger the next step.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + *
 + * RETURNS:
 + * %true if requests were added to the dispatch queue, %false otherwise.
 + */
 +static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 +				   int error)
 +{
 +	struct request_queue *q = rq->q;
 +	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
 +	bool queued = false;
 +
 +	BUG_ON(rq->flush.seq & seq);
 +	rq->flush.seq |= seq;
 +
 +	if (likely(!error))
 +		seq = blk_flush_cur_seq(rq);
 +	else
 +		seq = REQ_FSEQ_DONE;
 +
 +	switch (seq) {
 +	case REQ_FSEQ_PREFLUSH:
 +	case REQ_FSEQ_POSTFLUSH:
 +		/* queue for flush */
 +		if (list_empty(pending))
 +			q->flush_pending_since = jiffies;
 +		list_move_tail(&rq->flush.list, pending);
 +		break;
 +
 +	case REQ_FSEQ_DATA:
 +		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
 +		list_add(&rq->queuelist, &q->queue_head);
 +		queued = true;
 +		break;
 +
 +	case REQ_FSEQ_DONE:
 +		/*
 +		 * @rq was previously adjusted by blk_flush_issue() for
 +		 * flush sequencing and may already have gone through the
 +		 * flush data request completion path.  Restore @rq for
 +		 * normal completion and end it.
 +		 */
 +		BUG_ON(!list_empty(&rq->queuelist));
 +		list_del_init(&rq->flush.list);
 +		blk_flush_restore_request(rq);
 +		__blk_end_request_all(rq, error);
 +		break;
 +
 +	default:
 +		BUG();
  	}
 -	return next_rq;
 +
 +	return blk_kick_flush(q) | queued;
  }
  
 -static void blk_flush_complete_seq_end_io(struct request_queue *q,
 -					  unsigned seq, int error)
 +static void flush_end_io(struct request *flush_rq, int error)
  {
 +	struct request_queue *q = flush_rq->q;
 +	struct list_head *running = &q->flush_queue[q->flush_running_idx];
  	bool was_empty = elv_queue_empty(q);
 -	struct request *next_rq;
 +	bool queued = false;
 +	struct request *rq, *n;
 +
 +	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
 +
 +	/* account completion of the flush request */
 +	q->flush_running_idx ^= 1;
 +	elv_completed_request(q, flush_rq);
  
 -	next_rq = blk_flush_complete_seq(q, seq, error);
 +	/* and push the waiting requests to the next stage */
 +	list_for_each_entry_safe(rq, n, running, flush.list) {
 +		unsigned int seq = blk_flush_cur_seq(rq);
 +
 +		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
 +		queued |= blk_flush_complete_seq(rq, seq, error);
 +	}
  
- 	/* after populating an empty queue, kick it to avoid stall */
+ 	/*
+ 	 * Moving a request silently to empty queue_head may stall the
+ 	 * queue.  Kick the queue in those cases.  This function is called
+ 	 * from request completion path and calling directly into
+ 	 * request_fn may confuse the driver.  Always use kblockd.
+ 	 */
 -	if (was_empty && next_rq)
 +	if (queued && was_empty)
- 		__blk_run_queue(q);
+ 		__blk_run_queue(q, true);
  }
  
 -static void pre_flush_end_io(struct request *rq, int error)
 +/**
 + * blk_kick_flush - consider issuing flush request
 + * @q: request_queue being kicked
 + *
 + * Flush related states of @q have changed, consider issuing flush request.
 + * Please read the comment at the top of this file for more info.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + *
 + * RETURNS:
 + * %true if flush was issued, %false otherwise.
 + */
 +static bool blk_kick_flush(struct request_queue *q)
  {
 -	elv_completed_request(rq->q, rq);
 -	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
 +	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
 +	struct request *first_rq =
 +		list_first_entry(pending, struct request, flush.list);
 +
 +	/* C1 described at the top of this file */
 +	if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
 +		return false;
 +
 +	/* C2 and C3 */
 +	if (!list_empty(&q->flush_data_in_flight) &&
 +	    time_before(jiffies,
 +			q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
 +		return false;
 +
 +	/*
 +	 * Issue flush and toggle pending_idx.  This makes pending_idx
 +	 * different from running_idx, which means flush is in flight.
 +	 */
 +	blk_rq_init(q, &q->flush_rq);
 +	q->flush_rq.cmd_type = REQ_TYPE_FS;
 +	q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
 +	q->flush_rq.rq_disk = first_rq->rq_disk;
 +	q->flush_rq.end_io = flush_end_io;
 +
 +	q->flush_pending_idx ^= 1;
- 	elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_FRONT);
++	elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE);
 +	return true;
  }
  
  static void flush_data_end_io(struct request *rq, int error)
  {
 -	elv_completed_request(rq->q, rq);
 -	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
 -}
 +	struct request_queue *q = rq->q;
 +	bool was_empty = elv_queue_empty(q);
  
- 	/* after populating an empty queue, kick it to avoid stall */
 -static void post_flush_end_io(struct request *rq, int error)
 -{
 -	elv_completed_request(rq->q, rq);
 -	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
++	/*
++	 * After populating an empty queue, kick it to avoid stall.  Read
++	 * the comment in flush_end_io().
++	 */
 +	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
- 		__blk_run_queue(q);
++		__blk_run_queue(q, true);
  }
  
 -static void init_flush_request(struct request *rq, struct gendisk *disk)
 +/**
 + * blk_insert_flush - insert a new FLUSH/FUA request
 + * @rq: request to insert
 + *
 + * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions.
 + * @rq is being submitted.  Analyze what needs to be done and put it on the
 + * right queue.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + */
 +void blk_insert_flush(struct request *rq)
  {
 -	rq->cmd_type = REQ_TYPE_FS;
 -	rq->cmd_flags = WRITE_FLUSH;
 -	rq->rq_disk = disk;
 -}
 +	struct request_queue *q = rq->q;
 +	unsigned int fflags = q->flush_flags;	/* may change, cache */
 +	unsigned int policy = blk_flush_policy(fflags, rq);
  
 -static struct request *queue_next_fseq(struct request_queue *q)
 -{
 -	struct request *orig_rq = q->orig_flush_rq;
 -	struct request *rq = &q->flush_rq;
 +	BUG_ON(rq->end_io);
 +	BUG_ON(!rq->bio || rq->bio != rq->biotail);
  
 -	blk_rq_init(q, rq);
 +	/*
 +	 * @policy now records what operations need to be done.  Adjust
 +	 * REQ_FLUSH and FUA for the driver.
 +	 */
 +	rq->cmd_flags &= ~REQ_FLUSH;
 +	if (!(fflags & REQ_FUA))
 +		rq->cmd_flags &= ~REQ_FUA;
  
 -	switch (blk_flush_cur_seq(q)) {
 -	case QUEUE_FSEQ_PREFLUSH:
 -		init_flush_request(rq, orig_rq->rq_disk);
 -		rq->end_io = pre_flush_end_io;
 -		break;
 -	case QUEUE_FSEQ_DATA:
 -		init_request_from_bio(rq, orig_rq->bio);
 -		/*
 -		 * orig_rq->rq_disk may be different from
 -		 * bio->bi_bdev->bd_disk if orig_rq got here through
 -		 * remapping drivers.  Make sure rq->rq_disk points
 -		 * to the same one as orig_rq.
 -		 */
 -		rq->rq_disk = orig_rq->rq_disk;
 -		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
 -		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 -		rq->end_io = flush_data_end_io;
 -		break;
 -	case QUEUE_FSEQ_POSTFLUSH:
 -		init_flush_request(rq, orig_rq->rq_disk);
 -		rq->end_io = post_flush_end_io;
 -		break;
 -	default:
 -		BUG();
 +	/*
 +	 * If there's data but flush is not necessary, the request can be
 +	 * processed directly without going through flush machinery.  Queue
 +	 * for normal execution.
 +	 */
 +	if ((policy & REQ_FSEQ_DATA) &&
 +	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
 +		list_add(&rq->queuelist, &q->queue_head);
 +		return;
  	}
  
 -	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 -	return rq;
 +	/*
 +	 * @rq should go through flush machinery.  Mark it part of flush
 +	 * sequence and submit for further processing.
 +	 */
 +	memset(&rq->flush, 0, sizeof(rq->flush));
 +	INIT_LIST_HEAD(&rq->flush.list);
 +	rq->cmd_flags |= REQ_FLUSH_SEQ;
 +	rq->end_io = flush_data_end_io;
 +
 +	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
  }
  
 -struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 +/**
 + * blk_abort_flushes - @q is being aborted, abort flush requests
 + * @q: request_queue being aborted
 + *
 + * To be called from elv_abort_queue().  @q is being aborted.  Prepare all
 + * FLUSH/FUA requests for abortion.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + */
 +void blk_abort_flushes(struct request_queue *q)
  {
 -	unsigned int fflags = q->flush_flags; /* may change, cache it */
 -	bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
 -	bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
 -	bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
 -	unsigned skip = 0;
 +	struct request *rq, *n;
 +	int i;
  
  	/*
 -	 * Special case.  If there's data but flush is not necessary,
 -	 * the request can be issued directly.
 -	 *
 -	 * Flush w/o data should be able to be issued directly too but
 -	 * currently some drivers assume that rq->bio contains
 -	 * non-zero data if it isn't NULL and empty FLUSH requests
 -	 * getting here usually have bio's without data.
 +	 * Requests in flight for data are already owned by the dispatch
 +	 * queue or the device driver.  Just restore for normal completion.
  	 */
 -	if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
 -		rq->cmd_flags &= ~REQ_FLUSH;
 -		if (!has_fua)
 -			rq->cmd_flags &= ~REQ_FUA;
 -		return rq;
 +	list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
 +		list_del_init(&rq->flush.list);
 +		blk_flush_restore_request(rq);
  	}
  
  	/*
diff --cc block/blk-throttle.c
index c0f623742165,e36cc10a346c..061dee66e2a6
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@@ -962,10 -965,10 +965,10 @@@ static void throtl_update_blkio_group_w
  	smp_mb__before_atomic_inc();
  	atomic_inc(&td->limits_changed);
  	smp_mb__after_atomic_inc();
- 	throtl_schedule_delayed_work(td->queue, 0);
+ 	throtl_schedule_delayed_work(td, 0);
  }
  
 -void throtl_shutdown_timer_wq(struct request_queue *q)
 +static void throtl_shutdown_wq(struct request_queue *q)
  {
  	struct throtl_data *td = q->td;
  
diff --cc include/linux/blkdev.h
index 23fb92506c31,d5063e1b5555..13b75ca62181
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -1143,7 -1135,7 +1142,6 @@@ static inline uint64_t rq_io_start_time
  extern int blk_throtl_init(struct request_queue *q);
  extern void blk_throtl_exit(struct request_queue *q);
  extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
- extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
 -extern void throtl_shutdown_timer_wq(struct request_queue *q);
  #else /* CONFIG_BLK_DEV_THROTTLING */
  static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
  {
@@@ -1152,7 -1144,7 +1150,6 @@@
  
  static inline int blk_throtl_init(struct request_queue *q) { return 0; }
  static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
- static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
 -static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
  #endif /* CONFIG_BLK_DEV_THROTTLING */
  
  #define MODULE_ALIAS_BLOCKDEV(major,minor) \