From: Jens Axboe Date: Wed, 19 Oct 2011 12:30:42 +0000 (+0200) Subject: Merge branch 'v3.1-rc10' into for-3.2/core X-Git-Tag: firefly_0821_release~3680^2~4193^2~19 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5c04b426f2e8b46cfc7969a35b2631063a3c646c;hp=-c;p=firefly-linux-kernel-4.4.55.git Merge branch 'v3.1-rc10' into for-3.2/core Conflicts: block/blk-core.c include/linux/blkdev.h Signed-off-by: Jens Axboe --- 5c04b426f2e8b46cfc7969a35b2631063a3c646c diff --combined block/blk-core.c index 97e9e5405b83,d34433ae7917..79e41a76d96a --- a/block/blk-core.c +++ b/block/blk-core.c @@@ -38,6 -38,8 +38,6 @@@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_ EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); -static int __make_request(struct request_queue *q, struct bio *bio); - /* * For the allocated request tables */ @@@ -346,9 -348,10 +346,10 @@@ void blk_put_queue(struct request_queu EXPORT_SYMBOL(blk_put_queue); /* - * Note: If a driver supplied the queue lock, it should not zap that lock - * unexpectedly as some queue cleanup components like elevator_exit() and - * blk_throtl_exit() need queue lock. + * Note: If a driver supplied the queue lock, it is disconnected + * by this function. The actual state of the lock doesn't matter + * here as the request_queue isn't accessible after this point + * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. */ void blk_cleanup_queue(struct request_queue *q) { @@@ -365,10 -368,8 +366,8 @@@ queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); - if (q->elevator) - elevator_exit(q->elevator); - - blk_throtl_exit(q); + if (q->queue_lock != &q->__queue_lock) + q->queue_lock = &q->__queue_lock; blk_put_queue(q); } @@@ -540,7 -541,7 +539,7 @@@ blk_init_allocated_queue_node(struct re /* * This also sets hw/phys segments, boundary and size */ - blk_queue_make_request(q, __make_request); + blk_queue_make_request(q, blk_queue_bio); q->sg_reserved_size = INT_MAX; @@@ -1165,7 -1166,7 +1164,7 @@@ static bool bio_attempt_front_merge(str * true if merge was successful, otherwise false. */ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, - struct bio *bio) + struct bio *bio, unsigned int *request_count) { struct blk_plug *plug; struct request *rq; @@@ -1174,10 -1175,13 +1173,13 @@@ plug = tsk->plug; if (!plug) goto out; + *request_count = 0; list_for_each_entry_reverse(rq, &plug->list, queuelist) { int el_ret; + (*request_count)++; + if (rq->q != q) continue; @@@ -1211,12 -1215,13 +1213,13 @@@ void init_request_from_bio(struct reque blk_rq_bio_prep(req->q, req, bio); } -static int __make_request(struct request_queue *q, struct bio *bio) +void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; + unsigned int request_count = 0; /* * low level driver can indicate that it wants pages above a @@@ -1235,8 -1240,8 +1238,8 @@@ * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(current, q, bio)) + if (attempt_plug_merge(current, q, bio, &request_count)) - goto out; + return; spin_lock_irq(q->queue_lock); @@@ -1300,11 -1305,10 +1303,10 @@@ get_rq if (__rq->q != q) plug->should_sort = 1; } + if (request_count >= BLK_MAX_REQUEST_COUNT) + blk_flush_plug_list(plug, false); list_add_tail(&req->queuelist, &plug->list); - plug->count++; drive_stat_acct(req, 1); - if (plug->count >= BLK_MAX_REQUEST_COUNT) - blk_flush_plug_list(plug, false); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); @@@ -1312,8 -1316,9 +1314,8 @@@ out_unlock: spin_unlock_irq(q->queue_lock); } -out: - return 0; } +EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ /* * If bio->bi_dev is a partition, remap the location @@@ -1412,142 -1417,165 +1414,142 @@@ static inline int bio_check_eod(struct return 0; } -/** - * generic_make_request - hand a buffer to its device driver for I/O - * @bio: The bio describing the location in memory and on the device. - * - * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct bio, which describes the I/O that needs - * to be done. - * - * generic_make_request() does not return any status. The - * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bio->bi_end_io - * function described (one day) else where. - * - * The caller of generic_make_request must make sure that bi_io_vec - * are set to describe the memory buffer, and that bi_dev and bi_sector are - * set to describe the device address, and the - * bi_end_io and optionally bi_private are set to describe how - * completion notification should be signaled. - * - * generic_make_request and the drivers it calls may use bi_next if this - * bio happens to be merged with someone else, and may change bi_dev and - * bi_sector for remaps as it sees fit. So the values of these fields - * should NOT be depended on after the call to generic_make_request. - */ -static inline void __generic_make_request(struct bio *bio) +static noinline_for_stack bool +generic_make_request_checks(struct bio *bio) { struct request_queue *q; - sector_t old_sector; - int ret, nr_sectors = bio_sectors(bio); - dev_t old_dev; + int nr_sectors = bio_sectors(bio); int err = -EIO; + char b[BDEVNAME_SIZE]; + struct hd_struct *part; might_sleep(); if (bio_check_eod(bio, nr_sectors)) goto end_io; - /* - * Resolve the mapping until finished. (drivers are - * still free to implement/resolve their own stacking - * by explicitly returning 0) - * - * NOTE: we don't repeat the blk_size check for each new device. - * Stacking drivers are expected to know what they are doing. - */ - old_sector = -1; - old_dev = 0; - do { - char b[BDEVNAME_SIZE]; - struct hd_struct *part; - - q = bdev_get_queue(bio->bi_bdev); - if (unlikely(!q)) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%Lu)\n", - bdevname(bio->bi_bdev, b), - (long long) bio->bi_sector); - goto end_io; - } - - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { - printk(KERN_ERR "bio too big device %s (%u > %u)\n", - bdevname(bio->bi_bdev, b), - bio_sectors(bio), - queue_max_hw_sectors(q)); - goto end_io; - } - - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - goto end_io; - - part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_size) || - should_fail_request(&part_to_disk(part)->part0, - bio->bi_size)) - goto end_io; + q = bdev_get_queue(bio->bi_bdev); + if (unlikely(!q)) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%Lu)\n", + bdevname(bio->bi_bdev, b), + (long long) bio->bi_sector); + goto end_io; + } - /* - * If this device has partitions, remap block n - * of partition p to block n+start(p) of the disk. - */ - blk_partition_remap(bio); + if (unlikely(!(bio->bi_rw & REQ_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { + printk(KERN_ERR "bio too big device %s (%u > %u)\n", + bdevname(bio->bi_bdev, b), + bio_sectors(bio), + queue_max_hw_sectors(q)); + goto end_io; + } - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) - goto end_io; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + goto end_io; - if (old_sector != -1) - trace_block_bio_remap(q, bio, old_dev, old_sector); + part = bio->bi_bdev->bd_part; + if (should_fail_request(part, bio->bi_size) || + should_fail_request(&part_to_disk(part)->part0, + bio->bi_size)) + goto end_io; - old_sector = bio->bi_sector; - old_dev = bio->bi_bdev->bd_dev; + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); - if (bio_check_eod(bio, nr_sectors)) - goto end_io; + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) + goto end_io; - /* - * Filter flush bio's early so that make_request based - * drivers without flush support don't have to worry - * about them. - */ - if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { - bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); - if (!nr_sectors) { - err = 0; - goto end_io; - } - } + if (bio_check_eod(bio, nr_sectors)) + goto end_io; - if ((bio->bi_rw & REQ_DISCARD) && - (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { - err = -EOPNOTSUPP; + /* + * Filter flush bio's early so that make_request based + * drivers without flush support don't have to worry + * about them. + */ + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + if (!nr_sectors) { + err = 0; goto end_io; } + } - if (blk_throtl_bio(q, &bio)) - goto end_io; - - /* - * If bio = NULL, bio has been throttled and will be submitted - * later. - */ - if (!bio) - break; + if ((bio->bi_rw & REQ_DISCARD) && + (!blk_queue_discard(q) || + ((bio->bi_rw & REQ_SECURE) && + !blk_queue_secdiscard(q)))) { + err = -EOPNOTSUPP; + goto end_io; + } - trace_block_bio_queue(q, bio); + if (blk_throtl_bio(q, &bio)) + goto end_io; - ret = q->make_request_fn(q, bio); - } while (ret); + /* if bio = NULL, bio has been throttled and will be submitted later. */ + if (!bio) + return false; - return; + trace_block_bio_queue(q, bio); + return true; end_io: bio_endio(bio, err); + return false; } -/* - * We only want one ->make_request_fn to be active at a time, - * else stack usage with stacked devices could be a problem. - * So use current->bio_list to keep a list of requests - * submited by a make_request_fn function. - * current->bio_list is also used as a flag to say if - * generic_make_request is currently active in this task or not. - * If it is NULL, then no make_request is active. If it is non-NULL, - * then a make_request is active, and new requests should be added - * at the tail +/** + * generic_make_request - hand a buffer to its device driver for I/O + * @bio: The bio describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bio->bi_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + * set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may resubmit the bio to + * a lower device by calling into generic_make_request recursively, which + * means the bio should NOT be touched after the call to ->make_request_fn. */ void generic_make_request(struct bio *bio) { struct bio_list bio_list_on_stack; + if (!generic_make_request_checks(bio)) + return; + + /* + * We only want one ->make_request_fn to be active at a time, else + * stack usage with stacked devices could be a problem. So use + * current->bio_list to keep a list of requests submited by a + * make_request_fn function. current->bio_list is also used as a + * flag to say if generic_make_request is currently active in this + * task or not. If it is NULL, then no make_request is active. If + * it is non-NULL, then a make_request is active, and new requests + * should be added at the tail + */ if (current->bio_list) { - /* make_request is active */ bio_list_add(current->bio_list, bio); return; } + /* following loop may be a bit non-obvious, and so deserves some * explanation. * Before entering the loop, bio->bi_next is NULL (as all callers @@@ -1555,21 -1583,22 +1557,21 @@@ * We pretend that we have just taken it off a longer list, so * we assign bio_list to a pointer to the bio_list_on_stack, * thus initialising the bio_list of new bios to be - * added. __generic_make_request may indeed add some more bios + * added. ->make_request() may indeed add some more bios * through a recursive call to generic_make_request. If it * did, we find a non-NULL value in bio_list and re-enter the loop * from the top. In this case we really did just take the bio * of the top of the list (no pretending) and so remove it from - * bio_list, and call into __generic_make_request again. - * - * The loop was structured like this to make only one call to - * __generic_make_request (which is important as it is large and - * inlined) and to keep the structure simple. + * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); bio_list_init(&bio_list_on_stack); current->bio_list = &bio_list_on_stack; do { - __generic_make_request(bio); + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + + q->make_request_fn(q, bio); + bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ @@@ -1675,6 -1704,7 +1677,7 @@@ EXPORT_SYMBOL_GPL(blk_rq_check_limits) int blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; + int where = ELEVATOR_INSERT_BACK; if (blk_rq_check_limits(q, rq)) return -EIO; @@@ -1691,7 -1721,10 +1694,10 @@@ */ BUG_ON(blk_queued_rq(rq)); - add_acct_request(q, rq, ELEVATOR_INSERT_BACK); + if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + where = ELEVATOR_INSERT_FLUSH; + + add_acct_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@@ -2248,7 -2281,7 +2254,7 @@@ static bool blk_end_bidi_request(struc * %false - we are done with this request * %true - still buffers pending for this request **/ - static bool __blk_end_bidi_request(struct request *rq, int error, + bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) @@@ -2595,20 -2628,6 +2601,20 @@@ EXPORT_SYMBOL(kblockd_schedule_delayed_ #define PLUG_MAGIC 0x91827364 +/** + * blk_start_plug - initialize blk_plug and track it inside the task_struct + * @plug: The &struct blk_plug that needs to be initialized + * + * Description: + * Tracking blk_plug inside the task_struct will help with auto-flushing the + * pending I/O should the task end up blocking between blk_start_plug() and + * blk_finish_plug(). This is important from a performance perspective, but + * also ensures that we don't deadlock. For instance, if the task is blocking + * for a memory allocation, memory reclaim could end up wanting to free a + * page belonging to that request that is currently residing in our private + * plug. By flushing the pending I/O when the process goes to sleep, we avoid + * this kind of deadlock. + */ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; @@@ -2617,7 -2636,6 +2623,6 @@@ INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; - plug->count = 0; /* * If this is a nested plug, don't actually assign it. It will be @@@ -2701,7 -2719,6 +2706,6 @@@ void blk_flush_plug_list(struct blk_plu return; list_splice_init(&plug->list, &list); - plug->count = 0; if (plug->should_sort) { list_sort(NULL, &list, plug_rq_cmp); diff --combined block/blk-sysfs.c index adc923e9d1f8,60fda88c57f0..a8eff5f8b9c5 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@@ -258,11 -258,13 +258,13 @@@ queue_rq_affinity_store(struct request_ ret = queue_var_store(&val, page, count); spin_lock_irq(q->queue_lock); - if (val) { + if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - if (val == 2) - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); - } else { + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 1) { + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 0) { queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } @@@ -455,11 -457,11 +457,11 @@@ queue_attr_store(struct kobject *kobj, } /** - * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed - * @kobj: the kobj belonging of the request queue to be released + * blk_release_queue: - release a &struct request_queue when it is no longer needed + * @kobj: the kobj belonging to the request queue to be released * * Description: - * blk_cleanup_queue is the pair to blk_init_queue() or + * blk_release_queue is the pair to blk_init_queue() or * blk_queue_make_request(). It should be called when a request queue is * being released; typically when a block device is being de-registered. * Currently, its primary task it to free all the &struct request @@@ -477,6 -479,11 +479,11 @@@ static void blk_release_queue(struct ko blk_sync_queue(q); + if (q->elevator) + elevator_exit(q->elevator); + + blk_throtl_exit(q); + if (rl->rq_pool) mempool_destroy(rl->rq_pool); diff --combined drivers/block/loop.c index 8360239d553c,4720c7ade0ae..157ddcb9d0a5 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@@ -75,11 -75,11 +75,11 @@@ #include #include #include - + #include #include - static LIST_HEAD(loop_devices); - static DEFINE_MUTEX(loop_devices_mutex); + static DEFINE_IDR(loop_index_idr); + static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; @@@ -514,7 -514,7 +514,7 @@@ static struct bio *loop_get_bio(struct return bio_list_pop(&lo->lo_bio_list); } -static int loop_make_request(struct request_queue *q, struct bio *old_bio) +static void loop_make_request(struct request_queue *q, struct bio *old_bio) { struct loop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@@ -532,11 -532,12 +532,11 @@@ loop_add_bio(lo, old_bio); wake_up(&lo->lo_event); spin_unlock_irq(&lo->lo_lock); - return 0; + return; out: spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio); - return 0; } struct switch_request { @@@ -721,17 -722,10 +721,10 @@@ static inline int is_loop_device(struc static ssize_t loop_attr_show(struct device *dev, char *page, ssize_t (*callback)(struct loop_device *, char *)) { - struct loop_device *l, *lo = NULL; - - mutex_lock(&loop_devices_mutex); - list_for_each_entry(l, &loop_devices, lo_list) - if (disk_to_dev(l->lo_disk) == dev) { - lo = l; - break; - } - mutex_unlock(&loop_devices_mutex); + struct gendisk *disk = dev_to_disk(dev); + struct loop_device *lo = disk->private_data; - return lo ? callback(lo, page) : -EIO; + return callback(lo, page); } #define LOOP_ATTR_RO(_name) \ @@@ -749,10 -743,10 +742,10 @@@ static ssize_t loop_attr_backing_file_s ssize_t ret; char *p = NULL; - mutex_lock(&lo->lo_ctl_mutex); + spin_lock_irq(&lo->lo_lock); if (lo->lo_backing_file) p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); - mutex_unlock(&lo->lo_ctl_mutex); + spin_unlock_irq(&lo->lo_lock); if (IS_ERR_OR_NULL(p)) ret = PTR_ERR(p); @@@ -1006,7 -1000,9 +999,9 @@@ static int loop_clr_fd(struct loop_devi kthread_stop(lo->lo_thread); + spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; + spin_unlock_irq(&lo->lo_lock); loop_release_xfer(lo); lo->transfer = NULL; @@@ -1484,13 -1480,22 +1479,22 @@@ static int lo_compat_ioctl(struct block static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo = bdev->bd_disk->private_data; + struct loop_device *lo; + int err = 0; + + mutex_lock(&loop_index_mutex); + lo = bdev->bd_disk->private_data; + if (!lo) { + err = -ENXIO; + goto out; + } mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); - - return 0; + out: + mutex_unlock(&loop_index_mutex); + return err; } static int lo_release(struct gendisk *disk, fmode_t mode) @@@ -1556,40 -1561,71 +1560,71 @@@ int loop_register_transfer(struct loop_ return 0; } + static int unregister_transfer_cb(int id, void *ptr, void *data) + { + struct loop_device *lo = ptr; + struct loop_func_table *xfer = data; + + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_encryption == xfer) + loop_release_xfer(lo); + mutex_unlock(&lo->lo_ctl_mutex); + return 0; + } + int loop_unregister_transfer(int number) { unsigned int n = number; - struct loop_device *lo; struct loop_func_table *xfer; if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) return -EINVAL; xfer_funcs[n] = NULL; - - list_for_each_entry(lo, &loop_devices, lo_list) { - mutex_lock(&lo->lo_ctl_mutex); - - if (lo->lo_encryption == xfer) - loop_release_xfer(lo); - - mutex_unlock(&lo->lo_ctl_mutex); - } - + idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); return 0; } EXPORT_SYMBOL(loop_register_transfer); EXPORT_SYMBOL(loop_unregister_transfer); - static struct loop_device *loop_alloc(int i) + static int loop_add(struct loop_device **l, int i) { struct loop_device *lo; struct gendisk *disk; + int err; lo = kzalloc(sizeof(*lo), GFP_KERNEL); - if (!lo) + if (!lo) { + err = -ENOMEM; goto out; + } + + err = idr_pre_get(&loop_index_idr, GFP_KERNEL); + if (err < 0) + goto out_free_dev; + + if (i >= 0) { + int m; + + /* create specific i in the index */ + err = idr_get_new_above(&loop_index_idr, lo, i, &m); + if (err >= 0 && i != m) { + idr_remove(&loop_index_idr, m); + err = -EEXIST; + } + } else if (i == -1) { + int m; + + /* get next free nr */ + err = idr_get_new(&loop_index_idr, lo, &m); + if (err >= 0) + i = m; + } else { + err = -EINVAL; + } + if (err < 0) + goto out_free_dev; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) @@@ -1610,81 -1646,158 +1645,158 @@@ disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); - return lo; + add_disk(disk); + *l = lo; + return lo->lo_number; out_free_queue: blk_cleanup_queue(lo->lo_queue); out_free_dev: kfree(lo); out: - return NULL; + return err; } - static void loop_free(struct loop_device *lo) + static void loop_remove(struct loop_device *lo) { + del_gendisk(lo->lo_disk); blk_cleanup_queue(lo->lo_queue); put_disk(lo->lo_disk); - list_del(&lo->lo_list); kfree(lo); } - static struct loop_device *loop_init_one(int i) + static int find_free_cb(int id, void *ptr, void *data) + { + struct loop_device *lo = ptr; + struct loop_device **l = data; + + if (lo->lo_state == Lo_unbound) { + *l = lo; + return 1; + } + return 0; + } + + static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; + int ret = -ENODEV; - list_for_each_entry(lo, &loop_devices, lo_list) { - if (lo->lo_number == i) - return lo; + if (i < 0) { + int err; + + err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); + if (err == 1) { + *l = lo; + ret = lo->lo_number; + } + goto out; } - lo = loop_alloc(i); + /* lookup and return a specific i */ + lo = idr_find(&loop_index_idr, i); if (lo) { - add_disk(lo->lo_disk); - list_add_tail(&lo->lo_list, &loop_devices); + *l = lo; + ret = lo->lo_number; } - return lo; - } - - static void loop_del_one(struct loop_device *lo) - { - del_gendisk(lo->lo_disk); - loop_free(lo); + out: + return ret; } static struct kobject *loop_probe(dev_t dev, int *part, void *data) { struct loop_device *lo; struct kobject *kobj; + int err; - mutex_lock(&loop_devices_mutex); - lo = loop_init_one(MINOR(dev) >> part_shift); - kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); - mutex_unlock(&loop_devices_mutex); + mutex_lock(&loop_index_mutex); + err = loop_lookup(&lo, MINOR(dev) >> part_shift); + if (err < 0) + err = loop_add(&lo, MINOR(dev) >> part_shift); + if (err < 0) + kobj = ERR_PTR(err); + else + kobj = get_disk(lo->lo_disk); + mutex_unlock(&loop_index_mutex); *part = 0; return kobj; } + static long loop_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) + { + struct loop_device *lo; + int ret = -ENOSYS; + + mutex_lock(&loop_index_mutex); + switch (cmd) { + case LOOP_CTL_ADD: + ret = loop_lookup(&lo, parm); + if (ret >= 0) { + ret = -EEXIST; + break; + } + ret = loop_add(&lo, parm); + break; + case LOOP_CTL_REMOVE: + ret = loop_lookup(&lo, parm); + if (ret < 0) + break; + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_state != Lo_unbound) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + if (lo->lo_refcnt > 0) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + lo->lo_disk->private_data = NULL; + mutex_unlock(&lo->lo_ctl_mutex); + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); + break; + case LOOP_CTL_GET_FREE: + ret = loop_lookup(&lo, -1); + if (ret >= 0) + break; + ret = loop_add(&lo, -1); + } + mutex_unlock(&loop_index_mutex); + + return ret; + } + + static const struct file_operations loop_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = loop_control_ioctl, + .compat_ioctl = loop_control_ioctl, + .owner = THIS_MODULE, + .llseek = noop_llseek, + }; + + static struct miscdevice loop_misc = { + .minor = LOOP_CTRL_MINOR, + .name = "loop-control", + .fops = &loop_ctl_fops, + }; + + MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); + MODULE_ALIAS("devname:loop-control"); + static int __init loop_init(void) { int i, nr; unsigned long range; - struct loop_device *lo, *next; + struct loop_device *lo; + int err; - /* - * loop module now has a feature to instantiate underlying device - * structure on-demand, provided that there is an access dev node. - * However, this will not work well with user space tool that doesn't - * know about such "feature". In order to not break any existing - * tool, we do the following: - * - * (1) if max_loop is specified, create that many upfront, and this - * also becomes a hard limit. - * (2) if max_loop is not specified, create 8 loop device on module - * load, user can further extend loop device by create dev node - * themselves and have kernel automatically instantiate actual - * device on-demand. - */ + err = misc_register(&loop_misc); + if (err < 0) + return err; part_shift = 0; if (max_part > 0) { @@@ -1707,57 -1820,60 +1819,60 @@@ if (max_loop > 1UL << (MINORBITS - part_shift)) return -EINVAL; + /* + * If max_loop is specified, create that many devices upfront. + * This also becomes a hard limit. If max_loop is not specified, + * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module + * init time. Loop devices can be requested on-demand with the + * /dev/loop-control interface, or be instantiated by accessing + * a 'dead' device node. + */ if (max_loop) { nr = max_loop; range = max_loop << part_shift; } else { - nr = 8; + nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; range = 1UL << MINORBITS; } if (register_blkdev(LOOP_MAJOR, "loop")) return -EIO; - for (i = 0; i < nr; i++) { - lo = loop_alloc(i); - if (!lo) - goto Enomem; - list_add_tail(&lo->lo_list, &loop_devices); - } - - /* point of no return */ - - list_for_each_entry(lo, &loop_devices, lo_list) - add_disk(lo->lo_disk); - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); + /* pre-create number of devices given by config or max_loop */ + mutex_lock(&loop_index_mutex); + for (i = 0; i < nr; i++) + loop_add(&lo, i); + mutex_unlock(&loop_index_mutex); + printk(KERN_INFO "loop: module loaded\n"); return 0; + } - Enomem: - printk(KERN_INFO "loop: out of memory\n"); - - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_free(lo); + static int loop_exit_cb(int id, void *ptr, void *data) + { + struct loop_device *lo = ptr; - unregister_blkdev(LOOP_MAJOR, "loop"); - return -ENOMEM; + loop_remove(lo); + return 0; } static void __exit loop_exit(void) { unsigned long range; - struct loop_device *lo, *next; range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_del_one(lo); + idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); + idr_remove_all(&loop_index_idr); + idr_destroy(&loop_index_idr); blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); + + misc_deregister(&loop_misc); } module_init(loop_init); diff --combined drivers/md/md.c index 5c2178562c96,5c95ccb59500..8f52d4eb78a0 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@@ -61,6 -61,11 +61,11 @@@ static void autostart_arrays(int part); #endif + /* pers_list is a list of registered personalities protected + * by pers_lock. + * pers_lock does extra service to protect accesses to + * mddev->thread when the mutex cannot be held. + */ static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); @@@ -330,17 -335,18 +335,17 @@@ static DEFINE_SPINLOCK(all_mddevs_lock) * call has finished, the bio has been linked into some internal structure * and so is visible to ->quiesce(), so we don't need the refcount any more. */ -static int md_make_request(struct request_queue *q, struct bio *bio) +static void md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); mddev_t *mddev = q->queuedata; - int rv; int cpu; unsigned int sectors; if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { bio_io_error(bio); - return 0; + return; } smp_rmb(); /* Ensure implications of 'active' are visible */ rcu_read_lock(); @@@ -365,7 -371,7 +370,7 @@@ * go away inside make_request */ sectors = bio_sectors(bio); - rv = mddev->pers->make_request(mddev, bio); + mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); @@@ -374,6 -380,8 +379,6 @@@ if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) wake_up(&mddev->sb_wait); - - return rv; } /* mddev_suspend makes sure no new requests are submitted @@@ -472,7 -480,8 +477,7 @@@ static void md_submit_flush_data(struc bio_endio(bio, 0); else { bio->bi_rw &= ~REQ_FLUSH; - if (mddev->pers->make_request(mddev, bio)) - generic_make_request(bio); + mddev->pers->make_request(mddev, bio); } mddev->flush_bio = NULL; @@@ -735,7 -744,12 +740,12 @@@ static void mddev_unlock(mddev_t * mdde } else mutex_unlock(&mddev->reconfig_mutex); + /* was we've dropped the mutex we need a spinlock to + * make sur the thread doesn't disappear + */ + spin_lock(&pers_lock); md_wakeup_thread(mddev->thread); + spin_unlock(&pers_lock); } static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) @@@ -844,7 -858,7 +854,7 @@@ void md_super_write(mddev_t *mddev, mdk bio->bi_end_io = super_written; atomic_inc(&mddev->pending_writes); - submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio); + submit_bio(WRITE_FLUSH_FUA, bio); } void md_super_wait(mddev_t *mddev) @@@ -1134,8 -1148,11 +1144,11 @@@ static int super_90_load(mdk_rdev_t *rd ret = 0; } rdev->sectors = rdev->sb_start; + /* Limit to 4TB as metadata cannot record more than that */ + if (rdev->sectors >= (2ULL << 32)) + rdev->sectors = (2ULL << 32) - 2; - if (rdev->sectors < sb->size * 2 && sb->level > 1) + if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) /* "this cannot possibly happen" ... */ ret = -EINVAL; @@@ -1169,7 -1186,7 +1182,7 @@@ static int super_90_validate(mddev_t *m mddev->clevel[0] = 0; mddev->layout = sb->layout; mddev->raid_disks = sb->raid_disks; - mddev->dev_sectors = sb->size * 2; + mddev->dev_sectors = ((sector_t)sb->size) * 2; mddev->events = ev1; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; @@@ -1411,6 -1428,11 +1424,11 @@@ super_90_rdev_size_change(mdk_rdev_t *r rdev->sb_start = calc_dev_sboffset(rdev); if (!num_sectors || num_sectors > rdev->sb_start) num_sectors = rdev->sb_start; + /* Limit to 4TB as metadata cannot record more than that. + * 4TB == 2^32 KB, or 2*2^32 sectors. + */ + if (num_sectors >= (2ULL << 32)) + num_sectors = (2ULL << 32) - 2; md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); md_super_wait(rdev->mddev); @@@ -1734,6 -1756,11 +1752,11 @@@ static void super_1_sync(mddev_t *mddev sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + if (test_bit(WriteMostly, &rdev->flags)) + sb->devflags |= WriteMostly1; + else + sb->devflags &= ~WriteMostly1; + if (mddev->bitmap && mddev->bitmap_info.file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); @@@ -2557,7 -2584,10 +2580,10 @@@ state_store(mdk_rdev_t *rdev, const cha int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); - err = 0; + if (test_bit(Faulty, &rdev->flags)) + err = 0; + else + err = -EBUSY; } else if (cmd_match(buf, "remove")) { if (rdev->raid_disk >= 0) err = -EBUSY; @@@ -2580,7 -2610,7 +2606,7 @@@ err = 0; } else if (cmd_match(buf, "-blocked")) { if (!test_bit(Faulty, &rdev->flags) && - test_bit(BlockedBadBlocks, &rdev->flags)) { + rdev->badblocks.unacked_exist) { /* metadata handler doesn't understand badblocks, * so we need to fail the device */ @@@ -5979,6 -6009,8 +6005,8 @@@ static int set_disk_faulty(mddev_t *mdd return -ENODEV; md_error(mddev, rdev); + if (!test_bit(Faulty, &rdev->flags)) + return -EBUSY; return 0; } @@@ -6407,11 -6439,18 +6435,18 @@@ mdk_thread_t *md_register_thread(void ( return thread; } - void md_unregister_thread(mdk_thread_t *thread) + void md_unregister_thread(mdk_thread_t **threadp) { + mdk_thread_t *thread = *threadp; if (!thread) return; dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); + /* Locking ensures that mddev_unlock does not wake_up a + * non-existent thread + */ + spin_lock(&pers_lock); + *threadp = NULL; + spin_unlock(&pers_lock); kthread_stop(thread->tsk); kfree(thread); @@@ -7318,8 -7357,7 +7353,7 @@@ static void reap_sync_thread(mddev_t *m mdk_rdev_t *rdev; /* resync has finished, collect result */ - md_unregister_thread(mddev->sync_thread); - mddev->sync_thread = NULL; + md_unregister_thread(&mddev->sync_thread); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ diff --combined drivers/md/md.h index bd47847cf7ca,0a309dc29b45..1509a3eb9ae1 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@@ -424,7 -424,7 +424,7 @@@ struct mdk_personalit int level; struct list_head list; struct module *owner; - int (*make_request)(mddev_t *mddev, struct bio *bio); + void (*make_request)(mddev_t *mddev, struct bio *bio); int (*run)(mddev_t *mddev); int (*stop)(mddev_t *mddev); void (*status)(struct seq_file *seq, mddev_t *mddev); @@@ -560,7 -560,7 +560,7 @@@ extern int register_md_personality(stru extern int unregister_md_personality(struct mdk_personality *p); extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), mddev_t *mddev, const char *name); - extern void md_unregister_thread(mdk_thread_t *thread); + extern void md_unregister_thread(mdk_thread_t **threadp); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); extern void md_write_start(mddev_t *mddev, struct bio *bi); diff --combined drivers/md/multipath.c index 407cb5691425,d5b5fb300171..618dd9e22513 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@@ -106,7 -106,7 +106,7 @@@ static void multipath_end_request(struc rdev_dec_pending(rdev, conf->mddev); } -static int multipath_make_request(mddev_t *mddev, struct bio * bio) +static void multipath_make_request(mddev_t *mddev, struct bio * bio) { multipath_conf_t *conf = mddev->private; struct multipath_bh * mp_bh; @@@ -114,7 -114,7 +114,7 @@@ if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return 0; + return; } mp_bh = mempool_alloc(conf->pool, GFP_NOIO); @@@ -126,7 -126,7 +126,7 @@@ if (mp_bh->path < 0) { bio_endio(bio, -EIO); mempool_free(mp_bh, conf->pool); - return 0; + return; } multipath = conf->multipaths + mp_bh->path; @@@ -137,7 -137,7 +137,7 @@@ mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; generic_make_request(&mp_bh->bio); - return 0; + return; } static void multipath_status (struct seq_file *seq, mddev_t *mddev) @@@ -514,8 -514,7 +514,7 @@@ static int multipath_stop (mddev_t *mdd { multipath_conf_t *conf = mddev->private; - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ mempool_destroy(conf->pool); kfree(conf->multipaths); diff --combined drivers/md/raid1.c index 97f2a5f977b1,d9587dffe533..d4ddfa627301 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@@ -785,7 -785,7 +785,7 @@@ do_sync_io PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); } -static int make_request(mddev_t *mddev, struct bio * bio) +static void make_request(mddev_t *mddev, struct bio * bio) { conf_t *conf = mddev->private; mirror_info_t *mirror; @@@ -870,7 -870,7 +870,7 @@@ read_again if (rdisk < 0) { /* couldn't find anywhere to read from */ raid_end_bio_io(r1_bio); - return 0; + return; } mirror = conf->mirrors + rdisk; @@@ -928,7 -928,7 +928,7 @@@ goto read_again; } else generic_make_request(read_bio); - return 0; + return; } /* @@@ -1099,12 -1099,11 +1099,11 @@@ bio_list_add(&conf->pending_bio_list, mbio); spin_unlock_irqrestore(&conf->device_lock, flags); } - r1_bio_write_done(r1_bio); - - /* In case raid1d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); - + /* Mustn't call r1_bio_write_done before this next test, + * as it could result in the bio being freed. + */ if (sectors_handled < (bio->bi_size >> 9)) { + r1_bio_write_done(r1_bio); /* We need another r1_bio. It has already been counted * in bio->bi_phys_segments */ @@@ -1117,8 -1116,15 +1116,13 @@@ goto retry_write; } + r1_bio_write_done(r1_bio); + + /* In case raid1d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync || !bitmap || !plugged) md_wakeup_thread(mddev->thread); - - return 0; } static void status(struct seq_file *seq, mddev_t *mddev) @@@ -2556,8 -2562,7 +2560,7 @@@ static int stop(mddev_t *mddev raise_barrier(conf); lower_barrier(conf); - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); diff --combined drivers/md/raid10.c index 04b625e1cb60,0cd9672cf9cb..ea5fc0b6a84c --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@@ -337,6 -337,21 +337,21 @@@ static void close_write(r10bio_t *r10_b md_write_end(r10_bio->mddev); } + static void one_write_done(r10bio_t *r10_bio) + { + if (atomic_dec_and_test(&r10_bio->remaining)) { + if (test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else { + close_write(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state)) + reschedule_retry(r10_bio); + else + raid_end_bio_io(r10_bio); + } + } + } + static void raid10_end_write_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@@ -387,17 -402,7 +402,7 @@@ * Let's see if all mirrored write operations have finished * already. */ - if (atomic_dec_and_test(&r10_bio->remaining)) { - if (test_bit(R10BIO_WriteError, &r10_bio->state)) - reschedule_retry(r10_bio); - else { - close_write(r10_bio); - if (test_bit(R10BIO_MadeGood, &r10_bio->state)) - reschedule_retry(r10_bio); - else - raid_end_bio_io(r10_bio); - } - } + one_write_done(r10_bio); if (dec_rdev) rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); } @@@ -825,7 -830,7 +830,7 @@@ static void unfreeze_array(conf_t *conf spin_unlock_irq(&conf->resync_lock); } -static int make_request(mddev_t *mddev, struct bio * bio) +static void make_request(mddev_t *mddev, struct bio * bio) { conf_t *conf = mddev->private; mirror_info_t *mirror; @@@ -844,7 -849,7 +849,7 @@@ if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return 0; + return; } /* If this request crosses a chunk boundary, we need to @@@ -876,8 -881,10 +881,8 @@@ conf->nr_waiting++; spin_unlock_irq(&conf->resync_lock); - if (make_request(mddev, &bp->bio1)) - generic_make_request(&bp->bio1); - if (make_request(mddev, &bp->bio2)) - generic_make_request(&bp->bio2); + make_request(mddev, &bp->bio1); + make_request(mddev, &bp->bio2); spin_lock_irq(&conf->resync_lock); conf->nr_waiting--; @@@ -885,14 -892,14 +890,14 @@@ spin_unlock_irq(&conf->resync_lock); bio_pair_release(bp); - return 0; + return; bad_map: printk("md/raid10:%s: make_request bug: can't convert block across chunks" " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, (unsigned long long)bio->bi_sector, bio->bi_size >> 10); bio_io_error(bio); - return 0; + return; } md_write_start(mddev, bio); @@@ -935,7 -942,7 +940,7 @@@ read_again slot = r10_bio->read_slot; if (disk < 0) { raid_end_bio_io(r10_bio); - return 0; + return; } mirror = conf->mirrors + disk; @@@ -983,7 -990,7 +988,7 @@@ goto read_again; } else generic_make_request(read_bio); - return 0; + return; } /* @@@ -1125,20 -1132,12 +1130,12 @@@ retry_write spin_unlock_irqrestore(&conf->device_lock, flags); } - if (atomic_dec_and_test(&r10_bio->remaining)) { - /* This matches the end of raid10_end_write_request() */ - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, - r10_bio->sectors, - !test_bit(R10BIO_Degraded, &r10_bio->state), - 0); - md_write_end(mddev); - raid_end_bio_io(r10_bio); - } - - /* In case raid10d snuck in to freeze_array */ - wake_up(&conf->wait_barrier); + /* Don't remove the bias on 'remaining' (one_write_done) until + * after checking if we need to go around again. + */ if (sectors_handled < (bio->bi_size >> 9)) { + one_write_done(r10_bio); /* We need another r10_bio. It has already been counted * in bio->bi_phys_segments. */ @@@ -1152,9 -1151,14 +1149,13 @@@ r10_bio->state = 0; goto retry_write; } + one_write_done(r10_bio); + + /* In case raid10d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); - return 0; } static void status(struct seq_file *seq, mddev_t *mddev) @@@ -2951,7 -2955,7 +2952,7 @@@ static int run(mddev_t *mddev return 0; out_free_conf: - md_unregister_thread(mddev->thread); + md_unregister_thread(&mddev->thread); if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); @@@ -2969,8 -2973,7 +2970,7 @@@ static int stop(mddev_t *mddev raise_barrier(conf, 0); lower_barrier(conf); - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); diff --combined drivers/md/raid5.c index 96b7f6a1b6f2,ac5e8b57e50f..83f2c44e170f --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@@ -3336,7 -3336,7 +3336,7 @@@ static void handle_stripe(struct stripe finish: /* wait for this device to become unblocked */ - if (unlikely(s.blocked_rdev)) + if (conf->mddev->external && unlikely(s.blocked_rdev)) md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); if (s.handle_bad_blocks) @@@ -3695,7 -3695,7 +3695,7 @@@ static struct stripe_head *__get_priori return sh; } -static int make_request(mddev_t *mddev, struct bio * bi) +static void make_request(mddev_t *mddev, struct bio * bi) { raid5_conf_t *conf = mddev->private; int dd_idx; @@@ -3708,7 -3708,7 +3708,7 @@@ if (unlikely(bi->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bi); - return 0; + return; } md_write_start(mddev, bi); @@@ -3716,7 -3716,7 +3716,7 @@@ if (rw == READ && mddev->reshape_position == MaxSector && chunk_aligned_read(mddev,bi)) - return 0; + return; logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); last_sector = bi->bi_sector + (bi->bi_size>>9); @@@ -3851,6 -3851,8 +3851,6 @@@ bio_endio(bi, 0); } - - return 0; } static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks); @@@ -4939,8 -4941,7 +4939,7 @@@ static int run(mddev_t *mddev return 0; abort: - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); if (conf) { print_raid5_conf(conf); free_conf(conf); @@@ -4954,8 -4955,7 +4953,7 @@@ static int stop(mddev_t *mddev { raid5_conf_t *conf = mddev->private; - md_unregister_thread(mddev->thread); - mddev->thread = NULL; + md_unregister_thread(&mddev->thread); if (mddev->queue) mddev->queue->backing_dev_info.congested_fn = NULL; free_conf(conf); diff --combined include/linux/blkdev.h index 1978655faa3b,7fbaa9103344..0b68044e7abb --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@@ -30,6 -30,7 +30,7 @@@ struct request_pm_state struct blk_trace; struct request; struct sg_io_hdr; + struct bsg_job; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@@ -117,6 -118,7 +118,7 @@@ struct request struct { unsigned int seq; struct list_head list; + rq_end_io_fn *saved_end_io; } flush; }; @@@ -193,7 -195,7 +195,7 @@@ struct request_pm_stat #include typedef void (request_fn_proc) (struct request_queue *q); -typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@@ -209,6 -211,7 +211,7 @@@ typedef int (merge_bvec_fn) (struct req typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); + typedef int (bsg_job_fn) (struct bsg_job *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@@ -375,6 -378,8 +378,8 @@@ struct request_queue struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) + bsg_job_fn *bsg_job_fn; + int bsg_job_size; struct bsg_class_device bsg_dev; #endif @@@ -675,8 -680,6 +680,8 @@@ extern int scsi_cmd_ioctl(struct reques extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern void blk_queue_bio(struct request_queue *q, struct bio *bio); + /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be @@@ -860,23 -863,16 +865,22 @@@ struct request_queue *blk_alloc_queue_n extern void blk_put_queue(struct request_queue *); /* - * Note: Code in between changing the blk_plug list/cb_list or element of such - * lists is preemptable, but such code can't do sleep (or be very careful), - * otherwise data is corrupted. For details, please check schedule() where - * blk_schedule_flush_plug() is called. + * blk_plug permits building a queue of related requests by holding the I/O + * fragments for a short period. This allows merging of sequential requests + * into single larger request. As the requests are moved from a per-task list to + * the device's request_queue in a batch, this results in improved scalability + * as the lock contention for request_queue lock is reduced. + * + * It is ok not to disable preemption when adding the request to the plug list + * or when attempting a merge, because blk_schedule_flush_list() will only flush + * the plug list when the task sleeps by itself. For details, please see + * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - unsigned long magic; - struct list_head list; - struct list_head cb_list; - unsigned int should_sort; + unsigned long magic; /* detect uninitialized use-cases */ + struct list_head list; /* requests */ + struct list_head cb_list; /* md requires an unplug callback */ + unsigned int should_sort; /* list to be sorted before flushing? */ - unsigned int count; /* number of queued requests */ }; #define BLK_MAX_REQUEST_COUNT 16