md: deal with merge_bvec_fn in component devices better.
authorNeilBrown <neilb@suse.de>
Wed, 31 Mar 2010 01:07:16 +0000 (12:07 +1100)
committerGreg Kroah-Hartman <gregkh@suse.de>
Mon, 26 Apr 2010 14:41:35 +0000 (07:41 -0700)
commit 627a2d3c29427637f4c5d31ccc7fcbd8d312cd71 upstream.

If a component device has a merge_bvec_fn then as we never call it
we must ensure we never need to.  Currently this is done by setting
max_sector to 1 PAGE, however this does not stop a bio being created
with several sub-page iovecs that would violate the merge_bvec_fn.

So instead set max_phys_segments to 1 and set the segment boundary to the
same as a page boundary to ensure there is only ever one single-page
segment of IO requested at a time.

This can particularly be an issue when 'xen' is used as it is
known to submit multiple small buffers in a single bio.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
drivers/md/linear.c
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid10.c

index 1ceceb334d5ebe8f5ce637d29898fc984bfcc797..dff9d2f449c380363287b567c9b5e1ad79ee7ee1 100644 (file)
@@ -172,12 +172,14 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
                disk_stack_limits(mddev->gendisk, rdev->bdev,
                                  rdev->data_offset << 9);
                /* as we don't honour merge_bvec_fn, we must never risk
-                * violating it, so limit ->max_sector to one PAGE, as
-                * a one page request is never in violation.
+                * violating it, so limit max_phys_segments to 1 lying within
+                * a single page.
                 */
-               if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-                   queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-                       blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+               if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+                       blk_queue_max_phys_segments(mddev->queue, 1);
+                       blk_queue_segment_boundary(mddev->queue,
+                                                  PAGE_CACHE_SIZE - 1);
+               }
 
                conf->array_sectors += rdev->sectors;
                cnt++;
index ee7646f974a07165bd368deb3a18f6390e5553dc..e4b11f18adc7d978947d4da9bb992b8cec334559 100644 (file)
@@ -301,14 +301,16 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
                                          rdev->data_offset << 9);
 
                /* as we don't honour merge_bvec_fn, we must never risk
-                * violating it, so limit ->max_sector to one PAGE, as
-                * a one page request is never in violation.
+                * violating it, so limit ->max_phys_segments to one, lying
+                * within a single page.
                 * (Note: it is very unlikely that a device with
                 * merge_bvec_fn will be involved in multipath.)
                 */
-                       if (q->merge_bvec_fn &&
-                           queue_max_sectors(q) > (PAGE_SIZE>>9))
-                               blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+                       if (q->merge_bvec_fn) {
+                               blk_queue_max_phys_segments(mddev->queue, 1);
+                               blk_queue_segment_boundary(mddev->queue,
+                                                          PAGE_CACHE_SIZE - 1);
+                       }
 
                        conf->working_disks++;
                        mddev->degraded--;
@@ -476,9 +478,11 @@ static int multipath_run (mddev_t *mddev)
                /* as we don't honour merge_bvec_fn, we must never risk
                 * violating it, not that we ever expect a device with
                 * a merge_bvec_fn to be involved in multipath */
-               if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-                   queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-                       blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+               if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+                       blk_queue_max_phys_segments(mddev->queue, 1);
+                       blk_queue_segment_boundary(mddev->queue,
+                                                  PAGE_CACHE_SIZE - 1);
+               }
 
                if (!test_bit(Faulty, &rdev->flags))
                        conf->working_disks++;
index d3a4ce06015a300e9d4df5e95ee63669613db17e..3db857cdd33e14bc0b6430df75b1da8783bad435 100644 (file)
@@ -176,14 +176,15 @@ static int create_strip_zones(mddev_t *mddev)
                disk_stack_limits(mddev->gendisk, rdev1->bdev,
                                  rdev1->data_offset << 9);
                /* as we don't honour merge_bvec_fn, we must never risk
-                * violating it, so limit ->max_sector to one PAGE, as
-                * a one page request is never in violation.
+                * violating it, so limit ->max_phys_segments to 1, lying within
+                * a single page.
                 */
 
-               if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
-                   queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-                       blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
-
+               if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
+                       blk_queue_max_phys_segments(mddev->queue, 1);
+                       blk_queue_segment_boundary(mddev->queue,
+                                                  PAGE_CACHE_SIZE - 1);
+               }
                if (!smallest || (rdev1->sectors < smallest->sectors))
                        smallest = rdev1;
                cnt++;
index c2cb7b87b440dce777d92bb6c215d1c82efddbe1..3860ac7c0f5b2bb72f4f3d48328603b2b6660d5d 100644 (file)
@@ -1155,13 +1155,17 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 
                        disk_stack_limits(mddev->gendisk, rdev->bdev,
                                          rdev->data_offset << 9);
-                       /* as we don't honour merge_bvec_fn, we must never risk
-                        * violating it, so limit ->max_sector to one PAGE, as
-                        * a one page request is never in violation.
+                       /* as we don't honour merge_bvec_fn, we must
+                        * never risk violating it, so limit
+                        * ->max_phys_segments to one lying with a single
+                        * page, as a one page request is never in
+                        * violation.
                         */
-                       if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-                           queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-                               blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+                       if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+                               blk_queue_max_phys_segments(mddev->queue, 1);
+                               blk_queue_segment_boundary(mddev->queue,
+                                                          PAGE_CACHE_SIZE - 1);
+                       }
 
                        p->head_position = 0;
                        rdev->raid_disk = mirror;
@@ -2155,12 +2159,14 @@ static int run(mddev_t *mddev)
                disk_stack_limits(mddev->gendisk, rdev->bdev,
                                  rdev->data_offset << 9);
                /* as we don't honour merge_bvec_fn, we must never risk
-                * violating it, so limit ->max_sector to one PAGE, as
-                * a one page request is never in violation.
+                * violating it, so limit max_phys_segments to 1 lying
+                * within a single page.
                 */
-               if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-                   queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-                       blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+               if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+                       blk_queue_max_phys_segments(mddev->queue, 1);
+                       blk_queue_segment_boundary(mddev->queue,
+                                                  PAGE_CACHE_SIZE - 1);
+               }
 
                disk->head_position = 0;
        }