if (!sc)
return -ENOMEM;
+ /* Need to ensure auto-resizing doesn't interfere */
+ mutex_lock(&conf->cache_size_mutex);
+
for (i = conf->max_nr_stripes; i; i--) {
nsh = alloc_stripe(sc, GFP_KERNEL);
if (!nsh)
kmem_cache_free(sc, nsh);
}
kmem_cache_destroy(sc);
+ mutex_unlock(&conf->cache_size_mutex);
return -ENOMEM;
}
/* Step 2 - Must use GFP_NOIO now.
} else
err = -ENOMEM;
+ mutex_unlock(&conf->cache_size_mutex);
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
static int drop_one_stripe(struct r5conf *conf)
{
struct stripe_head *sh;
- int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
+ int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
spin_lock_irq(conf->hash_locks + hash);
sh = get_free_stripe(conf, hash);
&first_bad, &bad_sectors))
set_bit(R5_ReadRepl, &dev->flags);
else {
- if (rdev)
+ if (rdev && !test_bit(Faulty, &rdev->flags))
set_bit(R5_NeedReplace, &dev->flags);
+ else
+ clear_bit(R5_NeedReplace, &dev->flags);
rdev = rcu_dereference(conf->disks[i].rdev);
clear_bit(R5_ReadRepl, &dev->flags);
}
struct bvec_merge_data *bvm,
struct bio_vec *biovec)
{
+ struct r5conf *conf = mddev->private;
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
- unsigned int chunk_sectors = mddev->chunk_sectors;
+ unsigned int chunk_sectors;
unsigned int bio_sectors = bvm->bi_size >> 9;
/*
if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
return biovec->bv_len;
- if (mddev->new_chunk_sectors < mddev->chunk_sectors)
- chunk_sectors = mddev->new_chunk_sectors;
+ chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0;
if (max <= biovec->bv_len && bio_sectors == 0)
static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
{
+ struct r5conf *conf = mddev->private;
sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
- unsigned int chunk_sectors = mddev->chunk_sectors;
+ unsigned int chunk_sectors;
unsigned int bio_sectors = bio_sectors(bio);
- if (mddev->new_chunk_sectors < mddev->chunk_sectors)
- chunk_sectors = mddev->new_chunk_sectors;
+ chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
return chunk_sectors >=
((sector & (chunk_sectors - 1)) + bio_sectors);
}
sector_t stripe_addr;
int reshape_sectors;
struct list_head stripes;
+ sector_t retn;
if (sector_nr == 0) {
/* If restarting in the middle, skip the initial sectors */
mddev->curr_resync_completed = sector_nr;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
*skipped = 1;
- return sector_nr;
+ retn = sector_nr;
+ goto finish;
}
}
* If old and new chunk sizes differ, we need to process the
* largest of these
*/
- if (mddev->new_chunk_sectors > mddev->chunk_sectors)
- reshape_sectors = mddev->new_chunk_sectors;
- else
- reshape_sectors = mddev->chunk_sectors;
+
+ reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
/* We update the metadata at least every 10 seconds, or when
* the data about to be copied would over-write the source of
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->reshape_backwards) {
- writepos -= min_t(sector_t, reshape_sectors, writepos);
+ BUG_ON(writepos < reshape_sectors);
+ writepos -= reshape_sectors;
readpos += reshape_sectors;
safepos += reshape_sectors;
} else {
writepos += reshape_sectors;
+ /* readpos and safepos are worst-case calculations.
+ * A negative number is overly pessimistic, and causes
+ * obvious problems for unsigned storage. So clip to 0.
+ */
readpos -= min_t(sector_t, reshape_sectors, readpos);
safepos -= min_t(sector_t, reshape_sectors, safepos);
}
* then we need to write out the superblock.
*/
sector_nr += reshape_sectors;
- if ((sector_nr - mddev->curr_resync_completed) * 2
+ retn = reshape_sectors;
+finish:
+ if (mddev->curr_resync_completed > mddev->resync_max ||
+ (sector_nr - mddev->curr_resync_completed) * 2
>= mddev->resync_max - mddev->curr_resync_completed) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
ret:
- return reshape_sectors;
+ return retn;
}
static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
pr_debug("%d stripes handled\n", handled);
spin_unlock_irq(&conf->device_lock);
- if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
+ if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
+ mutex_trylock(&conf->cache_size_mutex)) {
grow_one_stripe(conf, __GFP_NOWARN);
/* Set flag even if allocation failed. This helps
* slow down allocation requests when mem is short
*/
set_bit(R5_DID_ALLOC, &conf->cache_state);
+ mutex_unlock(&conf->cache_size_mutex);
}
async_tx_issue_pending_all();
return -EINVAL;
conf->min_nr_stripes = size;
+ mutex_lock(&conf->cache_size_mutex);
while (size < conf->max_nr_stripes &&
drop_one_stripe(conf))
;
+ mutex_unlock(&conf->cache_size_mutex);
err = md_allow_write(mddev);
if (err)
return err;
+ mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL))
break;
+ mutex_unlock(&conf->cache_size_mutex);
return 0;
}
/* size is defined by the smallest of previous and new size */
raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
- sectors &= ~((sector_t)mddev->chunk_sectors - 1);
- sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
+ sectors &= ~((sector_t)conf->chunk_sectors - 1);
+ sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
return sectors * (raid_disks - conf->max_degraded);
}
struct shrink_control *sc)
{
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
- int ret = 0;
- while (ret < sc->nr_to_scan) {
- if (drop_one_stripe(conf) == 0)
- return SHRINK_STOP;
- ret++;
+ unsigned long ret = SHRINK_STOP;
+
+ if (mutex_trylock(&conf->cache_size_mutex)) {
+ ret= 0;
+ while (ret < sc->nr_to_scan &&
+ conf->max_nr_stripes > conf->min_nr_stripes) {
+ if (drop_one_stripe(conf) == 0) {
+ ret = SHRINK_STOP;
+ break;
+ }
+ ret++;
+ }
+ mutex_unlock(&conf->cache_size_mutex);
}
return ret;
}
goto abort;
spin_lock_init(&conf->device_lock);
seqcount_init(&conf->gen_lock);
+ mutex_init(&conf->cache_size_mutex);
init_waitqueue_head(&conf->wait_for_quiescent);
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
init_waitqueue_head(&conf->wait_for_stripe[i]);
if (conf->reshape_progress != MaxSector) {
conf->prev_chunk_sectors = mddev->chunk_sectors;
conf->prev_algo = mddev->layout;
+ } else {
+ conf->prev_chunk_sectors = conf->chunk_sectors;
+ conf->prev_algo = conf->algorithm;
}
conf->min_nr_stripes = NR_STRIPES;
sector_t here_new, here_old;
int old_disks;
int max_degraded = (mddev->level == 6 ? 2 : 1);
+ int chunk_sectors;
+ int new_data_disks;
if (mddev->new_level != mddev->level) {
printk(KERN_ERR "md/raid:%s: unsupported reshape "
/* reshape_position must be on a new-stripe boundary, and one
* further up in new geometry must map after here in old
* geometry.
+ * If the chunk sizes are different, then as we perform reshape
+ * in units of the largest of the two, reshape_position needs
+ * be a multiple of the largest chunk size times new data disks.
*/
here_new = mddev->reshape_position;
- if (sector_div(here_new, mddev->new_chunk_sectors *
- (mddev->raid_disks - max_degraded))) {
+ chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors);
+ new_data_disks = mddev->raid_disks - max_degraded;
+ if (sector_div(here_new, chunk_sectors * new_data_disks)) {
printk(KERN_ERR "md/raid:%s: reshape_position not "
"on a stripe boundary\n", mdname(mddev));
return -EINVAL;
}
- reshape_offset = here_new * mddev->new_chunk_sectors;
+ reshape_offset = here_new * chunk_sectors;
/* here_new is the stripe we will write to */
here_old = mddev->reshape_position;
- sector_div(here_old, mddev->chunk_sectors *
- (old_disks-max_degraded));
+ sector_div(here_old, chunk_sectors * (old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
if (mddev->delta_disks == 0) {
- if ((here_new * mddev->new_chunk_sectors !=
- here_old * mddev->chunk_sectors)) {
- printk(KERN_ERR "md/raid:%s: reshape position is"
- " confused - aborting\n", mdname(mddev));
- return -EINVAL;
- }
/* We cannot be sure it is safe to start an in-place
* reshape. It is only safe if user-space is monitoring
* and taking constant backups.
return -EINVAL;
}
} else if (mddev->reshape_backwards
- ? (here_new * mddev->new_chunk_sectors + min_offset_diff <=
- here_old * mddev->chunk_sectors)
- : (here_new * mddev->new_chunk_sectors >=
- here_old * mddev->chunk_sectors + (-min_offset_diff))) {
+ ? (here_new * chunk_sectors + min_offset_diff <=
+ here_old * chunk_sectors)
+ : (here_new * chunk_sectors >=
+ here_old * chunk_sectors + (-min_offset_diff))) {
/* Reading from the same stripe as writing to - bad */
printk(KERN_ERR "md/raid:%s: reshape_position too early for "
"auto-recovery - aborting.\n",
int i;
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
- mddev->chunk_sectors / 2, mddev->layout);
+ conf->chunk_sectors / 2, mddev->layout);
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
for (i = 0; i < conf->raid_disks; i++)
seq_printf (seq, "%s",
* worth it.
*/
sector_t newsize;
- sectors &= ~((sector_t)mddev->chunk_sectors - 1);
+ struct r5conf *conf = mddev->private;
+
+ sectors &= ~((sector_t)conf->chunk_sectors - 1);
newsize = raid5_size(mddev, sectors, mddev->raid_disks);
if (mddev->external_size &&
mddev->array_sectors > newsize)