#include <linux/rcupdate.h>
#include <linux/kthread.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include "compat.h"
#include "hash.h"
#include "ctree.h"
struct btrfs_root *root,
int load_cache_only)
{
+ DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
int ret = 0;
- smp_mb();
- if (cache->cached != BTRFS_CACHE_NO)
+ caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
+ BUG_ON(!caching_ctl);
+
+ INIT_LIST_HEAD(&caching_ctl->list);
+ mutex_init(&caching_ctl->mutex);
+ init_waitqueue_head(&caching_ctl->wait);
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ atomic_set(&caching_ctl->count, 1);
+ caching_ctl->work.func = caching_thread;
+
+ spin_lock(&cache->lock);
+ /*
+ * This should be a rare occasion, but this could happen I think in the
+ * case where one thread starts to load the space cache info, and then
+ * some other thread starts a transaction commit which tries to do an
+ * allocation while the other thread is still loading the space cache
+ * info. The previous loop should have kept us from choosing this block
+ * group, but if we've moved to the state where we will wait on caching
+ * block groups we need to first check if we're doing a fast load here,
+ * so we can wait for it to finish, otherwise we could end up allocating
+ * from a block group who's cache gets evicted for one reason or
+ * another.
+ */
+ while (cache->cached == BTRFS_CACHE_FAST) {
+ struct btrfs_caching_control *ctl;
+
+ ctl = cache->caching_ctl;
+ atomic_inc(&ctl->count);
+ prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&cache->lock);
+
+ schedule();
+
+ finish_wait(&ctl->wait, &wait);
+ put_caching_control(ctl);
+ spin_lock(&cache->lock);
+ }
+
+ if (cache->cached != BTRFS_CACHE_NO) {
+ spin_unlock(&cache->lock);
+ kfree(caching_ctl);
return 0;
+ }
+ WARN_ON(cache->caching_ctl);
+ cache->caching_ctl = caching_ctl;
+ cache->cached = BTRFS_CACHE_FAST;
+ spin_unlock(&cache->lock);
/*
* We can't do the read from on-disk cache during a commit since we need
if (trans && (!trans->transaction->in_commit) &&
(root && root != root->fs_info->tree_root) &&
btrfs_test_opt(root, SPACE_CACHE)) {
- spin_lock(&cache->lock);
- if (cache->cached != BTRFS_CACHE_NO) {
- spin_unlock(&cache->lock);
- return 0;
- }
- cache->cached = BTRFS_CACHE_STARTED;
- spin_unlock(&cache->lock);
-
ret = load_free_space_cache(fs_info, cache);
spin_lock(&cache->lock);
if (ret == 1) {
+ cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_FINISHED;
cache->last_byte_to_unpin = (u64)-1;
} else {
- cache->cached = BTRFS_CACHE_NO;
+ if (load_cache_only) {
+ cache->caching_ctl = NULL;
+ cache->cached = BTRFS_CACHE_NO;
+ } else {
+ cache->cached = BTRFS_CACHE_STARTED;
+ }
}
spin_unlock(&cache->lock);
+ wake_up(&caching_ctl->wait);
if (ret == 1) {
+ put_caching_control(caching_ctl);
free_excluded_extents(fs_info->extent_root, cache);
return 0;
}
+ } else {
+ /*
+ * We are not going to do the fast caching, set cached to the
+ * appropriate value and wakeup any waiters.
+ */
+ spin_lock(&cache->lock);
+ if (load_cache_only) {
+ cache->caching_ctl = NULL;
+ cache->cached = BTRFS_CACHE_NO;
+ } else {
+ cache->cached = BTRFS_CACHE_STARTED;
+ }
+ spin_unlock(&cache->lock);
+ wake_up(&caching_ctl->wait);
}
- if (load_cache_only)
- return 0;
-
- caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
- BUG_ON(!caching_ctl);
-
- INIT_LIST_HEAD(&caching_ctl->list);
- mutex_init(&caching_ctl->mutex);
- init_waitqueue_head(&caching_ctl->wait);
- caching_ctl->block_group = cache;
- caching_ctl->progress = cache->key.objectid;
- /* one for caching kthread, one for caching block group list */
- atomic_set(&caching_ctl->count, 2);
- caching_ctl->work.func = caching_thread;
-
- spin_lock(&cache->lock);
- if (cache->cached != BTRFS_CACHE_NO) {
- spin_unlock(&cache->lock);
- kfree(caching_ctl);
+ if (load_cache_only) {
+ put_caching_control(caching_ctl);
return 0;
}
- cache->caching_ctl = caching_ctl;
- cache->cached = BTRFS_CACHE_STARTED;
- spin_unlock(&cache->lock);
down_write(&fs_info->extent_commit_sem);
+ atomic_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);
{
int ret;
u64 discarded_bytes = 0;
- struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_bio *bbio = NULL;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
- bytenr, &num_bytes, &multi, 0);
+ bytenr, &num_bytes, &bbio, 0);
if (!ret) {
- struct btrfs_bio_stripe *stripe = multi->stripes;
+ struct btrfs_bio_stripe *stripe = bbio->stripes;
int i;
- for (i = 0; i < multi->num_stripes; i++, stripe++) {
+ for (i = 0; i < bbio->num_stripes; i++, stripe++) {
if (!stripe->dev->can_discard)
continue;
*/
ret = 0;
}
- kfree(multi);
+ kfree(bbio);
}
if (actual_bytes)
goto again;
}
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
- ret = btrfs_delalloc_reserve_space(inode, num_pages);
+ ret = btrfs_check_data_free_space(inode, num_pages);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
num_pages, num_pages,
&alloc_hint);
- if (!ret) {
+ if (!ret)
dcs = BTRFS_DC_SETUP;
- btrfs_free_reserved_data_space(inode, num_pages);
- } else {
- btrfs_delalloc_release_space(inode, num_pages);
- }
+ btrfs_free_reserved_data_space(inode, num_pages);
out_put:
iput(inode);
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
+ if (!ret && dcs == BTRFS_DC_SETUP)
+ block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
* about 1% of the FS size.
*/
if (force == CHUNK_ALLOC_LIMITED) {
- thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
thresh = max_t(u64, 64 * 1024 * 1024,
div_factor_fine(thresh, 1));
if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
return 0;
- thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
/* 256MB or 5% of the FS */
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
/*
* shrink metadata reservation for delalloc
*/
-static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim, int sync)
+static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
+ bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
+ struct btrfs_trans_handle *trans;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
unsigned long progress;
+ trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
}
max_reclaim = min(reserved, to_reclaim);
-
+ nr_pages = max_t(unsigned long, nr_pages,
+ max_reclaim >> PAGE_CACHE_SHIFT);
while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
if (trans && trans->transaction->blocked)
return -EAGAIN;
- time_left = schedule_timeout_interruptible(1);
+ if (wait_ordered && !trans) {
+ btrfs_wait_ordered_extents(root, 0, 0);
+ } else {
+ time_left = schedule_timeout_interruptible(1);
- /* We were interrupted, exit */
- if (time_left)
- break;
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+ }
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
}
}
- if (reclaimed >= to_reclaim && !trans)
- btrfs_wait_ordered_extents(root, 0, 0);
+
return reclaimed >= to_reclaim;
}
+/**
+ * maybe_commit_transaction - possibly commit the transaction if its ok to
+ * @root - the root we're allocating for
+ * @bytes - the number of bytes we want to reserve
+ * @force - force the commit
+ *
+ * This will check to make sure that committing the transaction will actually
+ * get us somewhere and then commit the transaction if it does. Otherwise it
+ * will return -ENOSPC.
+ */
+static int may_commit_transaction(struct btrfs_root *root,
+ struct btrfs_space_info *space_info,
+ u64 bytes, int force)
+{
+ struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
+ struct btrfs_trans_handle *trans;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
+ if (trans)
+ return -EAGAIN;
+
+ if (force)
+ goto commit;
+
+ /* See if there is enough pinned space to make this reservation */
+ spin_lock(&space_info->lock);
+ if (space_info->bytes_pinned >= bytes) {
+ spin_unlock(&space_info->lock);
+ goto commit;
+ }
+ spin_unlock(&space_info->lock);
+
+ /*
+ * See if there is some space in the delayed insertion reservation for
+ * this reservation.
+ */
+ if (space_info != delayed_rsv->space_info)
+ return -ENOSPC;
+
+ spin_lock(&delayed_rsv->lock);
+ if (delayed_rsv->size < bytes) {
+ spin_unlock(&delayed_rsv->lock);
+ return -ENOSPC;
+ }
+ spin_unlock(&delayed_rsv->lock);
+
+commit:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return -ENOSPC;
+
+ return btrfs_commit_transaction(trans, root);
+}
+
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
* @block_rsv - the block_rsv we're allocating for
* @orig_bytes - the number of bytes we want
* @flush - wether or not we can flush to make our reservation
- * @check - wether this is just to check if we have enough space or not
*
* This will reserve orgi_bytes number of bytes from the space info associated
* with the block_rsv. If there is not enough space it will make an attempt to
*/
static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 orig_bytes, int flush, int check)
+ u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- struct btrfs_trans_handle *trans;
u64 used;
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
bool committed = false;
bool flushing = false;
+ bool wait_ordered = false;
- trans = (struct btrfs_trans_handle *)current->journal_info;
again:
ret = 0;
spin_lock(&space_info->lock);
* deadlock since we are waiting for the flusher to finish, but
* hold the current transaction open.
*/
- if (trans)
+ if (current->journal_info)
return -EAGAIN;
ret = wait_event_interruptible(space_info->wait,
!space_info->flush);
* amount plus the amount of bytes that we need for this
* reservation.
*/
+ wait_ordered = true;
num_bytes = used - space_info->total_bytes +
(orig_bytes * (retries + 1));
}
- if (ret && !check) {
+ if (ret) {
u64 profile = btrfs_get_alloc_profile(root, 0);
u64 avail;
+ /*
+ * If we have a lot of space that's pinned, don't bother doing
+ * the overcommit dance yet and just commit the transaction.
+ */
+ avail = (space_info->total_bytes - space_info->bytes_used) * 8;
+ do_div(avail, 10);
+ if (space_info->bytes_pinned >= avail && flush && !committed) {
+ space_info->flush = 1;
+ flushing = true;
+ spin_unlock(&space_info->lock);
+ ret = may_commit_transaction(root, space_info,
+ orig_bytes, 1);
+ if (ret)
+ goto out;
+ committed = true;
+ goto again;
+ }
+
spin_lock(&root->fs_info->free_chunk_lock);
avail = root->fs_info->free_chunk_space;
avail >>= 1;
spin_unlock(&root->fs_info->free_chunk_lock);
- if (used + orig_bytes < space_info->total_bytes + avail) {
+ if (used + num_bytes < space_info->total_bytes + avail) {
space_info->bytes_may_use += orig_bytes;
ret = 0;
+ } else {
+ wait_ordered = true;
}
}
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
- ret = shrink_delalloc(trans, root, num_bytes, 1);
+ ret = shrink_delalloc(root, num_bytes, wait_ordered);
if (ret < 0)
goto out;
* so go back around and try again.
*/
if (retries < 2) {
+ wait_ordered = true;
retries++;
goto again;
}
- /*
- * Not enough space to be reclaimed, don't bother committing the
- * transaction.
- */
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < orig_bytes)
- ret = -ENOSPC;
- spin_unlock(&space_info->lock);
- if (ret)
- goto out;
-
- ret = -EAGAIN;
- if (trans)
- goto out;
-
ret = -ENOSPC;
if (committed)
goto out;
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto out;
- ret = btrfs_commit_transaction(trans, root);
+ ret = may_commit_transaction(root, space_info, orig_bytes, 0);
if (!ret) {
- trans = NULL;
committed = true;
goto again;
}
kfree(rsv);
}
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
+static inline int __block_rsv_add(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, int flush)
{
int ret;
if (num_bytes == 0)
return 0;
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1, 0);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
return ret;
}
+int btrfs_block_rsv_add(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ return __block_rsv_add(root, block_rsv, num_bytes, 1);
+}
+
+int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ return __block_rsv_add(root, block_rsv, num_bytes, 0);
+}
+
int btrfs_block_rsv_check(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved, int min_factor, int flush)
+ struct btrfs_block_rsv *block_rsv, int min_factor)
{
u64 num_bytes = 0;
int ret = -ENOSPC;
return 0;
spin_lock(&block_rsv->lock);
- if (min_factor > 0)
- num_bytes = div_factor(block_rsv->size, min_factor);
- if (min_reserved > num_bytes)
- num_bytes = min_reserved;
+ num_bytes = div_factor(block_rsv->size, min_factor);
+ if (block_rsv->reserved >= num_bytes)
+ ret = 0;
+ spin_unlock(&block_rsv->lock);
+
+ return ret;
+}
+static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved, int flush)
+{
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ if (!block_rsv)
+ return 0;
+
+ spin_lock(&block_rsv->lock);
+ num_bytes = min_reserved;
if (block_rsv->reserved >= num_bytes)
ret = 0;
else
if (!ret)
return 0;
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush, !flush);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
return ret;
}
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved)
+{
+ return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
+}
+
+int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved)
+{
+ return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
+}
+
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes)
u64 num_bytes;
u64 meta_used;
u64 data_used;
- int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
+ int csum_size = btrfs_super_csum_size(fs_info->super_copy);
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
spin_lock(&sinfo->lock);
fs_info->delalloc_block_rsv.space_info = space_info;
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
+ fs_info->delayed_block_rsv.space_info = space_info;
fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_block_rsv.size > 0);
+ WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_block_rsv *block_rsv;
-
if (!trans->bytes_reserved)
return;
- block_rsv = &root->fs_info->trans_block_rsv;
- btrfs_block_rsv_release(root, block_rsv, trans->bytes_reserved);
+ btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
*/
static unsigned drop_outstanding_extent(struct inode *inode)
{
+ unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
BUG_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
+ if (BTRFS_I(inode)->outstanding_extents == 0 &&
+ BTRFS_I(inode)->delalloc_meta_reserved) {
+ drop_inode_space = 1;
+ BTRFS_I(inode)->delalloc_meta_reserved = 0;
+ }
+
/*
* If we have more or the same amount of outsanding extents than we have
* reserved then we need to leave the reserved extents count alone.
*/
if (BTRFS_I(inode)->outstanding_extents >=
BTRFS_I(inode)->reserved_extents)
- return 0;
+ return drop_inode_space;
dropped_extents = BTRFS_I(inode)->reserved_extents -
BTRFS_I(inode)->outstanding_extents;
BTRFS_I(inode)->reserved_extents -= dropped_extents;
- return dropped_extents;
+ return dropped_extents + drop_inode_space;
}
/**
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve = 0;
+ u64 csum_bytes;
unsigned nr_extents = 0;
+ int extra_reserve = 0;
int flush = 1;
int ret;
+ /* Need to be holding the i_mutex here if we aren't free space cache */
if (btrfs_is_free_space_inode(root, inode))
flush = 0;
+ else
+ WARN_ON(!mutex_is_locked(&inode->i_mutex));
if (flush && btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
BTRFS_I(inode)->outstanding_extents++;
if (BTRFS_I(inode)->outstanding_extents >
- BTRFS_I(inode)->reserved_extents) {
+ BTRFS_I(inode)->reserved_extents)
nr_extents = BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents;
- BTRFS_I(inode)->reserved_extents += nr_extents;
- to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
+ /*
+ * Add an item to reserve for updating the inode when we complete the
+ * delalloc io.
+ */
+ if (!BTRFS_I(inode)->delalloc_meta_reserved) {
+ nr_extents++;
+ extra_reserve = 1;
}
+
+ to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+ csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
- ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush, 0);
+ ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (ret) {
u64 to_free = 0;
unsigned dropped;
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
- to_free = calc_csum_metadata_size(inode, num_bytes, 0);
- spin_unlock(&BTRFS_I(inode)->lock);
- to_free += btrfs_calc_trans_metadata_size(root, dropped);
-
/*
- * Somebody could have come in and twiddled with the
- * reservation, so if we have to free more than we would have
- * reserved from this reservation go ahead and release those
- * bytes.
+ * If the inodes csum_bytes is the same as the original
+ * csum_bytes then we know we haven't raced with any free()ers
+ * so we can just reduce our inodes csum bytes and carry on.
+ * Otherwise we have to do the normal free thing to account for
+ * the case that the free side didn't free up its reserve
+ * because of this outstanding reservation.
*/
- to_free -= to_reserve;
+ if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+ calc_csum_metadata_size(inode, num_bytes, 0);
+ else
+ to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+ spin_unlock(&BTRFS_I(inode)->lock);
+ if (dropped)
+ to_free += btrfs_calc_trans_metadata_size(root, dropped);
+
if (to_free)
btrfs_block_rsv_release(root, block_rsv, to_free);
return ret;
}
+ spin_lock(&BTRFS_I(inode)->lock);
+ if (extra_reserve) {
+ BTRFS_I(inode)->delalloc_meta_reserved = 1;
+ nr_extents--;
+ }
+ BTRFS_I(inode)->reserved_extents += nr_extents;
+ spin_unlock(&BTRFS_I(inode)->lock);
+
block_rsv_add_bytes(block_rsv, to_reserve, 1);
return 0;
/* block accounting for super block */
spin_lock(&info->delalloc_lock);
- old_val = btrfs_super_bytes_used(&info->super_copy);
+ old_val = btrfs_super_bytes_used(info->super_copy);
if (alloc)
old_val += num_bytes;
else
old_val -= num_bytes;
- btrfs_set_super_bytes_used(&info->super_copy, old_val);
+ btrfs_set_super_bytes_used(info->super_copy, old_val);
spin_unlock(&info->delalloc_lock);
while (total) {
return 0;
}
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+ BUG_ON(!cache);
+
+ /*
+ * pull in the free space cache (if any) so that our pin
+ * removes the free space from the cache. We have load_only set
+ * to one because the slow code to read in the free extents does check
+ * the pinned extents.
+ */
+ cache_block_group(cache, trans, root, 1);
+
+ pin_down_extent(root, cache, bytenr, num_bytes, 0);
+
+ /* remove us from the free space cache (if we're there at all) */
+ btrfs_remove_free_space(cache, bytenr, num_bytes);
+ btrfs_put_block_group(cache);
+ return 0;
+}
+
/**
* btrfs_update_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
struct btrfs_root *root = orig_root->fs_info->extent_root;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group_cache *block_group = NULL;
+ struct btrfs_block_group_cache *used_block_group;
int empty_cluster = 2 * 1024 * 1024;
int allowed_chunk_alloc = 0;
int done_chunk_alloc = 0;
struct btrfs_space_info *space_info;
- int last_ptr_loop = 0;
int loop = 0;
int index = 0;
int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
+ bool have_caching_bg = false;
u64 ideal_cache_percent = 0;
u64 ideal_cache_offset = 0;
ideal_cache:
block_group = btrfs_lookup_block_group(root->fs_info,
search_start);
+ used_block_group = block_group;
/*
* we don't want to use the block group if it doesn't match our
* allocation bits, or if its not cached.
}
}
search:
+ have_caching_bg = false;
down_read(&space_info->groups_sem);
list_for_each_entry(block_group, &space_info->block_groups[index],
list) {
u64 offset;
int cached;
+ used_block_group = block_group;
btrfs_get_block_group(block_group);
search_start = block_group->key.objectid;
}
have_block_group:
- if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+ cached = block_group_cache_done(block_group);
+ if (unlikely(!cached)) {
u64 free_percent;
+ found_uncached_bg = true;
ret = cache_block_group(block_group, trans,
orig_root, 1);
if (block_group->cached == BTRFS_CACHE_FINISHED)
- goto have_block_group;
+ goto alloc;
free_percent = btrfs_block_group_used(&block_group->item);
free_percent *= 100;
orig_root, 0);
BUG_ON(ret);
}
- found_uncached_bg = true;
/*
* If loop is set for cached only, try the next block
goto loop;
}
- cached = block_group_cache_done(block_group);
- if (unlikely(!cached))
- found_uncached_bg = true;
-
+alloc:
if (unlikely(block_group->ro))
goto loop;
spin_lock(&block_group->free_space_ctl->tree_lock);
if (cached &&
block_group->free_space_ctl->free_space <
- num_bytes + empty_size) {
+ num_bytes + empty_cluster + empty_size) {
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
/*
- * Ok we want to try and use the cluster allocator, so lets look
- * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
- * have tried the cluster allocator plenty of times at this
- * point and not have found anything, so we are likely way too
- * fragmented for the clustering stuff to find anything, so lets
- * just skip it and let the allocator find whatever block it can
- * find
+ * Ok we want to try and use the cluster allocator, so
+ * lets look there
*/
- if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
+ if (last_ptr) {
/*
* the refill lock keeps out other
* people trying to start a new cluster
*/
spin_lock(&last_ptr->refill_lock);
- if (last_ptr->block_group &&
- (last_ptr->block_group->ro ||
- !block_group_bits(last_ptr->block_group, data))) {
- offset = 0;
+ used_block_group = last_ptr->block_group;
+ if (used_block_group != block_group &&
+ (!used_block_group ||
+ used_block_group->ro ||
+ !block_group_bits(used_block_group, data))) {
+ used_block_group = block_group;
goto refill_cluster;
}
- offset = btrfs_alloc_from_cluster(block_group, last_ptr,
- num_bytes, search_start);
+ if (used_block_group != block_group)
+ btrfs_get_block_group(used_block_group);
+
+ offset = btrfs_alloc_from_cluster(used_block_group,
+ last_ptr, num_bytes, used_block_group->key.objectid);
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
goto checks;
}
- spin_lock(&last_ptr->lock);
- /*
- * whoops, this cluster doesn't actually point to
- * this block group. Get a ref on the block
- * group is does point to and try again
- */
- if (!last_ptr_loop && last_ptr->block_group &&
- last_ptr->block_group != block_group &&
- index <=
- get_block_group_index(last_ptr->block_group)) {
-
- btrfs_put_block_group(block_group);
- block_group = last_ptr->block_group;
- btrfs_get_block_group(block_group);
- spin_unlock(&last_ptr->lock);
- spin_unlock(&last_ptr->refill_lock);
-
- last_ptr_loop = 1;
- search_start = block_group->key.objectid;
- /*
- * we know this block group is properly
- * in the list because
- * btrfs_remove_block_group, drops the
- * cluster before it removes the block
- * group from the list
- */
- goto have_block_group;
+ WARN_ON(last_ptr->block_group != used_block_group);
+ if (used_block_group != block_group) {
+ btrfs_put_block_group(used_block_group);
+ used_block_group = block_group;
}
- spin_unlock(&last_ptr->lock);
refill_cluster:
+ BUG_ON(used_block_group != block_group);
+ /* If we are on LOOP_NO_EMPTY_SIZE, we can't
+ * set up a new clusters, so lets just skip it
+ * and let the allocator find whatever block
+ * it can find. If we reach this point, we
+ * will have tried the cluster allocator
+ * plenty of times and not have found
+ * anything, so we are likely way too
+ * fragmented for the clustering stuff to find
+ * anything. */
+ if (loop >= LOOP_NO_EMPTY_SIZE) {
+ spin_unlock(&last_ptr->refill_lock);
+ goto unclustered_alloc;
+ }
+
/*
* this cluster didn't work out, free it and
* start over
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
- last_ptr_loop = 0;
-
/* allocate a cluster in this block group */
ret = btrfs_find_space_cluster(trans, root,
block_group, last_ptr,
- offset, num_bytes,
+ search_start, num_bytes,
empty_cluster + empty_size);
if (ret == 0) {
/*
goto loop;
}
+unclustered_alloc:
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size);
/*
failed_alloc = true;
goto have_block_group;
} else if (!offset) {
+ if (!cached)
+ have_caching_bg = true;
goto loop;
}
checks:
search_start = stripe_align(root, offset);
/* move on to the next group */
if (search_start + num_bytes >= search_end) {
- btrfs_add_free_space(block_group, offset, num_bytes);
+ btrfs_add_free_space(used_block_group, offset, num_bytes);
goto loop;
}
/* move on to the next group */
if (search_start + num_bytes >
- block_group->key.objectid + block_group->key.offset) {
- btrfs_add_free_space(block_group, offset, num_bytes);
+ used_block_group->key.objectid + used_block_group->key.offset) {
+ btrfs_add_free_space(used_block_group, offset, num_bytes);
goto loop;
}
ins->offset = num_bytes;
if (offset < search_start)
- btrfs_add_free_space(block_group, offset,
+ btrfs_add_free_space(used_block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
- ret = btrfs_update_reserved_bytes(block_group, num_bytes,
+ ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
alloc_type);
if (ret == -EAGAIN) {
- btrfs_add_free_space(block_group, offset, num_bytes);
+ btrfs_add_free_space(used_block_group, offset, num_bytes);
goto loop;
}
ins->offset = num_bytes;
if (offset < search_start)
- btrfs_add_free_space(block_group, offset,
+ btrfs_add_free_space(used_block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
+ if (used_block_group != block_group)
+ btrfs_put_block_group(used_block_group);
btrfs_put_block_group(block_group);
break;
loop:
failed_cluster_refill = false;
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
+ if (used_block_group != block_group)
+ btrfs_put_block_group(used_block_group);
btrfs_put_block_group(block_group);
}
up_read(&space_info->groups_sem);
+ if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
+ goto search;
+
if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
goto search;
return ret;
}
-int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
+static int __btrfs_free_reserved_extent(struct btrfs_root *root,
+ u64 start, u64 len, int pin)
{
struct btrfs_block_group_cache *cache;
int ret = 0;
if (btrfs_test_opt(root, DISCARD))
ret = btrfs_discard_extent(root, start, len, NULL);
- btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+ if (pin)
+ pin_down_extent(root, cache, start, len, 1);
+ else {
+ btrfs_add_free_space(cache, start, len);
+ btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+ }
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
return ret;
}
+int btrfs_free_reserved_extent(struct btrfs_root *root,
+ u64 start, u64 len)
+{
+ return __btrfs_free_reserved_extent(root, start, len, 0);
+}
+
+int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
+ u64 start, u64 len)
+{
+ return __btrfs_free_reserved_extent(root, start, len, 1);
+}
+
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0, 0);
+ ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
/*
* If we couldn't reserve metadata bytes try and use some from
* the global reserve.
if (!ret)
return block_rsv;
if (ret) {
- WARN_ON(1);
- ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0, 0);
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL,
+ /*DEFAULT_RATELIMIT_BURST*/ 2);
+ if (__ratelimit(&_rs)) {
+ printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
+ WARN_ON(1);
+ }
+ ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
if (!ret) {
return block_rsv;
} else if (ret && block_rsv != global_rsv) {
* space to fit our block group in.
*/
if (device->total_bytes > device->bytes_used + min_free) {
- ret = find_free_dev_extent(NULL, device, min_free,
+ ret = find_free_dev_extent(device, min_free,
&dev_offset, NULL);
if (!ret)
dev_nr++;
return -ENOMEM;
path->reada = 1;
- cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+ cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
if (btrfs_test_opt(root, SPACE_CACHE) &&
- btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+ btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
need_clear = 1;
if (btrfs_test_opt(root, CLEAR_CACHE))
need_clear = 1;
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
+ update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
goto out;
}
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(tree_root, block_group, path);
if (!IS_ERR(inode)) {
ret = btrfs_orphan_add(trans, inode);
BUG_ON(ret);
int mixed = 0;
int ret;
- disk_super = &fs_info->super_copy;
+ disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
return 1;