From 459931eca5f4b8c9ad259d07cc1ca49afed54804 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 10 Dec 2008 09:10:46 -0500 Subject: [PATCH] Btrfs: Delete csum items when freeing extents This finishes off the new checksumming code by removing csum items for extents that are no longer in use. The trick is doing it without racing because a single csum item may hold csums for more than one extent. Extra checks are added to btrfs_csum_file_blocks to make sure that we are using the correct csum item after dropping locks. A new btrfs_split_item is added to split a single csum item so it can be split without dropping the leaf lock. This is used to remove csum bytes from the middle of an item. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 131 ++++++++++++++++++++++-- fs/btrfs/ctree.h | 13 +++ fs/btrfs/extent-tree.c | 6 +- fs/btrfs/file-item.c | 226 ++++++++++++++++++++++++++++++++++------- 4 files changed, 335 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19c0dd33b1e8..c0c95cccbb5b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1512,7 +1512,8 @@ cow_done: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(b) >= + if ((p->search_for_split || ins_len > 0) && + btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); @@ -1596,7 +1597,8 @@ cow_done: goto done; } } - unlock_up(p, level, lowest_unlock); + if (!p->search_for_split) + unlock_up(p, level, lowest_unlock); goto done; } } @@ -2636,11 +2638,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int num_doubles = 0; struct btrfs_disk_key disk_key; - if (extend) + if (extend && data_size) space_needed = data_size; /* first try to make some room by pushing left and right */ - if (ins_key->type != BTRFS_DIR_ITEM_KEY) { + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); if (wret < 0) { return wret; @@ -2721,7 +2723,7 @@ again: } else { if (leaf_space_used(l, 0, mid + 1) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - if (!extend && slot == 0) { + if (!extend && data_size && slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2742,7 +2744,7 @@ again: } btrfs_mark_buffer_dirty(right); return ret; - } else if (extend && slot == 0) { + } else if ((extend || !data_size) && slot == 0) { mid = 1; } else { mid = slot; @@ -2827,6 +2829,123 @@ again: return ret; } +/* + * This function splits a single item into two items, + * giving 'new_key' to the new item and splitting the + * old one at split_offset (from the start of the item). + * + * The path may be released by this operation. After + * the split, the path is pointing to the old item. The + * new item is going to be in the same node as the old one. + * + * Note, the item being split must be smaller enough to live alone on + * a tree block with room for one extra struct btrfs_item + * + * This allows us to split the item in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + u32 item_size; + struct extent_buffer *leaf; + struct btrfs_key orig_key; + struct btrfs_item *item; + struct btrfs_item *new_item; + int ret = 0; + int slot; + u32 nritems; + u32 orig_offset; + struct btrfs_disk_key disk_key; + char *buf; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); + if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) + goto split; + + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + btrfs_release_path(root, path); + + path->search_for_split = 1; + path->keep_locks = 1; + + ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); + path->search_for_split = 0; + + /* if our item isn't there or got smaller, return now */ + if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], + path->slots[0])) { + path->keep_locks = 0; + return -EAGAIN; + } + + ret = split_leaf(trans, root, &orig_key, path, 0, 0); + path->keep_locks = 0; + BUG_ON(ret); + + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + leaf = path->nodes[0]; + +split: + item = btrfs_item_nr(leaf, path->slots[0]); + orig_offset = btrfs_item_offset(leaf, item); + item_size = btrfs_item_size(leaf, item); + + + buf = kmalloc(item_size, GFP_NOFS); + read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, + path->slots[0]), item_size); + slot = path->slots[0] + 1; + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + + if (slot != nritems) { + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(leaf, &disk_key, slot); + + new_item = btrfs_item_nr(leaf, slot); + + btrfs_set_item_offset(leaf, new_item, orig_offset); + btrfs_set_item_size(leaf, new_item, item_size - split_offset); + + btrfs_set_item_offset(leaf, item, + orig_offset + item_size - split_offset); + btrfs_set_item_size(leaf, item, split_offset); + + btrfs_set_header_nritems(leaf, nritems + 1); + + /* write the data for the start of the original item */ + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + split_offset); + + /* write the data for the new item */ + write_extent_buffer(leaf, buf + split_offset, + btrfs_item_ptr_offset(leaf, slot), + item_size - split_offset); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + kfree(buf); + return ret; +} + /* * make the item pointed to by the path smaller. new_size indicates * how small to make it, and from_end tells us if we just chop bytes diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f72b43819349..5b0c79d22c09 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -409,6 +409,12 @@ struct btrfs_path { int keep_locks; int skip_locking; int lowest_level; + + /* + * set by btrfs_split_item, tells search_slot to keep all locks + * and to force calls to keep space in the nodes + */ + int search_for_split; }; /* @@ -1816,6 +1822,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 new_size, int from_end); +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1953,6 +1964,8 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 803647bc8400..cc74316dc426 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2484,7 +2484,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, mark_free = 1; BUG_ON(ret < 0); } - /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); @@ -2504,6 +2503,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, mark_free); BUG_ON(ret); + if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + ret = btrfs_del_csums(trans, root, bytenr, num_bytes); + BUG_ON(ret); + } + #ifdef BIO_RW_DISCARD /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a3ad2ce00116..3ebef871ee6c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -310,6 +310,179 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, return 0; } +/* + * helper function for csum removal, this expects the + * key to describe the csum pointed to by the path, and it expects + * the csum to overlap the range [bytenr, len] + * + * The csum should not be entirely contained in the range and the + * range should not be entirely contained in the csum. + * + * This calls btrfs_truncate_item with the correct args based on the + * overlap, and fixes up the key as required. + */ +static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u64 bytenr, u64 len) +{ + struct extent_buffer *leaf; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + u64 csum_end; + u64 end_byte = bytenr + len; + u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; + int ret; + + leaf = path->nodes[0]; + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end <<= root->fs_info->sb->s_blocksize_bits; + csum_end += key->offset; + + if (key->offset < bytenr && csum_end <= end_byte) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * A simple truncate off the end of the item + */ + u32 new_size = (bytenr - key->offset) >> blocksize_bits; + new_size *= csum_size; + ret = btrfs_truncate_item(trans, root, path, new_size, 1); + BUG_ON(ret); + } else if (key->offset >= bytenr && csum_end > end_byte && + end_byte > key->offset) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * we need to truncate from the beginning of the csum + */ + u32 new_size = (csum_end - end_byte) >> blocksize_bits; + new_size *= csum_size; + + ret = btrfs_truncate_item(trans, root, path, new_size, 0); + BUG_ON(ret); + + key->offset = end_byte; + ret = btrfs_set_item_key_safe(trans, root, path, key); + BUG_ON(ret); + } else { + BUG(); + } + return 0; +} + +/* + * deletes the csum items from the csum tree for a given + * range of bytes. + */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + u64 end_byte = bytenr + len; + u64 csum_end; + struct extent_buffer *leaf; + int ret; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + int blocksize_bits = root->fs_info->sb->s_blocksize_bits; + + root = root->fs_info->csum_root; + + path = btrfs_alloc_path(); + + while(1) { + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.offset = end_byte - 1; + key.type = BTRFS_EXTENT_CSUM_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) { + break; + } + + if (key.offset >= end_byte) + break; + + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end <<= blocksize_bits; + csum_end += key.offset; + + /* this csum ends before we start, we're done */ + if (csum_end <= bytenr) + break; + + /* delete the entire item, it is inside our range */ + if (key.offset >= bytenr && csum_end <= end_byte) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else if (key.offset < bytenr && csum_end > end_byte) { + unsigned long offset; + unsigned long shift_len; + unsigned long item_offset; + /* + * [ bytenr - len ] + * [csum ] + * + * Our bytes are in the middle of the csum, + * we need to split this item and insert a new one. + * + * But we can't drop the path because the + * csum could change, get removed, extended etc. + * + * The trick here is the max size of a csum item leaves + * enough room in the tree block for a single + * item header. So, we split the item in place, + * adding a new header pointing to the existing + * bytes. Then we loop around again and we have + * a nicely formed csum item that we can neatly + * truncate. + */ + offset = (bytenr - key.offset) >> blocksize_bits; + offset *= csum_size; + + shift_len = (len >> blocksize_bits) * csum_size; + + item_offset = btrfs_item_ptr_offset(leaf, + path->slots[0]); + + memset_extent_buffer(leaf, 0, item_offset + offset, + shift_len); + key.offset = bytenr; + + /* + * btrfs_split_item returns -EAGAIN when the + * item changed size or key + */ + ret = btrfs_split_item(trans, root, path, &key, offset); + BUG_ON(ret && ret != -EAGAIN); + + key.offset = end_byte - 1; + } else { + ret = truncate_one_csum(trans, root, path, + &key, bytenr, len); + BUG_ON(ret); + } + btrfs_release_path(root, path); + } +out: + btrfs_free_path(path); + return 0; +} + int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums) @@ -396,28 +569,40 @@ again: csum_size, 1); if (ret < 0) goto fail_unlock; - if (ret == 0) { - BUG(); - } - if (path->slots[0] == 0) { - goto insert; + + if (ret > 0) { + if (path->slots[0] == 0) + goto insert; + path->slots[0]--; } - path->slots[0]--; + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (bytenr - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY || found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { goto insert; } + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / csum_size) { u32 diff = (csum_offset + 1) * csum_size; + + /* + * is the item big enough already? we dropped our lock + * before and need to recheck + */ + if (diff < btrfs_item_size_nr(leaf, path->slots[0])) + goto csum; + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); - if (diff != csum_size) + if (diff != csum_size) { goto insert; + } + ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -518,30 +703,3 @@ out: fail_unlock: goto out; } - -int btrfs_csum_truncate(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 isize) -{ - struct btrfs_key key; - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; - int ret; - u32 new_item_size; - u64 new_item_span; - u64 blocks; - - btrfs_item_key_to_cpu(leaf, &key, slot); - if (isize <= key.offset) - return 0; - new_item_span = isize - key.offset; - blocks = (new_item_span + root->sectorsize - 1) >> - root->fs_info->sb->s_blocksize_bits; - new_item_size = blocks * - btrfs_super_csum_size(&root->fs_info->super_copy); - if (new_item_size >= btrfs_item_size_nr(leaf, slot)) - return 0; - ret = btrfs_truncate_item(trans, root, path, new_item_size, 1); - BUG_ON(ret); - return ret; -} -- 2.34.1