2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
22 #include "print-tree.h"
23 #include "transaction.h"
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26 *orig_root, u64 num_blocks, u64 empty_size,
28 u64 search_end, u64 hint_block,
29 struct btrfs_key *ins, u64 exclude_start,
30 u64 exclude_nr, int data);
31 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
32 btrfs_root *extent_root);
33 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
34 btrfs_root *extent_root);
36 static int cache_block_group(struct btrfs_root *root,
37 struct btrfs_block_group_cache *block_group)
39 struct btrfs_path *path;
42 struct btrfs_leaf *leaf;
43 struct radix_tree_root *extent_radix;
51 root = root->fs_info->extent_root;
52 extent_radix = &root->fs_info->extent_map_radix;
54 if (block_group->cached)
56 if (block_group->data)
58 path = btrfs_alloc_path();
62 key.objectid = block_group->key.objectid;
65 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
66 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
69 if (ret && path->slots[0] > 0)
71 limit = block_group->key.objectid + block_group->key.offset;
73 leaf = btrfs_buffer_leaf(path->nodes[0]);
74 slot = path->slots[0];
75 if (slot >= btrfs_header_nritems(&leaf->header)) {
76 ret = btrfs_next_leaf(root, path);
83 hole_size = block_group->key.objectid +
84 block_group->key.offset - last;
86 last = block_group->key.objectid;
87 hole_size = block_group->key.offset;
89 for (i = 0; i < hole_size; i++) {
90 set_radix_bit(extent_radix,
96 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
97 if (key.objectid >= block_group->key.objectid +
98 block_group->key.offset) {
100 hole_size = block_group->key.objectid +
101 block_group->key.offset - last;
103 last = block_group->key.objectid;
104 hole_size = block_group->key.offset;
106 for (i = 0; i < hole_size; i++) {
107 set_radix_bit(extent_radix, last + i);
111 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
113 last = key.objectid + key.offset;
116 hole_size = key.objectid - last;
117 for (i = 0; i < hole_size; i++) {
118 set_radix_bit(extent_radix, last + i);
120 last = key.objectid + key.offset;
126 block_group->cached = 1;
128 btrfs_free_path(path);
132 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
136 struct btrfs_block_group_cache *block_group;
139 ret = radix_tree_gang_lookup(&info->block_group_radix,
140 (void **)&block_group,
143 if (block_group->key.objectid <= blocknr && blocknr <=
144 block_group->key.objectid + block_group->key.offset)
147 ret = radix_tree_gang_lookup(&info->block_group_data_radix,
148 (void **)&block_group,
151 if (block_group->key.objectid <= blocknr && blocknr <=
152 block_group->key.objectid + block_group->key.offset)
158 static u64 leaf_range(struct btrfs_root *root)
160 u64 size = BTRFS_LEAF_DATA_SIZE(root);
161 do_div(size, sizeof(struct btrfs_extent_item) +
162 sizeof(struct btrfs_item));
166 static u64 find_search_start(struct btrfs_root *root,
167 struct btrfs_block_group_cache **cache_ret,
168 u64 search_start, int num)
170 unsigned long gang[8];
172 struct btrfs_block_group_cache *cache = *cache_ret;
173 u64 last = max(search_start, cache->key.objectid);
178 last = max(last, cache->last_prealloc);
181 ret = cache_block_group(root, cache);
185 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
186 gang, last, ARRAY_SIZE(gang));
189 last = gang[ret-1] + 1;
191 if (ret != ARRAY_SIZE(gang)) {
194 if (gang[ret-1] - gang[0] > leaf_range(root)) {
198 if (gang[0] >= cache->key.objectid + cache->key.offset) {
204 return max(cache->last_alloc, search_start);
207 cache = btrfs_lookup_block_group(root->fs_info,
208 last + cache->key.offset - 1);
210 return max((*cache_ret)->last_alloc, search_start);
212 cache = btrfs_find_block_group(root, cache,
213 last + cache->key.offset - 1, 0, 0);
218 static u64 div_factor(u64 num, int factor)
225 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
226 struct btrfs_block_group_cache
227 *hint, u64 search_start,
230 struct btrfs_block_group_cache *cache[8];
231 struct btrfs_block_group_cache *found_group = NULL;
232 struct btrfs_fs_info *info = root->fs_info;
233 struct radix_tree_root *radix;
234 struct radix_tree_root *swap_radix;
248 radix = &info->block_group_data_radix;
249 swap_radix = &info->block_group_radix;
251 radix = &info->block_group_radix;
252 swap_radix = &info->block_group_data_radix;
256 struct btrfs_block_group_cache *shint;
257 shint = btrfs_lookup_block_group(info, search_start);
258 if (shint->data == data) {
259 used = btrfs_block_group_used(&shint->item);
260 if (used + shint->pinned <
261 div_factor(shint->key.offset, factor)) {
266 if (hint && hint->data == data) {
267 used = btrfs_block_group_used(&hint->item);
268 if (used + hint->pinned <
269 div_factor(hint->key.offset, factor)) {
272 if (used >= div_factor(hint->key.offset, 8)) {
273 radix_tree_tag_clear(radix,
275 hint->key.offset - 1,
276 BTRFS_BLOCK_GROUP_AVAIL);
278 last = hint->key.offset * 3;
279 if (hint->key.objectid >= last)
280 last = max(search_start + hint->key.offset - 1,
281 hint->key.objectid - last);
283 last = hint->key.objectid + hint->key.offset;
287 hint_last = max(hint->key.objectid, search_start);
289 hint_last = search_start;
294 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
295 last, ARRAY_SIZE(cache),
296 BTRFS_BLOCK_GROUP_AVAIL);
299 for (i = 0; i < ret; i++) {
300 last = cache[i]->key.objectid +
301 cache[i]->key.offset;
302 used = btrfs_block_group_used(&cache[i]->item);
303 if (used + cache[i]->pinned <
304 div_factor(cache[i]->key.offset, factor)) {
305 found_group = cache[i];
308 if (used >= div_factor(cache[i]->key.offset, 8)) {
309 radix_tree_tag_clear(radix,
310 cache[i]->key.objectid +
311 cache[i]->key.offset - 1,
312 BTRFS_BLOCK_GROUP_AVAIL);
320 ret = radix_tree_gang_lookup(radix, (void **)cache,
321 last, ARRAY_SIZE(cache));
324 for (i = 0; i < ret; i++) {
325 last = cache[i]->key.objectid +
326 cache[i]->key.offset;
327 used = btrfs_block_group_used(&cache[i]->item);
328 if (used + cache[i]->pinned < cache[i]->key.offset) {
329 found_group = cache[i];
332 if (used >= cache[i]->key.offset) {
333 radix_tree_tag_clear(radix,
334 cache[i]->key.objectid +
335 cache[i]->key.offset - 1,
336 BTRFS_BLOCK_GROUP_AVAIL);
347 struct radix_tree_root *tmp = radix;
355 ret = radix_tree_gang_lookup(radix,
356 (void **)&found_group, 0, 1);
358 ret = radix_tree_gang_lookup(swap_radix,
359 (void **)&found_group,
368 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
369 struct btrfs_root *root,
370 u64 blocknr, u64 num_blocks)
372 struct btrfs_path *path;
374 struct btrfs_key key;
375 struct btrfs_leaf *l;
376 struct btrfs_extent_item *item;
377 struct btrfs_key ins;
380 path = btrfs_alloc_path();
383 ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0,
384 (u64)-1, 0, &ins, 0, 0, 0);
386 btrfs_free_path(path);
389 key.objectid = blocknr;
391 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
392 key.offset = num_blocks;
393 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
401 l = btrfs_buffer_leaf(path->nodes[0]);
402 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
403 refs = btrfs_extent_refs(item);
404 btrfs_set_extent_refs(item, refs + 1);
405 btrfs_mark_buffer_dirty(path->nodes[0]);
407 btrfs_release_path(root->fs_info->extent_root, path);
408 btrfs_free_path(path);
409 finish_current_insert(trans, root->fs_info->extent_root);
410 del_pending_extents(trans, root->fs_info->extent_root);
414 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
415 struct btrfs_root *root, u64 blocknr,
416 u64 num_blocks, u32 *refs)
418 struct btrfs_path *path;
420 struct btrfs_key key;
421 struct btrfs_leaf *l;
422 struct btrfs_extent_item *item;
424 path = btrfs_alloc_path();
425 key.objectid = blocknr;
426 key.offset = num_blocks;
428 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
429 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
435 l = btrfs_buffer_leaf(path->nodes[0]);
436 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
437 *refs = btrfs_extent_refs(item);
439 btrfs_free_path(path);
443 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
444 struct btrfs_root *root)
446 return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
449 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
450 struct buffer_head *buf)
453 struct btrfs_node *buf_node;
454 struct btrfs_leaf *buf_leaf;
455 struct btrfs_disk_key *key;
456 struct btrfs_file_extent_item *fi;
465 buf_node = btrfs_buffer_node(buf);
466 leaf = btrfs_is_leaf(buf_node);
467 buf_leaf = btrfs_buffer_leaf(buf);
468 for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
471 key = &buf_leaf->items[i].key;
472 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
474 fi = btrfs_item_ptr(buf_leaf, i,
475 struct btrfs_file_extent_item);
476 if (btrfs_file_extent_type(fi) ==
477 BTRFS_FILE_EXTENT_INLINE)
479 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
480 if (disk_blocknr == 0)
482 ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
483 btrfs_file_extent_disk_num_blocks(fi));
489 blocknr = btrfs_node_blockptr(buf_node, i);
490 ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
500 for (i =0; i < faili; i++) {
503 key = &buf_leaf->items[i].key;
504 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
506 fi = btrfs_item_ptr(buf_leaf, i,
507 struct btrfs_file_extent_item);
508 if (btrfs_file_extent_type(fi) ==
509 BTRFS_FILE_EXTENT_INLINE)
511 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
512 if (disk_blocknr == 0)
514 err = btrfs_free_extent(trans, root, disk_blocknr,
515 btrfs_file_extent_disk_num_blocks(fi), 0);
518 blocknr = btrfs_node_blockptr(buf_node, i);
519 err = btrfs_free_extent(trans, root, blocknr, 1, 0);
526 static int write_one_cache_group(struct btrfs_trans_handle *trans,
527 struct btrfs_root *root,
528 struct btrfs_path *path,
529 struct btrfs_block_group_cache *cache)
533 struct btrfs_root *extent_root = root->fs_info->extent_root;
534 struct btrfs_block_group_item *bi;
535 struct btrfs_key ins;
537 ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins,
539 /* FIXME, set bit to recalc cache groups on next mount */
542 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
546 bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
547 struct btrfs_block_group_item);
548 memcpy(bi, &cache->item, sizeof(*bi));
549 btrfs_mark_buffer_dirty(path->nodes[0]);
550 btrfs_release_path(extent_root, path);
552 finish_current_insert(trans, extent_root);
553 pending_ret = del_pending_extents(trans, extent_root);
559 cache->last_alloc = cache->first_free;
564 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
565 struct btrfs_root *root,
566 struct radix_tree_root *radix)
568 struct btrfs_block_group_cache *cache[8];
573 struct btrfs_path *path;
574 unsigned long off = 0;
576 path = btrfs_alloc_path();
581 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
582 off, ARRAY_SIZE(cache),
583 BTRFS_BLOCK_GROUP_DIRTY);
586 for (i = 0; i < ret; i++) {
587 err = write_one_cache_group(trans, root,
590 * if we fail to write the cache group, we want
591 * to keep it marked dirty in hopes that a later
596 off = cache[i]->key.objectid +
597 cache[i]->key.offset;
601 radix_tree_tag_clear(radix, cache[i]->key.objectid +
602 cache[i]->key.offset - 1,
603 BTRFS_BLOCK_GROUP_DIRTY);
606 btrfs_free_path(path);
610 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
611 struct btrfs_root *root)
615 ret = write_dirty_block_radix(trans, root,
616 &root->fs_info->block_group_radix);
617 ret2 = write_dirty_block_radix(trans, root,
618 &root->fs_info->block_group_data_radix);
626 static int update_block_group(struct btrfs_trans_handle *trans,
627 struct btrfs_root *root,
628 u64 blocknr, u64 num, int alloc, int mark_free,
631 struct btrfs_block_group_cache *cache;
632 struct btrfs_fs_info *info = root->fs_info;
640 cache = btrfs_lookup_block_group(info, blocknr);
644 block_in_group = blocknr - cache->key.objectid;
645 WARN_ON(block_in_group > cache->key.offset);
646 radix_tree_tag_set(cache->radix, cache->key.objectid +
647 cache->key.offset - 1,
648 BTRFS_BLOCK_GROUP_DIRTY);
650 old_val = btrfs_block_group_used(&cache->item);
651 num = min(total, cache->key.offset - block_in_group);
653 if (blocknr > cache->last_alloc)
654 cache->last_alloc = blocknr;
656 for (i = 0; i < num; i++) {
657 clear_radix_bit(&info->extent_map_radix,
661 if (cache->data != data &&
662 old_val < (cache->key.offset >> 1)) {
664 radix_tree_delete(cache->radix,
665 cache->key.objectid +
666 cache->key.offset - 1);
670 &info->block_group_data_radix;
672 BTRFS_BLOCK_GROUP_DATA;
674 cache->radix = &info->block_group_radix;
676 ~BTRFS_BLOCK_GROUP_DATA;
678 ret = radix_tree_insert(cache->radix,
679 cache->key.objectid +
680 cache->key.offset - 1,
686 if (blocknr < cache->first_free)
687 cache->first_free = blocknr;
688 if (!cache->data && mark_free) {
689 for (i = 0; i < num; i++) {
690 set_radix_bit(&info->extent_map_radix,
694 if (old_val < (cache->key.offset >> 1) &&
695 old_val + num >= (cache->key.offset >> 1)) {
696 radix_tree_tag_set(cache->radix,
697 cache->key.objectid +
698 cache->key.offset - 1,
699 BTRFS_BLOCK_GROUP_AVAIL);
702 btrfs_set_block_group_used(&cache->item, old_val);
709 static int try_remove_page(struct address_space *mapping, unsigned long index)
713 ret = invalidate_mapping_pages(mapping, index, index);
717 int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy)
719 unsigned long gang[8];
721 struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
726 ret = find_first_radix_bit(pinned_radix, gang, last,
730 for (i = 0 ; i < ret; i++) {
731 set_radix_bit(copy, gang[i]);
738 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
739 struct btrfs_root *root,
740 struct radix_tree_root *unpin_radix)
742 unsigned long gang[8];
743 struct inode *btree_inode = root->fs_info->btree_inode;
744 struct btrfs_block_group_cache *block_group;
748 struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
749 struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
752 ret = find_first_radix_bit(unpin_radix, gang, 0,
758 for (i = 0; i < ret; i++) {
759 clear_radix_bit(pinned_radix, gang[i]);
760 clear_radix_bit(unpin_radix, gang[i]);
761 block_group = btrfs_lookup_block_group(root->fs_info,
764 WARN_ON(block_group->pinned == 0);
765 block_group->pinned--;
766 if (gang[i] < block_group->last_alloc)
767 block_group->last_alloc = gang[i];
768 if (gang[i] < block_group->last_prealloc)
769 block_group->last_prealloc = gang[i];
770 if (!block_group->data)
771 set_radix_bit(extent_radix, gang[i]);
773 try_remove_page(btree_inode->i_mapping,
774 gang[i] << (PAGE_CACHE_SHIFT -
775 btree_inode->i_blkbits));
781 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
782 btrfs_root *extent_root)
784 struct btrfs_key ins;
785 struct btrfs_extent_item extent_item;
788 u64 super_blocks_used;
789 struct btrfs_fs_info *info = extent_root->fs_info;
791 btrfs_set_extent_refs(&extent_item, 1);
794 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
795 btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
797 for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
798 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
799 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
800 btrfs_set_super_blocks_used(&info->super_copy,
801 super_blocks_used + 1);
802 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
803 sizeof(extent_item));
806 extent_root->fs_info->extent_tree_insert_nr = 0;
810 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
813 struct btrfs_header *header;
814 struct buffer_head *bh;
817 bh = btrfs_find_tree_block(root, blocknr);
819 if (buffer_uptodate(bh)) {
821 root->fs_info->running_transaction->transid;
822 header = btrfs_buffer_header(bh);
823 if (btrfs_header_generation(header) ==
825 btrfs_block_release(root, bh);
829 btrfs_block_release(root, bh);
831 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
833 struct btrfs_block_group_cache *cache;
834 cache = btrfs_lookup_block_group(root->fs_info,
840 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
847 * remove an extent from the root, returns 0 on success
849 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
850 *root, u64 blocknr, u64 num_blocks, int pin,
853 struct btrfs_path *path;
854 struct btrfs_key key;
855 struct btrfs_fs_info *info = root->fs_info;
856 struct btrfs_root *extent_root = info->extent_root;
858 struct btrfs_extent_item *ei;
859 struct btrfs_key ins;
862 key.objectid = blocknr;
864 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
865 key.offset = num_blocks;
867 path = btrfs_alloc_path();
871 ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
873 btrfs_free_path(path);
877 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
881 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
882 struct btrfs_extent_item);
883 BUG_ON(ei->refs == 0);
884 refs = btrfs_extent_refs(ei) - 1;
885 btrfs_set_extent_refs(ei, refs);
886 btrfs_mark_buffer_dirty(path->nodes[0]);
888 u64 super_blocks_used;
891 ret = pin_down_block(root, blocknr, 0);
895 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
896 btrfs_set_super_blocks_used(&info->super_copy,
897 super_blocks_used - num_blocks);
898 ret = btrfs_del_item(trans, extent_root, path);
902 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
906 btrfs_free_path(path);
907 finish_current_insert(trans, extent_root);
912 * find all the blocks marked as pending in the radix tree and remove
913 * them from the extent map
915 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
916 btrfs_root *extent_root)
921 unsigned long gang[4];
923 struct radix_tree_root *pending_radix;
924 struct radix_tree_root *pinned_radix;
925 struct btrfs_block_group_cache *cache;
927 pending_radix = &extent_root->fs_info->pending_del_radix;
928 pinned_radix = &extent_root->fs_info->pinned_radix;
931 ret = find_first_radix_bit(pending_radix, gang, 0,
935 for (i = 0; i < ret; i++) {
936 wret = set_radix_bit(pinned_radix, gang[i]);
939 btrfs_lookup_block_group(extent_root->fs_info,
945 printk(KERN_CRIT "set_radix_bit, err %d\n",
949 wret = clear_radix_bit(pending_radix, gang[i]);
951 wret = __free_extent(trans, extent_root,
961 * remove an extent from the root, returns 0 on success
963 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
964 *root, u64 blocknr, u64 num_blocks, int pin)
966 struct btrfs_root *extent_root = root->fs_info->extent_root;
970 if (root == extent_root) {
971 pin_down_block(root, blocknr, 1);
974 ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
975 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
976 return ret ? ret : pending_ret;
980 * walks the btree of allocated extents and find a hole of a given size.
981 * The key ins is changed to record the hole:
982 * ins->objectid == block start
983 * ins->flags = BTRFS_EXTENT_ITEM_KEY
984 * ins->offset == number of blocks
985 * Any available blocks before search_start are skipped.
987 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
988 *orig_root, u64 num_blocks, u64 empty_size,
989 u64 search_start, u64 search_end, u64 hint_block,
990 struct btrfs_key *ins, u64 exclude_start,
991 u64 exclude_nr, int data)
993 struct btrfs_path *path;
994 struct btrfs_key key;
1000 u64 orig_search_start = search_start;
1002 struct btrfs_leaf *l;
1003 struct btrfs_root * root = orig_root->fs_info->extent_root;
1004 struct btrfs_fs_info *info = root->fs_info;
1005 int total_needed = num_blocks;
1006 int total_found = 0;
1007 int fill_prealloc = 0;
1009 struct btrfs_block_group_cache *block_group;
1015 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1017 level = btrfs_header_level(btrfs_buffer_header(root->node));
1018 if (num_blocks == 0) {
1021 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
1023 if (fill_prealloc) {
1025 int nr = info->extent_tree_prealloc_nr;
1026 first = info->extent_tree_prealloc[nr - 1];
1027 if (info->extent_tree_prealloc_nr >= total_needed &&
1028 first >= search_start) {
1029 ins->objectid = info->extent_tree_prealloc[0];
1033 info->extent_tree_prealloc_nr = 0;
1035 if (search_end == (u64)-1)
1036 search_end = btrfs_super_total_blocks(&info->super_copy);
1038 block_group = btrfs_lookup_block_group(info, hint_block);
1039 block_group = btrfs_find_block_group(root, block_group,
1040 hint_block, data, 1);
1042 block_group = btrfs_find_block_group(root,
1043 trans->block_group, 0,
1047 total_needed += empty_size;
1048 path = btrfs_alloc_path();
1051 if (!block_group->data)
1052 search_start = find_search_start(root, &block_group,
1053 search_start, total_needed);
1054 else if (!full_scan)
1055 search_start = max(block_group->last_alloc, search_start);
1057 btrfs_init_path(path);
1058 ins->objectid = search_start;
1062 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1066 if (path->slots[0] > 0) {
1070 l = btrfs_buffer_leaf(path->nodes[0]);
1071 btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
1073 * a rare case, go back one key if we hit a block group item
1074 * instead of an extent item
1076 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1077 key.objectid + key.offset >= search_start) {
1078 ins->objectid = key.objectid;
1079 ins->offset = key.offset - 1;
1080 btrfs_release_path(root, path);
1081 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1085 if (path->slots[0] > 0) {
1091 l = btrfs_buffer_leaf(path->nodes[0]);
1092 slot = path->slots[0];
1093 if (slot >= btrfs_header_nritems(&l->header)) {
1094 if (fill_prealloc) {
1095 info->extent_tree_prealloc_nr = 0;
1099 limit = last_block +
1100 (block_group->key.offset >> 1);
1102 limit = search_start +
1103 (block_group->key.offset >> 1);
1104 ret = btrfs_next_leaf(root, path);
1110 ins->objectid = search_start;
1111 ins->offset = search_end - search_start;
1115 ins->objectid = last_block > search_start ?
1116 last_block : search_start;
1117 ins->offset = search_end - ins->objectid;
1121 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1122 if (key.objectid >= search_start && key.objectid > last_block &&
1124 if (last_block < search_start)
1125 last_block = search_start;
1126 hole_size = key.objectid - last_block;
1127 if (hole_size >= num_blocks) {
1128 ins->objectid = last_block;
1129 ins->offset = hole_size;
1134 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1138 last_block = key.objectid + key.offset;
1139 if (!full_scan && last_block >= block_group->key.objectid +
1140 block_group->key.offset) {
1141 btrfs_release_path(root, path);
1142 search_start = block_group->key.objectid +
1143 block_group->key.offset * 2;
1151 /* we have to make sure we didn't find an extent that has already
1152 * been allocated by the map tree or the original allocation
1154 btrfs_release_path(root, path);
1155 BUG_ON(ins->objectid < search_start);
1157 if (ins->objectid + num_blocks >= search_end) {
1162 search_start = orig_search_start;
1165 total_needed -= empty_size;
1171 for (test_block = ins->objectid;
1172 test_block < ins->objectid + num_blocks; test_block++) {
1173 if (test_radix_bit(&info->pinned_radix, test_block)) {
1174 search_start = test_block + 1;
1178 if (!fill_prealloc && info->extent_tree_insert_nr) {
1180 info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1181 if (ins->objectid + num_blocks >
1182 info->extent_tree_insert[0] &&
1183 ins->objectid <= last) {
1184 search_start = last + 1;
1185 WARN_ON(!full_scan);
1189 if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1191 info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1192 if (ins->objectid + num_blocks > first &&
1193 ins->objectid <= info->extent_tree_prealloc[0]) {
1194 search_start = info->extent_tree_prealloc[0] + 1;
1198 if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start &&
1199 ins->objectid < exclude_start + exclude_nr)) {
1200 search_start = exclude_start + exclude_nr;
1203 if (fill_prealloc) {
1205 test_block = ins->objectid;
1206 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1209 info->extent_tree_prealloc_nr = total_found;
1211 while(test_block < ins->objectid + ins->offset &&
1212 total_found < total_needed) {
1213 nr = total_needed - total_found - 1;
1215 info->extent_tree_prealloc[nr] = test_block;
1219 if (total_found < total_needed) {
1220 search_start = test_block;
1223 info->extent_tree_prealloc_nr = total_found;
1226 block_group = btrfs_lookup_block_group(info, ins->objectid);
1229 block_group->last_prealloc =
1230 info->extent_tree_prealloc[total_needed-1];
1232 trans->block_group = block_group;
1235 ins->offset = num_blocks;
1236 btrfs_free_path(path);
1240 if (search_start + num_blocks >= search_end) {
1241 search_start = orig_search_start;
1248 total_needed -= empty_size;
1253 block_group = btrfs_lookup_block_group(info, search_start);
1256 block_group = btrfs_find_block_group(root, block_group,
1257 search_start, data, 0);
1261 btrfs_release_path(root, path);
1262 btrfs_free_path(path);
1266 * finds a free extent and does all the dirty work required for allocation
1267 * returns the key for the extent through ins, and a tree buffer for
1268 * the first block of the extent through buf.
1270 * returns 0 if everything worked, non-zero otherwise.
1272 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1273 struct btrfs_root *root, u64 owner,
1274 u64 num_blocks, u64 empty_size, u64 hint_block,
1275 u64 search_end, struct btrfs_key *ins, int data)
1279 u64 super_blocks_used;
1280 u64 search_start = 0;
1281 u64 exclude_start = 0;
1283 struct btrfs_fs_info *info = root->fs_info;
1284 struct btrfs_root *extent_root = info->extent_root;
1285 struct btrfs_extent_item extent_item;
1286 struct btrfs_key prealloc_key;
1288 btrfs_set_extent_refs(&extent_item, 1);
1289 btrfs_set_extent_owner(&extent_item, owner);
1291 if (root == extent_root) {
1293 BUG_ON(info->extent_tree_prealloc_nr == 0);
1294 BUG_ON(num_blocks != 1);
1296 info->extent_tree_prealloc_nr--;
1297 nr = info->extent_tree_prealloc_nr;
1298 ins->objectid = info->extent_tree_prealloc[nr];
1299 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1301 ret = update_block_group(trans, root,
1302 ins->objectid, ins->offset, 1, 0, 0);
1308 * if we're doing a data allocation, preallocate room in the
1309 * extent tree first. This way the extent tree blocks end up
1310 * in the correct block group.
1313 ret = find_free_extent(trans, root, 0, 0, 0,
1314 search_end, 0, &prealloc_key, 0, 0, 0);
1318 exclude_nr = info->extent_tree_prealloc_nr;
1319 exclude_start = info->extent_tree_prealloc[exclude_nr - 1];
1322 /* do the real allocation */
1323 ret = find_free_extent(trans, root, num_blocks, empty_size,
1324 search_start, search_end, hint_block, ins,
1325 exclude_start, exclude_nr, data);
1331 * if we're doing a metadata allocation, preallocate space in the
1332 * extent tree second. This way, we don't create a tiny hole
1333 * in the allocation map between any unused preallocation blocks
1334 * and the metadata block we're actually allocating. On disk,
1336 * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1337 * The unused prealloc will get reused the next time around.
1340 exclude_start = ins->objectid;
1341 exclude_nr = ins->offset;
1342 hint_block = exclude_start + exclude_nr;
1343 ret = find_free_extent(trans, root, 0, 0, search_start,
1344 search_end, hint_block,
1345 &prealloc_key, exclude_start,
1352 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
1353 btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used +
1355 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1356 sizeof(extent_item));
1359 finish_current_insert(trans, extent_root);
1360 pending_ret = del_pending_extents(trans, extent_root);
1367 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1374 * helper function to allocate a block for a given tree
1375 * returns the tree buffer or NULL.
1377 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1378 struct btrfs_root *root, u64 hint,
1381 struct btrfs_key ins;
1383 struct buffer_head *buf;
1385 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1386 1, empty_size, hint,
1387 (unsigned long)-1, &ins, 0);
1390 return ERR_PTR(ret);
1392 buf = btrfs_find_create_tree_block(root, ins.objectid);
1394 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1395 return ERR_PTR(-ENOMEM);
1397 WARN_ON(buffer_dirty(buf));
1398 set_buffer_uptodate(buf);
1399 set_buffer_checked(buf);
1400 set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1404 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1405 struct btrfs_root *root, struct buffer_head *cur)
1407 struct btrfs_disk_key *key;
1408 struct btrfs_leaf *leaf;
1409 struct btrfs_file_extent_item *fi;
1414 BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1415 leaf = btrfs_buffer_leaf(cur);
1416 nritems = btrfs_header_nritems(&leaf->header);
1417 for (i = 0; i < nritems; i++) {
1419 key = &leaf->items[i].key;
1420 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1422 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1423 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1426 * FIXME make sure to insert a trans record that
1427 * repeats the snapshot del on crash
1429 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1430 if (disk_blocknr == 0)
1432 ret = btrfs_free_extent(trans, root, disk_blocknr,
1433 btrfs_file_extent_disk_num_blocks(fi),
1440 static void reada_walk_down(struct btrfs_root *root,
1441 struct btrfs_node *node)
1449 nritems = btrfs_header_nritems(&node->header);
1450 for (i = 0; i < nritems; i++) {
1451 blocknr = btrfs_node_blockptr(node, i);
1452 ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs);
1456 ret = readahead_tree_block(root, blocknr);
1463 * helper function for drop_snapshot, this walks down the tree dropping ref
1464 * counts as it goes.
1466 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1467 *root, struct btrfs_path *path, int *level)
1469 struct buffer_head *next;
1470 struct buffer_head *cur;
1475 WARN_ON(*level < 0);
1476 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1477 ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1484 * walk down to the last node level and free all the leaves
1486 while(*level >= 0) {
1487 WARN_ON(*level < 0);
1488 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1489 cur = path->nodes[*level];
1491 if (*level > 0 && path->slots[*level] == 0)
1492 reada_walk_down(root, btrfs_buffer_node(cur));
1494 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1497 if (path->slots[*level] >=
1498 btrfs_header_nritems(btrfs_buffer_header(cur)))
1501 ret = drop_leaf_ref(trans, root, cur);
1505 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1506 path->slots[*level]);
1507 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1510 path->slots[*level]++;
1511 ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1515 next = read_tree_block(root, blocknr);
1516 WARN_ON(*level <= 0);
1517 if (path->nodes[*level-1])
1518 btrfs_block_release(root, path->nodes[*level-1]);
1519 path->nodes[*level-1] = next;
1520 *level = btrfs_header_level(btrfs_buffer_header(next));
1521 path->slots[*level] = 0;
1524 WARN_ON(*level < 0);
1525 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1526 ret = btrfs_free_extent(trans, root,
1527 bh_blocknr(path->nodes[*level]), 1, 1);
1528 btrfs_block_release(root, path->nodes[*level]);
1529 path->nodes[*level] = NULL;
1536 * helper for dropping snapshots. This walks back up the tree in the path
1537 * to find the first node higher up where we haven't yet gone through
1540 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1541 *root, struct btrfs_path *path, int *level)
1546 struct btrfs_root_item *root_item = &root->root_item;
1548 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1549 slot = path->slots[i];
1550 if (slot < btrfs_header_nritems(
1551 btrfs_buffer_header(path->nodes[i])) - 1) {
1552 struct btrfs_node *node;
1553 node = btrfs_buffer_node(path->nodes[i]);
1556 WARN_ON(*level == 0);
1557 memcpy(&root_item->drop_progress,
1558 &node->ptrs[path->slots[i]].key,
1559 sizeof(root_item->drop_progress));
1560 root_item->drop_level = i;
1563 ret = btrfs_free_extent(trans, root,
1564 bh_blocknr(path->nodes[*level]),
1567 btrfs_block_release(root, path->nodes[*level]);
1568 path->nodes[*level] = NULL;
1576 * drop the reference count on the tree rooted at 'snap'. This traverses
1577 * the tree freeing any blocks that have a ref count of zero after being
1580 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1586 struct btrfs_path *path;
1590 struct btrfs_root_item *root_item = &root->root_item;
1592 path = btrfs_alloc_path();
1595 level = btrfs_header_level(btrfs_buffer_header(root->node));
1597 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1598 path->nodes[level] = root->node;
1599 path->slots[level] = 0;
1601 struct btrfs_key key;
1602 struct btrfs_disk_key *found_key;
1603 struct btrfs_node *node;
1605 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1606 level = root_item->drop_level;
1607 path->lowest_level = level;
1608 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1613 node = btrfs_buffer_node(path->nodes[level]);
1614 found_key = &node->ptrs[path->slots[level]].key;
1615 WARN_ON(memcmp(found_key, &root_item->drop_progress,
1616 sizeof(*found_key)));
1619 wret = walk_down_tree(trans, root, path, &level);
1625 wret = walk_up_tree(trans, root, path, &level);
1631 if (num_walks > 10) {
1637 for (i = 0; i <= orig_level; i++) {
1638 if (path->nodes[i]) {
1639 btrfs_block_release(root, path->nodes[i]);
1644 btrfs_free_path(path);
1648 static int free_block_group_radix(struct radix_tree_root *radix)
1651 struct btrfs_block_group_cache *cache[8];
1655 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1659 for (i = 0; i < ret; i++) {
1660 radix_tree_delete(radix, cache[i]->key.objectid +
1661 cache[i]->key.offset - 1);
1668 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1672 unsigned long gang[16];
1675 ret = free_block_group_radix(&info->block_group_radix);
1676 ret2 = free_block_group_radix(&info->block_group_data_radix);
1683 ret = find_first_radix_bit(&info->extent_map_radix,
1684 gang, 0, ARRAY_SIZE(gang));
1687 for (i = 0; i < ret; i++) {
1688 clear_radix_bit(&info->extent_map_radix, gang[i]);
1694 int btrfs_read_block_groups(struct btrfs_root *root)
1696 struct btrfs_path *path;
1699 struct btrfs_block_group_item *bi;
1700 struct btrfs_block_group_cache *cache;
1701 struct btrfs_fs_info *info = root->fs_info;
1702 struct radix_tree_root *radix;
1703 struct btrfs_key key;
1704 struct btrfs_key found_key;
1705 struct btrfs_leaf *leaf;
1706 u64 group_size_blocks;
1709 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1710 root->fs_info->sb->s_blocksize_bits;
1711 root = info->extent_root;
1713 key.offset = group_size_blocks;
1715 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1717 path = btrfs_alloc_path();
1722 ret = btrfs_search_slot(NULL, info->extent_root,
1728 leaf = btrfs_buffer_leaf(path->nodes[0]);
1729 btrfs_disk_key_to_cpu(&found_key,
1730 &leaf->items[path->slots[0]].key);
1731 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1737 bi = btrfs_item_ptr(leaf, path->slots[0],
1738 struct btrfs_block_group_item);
1739 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1740 radix = &info->block_group_data_radix;
1743 radix = &info->block_group_radix;
1747 memcpy(&cache->item, bi, sizeof(*bi));
1748 memcpy(&cache->key, &found_key, sizeof(found_key));
1749 cache->last_alloc = cache->key.objectid;
1750 cache->first_free = cache->key.objectid;
1751 cache->last_prealloc = cache->key.objectid;
1755 cache->radix = radix;
1757 key.objectid = found_key.objectid + found_key.offset;
1758 btrfs_release_path(root, path);
1759 ret = radix_tree_insert(radix, found_key.objectid +
1760 found_key.offset - 1,
1763 used = btrfs_block_group_used(bi);
1764 if (used < div_factor(key.offset, 8)) {
1765 radix_tree_tag_set(radix, found_key.objectid +
1766 found_key.offset - 1,
1767 BTRFS_BLOCK_GROUP_AVAIL);
1770 btrfs_super_total_blocks(&info->super_copy))
1774 btrfs_free_path(path);