Btrfs: cache the extent tree preallocation
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26                             *orig_root, u64 num_blocks, u64 search_start,
27                             u64 search_end, u64 hint_block,
28                             struct btrfs_key *ins, int data);
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static void reada_extent_leaves(struct btrfs_root *root,
35                                 struct btrfs_path *path, u64 limit)
36 {
37         struct btrfs_node *node;
38         int i;
39         int nritems;
40         u64 item_objectid;
41         u64 blocknr;
42         int slot;
43         int ret;
44
45         if (!path->nodes[1])
46                 return;
47         node = btrfs_buffer_node(path->nodes[1]);
48         slot = path->slots[1] + 1;
49         nritems = btrfs_header_nritems(&node->header);
50         for (i = slot; i < nritems && i < slot + 8; i++) {
51                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
52                 if (item_objectid > limit)
53                         break;
54                 blocknr = btrfs_node_blockptr(node, i);
55                 ret = readahead_tree_block(root, blocknr);
56                 if (ret)
57                         break;
58         }
59 }
60
61 static int cache_block_group(struct btrfs_root *root,
62                              struct btrfs_block_group_cache *block_group)
63 {
64         struct btrfs_path *path;
65         int ret;
66         struct btrfs_key key;
67         struct btrfs_leaf *leaf;
68         struct radix_tree_root *extent_radix;
69         int slot;
70         u64 i;
71         u64 last = 0;
72         u64 hole_size;
73         u64 limit;
74         int found = 0;
75
76         root = root->fs_info->extent_root;
77         extent_radix = &root->fs_info->extent_map_radix;
78
79         if (block_group->cached)
80                 return 0;
81         if (block_group->data)
82                 return 0;
83         path = btrfs_alloc_path();
84         if (!path)
85                 return -ENOMEM;
86         key.objectid = block_group->key.objectid;
87         key.flags = 0;
88         key.offset = 0;
89         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
90         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
91         if (ret < 0)
92                 return ret;
93         if (ret && path->slots[0] > 0)
94                 path->slots[0]--;
95         limit = block_group->key.objectid + block_group->key.offset;
96         reada_extent_leaves(root, path, limit);
97         while(1) {
98                 leaf = btrfs_buffer_leaf(path->nodes[0]);
99                 slot = path->slots[0];
100                 if (slot >= btrfs_header_nritems(&leaf->header)) {
101                         reada_extent_leaves(root, path, limit);
102                         ret = btrfs_next_leaf(root, path);
103                         if (ret == 0) {
104                                 continue;
105                         } else {
106                                 if (found) {
107                                         hole_size = block_group->key.objectid +
108                                                 block_group->key.offset - last;
109                                 } else {
110                                         last = block_group->key.objectid;
111                                         hole_size = block_group->key.offset;
112                                 }
113                                 for (i = 0; i < hole_size; i++) {
114                                         set_radix_bit(extent_radix,
115                                                       last + i);
116                                 }
117                                 break;
118                         }
119                 }
120                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
121                 if (key.objectid >= block_group->key.objectid +
122                     block_group->key.offset) {
123                         if (found) {
124                                 hole_size = block_group->key.objectid +
125                                         block_group->key.offset - last;
126                         } else {
127                                 last = block_group->key.objectid;
128                                 hole_size = block_group->key.offset;
129                         }
130                         for (i = 0; i < hole_size; i++) {
131                                 set_radix_bit(extent_radix, last + i);
132                         }
133                         break;
134                 }
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = key.objectid + key.offset;
138                                 found = 1;
139                         } else {
140                                 hole_size = key.objectid - last;
141                                 for (i = 0; i < hole_size; i++) {
142                                         set_radix_bit(extent_radix, last + i);
143                                 }
144                                 last = key.objectid + key.offset;
145                         }
146                 }
147                 path->slots[0]++;
148         }
149
150         block_group->cached = 1;
151         btrfs_free_path(path);
152         return 0;
153 }
154
155 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
156                                                          btrfs_fs_info *info,
157                                                          u64 blocknr)
158 {
159         struct btrfs_block_group_cache *block_group;
160         int ret;
161
162         ret = radix_tree_gang_lookup(&info->block_group_radix,
163                                      (void **)&block_group,
164                                      blocknr, 1);
165         if (ret) {
166                 if (block_group->key.objectid <= blocknr && blocknr <=
167                     block_group->key.objectid + block_group->key.offset)
168                         return block_group;
169         }
170         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
171                                      (void **)&block_group,
172                                      blocknr, 1);
173         if (ret) {
174                 if (block_group->key.objectid <= blocknr && blocknr <=
175                     block_group->key.objectid + block_group->key.offset)
176                         return block_group;
177         }
178         return NULL;
179 }
180
181 static u64 leaf_range(struct btrfs_root *root)
182 {
183         u64 size = BTRFS_LEAF_DATA_SIZE(root);
184         do_div(size, sizeof(struct btrfs_extent_item) +
185                 sizeof(struct btrfs_item));
186         return size;
187 }
188
189 static u64 find_search_start(struct btrfs_root *root,
190                              struct btrfs_block_group_cache **cache_ret,
191                              u64 search_start, int num)
192 {
193         unsigned long gang[8];
194         int ret;
195         struct btrfs_block_group_cache *cache = *cache_ret;
196         u64 last = max(search_start, cache->key.objectid);
197
198         if (cache->data)
199                 goto out;
200         if (num > 1) {
201                 last = max(last, cache->last_prealloc);
202         }
203 again:
204         cache_block_group(root, cache);
205         while(1) {
206                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
207                                            gang, last, ARRAY_SIZE(gang));
208                 if (!ret)
209                         goto out;
210                 last = gang[ret-1] + 1;
211                 if (num > 1) {
212                         if (ret != ARRAY_SIZE(gang)) {
213                                 goto new_group;
214                         }
215                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
216                                 continue;
217                         }
218                 }
219                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
220                         goto new_group;
221                 }
222                 return gang[0];
223         }
224 out:
225         return max(cache->last_alloc, search_start);
226
227 new_group:
228         cache = btrfs_lookup_block_group(root->fs_info,
229                                          last + cache->key.offset - 1);
230         if (!cache) {
231                 return max((*cache_ret)->last_alloc, search_start);
232         }
233         cache = btrfs_find_block_group(root, cache,
234                                        last + cache->key.offset - 1, 0, 0);
235         *cache_ret = cache;
236         goto again;
237 }
238
239 static u64 div_factor(u64 num, int factor)
240 {
241         num *= factor;
242         do_div(num, 10);
243         return num;
244 }
245
246 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
247                                                  struct btrfs_block_group_cache
248                                                  *hint, u64 search_start,
249                                                  int data, int owner)
250 {
251         struct btrfs_block_group_cache *cache[8];
252         struct btrfs_block_group_cache *found_group = NULL;
253         struct btrfs_fs_info *info = root->fs_info;
254         struct radix_tree_root *radix;
255         struct radix_tree_root *swap_radix;
256         u64 used;
257         u64 last = 0;
258         u64 hint_last;
259         int i;
260         int ret;
261         int full_search = 0;
262         int factor = 8;
263         int data_swap = 0;
264
265         if (!owner)
266                 factor = 5;
267
268         if (data) {
269                 radix = &info->block_group_data_radix;
270                 swap_radix = &info->block_group_radix;
271         } else {
272                 radix = &info->block_group_radix;
273                 swap_radix = &info->block_group_data_radix;
274         }
275
276         if (search_start) {
277                 struct btrfs_block_group_cache *shint;
278                 shint = btrfs_lookup_block_group(info, search_start);
279                 if (shint->data == data) {
280                         used = btrfs_block_group_used(&shint->item);
281                         if (used + shint->pinned <
282                             div_factor(shint->key.offset, factor)) {
283                                 return shint;
284                         }
285                 }
286         }
287         if (hint && hint->data == data) {
288                 used = btrfs_block_group_used(&hint->item);
289                 if (used + hint->pinned <
290                     div_factor(hint->key.offset, factor)) {
291                         return hint;
292                 }
293                 if (used >= div_factor(hint->key.offset, 8)) {
294                         radix_tree_tag_clear(radix,
295                                              hint->key.objectid +
296                                              hint->key.offset - 1,
297                                              BTRFS_BLOCK_GROUP_AVAIL);
298                 }
299                 last = hint->key.offset * 3;
300                 if (hint->key.objectid >= last)
301                         last = max(search_start + hint->key.offset - 1,
302                                    hint->key.objectid - last);
303                 else
304                         last = hint->key.objectid + hint->key.offset;
305                 hint_last = last;
306         } else {
307                 if (hint)
308                         hint_last = max(hint->key.objectid, search_start);
309                 else
310                         hint_last = search_start;
311
312                 last = hint_last;
313         }
314         while(1) {
315                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
316                                                  last, ARRAY_SIZE(cache),
317                                                  BTRFS_BLOCK_GROUP_AVAIL);
318                 if (!ret)
319                         break;
320                 for (i = 0; i < ret; i++) {
321                         last = cache[i]->key.objectid +
322                                 cache[i]->key.offset;
323                         used = btrfs_block_group_used(&cache[i]->item);
324                         if (used + cache[i]->pinned <
325                             div_factor(cache[i]->key.offset, factor)) {
326                                 found_group = cache[i];
327                                 goto found;
328                         }
329                         if (used >= div_factor(cache[i]->key.offset, 8)) {
330                                 radix_tree_tag_clear(radix,
331                                                      cache[i]->key.objectid +
332                                                      cache[i]->key.offset - 1,
333                                                      BTRFS_BLOCK_GROUP_AVAIL);
334                         }
335                 }
336                 cond_resched();
337         }
338         last = hint_last;
339 again:
340         while(1) {
341                 ret = radix_tree_gang_lookup(radix, (void **)cache,
342                                              last, ARRAY_SIZE(cache));
343                 if (!ret)
344                         break;
345                 for (i = 0; i < ret; i++) {
346                         last = cache[i]->key.objectid +
347                                 cache[i]->key.offset;
348                         used = btrfs_block_group_used(&cache[i]->item);
349                         if (used + cache[i]->pinned < cache[i]->key.offset) {
350                                 found_group = cache[i];
351                                 goto found;
352                         }
353                         if (used >= cache[i]->key.offset) {
354                                 radix_tree_tag_clear(radix,
355                                                      cache[i]->key.objectid +
356                                                      cache[i]->key.offset - 1,
357                                                      BTRFS_BLOCK_GROUP_AVAIL);
358                         }
359                 }
360                 cond_resched();
361         }
362         if (!full_search) {
363                 last = search_start;
364                 full_search = 1;
365                 goto again;
366         }
367         if (!data_swap) {
368                 struct radix_tree_root *tmp = radix;
369                 data_swap = 1;
370                 radix = swap_radix;
371                 swap_radix = tmp;
372                 last = search_start;
373                 goto again;
374         }
375         if (!found_group) {
376                 ret = radix_tree_gang_lookup(radix,
377                                              (void **)&found_group, 0, 1);
378                 if (ret == 0) {
379                         ret = radix_tree_gang_lookup(swap_radix,
380                                                      (void **)&found_group,
381                                                      0, 1);
382                 }
383                 BUG_ON(ret != 1);
384         }
385 found:
386         return found_group;
387 }
388
389 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
390                                 struct btrfs_root *root,
391                                 u64 blocknr, u64 num_blocks)
392 {
393         struct btrfs_path *path;
394         int ret;
395         struct btrfs_key key;
396         struct btrfs_leaf *l;
397         struct btrfs_extent_item *item;
398         struct btrfs_key ins;
399         u32 refs;
400
401         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
402                          &ins, 0);
403         path = btrfs_alloc_path();
404         BUG_ON(!path);
405         key.objectid = blocknr;
406         key.flags = 0;
407         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
408         key.offset = num_blocks;
409         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410                                 0, 1);
411         if (ret != 0) {
412                 BUG();
413         }
414         BUG_ON(ret != 0);
415         l = btrfs_buffer_leaf(path->nodes[0]);
416         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
417         refs = btrfs_extent_refs(item);
418         btrfs_set_extent_refs(item, refs + 1);
419         btrfs_mark_buffer_dirty(path->nodes[0]);
420
421         btrfs_release_path(root->fs_info->extent_root, path);
422         btrfs_free_path(path);
423         finish_current_insert(trans, root->fs_info->extent_root);
424         del_pending_extents(trans, root->fs_info->extent_root);
425         return 0;
426 }
427
428 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
429                              struct btrfs_root *root, u64 blocknr,
430                              u64 num_blocks, u32 *refs)
431 {
432         struct btrfs_path *path;
433         int ret;
434         struct btrfs_key key;
435         struct btrfs_leaf *l;
436         struct btrfs_extent_item *item;
437
438         path = btrfs_alloc_path();
439         key.objectid = blocknr;
440         key.offset = num_blocks;
441         key.flags = 0;
442         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
443         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
444                                 0, 0);
445         if (ret != 0)
446                 BUG();
447         l = btrfs_buffer_leaf(path->nodes[0]);
448         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
449         *refs = btrfs_extent_refs(item);
450         btrfs_release_path(root->fs_info->extent_root, path);
451         btrfs_free_path(path);
452         return 0;
453 }
454
455 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
456                        struct btrfs_root *root)
457 {
458         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
459 }
460
461 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
462                   struct buffer_head *buf)
463 {
464         u64 blocknr;
465         struct btrfs_node *buf_node;
466         struct btrfs_leaf *buf_leaf;
467         struct btrfs_disk_key *key;
468         struct btrfs_file_extent_item *fi;
469         int i;
470         int leaf;
471         int ret;
472
473         if (!root->ref_cows)
474                 return 0;
475         buf_node = btrfs_buffer_node(buf);
476         leaf = btrfs_is_leaf(buf_node);
477         buf_leaf = btrfs_buffer_leaf(buf);
478         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
479                 if (leaf) {
480                         u64 disk_blocknr;
481                         key = &buf_leaf->items[i].key;
482                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
483                                 continue;
484                         fi = btrfs_item_ptr(buf_leaf, i,
485                                             struct btrfs_file_extent_item);
486                         if (btrfs_file_extent_type(fi) ==
487                             BTRFS_FILE_EXTENT_INLINE)
488                                 continue;
489                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
490                         if (disk_blocknr == 0)
491                                 continue;
492                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
493                                     btrfs_file_extent_disk_num_blocks(fi));
494                         BUG_ON(ret);
495                 } else {
496                         blocknr = btrfs_node_blockptr(buf_node, i);
497                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
498                         BUG_ON(ret);
499                 }
500         }
501         return 0;
502 }
503
504 static int write_one_cache_group(struct btrfs_trans_handle *trans,
505                                  struct btrfs_root *root,
506                                  struct btrfs_path *path,
507                                  struct btrfs_block_group_cache *cache)
508 {
509         int ret;
510         int pending_ret;
511         struct btrfs_root *extent_root = root->fs_info->extent_root;
512         struct btrfs_block_group_item *bi;
513         struct btrfs_key ins;
514
515         find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
516         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
517         BUG_ON(ret);
518         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
519                             struct btrfs_block_group_item);
520         memcpy(bi, &cache->item, sizeof(*bi));
521         mark_buffer_dirty(path->nodes[0]);
522         btrfs_release_path(extent_root, path);
523
524         finish_current_insert(trans, extent_root);
525         pending_ret = del_pending_extents(trans, extent_root);
526         if (ret)
527                 return ret;
528         if (pending_ret)
529                 return pending_ret;
530         if (cache->data)
531                 cache->last_alloc = cache->first_free;
532         return 0;
533
534 }
535
536 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
537                                    struct btrfs_root *root,
538                                    struct radix_tree_root *radix)
539 {
540         struct btrfs_block_group_cache *cache[8];
541         int ret;
542         int err = 0;
543         int werr = 0;
544         int i;
545         struct btrfs_path *path;
546
547         path = btrfs_alloc_path();
548         if (!path)
549                 return -ENOMEM;
550
551         while(1) {
552                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
553                                                  0, ARRAY_SIZE(cache),
554                                                  BTRFS_BLOCK_GROUP_DIRTY);
555                 if (!ret)
556                         break;
557                 for (i = 0; i < ret; i++) {
558                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
559                                              cache[i]->key.offset - 1,
560                                              BTRFS_BLOCK_GROUP_DIRTY);
561                         err = write_one_cache_group(trans, root,
562                                                     path, cache[i]);
563                         if (err)
564                                 werr = err;
565                 }
566         }
567         btrfs_free_path(path);
568         return werr;
569 }
570
571 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
572                                    struct btrfs_root *root)
573 {
574         int ret;
575         int ret2;
576         ret = write_dirty_block_radix(trans, root,
577                                       &root->fs_info->block_group_radix);
578         ret2 = write_dirty_block_radix(trans, root,
579                                       &root->fs_info->block_group_data_radix);
580         if (ret)
581                 return ret;
582         if (ret2)
583                 return ret2;
584         return 0;
585 }
586
587 static int update_block_group(struct btrfs_trans_handle *trans,
588                               struct btrfs_root *root,
589                               u64 blocknr, u64 num, int alloc, int mark_free,
590                               int data)
591 {
592         struct btrfs_block_group_cache *cache;
593         struct btrfs_fs_info *info = root->fs_info;
594         u64 total = num;
595         u64 old_val;
596         u64 block_in_group;
597         u64 i;
598         int ret;
599
600         while(total) {
601                 cache = btrfs_lookup_block_group(info, blocknr);
602                 if (!cache) {
603                         return -1;
604                 }
605                 block_in_group = blocknr - cache->key.objectid;
606                 WARN_ON(block_in_group > cache->key.offset);
607                 radix_tree_tag_set(cache->radix, cache->key.objectid +
608                                    cache->key.offset - 1,
609                                    BTRFS_BLOCK_GROUP_DIRTY);
610
611                 old_val = btrfs_block_group_used(&cache->item);
612                 num = min(total, cache->key.offset - block_in_group);
613                 if (alloc) {
614                         if (blocknr > cache->last_alloc)
615                                 cache->last_alloc = blocknr;
616                         if (!cache->data) {
617                                 for (i = 0; i < num; i++) {
618                                         clear_radix_bit(&info->extent_map_radix,
619                                                         blocknr + i);
620                                 }
621                         }
622                         if (cache->data != data &&
623                             old_val < (cache->key.offset >> 1)) {
624                                 cache->data = data;
625                                 radix_tree_delete(cache->radix,
626                                                   cache->key.objectid +
627                                                   cache->key.offset - 1);
628
629                                 if (data) {
630                                         cache->radix =
631                                                 &info->block_group_data_radix;
632                                         cache->item.flags |=
633                                                 BTRFS_BLOCK_GROUP_DATA;
634                                 } else {
635                                         cache->radix = &info->block_group_radix;
636                                         cache->item.flags &=
637                                                 ~BTRFS_BLOCK_GROUP_DATA;
638                                 }
639                                 ret = radix_tree_insert(cache->radix,
640                                                         cache->key.objectid +
641                                                         cache->key.offset - 1,
642                                                         (void *)cache);
643                         }
644                         old_val += num;
645                 } else {
646                         old_val -= num;
647                         if (blocknr < cache->first_free)
648                                 cache->first_free = blocknr;
649                         if (!cache->data && mark_free) {
650                                 for (i = 0; i < num; i++) {
651                                         set_radix_bit(&info->extent_map_radix,
652                                                       blocknr + i);
653                                 }
654                         }
655                         if (old_val < (cache->key.offset >> 1) &&
656                             old_val + num >= (cache->key.offset >> 1)) {
657                                 radix_tree_tag_set(cache->radix,
658                                                    cache->key.objectid +
659                                                    cache->key.offset - 1,
660                                                    BTRFS_BLOCK_GROUP_AVAIL);
661                         }
662                 }
663                 btrfs_set_block_group_used(&cache->item, old_val);
664                 total -= num;
665                 blocknr += num;
666         }
667         return 0;
668 }
669
670 static int try_remove_page(struct address_space *mapping, unsigned long index)
671 {
672         int ret;
673         ret = invalidate_mapping_pages(mapping, index, index);
674         return ret;
675 }
676
677 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
678                                btrfs_root *root)
679 {
680         unsigned long gang[8];
681         struct inode *btree_inode = root->fs_info->btree_inode;
682         struct btrfs_block_group_cache *block_group;
683         u64 first = 0;
684         int ret;
685         int i;
686         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
687         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
688
689         while(1) {
690                 ret = find_first_radix_bit(pinned_radix, gang, 0,
691                                            ARRAY_SIZE(gang));
692                 if (!ret)
693                         break;
694                 if (!first)
695                         first = gang[0];
696                 for (i = 0; i < ret; i++) {
697                         clear_radix_bit(pinned_radix, gang[i]);
698                         block_group = btrfs_lookup_block_group(root->fs_info,
699                                                                gang[i]);
700                         if (block_group) {
701                                 WARN_ON(block_group->pinned == 0);
702                                 block_group->pinned--;
703                                 if (gang[i] < block_group->last_alloc)
704                                         block_group->last_alloc = gang[i];
705                                 if (gang[i] < block_group->last_prealloc)
706                                         block_group->last_prealloc = gang[i];
707                                 if (!block_group->data)
708                                         set_radix_bit(extent_radix, gang[i]);
709                         }
710                         try_remove_page(btree_inode->i_mapping,
711                                         gang[i] << (PAGE_CACHE_SHIFT -
712                                                     btree_inode->i_blkbits));
713                 }
714         }
715         return 0;
716 }
717
718 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
719                                  btrfs_root *extent_root)
720 {
721         struct btrfs_key ins;
722         struct btrfs_extent_item extent_item;
723         int i;
724         int ret;
725         u64 super_blocks_used;
726         struct btrfs_fs_info *info = extent_root->fs_info;
727
728         btrfs_set_extent_refs(&extent_item, 1);
729         ins.offset = 1;
730         ins.flags = 0;
731         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
732         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
733
734         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
735                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
736                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
737                 btrfs_set_super_blocks_used(info->disk_super,
738                                             super_blocks_used + 1);
739                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
740                                         sizeof(extent_item));
741                 BUG_ON(ret);
742         }
743         extent_root->fs_info->extent_tree_insert_nr = 0;
744         return 0;
745 }
746
747 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
748 {
749         int err;
750         struct btrfs_header *header;
751         struct buffer_head *bh;
752
753         if (!pending) {
754                 bh = btrfs_find_tree_block(root, blocknr);
755                 if (bh) {
756                         if (buffer_uptodate(bh)) {
757                                 u64 transid =
758                                     root->fs_info->running_transaction->transid;
759                                 header = btrfs_buffer_header(bh);
760                                 if (btrfs_header_generation(header) ==
761                                     transid) {
762                                         btrfs_block_release(root, bh);
763                                         return 0;
764                                 }
765                         }
766                         btrfs_block_release(root, bh);
767                 }
768                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
769                 if (!err) {
770                         struct btrfs_block_group_cache *cache;
771                         cache = btrfs_lookup_block_group(root->fs_info,
772                                                          blocknr);
773                         if (cache)
774                                 cache->pinned++;
775                 }
776         } else {
777                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
778         }
779         BUG_ON(err < 0);
780         return 0;
781 }
782
783 /*
784  * remove an extent from the root, returns 0 on success
785  */
786 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
787                          *root, u64 blocknr, u64 num_blocks, int pin,
788                          int mark_free)
789 {
790         struct btrfs_path *path;
791         struct btrfs_key key;
792         struct btrfs_fs_info *info = root->fs_info;
793         struct btrfs_root *extent_root = info->extent_root;
794         int ret;
795         struct btrfs_extent_item *ei;
796         struct btrfs_key ins;
797         u32 refs;
798
799         key.objectid = blocknr;
800         key.flags = 0;
801         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
802         key.offset = num_blocks;
803
804         find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
805         path = btrfs_alloc_path();
806         BUG_ON(!path);
807
808         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
809         if (ret) {
810                 BUG();
811         }
812         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
813                             struct btrfs_extent_item);
814         BUG_ON(ei->refs == 0);
815         refs = btrfs_extent_refs(ei) - 1;
816         btrfs_set_extent_refs(ei, refs);
817         btrfs_mark_buffer_dirty(path->nodes[0]);
818         if (refs == 0) {
819                 u64 super_blocks_used;
820
821                 if (pin) {
822                         ret = pin_down_block(root, blocknr, 0);
823                         BUG_ON(ret);
824                 }
825
826                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
827                 btrfs_set_super_blocks_used(info->disk_super,
828                                             super_blocks_used - num_blocks);
829                 ret = btrfs_del_item(trans, extent_root, path);
830                 if (ret)
831                         BUG();
832                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
833                                          mark_free, 0);
834                 BUG_ON(ret);
835         }
836         btrfs_free_path(path);
837         finish_current_insert(trans, extent_root);
838         return ret;
839 }
840
841 /*
842  * find all the blocks marked as pending in the radix tree and remove
843  * them from the extent map
844  */
845 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
846                                btrfs_root *extent_root)
847 {
848         int ret;
849         int wret;
850         int err = 0;
851         unsigned long gang[4];
852         int i;
853         struct radix_tree_root *pending_radix;
854         struct radix_tree_root *pinned_radix;
855         struct btrfs_block_group_cache *cache;
856
857         pending_radix = &extent_root->fs_info->pending_del_radix;
858         pinned_radix = &extent_root->fs_info->pinned_radix;
859
860         while(1) {
861                 ret = find_first_radix_bit(pending_radix, gang, 0,
862                                            ARRAY_SIZE(gang));
863                 if (!ret)
864                         break;
865                 for (i = 0; i < ret; i++) {
866                         wret = set_radix_bit(pinned_radix, gang[i]);
867                         if (wret == 0) {
868                                 cache =
869                                   btrfs_lookup_block_group(extent_root->fs_info,
870                                                            gang[i]);
871                                 if (cache)
872                                         cache->pinned++;
873                         }
874                         if (wret < 0) {
875                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
876                                        wret);
877                                 BUG_ON(wret < 0);
878                         }
879                         wret = clear_radix_bit(pending_radix, gang[i]);
880                         BUG_ON(wret);
881                         wret = __free_extent(trans, extent_root,
882                                              gang[i], 1, 0, 0);
883                         if (wret)
884                                 err = wret;
885                 }
886         }
887         return err;
888 }
889
890 /*
891  * remove an extent from the root, returns 0 on success
892  */
893 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
894                       *root, u64 blocknr, u64 num_blocks, int pin)
895 {
896         struct btrfs_root *extent_root = root->fs_info->extent_root;
897         int pending_ret;
898         int ret;
899
900         if (root == extent_root) {
901                 pin_down_block(root, blocknr, 1);
902                 return 0;
903         }
904         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
905         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
906         return ret ? ret : pending_ret;
907 }
908
909 /*
910  * walks the btree of allocated extents and find a hole of a given size.
911  * The key ins is changed to record the hole:
912  * ins->objectid == block start
913  * ins->flags = BTRFS_EXTENT_ITEM_KEY
914  * ins->offset == number of blocks
915  * Any available blocks before search_start are skipped.
916  */
917 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
918                             *orig_root, u64 num_blocks, u64 search_start, u64
919                             search_end, u64 hint_block,
920                             struct btrfs_key *ins, int data)
921 {
922         struct btrfs_path *path;
923         struct btrfs_key key;
924         int ret;
925         u64 hole_size = 0;
926         int slot = 0;
927         u64 last_block = 0;
928         u64 test_block;
929         u64 orig_search_start = search_start;
930         int start_found;
931         struct btrfs_leaf *l;
932         struct btrfs_root * root = orig_root->fs_info->extent_root;
933         struct btrfs_fs_info *info = root->fs_info;
934         int total_needed = num_blocks;
935         int total_found = 0;
936         int fill_prealloc = 0;
937         int level;
938         struct btrfs_block_group_cache *block_group;
939         int full_scan = 0;
940         int wrapped = 0;
941         u64 limit;
942
943         path = btrfs_alloc_path();
944         ins->flags = 0;
945         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
946
947         level = btrfs_header_level(btrfs_buffer_header(root->node));
948         if (num_blocks == 0) {
949                 fill_prealloc = 1;
950                 num_blocks = 1;
951                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
952         }
953         if (fill_prealloc) {
954                 u64 first;
955                 int nr = info->extent_tree_prealloc_nr;
956                 first = info->extent_tree_prealloc[nr - 1];
957                 if (info->extent_tree_prealloc_nr >= total_needed &&
958                     first >= search_start) {
959                         ins->objectid = info->extent_tree_prealloc[0];
960                         ins->offset = 1;
961                         return 0;
962                 }
963                 info->extent_tree_prealloc_nr = 0;
964         }
965         if (search_end == (u64)-1)
966                 search_end = btrfs_super_total_blocks(info->disk_super);
967         if (hint_block) {
968                 block_group = btrfs_lookup_block_group(info, hint_block);
969                 block_group = btrfs_find_block_group(root, block_group,
970                                                      hint_block, data, 1);
971         } else {
972                 block_group = btrfs_find_block_group(root,
973                                                      trans->block_group, 0,
974                                                      data, 1);
975         }
976
977 check_failed:
978         if (!block_group->data)
979                 search_start = find_search_start(root, &block_group,
980                                                  search_start, total_needed);
981         else if (!full_scan)
982                 search_start = max(block_group->last_alloc, search_start);
983
984         btrfs_init_path(path);
985         ins->objectid = search_start;
986         ins->offset = 0;
987         start_found = 0;
988
989         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
990         if (ret < 0)
991                 goto error;
992
993         if (path->slots[0] > 0) {
994                 path->slots[0]--;
995         }
996
997         l = btrfs_buffer_leaf(path->nodes[0]);
998         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
999         /*
1000          * a rare case, go back one key if we hit a block group item
1001          * instead of an extent item
1002          */
1003         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1004             key.objectid + key.offset >= search_start) {
1005                 ins->objectid = key.objectid;
1006                 ins->offset = key.offset - 1;
1007                 btrfs_release_path(root, path);
1008                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1009                 if (ret < 0)
1010                         goto error;
1011
1012                 if (path->slots[0] > 0) {
1013                         path->slots[0]--;
1014                 }
1015         }
1016
1017         while (1) {
1018                 l = btrfs_buffer_leaf(path->nodes[0]);
1019                 slot = path->slots[0];
1020                 if (slot >= btrfs_header_nritems(&l->header)) {
1021                         if (fill_prealloc) {
1022                                 info->extent_tree_prealloc_nr = 0;
1023                                 total_found = 0;
1024                         }
1025                         if (start_found)
1026                                 limit = last_block +
1027                                         (block_group->key.offset >> 1);
1028                         else
1029                                 limit = search_start +
1030                                         (block_group->key.offset >> 1);
1031                         ret = btrfs_next_leaf(root, path);
1032                         if (ret == 0)
1033                                 continue;
1034                         if (ret < 0)
1035                                 goto error;
1036                         if (!start_found) {
1037                                 ins->objectid = search_start;
1038                                 ins->offset = search_end - search_start;
1039                                 start_found = 1;
1040                                 goto check_pending;
1041                         }
1042                         ins->objectid = last_block > search_start ?
1043                                         last_block : search_start;
1044                         ins->offset = search_end - ins->objectid;
1045                         goto check_pending;
1046                 }
1047
1048                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1049                 if (key.objectid >= search_start && key.objectid > last_block &&
1050                     start_found) {
1051                         if (last_block < search_start)
1052                                 last_block = search_start;
1053                         hole_size = key.objectid - last_block;
1054                         if (hole_size >= num_blocks) {
1055                                 ins->objectid = last_block;
1056                                 ins->offset = hole_size;
1057                                 goto check_pending;
1058                         }
1059                 }
1060
1061                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1062                         goto next;
1063
1064                 start_found = 1;
1065                 last_block = key.objectid + key.offset;
1066                 if (!full_scan && last_block >= block_group->key.objectid +
1067                     block_group->key.offset) {
1068                         btrfs_release_path(root, path);
1069                         search_start = block_group->key.objectid +
1070                                 block_group->key.offset * 2;
1071                         goto new_group;
1072                 }
1073 next:
1074                 path->slots[0]++;
1075                 cond_resched();
1076         }
1077         // FIXME -ENOSPC
1078 check_pending:
1079         /* we have to make sure we didn't find an extent that has already
1080          * been allocated by the map tree or the original allocation
1081          */
1082         btrfs_release_path(root, path);
1083         BUG_ON(ins->objectid < search_start);
1084
1085         if (ins->objectid + num_blocks >= search_end) {
1086                 if (full_scan) {
1087                         ret = -ENOSPC;
1088                         goto error;
1089                 }
1090                 search_start = orig_search_start;
1091                 if (wrapped)
1092                         full_scan = 1;
1093                 else
1094                         wrapped = 1;
1095                 goto new_group;
1096         }
1097         for (test_block = ins->objectid;
1098              test_block < ins->objectid + num_blocks; test_block++) {
1099                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1100                         search_start = test_block + 1;
1101                         goto new_group;
1102                 }
1103         }
1104         if (!fill_prealloc && info->extent_tree_insert_nr) {
1105                 u64 last =
1106                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1107                 if (ins->objectid + num_blocks >
1108                     info->extent_tree_insert[0] &&
1109                     ins->objectid <= last) {
1110                         search_start = last + 1;
1111                         WARN_ON(!full_scan);
1112                         goto new_group;
1113                 }
1114         }
1115         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1116                 u64 first =
1117                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1118                 if (ins->objectid + num_blocks > first &&
1119                     ins->objectid <= info->extent_tree_prealloc[0]) {
1120                         search_start = info->extent_tree_prealloc[0] + 1;
1121                         goto new_group;
1122                 }
1123         }
1124         if (fill_prealloc) {
1125                 int nr;
1126                 test_block = ins->objectid;
1127                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1128                     leaf_range(root)) {
1129                         total_found = 0;
1130                         info->extent_tree_prealloc_nr = total_found;
1131                 }
1132                 while(test_block < ins->objectid + ins->offset &&
1133                       total_found < total_needed) {
1134                         nr = total_needed - total_found - 1;
1135                         BUG_ON(nr < 0);
1136                         info->extent_tree_prealloc[nr] = test_block;
1137                         total_found++;
1138                         test_block++;
1139                 }
1140                 if (total_found < total_needed) {
1141                         search_start = test_block;
1142                         goto new_group;
1143                 }
1144                 info->extent_tree_prealloc_nr = total_found;
1145         }
1146         if (!data) {
1147                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1148                 if (block_group) {
1149                         if (fill_prealloc)
1150                                 block_group->last_prealloc =
1151                                      info->extent_tree_prealloc[total_needed-1];
1152                         else
1153                                 trans->block_group = block_group;
1154                 }
1155         }
1156         ins->offset = num_blocks;
1157         btrfs_free_path(path);
1158         return 0;
1159
1160 new_group:
1161         if (search_start + num_blocks >= search_end) {
1162                 search_start = orig_search_start;
1163                 if (full_scan) {
1164                         ret = -ENOSPC;
1165                         goto error;
1166                 }
1167                 if (wrapped)
1168                         full_scan = 1;
1169                 else
1170                         wrapped = 1;
1171         }
1172         block_group = btrfs_lookup_block_group(info, search_start);
1173         cond_resched();
1174         if (!full_scan)
1175                 block_group = btrfs_find_block_group(root, block_group,
1176                                                      search_start, data, 0);
1177         goto check_failed;
1178
1179 error:
1180         btrfs_release_path(root, path);
1181         btrfs_free_path(path);
1182         return ret;
1183 }
1184 /*
1185  * finds a free extent and does all the dirty work required for allocation
1186  * returns the key for the extent through ins, and a tree buffer for
1187  * the first block of the extent through buf.
1188  *
1189  * returns 0 if everything worked, non-zero otherwise.
1190  */
1191 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1192                        struct btrfs_root *root, u64 owner,
1193                        u64 num_blocks, u64 hint_block,
1194                        u64 search_end, struct btrfs_key *ins, int data)
1195 {
1196         int ret;
1197         int pending_ret;
1198         u64 super_blocks_used;
1199         u64 search_start = 0;
1200         struct btrfs_fs_info *info = root->fs_info;
1201         struct btrfs_root *extent_root = info->extent_root;
1202         struct btrfs_extent_item extent_item;
1203         struct btrfs_key prealloc_key;
1204
1205         btrfs_set_extent_refs(&extent_item, 1);
1206         btrfs_set_extent_owner(&extent_item, owner);
1207
1208         if (root == extent_root) {
1209                 int nr;
1210                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1211                 BUG_ON(num_blocks != 1);
1212                 ins->offset = 1;
1213                 info->extent_tree_prealloc_nr--;
1214                 nr = info->extent_tree_prealloc_nr;
1215                 ins->objectid = info->extent_tree_prealloc[nr];
1216                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1217                         ins->objectid;
1218                 ret = update_block_group(trans, root,
1219                                          ins->objectid, ins->offset, 1, 0, 0);
1220                 BUG_ON(ret);
1221                 return 0;
1222         }
1223
1224         /*
1225          * if we're doing a data allocation, preallocate room in the
1226          * extent tree first.  This way the extent tree blocks end up
1227          * in the correct block group.
1228          */
1229         if (data) {
1230                 ret = find_free_extent(trans, root, 0, 0,
1231                                        search_end, 0, &prealloc_key, 0);
1232                 if (ret) {
1233                         return ret;
1234                 }
1235                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1236                         int nr = info->extent_tree_prealloc_nr;
1237                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1238                 } else {
1239                         search_start = info->extent_tree_prealloc[0] + 1;
1240                 }
1241         }
1242         if (hint_block < search_start)
1243                 hint_block = search_start;
1244         /* do the real allocation */
1245         ret = find_free_extent(trans, root, num_blocks, search_start,
1246                                search_end, hint_block, ins, data);
1247         if (ret) {
1248                 return ret;
1249         }
1250
1251         /*
1252          * if we're doing a metadata allocation, preallocate space in the
1253          * extent tree second.  This way, we don't create a tiny hole
1254          * in the allocation map between any unused preallocation blocks
1255          * and the metadata block we're actually allocating.  On disk,
1256          * it'll go:
1257          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1258          * The unused prealloc will get reused the next time around.
1259          */
1260         if (!data) {
1261                 if (ins->objectid + ins->offset >= search_end)
1262                         search_end = ins->objectid - 1;
1263                 else
1264                         search_start = ins->objectid + ins->offset;
1265
1266                 if (hint_block < search_start)
1267                         hint_block = search_start;
1268
1269                 ret = find_free_extent(trans, root, 0, search_start,
1270                                        search_end, hint_block,
1271                                        &prealloc_key, 0);
1272                 if (ret) {
1273                         return ret;
1274                 }
1275         }
1276
1277         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
1278         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
1279                                     num_blocks);
1280         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1281                                 sizeof(extent_item));
1282
1283         finish_current_insert(trans, extent_root);
1284         pending_ret = del_pending_extents(trans, extent_root);
1285         if (ret) {
1286                 return ret;
1287         }
1288         if (pending_ret) {
1289                 return pending_ret;
1290         }
1291         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1292                                  data);
1293         BUG_ON(ret);
1294         return 0;
1295 }
1296
1297 /*
1298  * helper function to allocate a block for a given tree
1299  * returns the tree buffer or NULL.
1300  */
1301 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1302                                            struct btrfs_root *root, u64 hint)
1303 {
1304         struct btrfs_key ins;
1305         int ret;
1306         struct buffer_head *buf;
1307
1308         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1309                                  1, hint, (unsigned long)-1, &ins, 0);
1310         if (ret) {
1311                 BUG();
1312                 return NULL;
1313         }
1314         BUG_ON(ret);
1315         buf = btrfs_find_create_tree_block(root, ins.objectid);
1316         set_buffer_uptodate(buf);
1317         set_buffer_checked(buf);
1318         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1319         return buf;
1320 }
1321
1322 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1323                          struct btrfs_root *root, struct buffer_head *cur)
1324 {
1325         struct btrfs_disk_key *key;
1326         struct btrfs_leaf *leaf;
1327         struct btrfs_file_extent_item *fi;
1328         int i;
1329         int nritems;
1330         int ret;
1331
1332         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1333         leaf = btrfs_buffer_leaf(cur);
1334         nritems = btrfs_header_nritems(&leaf->header);
1335         for (i = 0; i < nritems; i++) {
1336                 u64 disk_blocknr;
1337                 key = &leaf->items[i].key;
1338                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1339                         continue;
1340                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1341                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1342                         continue;
1343                 /*
1344                  * FIXME make sure to insert a trans record that
1345                  * repeats the snapshot del on crash
1346                  */
1347                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1348                 if (disk_blocknr == 0)
1349                         continue;
1350                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1351                                         btrfs_file_extent_disk_num_blocks(fi),
1352                                         0);
1353                 BUG_ON(ret);
1354         }
1355         return 0;
1356 }
1357
1358 /*
1359  * helper function for drop_snapshot, this walks down the tree dropping ref
1360  * counts as it goes.
1361  */
1362 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1363                           *root, struct btrfs_path *path, int *level)
1364 {
1365         struct buffer_head *next;
1366         struct buffer_head *cur;
1367         u64 blocknr;
1368         int ret;
1369         u32 refs;
1370
1371         WARN_ON(*level < 0);
1372         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1373         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1374                                1, &refs);
1375         BUG_ON(ret);
1376         if (refs > 1)
1377                 goto out;
1378         /*
1379          * walk down to the last node level and free all the leaves
1380          */
1381         while(*level >= 0) {
1382                 WARN_ON(*level < 0);
1383                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1384                 cur = path->nodes[*level];
1385                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1386                         WARN_ON(1);
1387                 if (path->slots[*level] >=
1388                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1389                         break;
1390                 if (*level == 0) {
1391                         ret = drop_leaf_ref(trans, root, cur);
1392                         BUG_ON(ret);
1393                         break;
1394                 }
1395                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1396                                               path->slots[*level]);
1397                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1398                 BUG_ON(ret);
1399                 if (refs != 1) {
1400                         path->slots[*level]++;
1401                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1402                         BUG_ON(ret);
1403                         continue;
1404                 }
1405                 next = read_tree_block(root, blocknr);
1406                 WARN_ON(*level <= 0);
1407                 if (path->nodes[*level-1])
1408                         btrfs_block_release(root, path->nodes[*level-1]);
1409                 path->nodes[*level-1] = next;
1410                 *level = btrfs_header_level(btrfs_buffer_header(next));
1411                 path->slots[*level] = 0;
1412         }
1413 out:
1414         WARN_ON(*level < 0);
1415         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1416         ret = btrfs_free_extent(trans, root,
1417                                 bh_blocknr(path->nodes[*level]), 1, 1);
1418         btrfs_block_release(root, path->nodes[*level]);
1419         path->nodes[*level] = NULL;
1420         *level += 1;
1421         BUG_ON(ret);
1422         return 0;
1423 }
1424
1425 /*
1426  * helper for dropping snapshots.  This walks back up the tree in the path
1427  * to find the first node higher up where we haven't yet gone through
1428  * all the slots
1429  */
1430 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1431                         *root, struct btrfs_path *path, int *level)
1432 {
1433         int i;
1434         int slot;
1435         int ret;
1436         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1437                 slot = path->slots[i];
1438                 if (slot < btrfs_header_nritems(
1439                     btrfs_buffer_header(path->nodes[i])) - 1) {
1440                         path->slots[i]++;
1441                         *level = i;
1442                         return 0;
1443                 } else {
1444                         ret = btrfs_free_extent(trans, root,
1445                                                 bh_blocknr(path->nodes[*level]),
1446                                                 1, 1);
1447                         BUG_ON(ret);
1448                         btrfs_block_release(root, path->nodes[*level]);
1449                         path->nodes[*level] = NULL;
1450                         *level = i + 1;
1451                 }
1452         }
1453         return 1;
1454 }
1455
1456 /*
1457  * drop the reference count on the tree rooted at 'snap'.  This traverses
1458  * the tree freeing any blocks that have a ref count of zero after being
1459  * decremented.
1460  */
1461 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1462                         *root, struct buffer_head *snap)
1463 {
1464         int ret = 0;
1465         int wret;
1466         int level;
1467         struct btrfs_path *path;
1468         int i;
1469         int orig_level;
1470
1471         path = btrfs_alloc_path();
1472         BUG_ON(!path);
1473
1474         level = btrfs_header_level(btrfs_buffer_header(snap));
1475         orig_level = level;
1476         path->nodes[level] = snap;
1477         path->slots[level] = 0;
1478         while(1) {
1479                 wret = walk_down_tree(trans, root, path, &level);
1480                 if (wret > 0)
1481                         break;
1482                 if (wret < 0)
1483                         ret = wret;
1484
1485                 wret = walk_up_tree(trans, root, path, &level);
1486                 if (wret > 0)
1487                         break;
1488                 if (wret < 0)
1489                         ret = wret;
1490         }
1491         for (i = 0; i <= orig_level; i++) {
1492                 if (path->nodes[i]) {
1493                         btrfs_block_release(root, path->nodes[i]);
1494                 }
1495         }
1496         btrfs_free_path(path);
1497         return ret;
1498 }
1499
1500 static int free_block_group_radix(struct radix_tree_root *radix)
1501 {
1502         int ret;
1503         struct btrfs_block_group_cache *cache[8];
1504         int i;
1505
1506         while(1) {
1507                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1508                                              ARRAY_SIZE(cache));
1509                 if (!ret)
1510                         break;
1511                 for (i = 0; i < ret; i++) {
1512                         radix_tree_delete(radix, cache[i]->key.objectid +
1513                                           cache[i]->key.offset - 1);
1514                         kfree(cache[i]);
1515                 }
1516         }
1517         return 0;
1518 }
1519
1520 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1521 {
1522         int ret;
1523         int ret2;
1524         unsigned long gang[16];
1525         int i;
1526
1527         ret = free_block_group_radix(&info->block_group_radix);
1528         ret2 = free_block_group_radix(&info->block_group_data_radix);
1529         if (ret)
1530                 return ret;
1531         if (ret2)
1532                 return ret2;
1533
1534         while(1) {
1535                 ret = find_first_radix_bit(&info->extent_map_radix,
1536                                            gang, 0, ARRAY_SIZE(gang));
1537                 if (!ret)
1538                         break;
1539                 for (i = 0; i < ret; i++) {
1540                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1541                 }
1542         }
1543         return 0;
1544 }
1545
1546 int btrfs_read_block_groups(struct btrfs_root *root)
1547 {
1548         struct btrfs_path *path;
1549         int ret;
1550         int err = 0;
1551         struct btrfs_block_group_item *bi;
1552         struct btrfs_block_group_cache *cache;
1553         struct btrfs_fs_info *info = root->fs_info;
1554         struct radix_tree_root *radix;
1555         struct btrfs_key key;
1556         struct btrfs_key found_key;
1557         struct btrfs_leaf *leaf;
1558         u64 group_size_blocks;
1559         u64 used;
1560
1561         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1562                 root->fs_info->sb->s_blocksize_bits;
1563         root = info->extent_root;
1564         key.objectid = 0;
1565         key.offset = group_size_blocks;
1566         key.flags = 0;
1567         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1568
1569         path = btrfs_alloc_path();
1570         if (!path)
1571                 return -ENOMEM;
1572
1573         while(1) {
1574                 ret = btrfs_search_slot(NULL, info->extent_root,
1575                                         &key, path, 0, 0);
1576                 if (ret != 0) {
1577                         err = ret;
1578                         break;
1579                 }
1580                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1581                 btrfs_disk_key_to_cpu(&found_key,
1582                                       &leaf->items[path->slots[0]].key);
1583                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1584                 if (!cache) {
1585                         err = -1;
1586                         break;
1587                 }
1588
1589                 bi = btrfs_item_ptr(leaf, path->slots[0],
1590                                     struct btrfs_block_group_item);
1591                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1592                         radix = &info->block_group_data_radix;
1593                         cache->data = 1;
1594                 } else {
1595                         radix = &info->block_group_radix;
1596                         cache->data = 0;
1597                 }
1598
1599                 memcpy(&cache->item, bi, sizeof(*bi));
1600                 memcpy(&cache->key, &found_key, sizeof(found_key));
1601                 cache->last_alloc = cache->key.objectid;
1602                 cache->first_free = cache->key.objectid;
1603                 cache->last_prealloc = cache->key.objectid;
1604                 cache->pinned = 0;
1605                 cache->cached = 0;
1606
1607                 cache->radix = radix;
1608
1609                 key.objectid = found_key.objectid + found_key.offset;
1610                 btrfs_release_path(root, path);
1611                 ret = radix_tree_insert(radix, found_key.objectid +
1612                                         found_key.offset - 1,
1613                                         (void *)cache);
1614                 BUG_ON(ret);
1615                 used = btrfs_block_group_used(bi);
1616                 if (used < div_factor(key.offset, 8)) {
1617                         radix_tree_tag_set(radix, found_key.objectid +
1618                                            found_key.offset - 1,
1619                                            BTRFS_BLOCK_GROUP_AVAIL);
1620                 }
1621                 if (key.objectid >=
1622                     btrfs_super_total_blocks(info->disk_super))
1623                         break;
1624         }
1625
1626         btrfs_free_path(path);
1627         return 0;
1628 }