Btrfs: reada while dropping snapshots
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26                             *orig_root, u64 num_blocks, u64 search_start,
27                             u64 search_end, u64 hint_block,
28                             struct btrfs_key *ins, int data);
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static void reada_extent_leaves(struct btrfs_root *root,
35                                 struct btrfs_path *path, u64 limit)
36 {
37         struct btrfs_node *node;
38         int i;
39         int nritems;
40         u64 item_objectid;
41         u64 blocknr;
42         int slot;
43         int ret;
44
45         if (!path->nodes[1])
46                 return;
47         node = btrfs_buffer_node(path->nodes[1]);
48         slot = path->slots[1] + 1;
49         nritems = btrfs_header_nritems(&node->header);
50         for (i = slot; i < nritems && i < slot + 8; i++) {
51                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
52                 if (item_objectid > limit)
53                         break;
54                 blocknr = btrfs_node_blockptr(node, i);
55                 ret = readahead_tree_block(root, blocknr);
56                 if (ret)
57                         break;
58         }
59 }
60
61 static int cache_block_group(struct btrfs_root *root,
62                              struct btrfs_block_group_cache *block_group)
63 {
64         struct btrfs_path *path;
65         int ret;
66         struct btrfs_key key;
67         struct btrfs_leaf *leaf;
68         struct radix_tree_root *extent_radix;
69         int slot;
70         u64 i;
71         u64 last = 0;
72         u64 hole_size;
73         u64 limit;
74         int found = 0;
75
76         root = root->fs_info->extent_root;
77         extent_radix = &root->fs_info->extent_map_radix;
78
79         if (block_group->cached)
80                 return 0;
81         if (block_group->data)
82                 return 0;
83         path = btrfs_alloc_path();
84         if (!path)
85                 return -ENOMEM;
86         key.objectid = block_group->key.objectid;
87         key.flags = 0;
88         key.offset = 0;
89         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
90         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
91         if (ret < 0)
92                 return ret;
93         if (ret && path->slots[0] > 0)
94                 path->slots[0]--;
95         limit = block_group->key.objectid + block_group->key.offset;
96         reada_extent_leaves(root, path, limit);
97         while(1) {
98                 leaf = btrfs_buffer_leaf(path->nodes[0]);
99                 slot = path->slots[0];
100                 if (slot >= btrfs_header_nritems(&leaf->header)) {
101                         reada_extent_leaves(root, path, limit);
102                         ret = btrfs_next_leaf(root, path);
103                         if (ret == 0) {
104                                 continue;
105                         } else {
106                                 if (found) {
107                                         hole_size = block_group->key.objectid +
108                                                 block_group->key.offset - last;
109                                 } else {
110                                         last = block_group->key.objectid;
111                                         hole_size = block_group->key.offset;
112                                 }
113                                 for (i = 0; i < hole_size; i++) {
114                                         set_radix_bit(extent_radix,
115                                                       last + i);
116                                 }
117                                 break;
118                         }
119                 }
120                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
121                 if (key.objectid >= block_group->key.objectid +
122                     block_group->key.offset) {
123                         if (found) {
124                                 hole_size = block_group->key.objectid +
125                                         block_group->key.offset - last;
126                         } else {
127                                 last = block_group->key.objectid;
128                                 hole_size = block_group->key.offset;
129                         }
130                         for (i = 0; i < hole_size; i++) {
131                                 set_radix_bit(extent_radix, last + i);
132                         }
133                         break;
134                 }
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = key.objectid + key.offset;
138                                 found = 1;
139                         } else {
140                                 hole_size = key.objectid - last;
141                                 for (i = 0; i < hole_size; i++) {
142                                         set_radix_bit(extent_radix, last + i);
143                                 }
144                                 last = key.objectid + key.offset;
145                         }
146                 }
147                 path->slots[0]++;
148         }
149
150         block_group->cached = 1;
151         btrfs_free_path(path);
152         return 0;
153 }
154
155 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
156                                                          btrfs_fs_info *info,
157                                                          u64 blocknr)
158 {
159         struct btrfs_block_group_cache *block_group;
160         int ret;
161
162         ret = radix_tree_gang_lookup(&info->block_group_radix,
163                                      (void **)&block_group,
164                                      blocknr, 1);
165         if (ret) {
166                 if (block_group->key.objectid <= blocknr && blocknr <=
167                     block_group->key.objectid + block_group->key.offset)
168                         return block_group;
169         }
170         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
171                                      (void **)&block_group,
172                                      blocknr, 1);
173         if (ret) {
174                 if (block_group->key.objectid <= blocknr && blocknr <=
175                     block_group->key.objectid + block_group->key.offset)
176                         return block_group;
177         }
178         return NULL;
179 }
180
181 static u64 leaf_range(struct btrfs_root *root)
182 {
183         u64 size = BTRFS_LEAF_DATA_SIZE(root);
184         do_div(size, sizeof(struct btrfs_extent_item) +
185                 sizeof(struct btrfs_item));
186         return size;
187 }
188
189 static u64 find_search_start(struct btrfs_root *root,
190                              struct btrfs_block_group_cache **cache_ret,
191                              u64 search_start, int num)
192 {
193         unsigned long gang[8];
194         int ret;
195         struct btrfs_block_group_cache *cache = *cache_ret;
196         u64 last = max(search_start, cache->key.objectid);
197
198         if (cache->data)
199                 goto out;
200         if (num > 1) {
201                 last = max(last, cache->last_prealloc);
202         }
203 again:
204         cache_block_group(root, cache);
205         while(1) {
206                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
207                                            gang, last, ARRAY_SIZE(gang));
208                 if (!ret)
209                         goto out;
210                 last = gang[ret-1] + 1;
211                 if (num > 1) {
212                         if (ret != ARRAY_SIZE(gang)) {
213                                 goto new_group;
214                         }
215                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
216                                 continue;
217                         }
218                 }
219                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
220                         goto new_group;
221                 }
222                 return gang[0];
223         }
224 out:
225         return max(cache->last_alloc, search_start);
226
227 new_group:
228         cache = btrfs_lookup_block_group(root->fs_info,
229                                          last + cache->key.offset - 1);
230         if (!cache) {
231                 return max((*cache_ret)->last_alloc, search_start);
232         }
233         cache = btrfs_find_block_group(root, cache,
234                                        last + cache->key.offset - 1, 0, 0);
235         *cache_ret = cache;
236         goto again;
237 }
238
239 static u64 div_factor(u64 num, int factor)
240 {
241         num *= factor;
242         do_div(num, 10);
243         return num;
244 }
245
246 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
247                                                  struct btrfs_block_group_cache
248                                                  *hint, u64 search_start,
249                                                  int data, int owner)
250 {
251         struct btrfs_block_group_cache *cache[8];
252         struct btrfs_block_group_cache *found_group = NULL;
253         struct btrfs_fs_info *info = root->fs_info;
254         struct radix_tree_root *radix;
255         struct radix_tree_root *swap_radix;
256         u64 used;
257         u64 last = 0;
258         u64 hint_last;
259         int i;
260         int ret;
261         int full_search = 0;
262         int factor = 8;
263         int data_swap = 0;
264
265         if (!owner)
266                 factor = 5;
267
268         if (data) {
269                 radix = &info->block_group_data_radix;
270                 swap_radix = &info->block_group_radix;
271         } else {
272                 radix = &info->block_group_radix;
273                 swap_radix = &info->block_group_data_radix;
274         }
275
276         if (search_start) {
277                 struct btrfs_block_group_cache *shint;
278                 shint = btrfs_lookup_block_group(info, search_start);
279                 if (shint->data == data) {
280                         used = btrfs_block_group_used(&shint->item);
281                         if (used + shint->pinned <
282                             div_factor(shint->key.offset, factor)) {
283                                 return shint;
284                         }
285                 }
286         }
287         if (hint && hint->data == data) {
288                 used = btrfs_block_group_used(&hint->item);
289                 if (used + hint->pinned <
290                     div_factor(hint->key.offset, factor)) {
291                         return hint;
292                 }
293                 if (used >= div_factor(hint->key.offset, 8)) {
294                         radix_tree_tag_clear(radix,
295                                              hint->key.objectid +
296                                              hint->key.offset - 1,
297                                              BTRFS_BLOCK_GROUP_AVAIL);
298                 }
299                 last = hint->key.offset * 3;
300                 if (hint->key.objectid >= last)
301                         last = max(search_start + hint->key.offset - 1,
302                                    hint->key.objectid - last);
303                 else
304                         last = hint->key.objectid + hint->key.offset;
305                 hint_last = last;
306         } else {
307                 if (hint)
308                         hint_last = max(hint->key.objectid, search_start);
309                 else
310                         hint_last = search_start;
311
312                 last = hint_last;
313         }
314         while(1) {
315                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
316                                                  last, ARRAY_SIZE(cache),
317                                                  BTRFS_BLOCK_GROUP_AVAIL);
318                 if (!ret)
319                         break;
320                 for (i = 0; i < ret; i++) {
321                         last = cache[i]->key.objectid +
322                                 cache[i]->key.offset;
323                         used = btrfs_block_group_used(&cache[i]->item);
324                         if (used + cache[i]->pinned <
325                             div_factor(cache[i]->key.offset, factor)) {
326                                 found_group = cache[i];
327                                 goto found;
328                         }
329                         if (used >= div_factor(cache[i]->key.offset, 8)) {
330                                 radix_tree_tag_clear(radix,
331                                                      cache[i]->key.objectid +
332                                                      cache[i]->key.offset - 1,
333                                                      BTRFS_BLOCK_GROUP_AVAIL);
334                         }
335                 }
336                 cond_resched();
337         }
338         last = hint_last;
339 again:
340         while(1) {
341                 ret = radix_tree_gang_lookup(radix, (void **)cache,
342                                              last, ARRAY_SIZE(cache));
343                 if (!ret)
344                         break;
345                 for (i = 0; i < ret; i++) {
346                         last = cache[i]->key.objectid +
347                                 cache[i]->key.offset;
348                         used = btrfs_block_group_used(&cache[i]->item);
349                         if (used + cache[i]->pinned < cache[i]->key.offset) {
350                                 found_group = cache[i];
351                                 goto found;
352                         }
353                         if (used >= cache[i]->key.offset) {
354                                 radix_tree_tag_clear(radix,
355                                                      cache[i]->key.objectid +
356                                                      cache[i]->key.offset - 1,
357                                                      BTRFS_BLOCK_GROUP_AVAIL);
358                         }
359                 }
360                 cond_resched();
361         }
362         if (!full_search) {
363                 last = search_start;
364                 full_search = 1;
365                 goto again;
366         }
367         if (!data_swap) {
368                 struct radix_tree_root *tmp = radix;
369                 data_swap = 1;
370                 radix = swap_radix;
371                 swap_radix = tmp;
372                 last = search_start;
373                 goto again;
374         }
375         if (!found_group) {
376                 ret = radix_tree_gang_lookup(radix,
377                                              (void **)&found_group, 0, 1);
378                 if (ret == 0) {
379                         ret = radix_tree_gang_lookup(swap_radix,
380                                                      (void **)&found_group,
381                                                      0, 1);
382                 }
383                 BUG_ON(ret != 1);
384         }
385 found:
386         return found_group;
387 }
388
389 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
390                                 struct btrfs_root *root,
391                                 u64 blocknr, u64 num_blocks)
392 {
393         struct btrfs_path *path;
394         int ret;
395         struct btrfs_key key;
396         struct btrfs_leaf *l;
397         struct btrfs_extent_item *item;
398         struct btrfs_key ins;
399         u32 refs;
400
401         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0,
402                          &ins, 0);
403         path = btrfs_alloc_path();
404         BUG_ON(!path);
405         key.objectid = blocknr;
406         key.flags = 0;
407         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
408         key.offset = num_blocks;
409         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
410                                 0, 1);
411         if (ret != 0) {
412                 BUG();
413         }
414         BUG_ON(ret != 0);
415         l = btrfs_buffer_leaf(path->nodes[0]);
416         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
417         refs = btrfs_extent_refs(item);
418         btrfs_set_extent_refs(item, refs + 1);
419         btrfs_mark_buffer_dirty(path->nodes[0]);
420
421         btrfs_release_path(root->fs_info->extent_root, path);
422         btrfs_free_path(path);
423         finish_current_insert(trans, root->fs_info->extent_root);
424         del_pending_extents(trans, root->fs_info->extent_root);
425         return 0;
426 }
427
428 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
429                              struct btrfs_root *root, u64 blocknr,
430                              u64 num_blocks, u32 *refs)
431 {
432         struct btrfs_path *path;
433         int ret;
434         struct btrfs_key key;
435         struct btrfs_leaf *l;
436         struct btrfs_extent_item *item;
437
438         path = btrfs_alloc_path();
439         key.objectid = blocknr;
440         key.offset = num_blocks;
441         key.flags = 0;
442         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
443         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
444                                 0, 0);
445         if (ret != 0)
446                 BUG();
447         l = btrfs_buffer_leaf(path->nodes[0]);
448         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
449         *refs = btrfs_extent_refs(item);
450         btrfs_release_path(root->fs_info->extent_root, path);
451         btrfs_free_path(path);
452         return 0;
453 }
454
455 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
456                        struct btrfs_root *root)
457 {
458         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
459 }
460
461 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
462                   struct buffer_head *buf)
463 {
464         u64 blocknr;
465         struct btrfs_node *buf_node;
466         struct btrfs_leaf *buf_leaf;
467         struct btrfs_disk_key *key;
468         struct btrfs_file_extent_item *fi;
469         int i;
470         int leaf;
471         int ret;
472
473         if (!root->ref_cows)
474                 return 0;
475         buf_node = btrfs_buffer_node(buf);
476         leaf = btrfs_is_leaf(buf_node);
477         buf_leaf = btrfs_buffer_leaf(buf);
478         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
479                 if (leaf) {
480                         u64 disk_blocknr;
481                         key = &buf_leaf->items[i].key;
482                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
483                                 continue;
484                         fi = btrfs_item_ptr(buf_leaf, i,
485                                             struct btrfs_file_extent_item);
486                         if (btrfs_file_extent_type(fi) ==
487                             BTRFS_FILE_EXTENT_INLINE)
488                                 continue;
489                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
490                         if (disk_blocknr == 0)
491                                 continue;
492                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
493                                     btrfs_file_extent_disk_num_blocks(fi));
494                         BUG_ON(ret);
495                 } else {
496                         blocknr = btrfs_node_blockptr(buf_node, i);
497                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
498                         BUG_ON(ret);
499                 }
500         }
501         return 0;
502 }
503
504 static int write_one_cache_group(struct btrfs_trans_handle *trans,
505                                  struct btrfs_root *root,
506                                  struct btrfs_path *path,
507                                  struct btrfs_block_group_cache *cache)
508 {
509         int ret;
510         int pending_ret;
511         struct btrfs_root *extent_root = root->fs_info->extent_root;
512         struct btrfs_block_group_item *bi;
513         struct btrfs_key ins;
514
515         find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
516         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
517         BUG_ON(ret);
518         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
519                             struct btrfs_block_group_item);
520         memcpy(bi, &cache->item, sizeof(*bi));
521         mark_buffer_dirty(path->nodes[0]);
522         btrfs_release_path(extent_root, path);
523
524         finish_current_insert(trans, extent_root);
525         pending_ret = del_pending_extents(trans, extent_root);
526         if (ret)
527                 return ret;
528         if (pending_ret)
529                 return pending_ret;
530         if (cache->data)
531                 cache->last_alloc = cache->first_free;
532         return 0;
533
534 }
535
536 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
537                                    struct btrfs_root *root,
538                                    struct radix_tree_root *radix)
539 {
540         struct btrfs_block_group_cache *cache[8];
541         int ret;
542         int err = 0;
543         int werr = 0;
544         int i;
545         struct btrfs_path *path;
546
547         path = btrfs_alloc_path();
548         if (!path)
549                 return -ENOMEM;
550
551         while(1) {
552                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
553                                                  0, ARRAY_SIZE(cache),
554                                                  BTRFS_BLOCK_GROUP_DIRTY);
555                 if (!ret)
556                         break;
557                 for (i = 0; i < ret; i++) {
558                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
559                                              cache[i]->key.offset - 1,
560                                              BTRFS_BLOCK_GROUP_DIRTY);
561                         err = write_one_cache_group(trans, root,
562                                                     path, cache[i]);
563                         if (err)
564                                 werr = err;
565                 }
566         }
567         btrfs_free_path(path);
568         return werr;
569 }
570
571 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
572                                    struct btrfs_root *root)
573 {
574         int ret;
575         int ret2;
576         ret = write_dirty_block_radix(trans, root,
577                                       &root->fs_info->block_group_radix);
578         ret2 = write_dirty_block_radix(trans, root,
579                                       &root->fs_info->block_group_data_radix);
580         if (ret)
581                 return ret;
582         if (ret2)
583                 return ret2;
584         return 0;
585 }
586
587 static int update_block_group(struct btrfs_trans_handle *trans,
588                               struct btrfs_root *root,
589                               u64 blocknr, u64 num, int alloc, int mark_free,
590                               int data)
591 {
592         struct btrfs_block_group_cache *cache;
593         struct btrfs_fs_info *info = root->fs_info;
594         u64 total = num;
595         u64 old_val;
596         u64 block_in_group;
597         u64 i;
598         int ret;
599
600         while(total) {
601                 cache = btrfs_lookup_block_group(info, blocknr);
602                 if (!cache) {
603                         return -1;
604                 }
605                 block_in_group = blocknr - cache->key.objectid;
606                 WARN_ON(block_in_group > cache->key.offset);
607                 radix_tree_tag_set(cache->radix, cache->key.objectid +
608                                    cache->key.offset - 1,
609                                    BTRFS_BLOCK_GROUP_DIRTY);
610
611                 old_val = btrfs_block_group_used(&cache->item);
612                 num = min(total, cache->key.offset - block_in_group);
613                 if (alloc) {
614                         if (blocknr > cache->last_alloc)
615                                 cache->last_alloc = blocknr;
616                         if (!cache->data) {
617                                 for (i = 0; i < num; i++) {
618                                         clear_radix_bit(&info->extent_map_radix,
619                                                         blocknr + i);
620                                 }
621                         }
622                         if (cache->data != data &&
623                             old_val < (cache->key.offset >> 1)) {
624                                 cache->data = data;
625                                 radix_tree_delete(cache->radix,
626                                                   cache->key.objectid +
627                                                   cache->key.offset - 1);
628
629                                 if (data) {
630                                         cache->radix =
631                                                 &info->block_group_data_radix;
632                                         cache->item.flags |=
633                                                 BTRFS_BLOCK_GROUP_DATA;
634                                 } else {
635                                         cache->radix = &info->block_group_radix;
636                                         cache->item.flags &=
637                                                 ~BTRFS_BLOCK_GROUP_DATA;
638                                 }
639                                 ret = radix_tree_insert(cache->radix,
640                                                         cache->key.objectid +
641                                                         cache->key.offset - 1,
642                                                         (void *)cache);
643                         }
644                         old_val += num;
645                 } else {
646                         old_val -= num;
647                         if (blocknr < cache->first_free)
648                                 cache->first_free = blocknr;
649                         if (!cache->data && mark_free) {
650                                 for (i = 0; i < num; i++) {
651                                         set_radix_bit(&info->extent_map_radix,
652                                                       blocknr + i);
653                                 }
654                         }
655                         if (old_val < (cache->key.offset >> 1) &&
656                             old_val + num >= (cache->key.offset >> 1)) {
657                                 radix_tree_tag_set(cache->radix,
658                                                    cache->key.objectid +
659                                                    cache->key.offset - 1,
660                                                    BTRFS_BLOCK_GROUP_AVAIL);
661                         }
662                 }
663                 btrfs_set_block_group_used(&cache->item, old_val);
664                 total -= num;
665                 blocknr += num;
666         }
667         return 0;
668 }
669
670 static int try_remove_page(struct address_space *mapping, unsigned long index)
671 {
672         int ret;
673         ret = invalidate_mapping_pages(mapping, index, index);
674         return ret;
675 }
676
677 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
678                                btrfs_root *root)
679 {
680         unsigned long gang[8];
681         struct inode *btree_inode = root->fs_info->btree_inode;
682         struct btrfs_block_group_cache *block_group;
683         u64 first = 0;
684         int ret;
685         int i;
686         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
687         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
688
689         while(1) {
690                 ret = find_first_radix_bit(pinned_radix, gang, 0,
691                                            ARRAY_SIZE(gang));
692                 if (!ret)
693                         break;
694                 if (!first)
695                         first = gang[0];
696                 for (i = 0; i < ret; i++) {
697                         clear_radix_bit(pinned_radix, gang[i]);
698                         block_group = btrfs_lookup_block_group(root->fs_info,
699                                                                gang[i]);
700                         if (block_group) {
701                                 WARN_ON(block_group->pinned == 0);
702                                 block_group->pinned--;
703                                 if (gang[i] < block_group->last_alloc)
704                                         block_group->last_alloc = gang[i];
705                                 if (gang[i] < block_group->last_prealloc)
706                                         block_group->last_prealloc = gang[i];
707                                 if (!block_group->data)
708                                         set_radix_bit(extent_radix, gang[i]);
709                         }
710                         try_remove_page(btree_inode->i_mapping,
711                                         gang[i] << (PAGE_CACHE_SHIFT -
712                                                     btree_inode->i_blkbits));
713                 }
714         }
715         return 0;
716 }
717
718 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
719                                  btrfs_root *extent_root)
720 {
721         struct btrfs_key ins;
722         struct btrfs_extent_item extent_item;
723         int i;
724         int ret;
725         u64 super_blocks_used;
726         struct btrfs_fs_info *info = extent_root->fs_info;
727
728         btrfs_set_extent_refs(&extent_item, 1);
729         ins.offset = 1;
730         ins.flags = 0;
731         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
732         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
733
734         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
735                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
736                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
737                 btrfs_set_super_blocks_used(info->disk_super,
738                                             super_blocks_used + 1);
739                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
740                                         sizeof(extent_item));
741                 BUG_ON(ret);
742         }
743         extent_root->fs_info->extent_tree_insert_nr = 0;
744         return 0;
745 }
746
747 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
748 {
749         int err;
750         struct btrfs_header *header;
751         struct buffer_head *bh;
752
753         if (!pending) {
754                 bh = btrfs_find_tree_block(root, blocknr);
755                 if (bh) {
756                         if (buffer_uptodate(bh)) {
757                                 u64 transid =
758                                     root->fs_info->running_transaction->transid;
759                                 header = btrfs_buffer_header(bh);
760                                 if (btrfs_header_generation(header) ==
761                                     transid) {
762                                         btrfs_block_release(root, bh);
763                                         return 0;
764                                 }
765                         }
766                         btrfs_block_release(root, bh);
767                 }
768                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
769                 if (!err) {
770                         struct btrfs_block_group_cache *cache;
771                         cache = btrfs_lookup_block_group(root->fs_info,
772                                                          blocknr);
773                         if (cache)
774                                 cache->pinned++;
775                 }
776         } else {
777                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
778         }
779         BUG_ON(err < 0);
780         return 0;
781 }
782
783 /*
784  * remove an extent from the root, returns 0 on success
785  */
786 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
787                          *root, u64 blocknr, u64 num_blocks, int pin,
788                          int mark_free)
789 {
790         struct btrfs_path *path;
791         struct btrfs_key key;
792         struct btrfs_fs_info *info = root->fs_info;
793         struct btrfs_root *extent_root = info->extent_root;
794         int ret;
795         struct btrfs_extent_item *ei;
796         struct btrfs_key ins;
797         u32 refs;
798
799         key.objectid = blocknr;
800         key.flags = 0;
801         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
802         key.offset = num_blocks;
803
804         find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
805         path = btrfs_alloc_path();
806         BUG_ON(!path);
807
808         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
809         if (ret) {
810                 BUG();
811         }
812         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
813                             struct btrfs_extent_item);
814         BUG_ON(ei->refs == 0);
815         refs = btrfs_extent_refs(ei) - 1;
816         btrfs_set_extent_refs(ei, refs);
817         btrfs_mark_buffer_dirty(path->nodes[0]);
818         if (refs == 0) {
819                 u64 super_blocks_used;
820
821                 if (pin) {
822                         ret = pin_down_block(root, blocknr, 0);
823                         BUG_ON(ret);
824                 }
825
826                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
827                 btrfs_set_super_blocks_used(info->disk_super,
828                                             super_blocks_used - num_blocks);
829                 ret = btrfs_del_item(trans, extent_root, path);
830                 if (ret)
831                         BUG();
832                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
833                                          mark_free, 0);
834                 BUG_ON(ret);
835         }
836         btrfs_free_path(path);
837         finish_current_insert(trans, extent_root);
838         return ret;
839 }
840
841 /*
842  * find all the blocks marked as pending in the radix tree and remove
843  * them from the extent map
844  */
845 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
846                                btrfs_root *extent_root)
847 {
848         int ret;
849         int wret;
850         int err = 0;
851         unsigned long gang[4];
852         int i;
853         struct radix_tree_root *pending_radix;
854         struct radix_tree_root *pinned_radix;
855         struct btrfs_block_group_cache *cache;
856
857         pending_radix = &extent_root->fs_info->pending_del_radix;
858         pinned_radix = &extent_root->fs_info->pinned_radix;
859
860         while(1) {
861                 ret = find_first_radix_bit(pending_radix, gang, 0,
862                                            ARRAY_SIZE(gang));
863                 if (!ret)
864                         break;
865                 for (i = 0; i < ret; i++) {
866                         wret = set_radix_bit(pinned_radix, gang[i]);
867                         if (wret == 0) {
868                                 cache =
869                                   btrfs_lookup_block_group(extent_root->fs_info,
870                                                            gang[i]);
871                                 if (cache)
872                                         cache->pinned++;
873                         }
874                         if (wret < 0) {
875                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
876                                        wret);
877                                 BUG_ON(wret < 0);
878                         }
879                         wret = clear_radix_bit(pending_radix, gang[i]);
880                         BUG_ON(wret);
881                         wret = __free_extent(trans, extent_root,
882                                              gang[i], 1, 0, 0);
883                         if (wret)
884                                 err = wret;
885                 }
886         }
887         return err;
888 }
889
890 /*
891  * remove an extent from the root, returns 0 on success
892  */
893 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
894                       *root, u64 blocknr, u64 num_blocks, int pin)
895 {
896         struct btrfs_root *extent_root = root->fs_info->extent_root;
897         int pending_ret;
898         int ret;
899
900         if (root == extent_root) {
901                 pin_down_block(root, blocknr, 1);
902                 return 0;
903         }
904         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
905         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
906         return ret ? ret : pending_ret;
907 }
908
909 /*
910  * walks the btree of allocated extents and find a hole of a given size.
911  * The key ins is changed to record the hole:
912  * ins->objectid == block start
913  * ins->flags = BTRFS_EXTENT_ITEM_KEY
914  * ins->offset == number of blocks
915  * Any available blocks before search_start are skipped.
916  */
917 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
918                             *orig_root, u64 num_blocks, u64 search_start, u64
919                             search_end, u64 hint_block,
920                             struct btrfs_key *ins, int data)
921 {
922         struct btrfs_path *path;
923         struct btrfs_key key;
924         int ret;
925         u64 hole_size = 0;
926         int slot = 0;
927         u64 last_block = 0;
928         u64 test_block;
929         u64 orig_search_start = search_start;
930         int start_found;
931         struct btrfs_leaf *l;
932         struct btrfs_root * root = orig_root->fs_info->extent_root;
933         struct btrfs_fs_info *info = root->fs_info;
934         int total_needed = num_blocks;
935         int total_found = 0;
936         int fill_prealloc = 0;
937         int level;
938         struct btrfs_block_group_cache *block_group;
939         int full_scan = 0;
940         int wrapped = 0;
941         u64 limit;
942
943         ins->flags = 0;
944         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
945
946         level = btrfs_header_level(btrfs_buffer_header(root->node));
947         if (num_blocks == 0) {
948                 fill_prealloc = 1;
949                 num_blocks = 1;
950                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
951         }
952         if (fill_prealloc) {
953                 u64 first;
954                 int nr = info->extent_tree_prealloc_nr;
955                 first = info->extent_tree_prealloc[nr - 1];
956                 if (info->extent_tree_prealloc_nr >= total_needed &&
957                     first >= search_start) {
958                         ins->objectid = info->extent_tree_prealloc[0];
959                         ins->offset = 1;
960                         return 0;
961                 }
962                 info->extent_tree_prealloc_nr = 0;
963         }
964         if (search_end == (u64)-1)
965                 search_end = btrfs_super_total_blocks(info->disk_super);
966         if (hint_block) {
967                 block_group = btrfs_lookup_block_group(info, hint_block);
968                 block_group = btrfs_find_block_group(root, block_group,
969                                                      hint_block, data, 1);
970         } else {
971                 block_group = btrfs_find_block_group(root,
972                                                      trans->block_group, 0,
973                                                      data, 1);
974         }
975
976         path = btrfs_alloc_path();
977
978 check_failed:
979         if (!block_group->data)
980                 search_start = find_search_start(root, &block_group,
981                                                  search_start, total_needed);
982         else if (!full_scan)
983                 search_start = max(block_group->last_alloc, search_start);
984
985         btrfs_init_path(path);
986         ins->objectid = search_start;
987         ins->offset = 0;
988         start_found = 0;
989
990         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
991         if (ret < 0)
992                 goto error;
993
994         if (path->slots[0] > 0) {
995                 path->slots[0]--;
996         }
997
998         l = btrfs_buffer_leaf(path->nodes[0]);
999         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
1000         /*
1001          * a rare case, go back one key if we hit a block group item
1002          * instead of an extent item
1003          */
1004         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1005             key.objectid + key.offset >= search_start) {
1006                 ins->objectid = key.objectid;
1007                 ins->offset = key.offset - 1;
1008                 btrfs_release_path(root, path);
1009                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1010                 if (ret < 0)
1011                         goto error;
1012
1013                 if (path->slots[0] > 0) {
1014                         path->slots[0]--;
1015                 }
1016         }
1017
1018         while (1) {
1019                 l = btrfs_buffer_leaf(path->nodes[0]);
1020                 slot = path->slots[0];
1021                 if (slot >= btrfs_header_nritems(&l->header)) {
1022                         if (fill_prealloc) {
1023                                 info->extent_tree_prealloc_nr = 0;
1024                                 total_found = 0;
1025                         }
1026                         if (start_found)
1027                                 limit = last_block +
1028                                         (block_group->key.offset >> 1);
1029                         else
1030                                 limit = search_start +
1031                                         (block_group->key.offset >> 1);
1032                         ret = btrfs_next_leaf(root, path);
1033                         if (ret == 0)
1034                                 continue;
1035                         if (ret < 0)
1036                                 goto error;
1037                         if (!start_found) {
1038                                 ins->objectid = search_start;
1039                                 ins->offset = search_end - search_start;
1040                                 start_found = 1;
1041                                 goto check_pending;
1042                         }
1043                         ins->objectid = last_block > search_start ?
1044                                         last_block : search_start;
1045                         ins->offset = search_end - ins->objectid;
1046                         goto check_pending;
1047                 }
1048
1049                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1050                 if (key.objectid >= search_start && key.objectid > last_block &&
1051                     start_found) {
1052                         if (last_block < search_start)
1053                                 last_block = search_start;
1054                         hole_size = key.objectid - last_block;
1055                         if (hole_size >= num_blocks) {
1056                                 ins->objectid = last_block;
1057                                 ins->offset = hole_size;
1058                                 goto check_pending;
1059                         }
1060                 }
1061
1062                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1063                         goto next;
1064
1065                 start_found = 1;
1066                 last_block = key.objectid + key.offset;
1067                 if (!full_scan && last_block >= block_group->key.objectid +
1068                     block_group->key.offset) {
1069                         btrfs_release_path(root, path);
1070                         search_start = block_group->key.objectid +
1071                                 block_group->key.offset * 2;
1072                         goto new_group;
1073                 }
1074 next:
1075                 path->slots[0]++;
1076                 cond_resched();
1077         }
1078         // FIXME -ENOSPC
1079 check_pending:
1080         /* we have to make sure we didn't find an extent that has already
1081          * been allocated by the map tree or the original allocation
1082          */
1083         btrfs_release_path(root, path);
1084         BUG_ON(ins->objectid < search_start);
1085
1086         if (ins->objectid + num_blocks >= search_end) {
1087                 if (full_scan) {
1088                         ret = -ENOSPC;
1089                         goto error;
1090                 }
1091                 search_start = orig_search_start;
1092                 if (wrapped)
1093                         full_scan = 1;
1094                 else
1095                         wrapped = 1;
1096                 goto new_group;
1097         }
1098         for (test_block = ins->objectid;
1099              test_block < ins->objectid + num_blocks; test_block++) {
1100                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1101                         search_start = test_block + 1;
1102                         goto new_group;
1103                 }
1104         }
1105         if (!fill_prealloc && info->extent_tree_insert_nr) {
1106                 u64 last =
1107                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1108                 if (ins->objectid + num_blocks >
1109                     info->extent_tree_insert[0] &&
1110                     ins->objectid <= last) {
1111                         search_start = last + 1;
1112                         WARN_ON(!full_scan);
1113                         goto new_group;
1114                 }
1115         }
1116         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1117                 u64 first =
1118                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1119                 if (ins->objectid + num_blocks > first &&
1120                     ins->objectid <= info->extent_tree_prealloc[0]) {
1121                         search_start = info->extent_tree_prealloc[0] + 1;
1122                         goto new_group;
1123                 }
1124         }
1125         if (fill_prealloc) {
1126                 int nr;
1127                 test_block = ins->objectid;
1128                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1129                     leaf_range(root)) {
1130                         total_found = 0;
1131                         info->extent_tree_prealloc_nr = total_found;
1132                 }
1133                 while(test_block < ins->objectid + ins->offset &&
1134                       total_found < total_needed) {
1135                         nr = total_needed - total_found - 1;
1136                         BUG_ON(nr < 0);
1137                         info->extent_tree_prealloc[nr] = test_block;
1138                         total_found++;
1139                         test_block++;
1140                 }
1141                 if (total_found < total_needed) {
1142                         search_start = test_block;
1143                         goto new_group;
1144                 }
1145                 info->extent_tree_prealloc_nr = total_found;
1146         }
1147         if (!data) {
1148                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1149                 if (block_group) {
1150                         if (fill_prealloc)
1151                                 block_group->last_prealloc =
1152                                      info->extent_tree_prealloc[total_needed-1];
1153                         else
1154                                 trans->block_group = block_group;
1155                 }
1156         }
1157         ins->offset = num_blocks;
1158         btrfs_free_path(path);
1159         return 0;
1160
1161 new_group:
1162         if (search_start + num_blocks >= search_end) {
1163                 search_start = orig_search_start;
1164                 if (full_scan) {
1165                         ret = -ENOSPC;
1166                         goto error;
1167                 }
1168                 if (wrapped)
1169                         full_scan = 1;
1170                 else
1171                         wrapped = 1;
1172         }
1173         block_group = btrfs_lookup_block_group(info, search_start);
1174         cond_resched();
1175         if (!full_scan)
1176                 block_group = btrfs_find_block_group(root, block_group,
1177                                                      search_start, data, 0);
1178         goto check_failed;
1179
1180 error:
1181         btrfs_release_path(root, path);
1182         btrfs_free_path(path);
1183         return ret;
1184 }
1185 /*
1186  * finds a free extent and does all the dirty work required for allocation
1187  * returns the key for the extent through ins, and a tree buffer for
1188  * the first block of the extent through buf.
1189  *
1190  * returns 0 if everything worked, non-zero otherwise.
1191  */
1192 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1193                        struct btrfs_root *root, u64 owner,
1194                        u64 num_blocks, u64 hint_block,
1195                        u64 search_end, struct btrfs_key *ins, int data)
1196 {
1197         int ret;
1198         int pending_ret;
1199         u64 super_blocks_used;
1200         u64 search_start = 0;
1201         struct btrfs_fs_info *info = root->fs_info;
1202         struct btrfs_root *extent_root = info->extent_root;
1203         struct btrfs_extent_item extent_item;
1204         struct btrfs_key prealloc_key;
1205
1206         btrfs_set_extent_refs(&extent_item, 1);
1207         btrfs_set_extent_owner(&extent_item, owner);
1208
1209         if (root == extent_root) {
1210                 int nr;
1211                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1212                 BUG_ON(num_blocks != 1);
1213                 ins->offset = 1;
1214                 info->extent_tree_prealloc_nr--;
1215                 nr = info->extent_tree_prealloc_nr;
1216                 ins->objectid = info->extent_tree_prealloc[nr];
1217                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1218                         ins->objectid;
1219                 ret = update_block_group(trans, root,
1220                                          ins->objectid, ins->offset, 1, 0, 0);
1221                 BUG_ON(ret);
1222                 return 0;
1223         }
1224
1225         /*
1226          * if we're doing a data allocation, preallocate room in the
1227          * extent tree first.  This way the extent tree blocks end up
1228          * in the correct block group.
1229          */
1230         if (data) {
1231                 ret = find_free_extent(trans, root, 0, 0,
1232                                        search_end, 0, &prealloc_key, 0);
1233                 if (ret) {
1234                         return ret;
1235                 }
1236                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1237                         int nr = info->extent_tree_prealloc_nr;
1238                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1239                 } else {
1240                         search_start = info->extent_tree_prealloc[0] + 1;
1241                 }
1242         }
1243         if (hint_block < search_start)
1244                 hint_block = search_start;
1245         /* do the real allocation */
1246         ret = find_free_extent(trans, root, num_blocks, search_start,
1247                                search_end, hint_block, ins, data);
1248         if (ret) {
1249                 return ret;
1250         }
1251
1252         /*
1253          * if we're doing a metadata allocation, preallocate space in the
1254          * extent tree second.  This way, we don't create a tiny hole
1255          * in the allocation map between any unused preallocation blocks
1256          * and the metadata block we're actually allocating.  On disk,
1257          * it'll go:
1258          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1259          * The unused prealloc will get reused the next time around.
1260          */
1261         if (!data) {
1262                 if (ins->objectid + ins->offset >= search_end)
1263                         search_end = ins->objectid - 1;
1264                 else
1265                         search_start = ins->objectid + ins->offset;
1266
1267                 if (hint_block < search_start)
1268                         hint_block = search_start;
1269
1270                 ret = find_free_extent(trans, root, 0, search_start,
1271                                        search_end, hint_block,
1272                                        &prealloc_key, 0);
1273                 if (ret) {
1274                         return ret;
1275                 }
1276         }
1277
1278         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
1279         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
1280                                     num_blocks);
1281         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1282                                 sizeof(extent_item));
1283
1284         finish_current_insert(trans, extent_root);
1285         pending_ret = del_pending_extents(trans, extent_root);
1286         if (ret) {
1287                 return ret;
1288         }
1289         if (pending_ret) {
1290                 return pending_ret;
1291         }
1292         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1293                                  data);
1294         BUG_ON(ret);
1295         return 0;
1296 }
1297
1298 /*
1299  * helper function to allocate a block for a given tree
1300  * returns the tree buffer or NULL.
1301  */
1302 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1303                                            struct btrfs_root *root, u64 hint)
1304 {
1305         struct btrfs_key ins;
1306         int ret;
1307         struct buffer_head *buf;
1308
1309         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1310                                  1, hint, (unsigned long)-1, &ins, 0);
1311         if (ret) {
1312                 BUG();
1313                 return NULL;
1314         }
1315         BUG_ON(ret);
1316         buf = btrfs_find_create_tree_block(root, ins.objectid);
1317         set_buffer_uptodate(buf);
1318         set_buffer_checked(buf);
1319         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1320         return buf;
1321 }
1322
1323 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1324                          struct btrfs_root *root, struct buffer_head *cur)
1325 {
1326         struct btrfs_disk_key *key;
1327         struct btrfs_leaf *leaf;
1328         struct btrfs_file_extent_item *fi;
1329         int i;
1330         int nritems;
1331         int ret;
1332
1333         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1334         leaf = btrfs_buffer_leaf(cur);
1335         nritems = btrfs_header_nritems(&leaf->header);
1336         for (i = 0; i < nritems; i++) {
1337                 u64 disk_blocknr;
1338                 key = &leaf->items[i].key;
1339                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1340                         continue;
1341                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1342                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1343                         continue;
1344                 /*
1345                  * FIXME make sure to insert a trans record that
1346                  * repeats the snapshot del on crash
1347                  */
1348                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1349                 if (disk_blocknr == 0)
1350                         continue;
1351                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1352                                         btrfs_file_extent_disk_num_blocks(fi),
1353                                         0);
1354                 BUG_ON(ret);
1355         }
1356         return 0;
1357 }
1358
1359 static void reada_walk_down(struct btrfs_root *root,
1360                             struct btrfs_node *node)
1361 {
1362         int i;
1363         u32 nritems;
1364         u64 blocknr;
1365         int ret;
1366         u32 refs;
1367
1368         nritems = btrfs_header_nritems(&node->header);
1369         for (i = 0; i < nritems; i++) {
1370                 blocknr = btrfs_node_blockptr(node, i);
1371                 ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs);
1372                 BUG_ON(ret);
1373                 if (refs != 1)
1374                         continue;
1375                 ret = readahead_tree_block(root, blocknr);
1376                 if (ret)
1377                         break;
1378         }
1379 }
1380
1381 /*
1382  * helper function for drop_snapshot, this walks down the tree dropping ref
1383  * counts as it goes.
1384  */
1385 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1386                           *root, struct btrfs_path *path, int *level)
1387 {
1388         struct buffer_head *next;
1389         struct buffer_head *cur;
1390         u64 blocknr;
1391         int ret;
1392         u32 refs;
1393
1394         WARN_ON(*level < 0);
1395         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1396         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1397                                1, &refs);
1398         BUG_ON(ret);
1399         if (refs > 1)
1400                 goto out;
1401
1402         /*
1403          * walk down to the last node level and free all the leaves
1404          */
1405         while(*level >= 0) {
1406                 WARN_ON(*level < 0);
1407                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1408                 cur = path->nodes[*level];
1409
1410                 if (*level > 0 && path->slots[*level] == 0)
1411                         reada_walk_down(root, btrfs_buffer_node(cur));
1412
1413                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1414                         WARN_ON(1);
1415
1416                 if (path->slots[*level] >=
1417                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1418                         break;
1419                 if (*level == 0) {
1420                         ret = drop_leaf_ref(trans, root, cur);
1421                         BUG_ON(ret);
1422                         break;
1423                 }
1424                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1425                                               path->slots[*level]);
1426                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1427                 BUG_ON(ret);
1428                 if (refs != 1) {
1429                         path->slots[*level]++;
1430                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1431                         BUG_ON(ret);
1432                         continue;
1433                 }
1434                 next = read_tree_block(root, blocknr);
1435                 WARN_ON(*level <= 0);
1436                 if (path->nodes[*level-1])
1437                         btrfs_block_release(root, path->nodes[*level-1]);
1438                 path->nodes[*level-1] = next;
1439                 *level = btrfs_header_level(btrfs_buffer_header(next));
1440                 path->slots[*level] = 0;
1441         }
1442 out:
1443         WARN_ON(*level < 0);
1444         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1445         ret = btrfs_free_extent(trans, root,
1446                                 bh_blocknr(path->nodes[*level]), 1, 1);
1447         btrfs_block_release(root, path->nodes[*level]);
1448         path->nodes[*level] = NULL;
1449         *level += 1;
1450         BUG_ON(ret);
1451         return 0;
1452 }
1453
1454 /*
1455  * helper for dropping snapshots.  This walks back up the tree in the path
1456  * to find the first node higher up where we haven't yet gone through
1457  * all the slots
1458  */
1459 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1460                         *root, struct btrfs_path *path, int *level)
1461 {
1462         int i;
1463         int slot;
1464         int ret;
1465         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1466                 slot = path->slots[i];
1467                 if (slot < btrfs_header_nritems(
1468                     btrfs_buffer_header(path->nodes[i])) - 1) {
1469                         path->slots[i]++;
1470                         *level = i;
1471                         return 0;
1472                 } else {
1473                         ret = btrfs_free_extent(trans, root,
1474                                                 bh_blocknr(path->nodes[*level]),
1475                                                 1, 1);
1476                         BUG_ON(ret);
1477                         btrfs_block_release(root, path->nodes[*level]);
1478                         path->nodes[*level] = NULL;
1479                         *level = i + 1;
1480                 }
1481         }
1482         return 1;
1483 }
1484
1485 /*
1486  * drop the reference count on the tree rooted at 'snap'.  This traverses
1487  * the tree freeing any blocks that have a ref count of zero after being
1488  * decremented.
1489  */
1490 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1491                         *root, struct buffer_head *snap)
1492 {
1493         int ret = 0;
1494         int wret;
1495         int level;
1496         struct btrfs_path *path;
1497         int i;
1498         int orig_level;
1499
1500         path = btrfs_alloc_path();
1501         BUG_ON(!path);
1502
1503         level = btrfs_header_level(btrfs_buffer_header(snap));
1504         orig_level = level;
1505         path->nodes[level] = snap;
1506         path->slots[level] = 0;
1507         while(1) {
1508                 wret = walk_down_tree(trans, root, path, &level);
1509                 if (wret > 0)
1510                         break;
1511                 if (wret < 0)
1512                         ret = wret;
1513
1514                 wret = walk_up_tree(trans, root, path, &level);
1515                 if (wret > 0)
1516                         break;
1517                 if (wret < 0)
1518                         ret = wret;
1519         }
1520         for (i = 0; i <= orig_level; i++) {
1521                 if (path->nodes[i]) {
1522                         btrfs_block_release(root, path->nodes[i]);
1523                 }
1524         }
1525         btrfs_free_path(path);
1526         return ret;
1527 }
1528
1529 static int free_block_group_radix(struct radix_tree_root *radix)
1530 {
1531         int ret;
1532         struct btrfs_block_group_cache *cache[8];
1533         int i;
1534
1535         while(1) {
1536                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1537                                              ARRAY_SIZE(cache));
1538                 if (!ret)
1539                         break;
1540                 for (i = 0; i < ret; i++) {
1541                         radix_tree_delete(radix, cache[i]->key.objectid +
1542                                           cache[i]->key.offset - 1);
1543                         kfree(cache[i]);
1544                 }
1545         }
1546         return 0;
1547 }
1548
1549 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1550 {
1551         int ret;
1552         int ret2;
1553         unsigned long gang[16];
1554         int i;
1555
1556         ret = free_block_group_radix(&info->block_group_radix);
1557         ret2 = free_block_group_radix(&info->block_group_data_radix);
1558         if (ret)
1559                 return ret;
1560         if (ret2)
1561                 return ret2;
1562
1563         while(1) {
1564                 ret = find_first_radix_bit(&info->extent_map_radix,
1565                                            gang, 0, ARRAY_SIZE(gang));
1566                 if (!ret)
1567                         break;
1568                 for (i = 0; i < ret; i++) {
1569                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1570                 }
1571         }
1572         return 0;
1573 }
1574
1575 int btrfs_read_block_groups(struct btrfs_root *root)
1576 {
1577         struct btrfs_path *path;
1578         int ret;
1579         int err = 0;
1580         struct btrfs_block_group_item *bi;
1581         struct btrfs_block_group_cache *cache;
1582         struct btrfs_fs_info *info = root->fs_info;
1583         struct radix_tree_root *radix;
1584         struct btrfs_key key;
1585         struct btrfs_key found_key;
1586         struct btrfs_leaf *leaf;
1587         u64 group_size_blocks;
1588         u64 used;
1589
1590         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1591                 root->fs_info->sb->s_blocksize_bits;
1592         root = info->extent_root;
1593         key.objectid = 0;
1594         key.offset = group_size_blocks;
1595         key.flags = 0;
1596         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1597
1598         path = btrfs_alloc_path();
1599         if (!path)
1600                 return -ENOMEM;
1601
1602         while(1) {
1603                 ret = btrfs_search_slot(NULL, info->extent_root,
1604                                         &key, path, 0, 0);
1605                 if (ret != 0) {
1606                         err = ret;
1607                         break;
1608                 }
1609                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1610                 btrfs_disk_key_to_cpu(&found_key,
1611                                       &leaf->items[path->slots[0]].key);
1612                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1613                 if (!cache) {
1614                         err = -1;
1615                         break;
1616                 }
1617
1618                 bi = btrfs_item_ptr(leaf, path->slots[0],
1619                                     struct btrfs_block_group_item);
1620                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1621                         radix = &info->block_group_data_radix;
1622                         cache->data = 1;
1623                 } else {
1624                         radix = &info->block_group_radix;
1625                         cache->data = 0;
1626                 }
1627
1628                 memcpy(&cache->item, bi, sizeof(*bi));
1629                 memcpy(&cache->key, &found_key, sizeof(found_key));
1630                 cache->last_alloc = cache->key.objectid;
1631                 cache->first_free = cache->key.objectid;
1632                 cache->last_prealloc = cache->key.objectid;
1633                 cache->pinned = 0;
1634                 cache->cached = 0;
1635
1636                 cache->radix = radix;
1637
1638                 key.objectid = found_key.objectid + found_key.offset;
1639                 btrfs_release_path(root, path);
1640                 ret = radix_tree_insert(radix, found_key.objectid +
1641                                         found_key.offset - 1,
1642                                         (void *)cache);
1643                 BUG_ON(ret);
1644                 used = btrfs_block_group_used(bi);
1645                 if (used < div_factor(key.offset, 8)) {
1646                         radix_tree_tag_set(radix, found_key.objectid +
1647                                            found_key.offset - 1,
1648                                            BTRFS_BLOCK_GROUP_AVAIL);
1649                 }
1650                 if (key.objectid >=
1651                     btrfs_super_total_blocks(info->disk_super))
1652                         break;
1653         }
1654
1655         btrfs_free_path(path);
1656         return 0;
1657 }