Btrfs: Fix super block updates during transaction commit
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26                             *orig_root, u64 num_blocks, u64 search_start,
27                             u64 search_end, u64 hint_block,
28                             struct btrfs_key *ins, int data);
29 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
30                                  btrfs_root *extent_root);
31 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
32                                btrfs_root *extent_root);
33
34 static void reada_extent_leaves(struct btrfs_root *root,
35                                 struct btrfs_path *path, u64 limit)
36 {
37         struct btrfs_node *node;
38         int i;
39         int nritems;
40         u64 item_objectid;
41         u64 blocknr;
42         int slot;
43         int ret;
44
45         if (!path->nodes[1])
46                 return;
47         node = btrfs_buffer_node(path->nodes[1]);
48         slot = path->slots[1] + 1;
49         nritems = btrfs_header_nritems(&node->header);
50         for (i = slot; i < nritems && i < slot + 8; i++) {
51                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
52                 if (item_objectid > limit)
53                         break;
54                 blocknr = btrfs_node_blockptr(node, i);
55                 ret = readahead_tree_block(root, blocknr);
56                 if (ret)
57                         break;
58         }
59 }
60
61 static int cache_block_group(struct btrfs_root *root,
62                              struct btrfs_block_group_cache *block_group)
63 {
64         struct btrfs_path *path;
65         int ret;
66         struct btrfs_key key;
67         struct btrfs_leaf *leaf;
68         struct radix_tree_root *extent_radix;
69         int slot;
70         u64 i;
71         u64 last = 0;
72         u64 hole_size;
73         u64 limit;
74         int found = 0;
75
76         root = root->fs_info->extent_root;
77         extent_radix = &root->fs_info->extent_map_radix;
78
79         if (block_group->cached)
80                 return 0;
81         if (block_group->data)
82                 return 0;
83         path = btrfs_alloc_path();
84         if (!path)
85                 return -ENOMEM;
86         key.objectid = block_group->key.objectid;
87         key.flags = 0;
88         key.offset = 0;
89         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
90         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
91         if (ret < 0)
92                 return ret;
93         if (ret && path->slots[0] > 0)
94                 path->slots[0]--;
95         limit = block_group->key.objectid + block_group->key.offset;
96         reada_extent_leaves(root, path, limit);
97         while(1) {
98                 leaf = btrfs_buffer_leaf(path->nodes[0]);
99                 slot = path->slots[0];
100                 if (slot >= btrfs_header_nritems(&leaf->header)) {
101                         reada_extent_leaves(root, path, limit);
102                         ret = btrfs_next_leaf(root, path);
103                         if (ret < 0)
104                                 goto err;
105                         if (ret == 0) {
106                                 continue;
107                         } else {
108                                 if (found) {
109                                         hole_size = block_group->key.objectid +
110                                                 block_group->key.offset - last;
111                                 } else {
112                                         last = block_group->key.objectid;
113                                         hole_size = block_group->key.offset;
114                                 }
115                                 for (i = 0; i < hole_size; i++) {
116                                         set_radix_bit(extent_radix,
117                                                       last + i);
118                                 }
119                                 break;
120                         }
121                 }
122                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
123                 if (key.objectid >= block_group->key.objectid +
124                     block_group->key.offset) {
125                         if (found) {
126                                 hole_size = block_group->key.objectid +
127                                         block_group->key.offset - last;
128                         } else {
129                                 last = block_group->key.objectid;
130                                 hole_size = block_group->key.offset;
131                         }
132                         for (i = 0; i < hole_size; i++) {
133                                 set_radix_bit(extent_radix, last + i);
134                         }
135                         break;
136                 }
137                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
138                         if (!found) {
139                                 last = key.objectid + key.offset;
140                                 found = 1;
141                         } else {
142                                 hole_size = key.objectid - last;
143                                 for (i = 0; i < hole_size; i++) {
144                                         set_radix_bit(extent_radix, last + i);
145                                 }
146                                 last = key.objectid + key.offset;
147                         }
148                 }
149                 path->slots[0]++;
150         }
151
152         block_group->cached = 1;
153 err:
154         btrfs_free_path(path);
155         return 0;
156 }
157
158 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
159                                                          btrfs_fs_info *info,
160                                                          u64 blocknr)
161 {
162         struct btrfs_block_group_cache *block_group;
163         int ret;
164
165         ret = radix_tree_gang_lookup(&info->block_group_radix,
166                                      (void **)&block_group,
167                                      blocknr, 1);
168         if (ret) {
169                 if (block_group->key.objectid <= blocknr && blocknr <=
170                     block_group->key.objectid + block_group->key.offset)
171                         return block_group;
172         }
173         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
174                                      (void **)&block_group,
175                                      blocknr, 1);
176         if (ret) {
177                 if (block_group->key.objectid <= blocknr && blocknr <=
178                     block_group->key.objectid + block_group->key.offset)
179                         return block_group;
180         }
181         return NULL;
182 }
183
184 static u64 leaf_range(struct btrfs_root *root)
185 {
186         u64 size = BTRFS_LEAF_DATA_SIZE(root);
187         do_div(size, sizeof(struct btrfs_extent_item) +
188                 sizeof(struct btrfs_item));
189         return size;
190 }
191
192 static u64 find_search_start(struct btrfs_root *root,
193                              struct btrfs_block_group_cache **cache_ret,
194                              u64 search_start, int num)
195 {
196         unsigned long gang[8];
197         int ret;
198         struct btrfs_block_group_cache *cache = *cache_ret;
199         u64 last = max(search_start, cache->key.objectid);
200
201         if (cache->data)
202                 goto out;
203         if (num > 1) {
204                 last = max(last, cache->last_prealloc);
205         }
206 again:
207         ret = cache_block_group(root, cache);
208         if (ret)
209                 goto out;
210         while(1) {
211                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
212                                            gang, last, ARRAY_SIZE(gang));
213                 if (!ret)
214                         goto out;
215                 last = gang[ret-1] + 1;
216                 if (num > 1) {
217                         if (ret != ARRAY_SIZE(gang)) {
218                                 goto new_group;
219                         }
220                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
221                                 continue;
222                         }
223                 }
224                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
225                         goto new_group;
226                 }
227                 return gang[0];
228         }
229 out:
230         return max(cache->last_alloc, search_start);
231
232 new_group:
233         cache = btrfs_lookup_block_group(root->fs_info,
234                                          last + cache->key.offset - 1);
235         if (!cache) {
236                 return max((*cache_ret)->last_alloc, search_start);
237         }
238         cache = btrfs_find_block_group(root, cache,
239                                        last + cache->key.offset - 1, 0, 0);
240         *cache_ret = cache;
241         goto again;
242 }
243
244 static u64 div_factor(u64 num, int factor)
245 {
246         num *= factor;
247         do_div(num, 10);
248         return num;
249 }
250
251 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
252                                                  struct btrfs_block_group_cache
253                                                  *hint, u64 search_start,
254                                                  int data, int owner)
255 {
256         struct btrfs_block_group_cache *cache[8];
257         struct btrfs_block_group_cache *found_group = NULL;
258         struct btrfs_fs_info *info = root->fs_info;
259         struct radix_tree_root *radix;
260         struct radix_tree_root *swap_radix;
261         u64 used;
262         u64 last = 0;
263         u64 hint_last;
264         int i;
265         int ret;
266         int full_search = 0;
267         int factor = 8;
268         int data_swap = 0;
269
270         if (!owner)
271                 factor = 5;
272
273         if (data) {
274                 radix = &info->block_group_data_radix;
275                 swap_radix = &info->block_group_radix;
276         } else {
277                 radix = &info->block_group_radix;
278                 swap_radix = &info->block_group_data_radix;
279         }
280
281         if (search_start) {
282                 struct btrfs_block_group_cache *shint;
283                 shint = btrfs_lookup_block_group(info, search_start);
284                 if (shint->data == data) {
285                         used = btrfs_block_group_used(&shint->item);
286                         if (used + shint->pinned <
287                             div_factor(shint->key.offset, factor)) {
288                                 return shint;
289                         }
290                 }
291         }
292         if (hint && hint->data == data) {
293                 used = btrfs_block_group_used(&hint->item);
294                 if (used + hint->pinned <
295                     div_factor(hint->key.offset, factor)) {
296                         return hint;
297                 }
298                 if (used >= div_factor(hint->key.offset, 8)) {
299                         radix_tree_tag_clear(radix,
300                                              hint->key.objectid +
301                                              hint->key.offset - 1,
302                                              BTRFS_BLOCK_GROUP_AVAIL);
303                 }
304                 last = hint->key.offset * 3;
305                 if (hint->key.objectid >= last)
306                         last = max(search_start + hint->key.offset - 1,
307                                    hint->key.objectid - last);
308                 else
309                         last = hint->key.objectid + hint->key.offset;
310                 hint_last = last;
311         } else {
312                 if (hint)
313                         hint_last = max(hint->key.objectid, search_start);
314                 else
315                         hint_last = search_start;
316
317                 last = hint_last;
318         }
319         while(1) {
320                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
321                                                  last, ARRAY_SIZE(cache),
322                                                  BTRFS_BLOCK_GROUP_AVAIL);
323                 if (!ret)
324                         break;
325                 for (i = 0; i < ret; i++) {
326                         last = cache[i]->key.objectid +
327                                 cache[i]->key.offset;
328                         used = btrfs_block_group_used(&cache[i]->item);
329                         if (used + cache[i]->pinned <
330                             div_factor(cache[i]->key.offset, factor)) {
331                                 found_group = cache[i];
332                                 goto found;
333                         }
334                         if (used >= div_factor(cache[i]->key.offset, 8)) {
335                                 radix_tree_tag_clear(radix,
336                                                      cache[i]->key.objectid +
337                                                      cache[i]->key.offset - 1,
338                                                      BTRFS_BLOCK_GROUP_AVAIL);
339                         }
340                 }
341                 cond_resched();
342         }
343         last = hint_last;
344 again:
345         while(1) {
346                 ret = radix_tree_gang_lookup(radix, (void **)cache,
347                                              last, ARRAY_SIZE(cache));
348                 if (!ret)
349                         break;
350                 for (i = 0; i < ret; i++) {
351                         last = cache[i]->key.objectid +
352                                 cache[i]->key.offset;
353                         used = btrfs_block_group_used(&cache[i]->item);
354                         if (used + cache[i]->pinned < cache[i]->key.offset) {
355                                 found_group = cache[i];
356                                 goto found;
357                         }
358                         if (used >= cache[i]->key.offset) {
359                                 radix_tree_tag_clear(radix,
360                                                      cache[i]->key.objectid +
361                                                      cache[i]->key.offset - 1,
362                                                      BTRFS_BLOCK_GROUP_AVAIL);
363                         }
364                 }
365                 cond_resched();
366         }
367         if (!full_search) {
368                 last = search_start;
369                 full_search = 1;
370                 goto again;
371         }
372         if (!data_swap) {
373                 struct radix_tree_root *tmp = radix;
374                 data_swap = 1;
375                 radix = swap_radix;
376                 swap_radix = tmp;
377                 last = search_start;
378                 goto again;
379         }
380         if (!found_group) {
381                 ret = radix_tree_gang_lookup(radix,
382                                              (void **)&found_group, 0, 1);
383                 if (ret == 0) {
384                         ret = radix_tree_gang_lookup(swap_radix,
385                                                      (void **)&found_group,
386                                                      0, 1);
387                 }
388                 BUG_ON(ret != 1);
389         }
390 found:
391         return found_group;
392 }
393
394 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
395                                 struct btrfs_root *root,
396                                 u64 blocknr, u64 num_blocks)
397 {
398         struct btrfs_path *path;
399         int ret;
400         struct btrfs_key key;
401         struct btrfs_leaf *l;
402         struct btrfs_extent_item *item;
403         struct btrfs_key ins;
404         u32 refs;
405
406         path = btrfs_alloc_path();
407         if (!path)
408                 return -ENOMEM;
409         ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0,
410                                (u64)-1, 0, &ins, 0);
411         if (ret) {
412                 btrfs_free_path(path);
413                 return ret;
414         }
415         key.objectid = blocknr;
416         key.flags = 0;
417         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
418         key.offset = num_blocks;
419         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
420                                 0, 1);
421         if (ret < 0)
422                 return ret;
423         if (ret != 0) {
424                 BUG();
425         }
426         BUG_ON(ret != 0);
427         l = btrfs_buffer_leaf(path->nodes[0]);
428         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
429         refs = btrfs_extent_refs(item);
430         btrfs_set_extent_refs(item, refs + 1);
431         btrfs_mark_buffer_dirty(path->nodes[0]);
432
433         btrfs_release_path(root->fs_info->extent_root, path);
434         btrfs_free_path(path);
435         finish_current_insert(trans, root->fs_info->extent_root);
436         del_pending_extents(trans, root->fs_info->extent_root);
437         return 0;
438 }
439
440 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
441                              struct btrfs_root *root, u64 blocknr,
442                              u64 num_blocks, u32 *refs)
443 {
444         struct btrfs_path *path;
445         int ret;
446         struct btrfs_key key;
447         struct btrfs_leaf *l;
448         struct btrfs_extent_item *item;
449
450         path = btrfs_alloc_path();
451         key.objectid = blocknr;
452         key.offset = num_blocks;
453         key.flags = 0;
454         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
455         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
456                                 0, 0);
457         if (ret < 0)
458                 goto out;
459         if (ret != 0)
460                 BUG();
461         l = btrfs_buffer_leaf(path->nodes[0]);
462         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
463         *refs = btrfs_extent_refs(item);
464 out:
465         btrfs_free_path(path);
466         return 0;
467 }
468
469 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
470                        struct btrfs_root *root)
471 {
472         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
473 }
474
475 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
476                   struct buffer_head *buf)
477 {
478         u64 blocknr;
479         struct btrfs_node *buf_node;
480         struct btrfs_leaf *buf_leaf;
481         struct btrfs_disk_key *key;
482         struct btrfs_file_extent_item *fi;
483         int i;
484         int leaf;
485         int ret;
486         int faili;
487         int err;
488
489         if (!root->ref_cows)
490                 return 0;
491         buf_node = btrfs_buffer_node(buf);
492         leaf = btrfs_is_leaf(buf_node);
493         buf_leaf = btrfs_buffer_leaf(buf);
494         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
495                 if (leaf) {
496                         u64 disk_blocknr;
497                         key = &buf_leaf->items[i].key;
498                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
499                                 continue;
500                         fi = btrfs_item_ptr(buf_leaf, i,
501                                             struct btrfs_file_extent_item);
502                         if (btrfs_file_extent_type(fi) ==
503                             BTRFS_FILE_EXTENT_INLINE)
504                                 continue;
505                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
506                         if (disk_blocknr == 0)
507                                 continue;
508                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
509                                     btrfs_file_extent_disk_num_blocks(fi));
510                         if (ret) {
511                                 faili = i;
512                                 goto fail;
513                         }
514                 } else {
515                         blocknr = btrfs_node_blockptr(buf_node, i);
516                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
517                         if (ret) {
518                                 faili = i;
519                                 goto fail;
520                         }
521                 }
522         }
523         return 0;
524 fail:
525         for (i =0; i < faili; i++) {
526                 if (leaf) {
527                         u64 disk_blocknr;
528                         key = &buf_leaf->items[i].key;
529                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
530                                 continue;
531                         fi = btrfs_item_ptr(buf_leaf, i,
532                                             struct btrfs_file_extent_item);
533                         if (btrfs_file_extent_type(fi) ==
534                             BTRFS_FILE_EXTENT_INLINE)
535                                 continue;
536                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
537                         if (disk_blocknr == 0)
538                                 continue;
539                         err = btrfs_free_extent(trans, root, disk_blocknr,
540                                     btrfs_file_extent_disk_num_blocks(fi), 0);
541                         BUG_ON(err);
542                 } else {
543                         blocknr = btrfs_node_blockptr(buf_node, i);
544                         err = btrfs_free_extent(trans, root, blocknr, 1, 0);
545                         BUG_ON(err);
546                 }
547         }
548         return ret;
549 }
550
551 static int write_one_cache_group(struct btrfs_trans_handle *trans,
552                                  struct btrfs_root *root,
553                                  struct btrfs_path *path,
554                                  struct btrfs_block_group_cache *cache)
555 {
556         int ret;
557         int pending_ret;
558         struct btrfs_root *extent_root = root->fs_info->extent_root;
559         struct btrfs_block_group_item *bi;
560         struct btrfs_key ins;
561
562         ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0);
563         /* FIXME, set bit to recalc cache groups on next mount */
564         if (ret)
565                 return ret;
566         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
567         if (ret < 0)
568                 goto fail;
569         BUG_ON(ret);
570         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
571                             struct btrfs_block_group_item);
572         memcpy(bi, &cache->item, sizeof(*bi));
573         mark_buffer_dirty(path->nodes[0]);
574         btrfs_release_path(extent_root, path);
575 fail:
576         finish_current_insert(trans, extent_root);
577         pending_ret = del_pending_extents(trans, extent_root);
578         if (ret)
579                 return ret;
580         if (pending_ret)
581                 return pending_ret;
582         if (cache->data)
583                 cache->last_alloc = cache->first_free;
584         return 0;
585
586 }
587
588 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
589                                    struct btrfs_root *root,
590                                    struct radix_tree_root *radix)
591 {
592         struct btrfs_block_group_cache *cache[8];
593         int ret;
594         int err = 0;
595         int werr = 0;
596         int i;
597         struct btrfs_path *path;
598         unsigned long off = 0;
599
600         path = btrfs_alloc_path();
601         if (!path)
602                 return -ENOMEM;
603
604         while(1) {
605                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
606                                                  off, ARRAY_SIZE(cache),
607                                                  BTRFS_BLOCK_GROUP_DIRTY);
608                 if (!ret)
609                         break;
610                 for (i = 0; i < ret; i++) {
611                         err = write_one_cache_group(trans, root,
612                                                     path, cache[i]);
613                         /*
614                          * if we fail to write the cache group, we want
615                          * to keep it marked dirty in hopes that a later
616                          * write will work
617                          */
618                         if (err) {
619                                 werr = err;
620                                 off = cache[i]->key.objectid +
621                                         cache[i]->key.offset;
622                                 continue;
623                         }
624
625                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
626                                              cache[i]->key.offset - 1,
627                                              BTRFS_BLOCK_GROUP_DIRTY);
628                 }
629         }
630         btrfs_free_path(path);
631         return werr;
632 }
633
634 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
635                                    struct btrfs_root *root)
636 {
637         int ret;
638         int ret2;
639         ret = write_dirty_block_radix(trans, root,
640                                       &root->fs_info->block_group_radix);
641         ret2 = write_dirty_block_radix(trans, root,
642                                       &root->fs_info->block_group_data_radix);
643         if (ret)
644                 return ret;
645         if (ret2)
646                 return ret2;
647         return 0;
648 }
649
650 static int update_block_group(struct btrfs_trans_handle *trans,
651                               struct btrfs_root *root,
652                               u64 blocknr, u64 num, int alloc, int mark_free,
653                               int data)
654 {
655         struct btrfs_block_group_cache *cache;
656         struct btrfs_fs_info *info = root->fs_info;
657         u64 total = num;
658         u64 old_val;
659         u64 block_in_group;
660         u64 i;
661         int ret;
662
663         while(total) {
664                 cache = btrfs_lookup_block_group(info, blocknr);
665                 if (!cache) {
666                         return -1;
667                 }
668                 block_in_group = blocknr - cache->key.objectid;
669                 WARN_ON(block_in_group > cache->key.offset);
670                 radix_tree_tag_set(cache->radix, cache->key.objectid +
671                                    cache->key.offset - 1,
672                                    BTRFS_BLOCK_GROUP_DIRTY);
673
674                 old_val = btrfs_block_group_used(&cache->item);
675                 num = min(total, cache->key.offset - block_in_group);
676                 if (alloc) {
677                         if (blocknr > cache->last_alloc)
678                                 cache->last_alloc = blocknr;
679                         if (!cache->data) {
680                                 for (i = 0; i < num; i++) {
681                                         clear_radix_bit(&info->extent_map_radix,
682                                                         blocknr + i);
683                                 }
684                         }
685                         if (cache->data != data &&
686                             old_val < (cache->key.offset >> 1)) {
687                                 cache->data = data;
688                                 radix_tree_delete(cache->radix,
689                                                   cache->key.objectid +
690                                                   cache->key.offset - 1);
691
692                                 if (data) {
693                                         cache->radix =
694                                                 &info->block_group_data_radix;
695                                         cache->item.flags |=
696                                                 BTRFS_BLOCK_GROUP_DATA;
697                                 } else {
698                                         cache->radix = &info->block_group_radix;
699                                         cache->item.flags &=
700                                                 ~BTRFS_BLOCK_GROUP_DATA;
701                                 }
702                                 ret = radix_tree_insert(cache->radix,
703                                                         cache->key.objectid +
704                                                         cache->key.offset - 1,
705                                                         (void *)cache);
706                         }
707                         old_val += num;
708                 } else {
709                         old_val -= num;
710                         if (blocknr < cache->first_free)
711                                 cache->first_free = blocknr;
712                         if (!cache->data && mark_free) {
713                                 for (i = 0; i < num; i++) {
714                                         set_radix_bit(&info->extent_map_radix,
715                                                       blocknr + i);
716                                 }
717                         }
718                         if (old_val < (cache->key.offset >> 1) &&
719                             old_val + num >= (cache->key.offset >> 1)) {
720                                 radix_tree_tag_set(cache->radix,
721                                                    cache->key.objectid +
722                                                    cache->key.offset - 1,
723                                                    BTRFS_BLOCK_GROUP_AVAIL);
724                         }
725                 }
726                 btrfs_set_block_group_used(&cache->item, old_val);
727                 total -= num;
728                 blocknr += num;
729         }
730         return 0;
731 }
732
733 static int try_remove_page(struct address_space *mapping, unsigned long index)
734 {
735         int ret;
736         ret = invalidate_mapping_pages(mapping, index, index);
737         return ret;
738 }
739
740 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
741                                btrfs_root *root)
742 {
743         unsigned long gang[8];
744         struct inode *btree_inode = root->fs_info->btree_inode;
745         struct btrfs_block_group_cache *block_group;
746         u64 first = 0;
747         int ret;
748         int i;
749         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
750         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
751
752         while(1) {
753                 ret = find_first_radix_bit(pinned_radix, gang, 0,
754                                            ARRAY_SIZE(gang));
755                 if (!ret)
756                         break;
757                 if (!first)
758                         first = gang[0];
759                 for (i = 0; i < ret; i++) {
760                         clear_radix_bit(pinned_radix, gang[i]);
761                         block_group = btrfs_lookup_block_group(root->fs_info,
762                                                                gang[i]);
763                         if (block_group) {
764                                 WARN_ON(block_group->pinned == 0);
765                                 block_group->pinned--;
766                                 if (gang[i] < block_group->last_alloc)
767                                         block_group->last_alloc = gang[i];
768                                 if (gang[i] < block_group->last_prealloc)
769                                         block_group->last_prealloc = gang[i];
770                                 if (!block_group->data)
771                                         set_radix_bit(extent_radix, gang[i]);
772                         }
773                         try_remove_page(btree_inode->i_mapping,
774                                         gang[i] << (PAGE_CACHE_SHIFT -
775                                                     btree_inode->i_blkbits));
776                 }
777         }
778         return 0;
779 }
780
781 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
782                                  btrfs_root *extent_root)
783 {
784         struct btrfs_key ins;
785         struct btrfs_extent_item extent_item;
786         int i;
787         int ret;
788         u64 super_blocks_used;
789         struct btrfs_fs_info *info = extent_root->fs_info;
790
791         btrfs_set_extent_refs(&extent_item, 1);
792         ins.offset = 1;
793         ins.flags = 0;
794         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
795         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
796
797         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
798                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
799                 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
800                 btrfs_set_super_blocks_used(&info->super_copy,
801                                             super_blocks_used + 1);
802                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
803                                         sizeof(extent_item));
804                 BUG_ON(ret);
805         }
806         extent_root->fs_info->extent_tree_insert_nr = 0;
807         return 0;
808 }
809
810 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
811 {
812         int err;
813         struct btrfs_header *header;
814         struct buffer_head *bh;
815
816         if (!pending) {
817                 bh = btrfs_find_tree_block(root, blocknr);
818                 if (bh) {
819                         if (buffer_uptodate(bh)) {
820                                 u64 transid =
821                                     root->fs_info->running_transaction->transid;
822                                 header = btrfs_buffer_header(bh);
823                                 if (btrfs_header_generation(header) ==
824                                     transid) {
825                                         btrfs_block_release(root, bh);
826                                         return 0;
827                                 }
828                         }
829                         btrfs_block_release(root, bh);
830                 }
831                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
832                 if (!err) {
833                         struct btrfs_block_group_cache *cache;
834                         cache = btrfs_lookup_block_group(root->fs_info,
835                                                          blocknr);
836                         if (cache)
837                                 cache->pinned++;
838                 }
839         } else {
840                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
841         }
842         BUG_ON(err < 0);
843         return 0;
844 }
845
846 /*
847  * remove an extent from the root, returns 0 on success
848  */
849 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
850                          *root, u64 blocknr, u64 num_blocks, int pin,
851                          int mark_free)
852 {
853         struct btrfs_path *path;
854         struct btrfs_key key;
855         struct btrfs_fs_info *info = root->fs_info;
856         struct btrfs_root *extent_root = info->extent_root;
857         int ret;
858         struct btrfs_extent_item *ei;
859         struct btrfs_key ins;
860         u32 refs;
861
862         key.objectid = blocknr;
863         key.flags = 0;
864         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
865         key.offset = num_blocks;
866
867         path = btrfs_alloc_path();
868         if (!path)
869                 return -ENOMEM;
870
871         ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0);
872         if (ret) {
873                 btrfs_free_path(path);
874                 return ret;
875         }
876
877         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
878         if (ret < 0)
879                 return ret;
880         BUG_ON(ret);
881         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
882                             struct btrfs_extent_item);
883         BUG_ON(ei->refs == 0);
884         refs = btrfs_extent_refs(ei) - 1;
885         btrfs_set_extent_refs(ei, refs);
886         btrfs_mark_buffer_dirty(path->nodes[0]);
887         if (refs == 0) {
888                 u64 super_blocks_used;
889
890                 if (pin) {
891                         ret = pin_down_block(root, blocknr, 0);
892                         BUG_ON(ret);
893                 }
894
895                 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
896                 btrfs_set_super_blocks_used(&info->super_copy,
897                                             super_blocks_used - num_blocks);
898                 ret = btrfs_del_item(trans, extent_root, path);
899                 if (ret) {
900                         return ret;
901                 }
902                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
903                                          mark_free, 0);
904                 BUG_ON(ret);
905         }
906         btrfs_free_path(path);
907         finish_current_insert(trans, extent_root);
908         return ret;
909 }
910
911 /*
912  * find all the blocks marked as pending in the radix tree and remove
913  * them from the extent map
914  */
915 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
916                                btrfs_root *extent_root)
917 {
918         int ret;
919         int wret;
920         int err = 0;
921         unsigned long gang[4];
922         int i;
923         struct radix_tree_root *pending_radix;
924         struct radix_tree_root *pinned_radix;
925         struct btrfs_block_group_cache *cache;
926
927         pending_radix = &extent_root->fs_info->pending_del_radix;
928         pinned_radix = &extent_root->fs_info->pinned_radix;
929
930         while(1) {
931                 ret = find_first_radix_bit(pending_radix, gang, 0,
932                                            ARRAY_SIZE(gang));
933                 if (!ret)
934                         break;
935                 for (i = 0; i < ret; i++) {
936                         wret = set_radix_bit(pinned_radix, gang[i]);
937                         if (wret == 0) {
938                                 cache =
939                                   btrfs_lookup_block_group(extent_root->fs_info,
940                                                            gang[i]);
941                                 if (cache)
942                                         cache->pinned++;
943                         }
944                         if (wret < 0) {
945                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
946                                        wret);
947                                 BUG_ON(wret < 0);
948                         }
949                         wret = clear_radix_bit(pending_radix, gang[i]);
950                         BUG_ON(wret);
951                         wret = __free_extent(trans, extent_root,
952                                              gang[i], 1, 0, 0);
953                         if (wret)
954                                 err = wret;
955                 }
956         }
957         return err;
958 }
959
960 /*
961  * remove an extent from the root, returns 0 on success
962  */
963 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
964                       *root, u64 blocknr, u64 num_blocks, int pin)
965 {
966         struct btrfs_root *extent_root = root->fs_info->extent_root;
967         int pending_ret;
968         int ret;
969
970         if (root == extent_root) {
971                 pin_down_block(root, blocknr, 1);
972                 return 0;
973         }
974         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
975         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
976         return ret ? ret : pending_ret;
977 }
978
979 /*
980  * walks the btree of allocated extents and find a hole of a given size.
981  * The key ins is changed to record the hole:
982  * ins->objectid == block start
983  * ins->flags = BTRFS_EXTENT_ITEM_KEY
984  * ins->offset == number of blocks
985  * Any available blocks before search_start are skipped.
986  */
987 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
988                             *orig_root, u64 num_blocks, u64 search_start, u64
989                             search_end, u64 hint_block,
990                             struct btrfs_key *ins, int data)
991 {
992         struct btrfs_path *path;
993         struct btrfs_key key;
994         int ret;
995         u64 hole_size = 0;
996         int slot = 0;
997         u64 last_block = 0;
998         u64 test_block;
999         u64 orig_search_start = search_start;
1000         int start_found;
1001         struct btrfs_leaf *l;
1002         struct btrfs_root * root = orig_root->fs_info->extent_root;
1003         struct btrfs_fs_info *info = root->fs_info;
1004         int total_needed = num_blocks;
1005         int total_found = 0;
1006         int fill_prealloc = 0;
1007         int level;
1008         struct btrfs_block_group_cache *block_group;
1009         int full_scan = 0;
1010         int wrapped = 0;
1011         u64 limit;
1012
1013         ins->flags = 0;
1014         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1015
1016         level = btrfs_header_level(btrfs_buffer_header(root->node));
1017         if (num_blocks == 0) {
1018                 fill_prealloc = 1;
1019                 num_blocks = 1;
1020                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
1021         }
1022         if (fill_prealloc) {
1023                 u64 first;
1024                 int nr = info->extent_tree_prealloc_nr;
1025                 first = info->extent_tree_prealloc[nr - 1];
1026                 if (info->extent_tree_prealloc_nr >= total_needed &&
1027                     first >= search_start) {
1028                         ins->objectid = info->extent_tree_prealloc[0];
1029                         ins->offset = 1;
1030                         return 0;
1031                 }
1032                 info->extent_tree_prealloc_nr = 0;
1033         }
1034         if (search_end == (u64)-1)
1035                 search_end = btrfs_super_total_blocks(&info->super_copy);
1036         if (hint_block) {
1037                 block_group = btrfs_lookup_block_group(info, hint_block);
1038                 block_group = btrfs_find_block_group(root, block_group,
1039                                                      hint_block, data, 1);
1040         } else {
1041                 block_group = btrfs_find_block_group(root,
1042                                                      trans->block_group, 0,
1043                                                      data, 1);
1044         }
1045
1046         path = btrfs_alloc_path();
1047
1048 check_failed:
1049         if (!block_group->data)
1050                 search_start = find_search_start(root, &block_group,
1051                                                  search_start, total_needed);
1052         else if (!full_scan)
1053                 search_start = max(block_group->last_alloc, search_start);
1054
1055         btrfs_init_path(path);
1056         ins->objectid = search_start;
1057         ins->offset = 0;
1058         start_found = 0;
1059
1060         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1061         if (ret < 0)
1062                 goto error;
1063
1064         if (path->slots[0] > 0) {
1065                 path->slots[0]--;
1066         }
1067
1068         l = btrfs_buffer_leaf(path->nodes[0]);
1069         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
1070         /*
1071          * a rare case, go back one key if we hit a block group item
1072          * instead of an extent item
1073          */
1074         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1075             key.objectid + key.offset >= search_start) {
1076                 ins->objectid = key.objectid;
1077                 ins->offset = key.offset - 1;
1078                 btrfs_release_path(root, path);
1079                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1080                 if (ret < 0)
1081                         goto error;
1082
1083                 if (path->slots[0] > 0) {
1084                         path->slots[0]--;
1085                 }
1086         }
1087
1088         while (1) {
1089                 l = btrfs_buffer_leaf(path->nodes[0]);
1090                 slot = path->slots[0];
1091                 if (slot >= btrfs_header_nritems(&l->header)) {
1092                         if (fill_prealloc) {
1093                                 info->extent_tree_prealloc_nr = 0;
1094                                 total_found = 0;
1095                         }
1096                         if (start_found)
1097                                 limit = last_block +
1098                                         (block_group->key.offset >> 1);
1099                         else
1100                                 limit = search_start +
1101                                         (block_group->key.offset >> 1);
1102                         ret = btrfs_next_leaf(root, path);
1103                         if (ret == 0)
1104                                 continue;
1105                         if (ret < 0)
1106                                 goto error;
1107                         if (!start_found) {
1108                                 ins->objectid = search_start;
1109                                 ins->offset = search_end - search_start;
1110                                 start_found = 1;
1111                                 goto check_pending;
1112                         }
1113                         ins->objectid = last_block > search_start ?
1114                                         last_block : search_start;
1115                         ins->offset = search_end - ins->objectid;
1116                         goto check_pending;
1117                 }
1118
1119                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1120                 if (key.objectid >= search_start && key.objectid > last_block &&
1121                     start_found) {
1122                         if (last_block < search_start)
1123                                 last_block = search_start;
1124                         hole_size = key.objectid - last_block;
1125                         if (hole_size >= num_blocks) {
1126                                 ins->objectid = last_block;
1127                                 ins->offset = hole_size;
1128                                 goto check_pending;
1129                         }
1130                 }
1131
1132                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1133                         goto next;
1134
1135                 start_found = 1;
1136                 last_block = key.objectid + key.offset;
1137                 if (!full_scan && last_block >= block_group->key.objectid +
1138                     block_group->key.offset) {
1139                         btrfs_release_path(root, path);
1140                         search_start = block_group->key.objectid +
1141                                 block_group->key.offset * 2;
1142                         goto new_group;
1143                 }
1144 next:
1145                 path->slots[0]++;
1146                 cond_resched();
1147         }
1148 check_pending:
1149         /* we have to make sure we didn't find an extent that has already
1150          * been allocated by the map tree or the original allocation
1151          */
1152         btrfs_release_path(root, path);
1153         BUG_ON(ins->objectid < search_start);
1154
1155         if (ins->objectid + num_blocks >= search_end) {
1156                 if (full_scan) {
1157                         ret = -ENOSPC;
1158                         goto error;
1159                 }
1160                 search_start = orig_search_start;
1161                 if (wrapped)
1162                         full_scan = 1;
1163                 else
1164                         wrapped = 1;
1165                 goto new_group;
1166         }
1167         for (test_block = ins->objectid;
1168              test_block < ins->objectid + num_blocks; test_block++) {
1169                 if (test_radix_bit(&info->pinned_radix, test_block)) {
1170                         search_start = test_block + 1;
1171                         goto new_group;
1172                 }
1173         }
1174         if (!fill_prealloc && info->extent_tree_insert_nr) {
1175                 u64 last =
1176                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
1177                 if (ins->objectid + num_blocks >
1178                     info->extent_tree_insert[0] &&
1179                     ins->objectid <= last) {
1180                         search_start = last + 1;
1181                         WARN_ON(!full_scan);
1182                         goto new_group;
1183                 }
1184         }
1185         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
1186                 u64 first =
1187                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
1188                 if (ins->objectid + num_blocks > first &&
1189                     ins->objectid <= info->extent_tree_prealloc[0]) {
1190                         search_start = info->extent_tree_prealloc[0] + 1;
1191                         goto new_group;
1192                 }
1193         }
1194         if (fill_prealloc) {
1195                 int nr;
1196                 test_block = ins->objectid;
1197                 if (test_block - info->extent_tree_prealloc[total_needed - 1] >=
1198                     leaf_range(root)) {
1199                         total_found = 0;
1200                         info->extent_tree_prealloc_nr = total_found;
1201                 }
1202                 while(test_block < ins->objectid + ins->offset &&
1203                       total_found < total_needed) {
1204                         nr = total_needed - total_found - 1;
1205                         BUG_ON(nr < 0);
1206                         info->extent_tree_prealloc[nr] = test_block;
1207                         total_found++;
1208                         test_block++;
1209                 }
1210                 if (total_found < total_needed) {
1211                         search_start = test_block;
1212                         goto new_group;
1213                 }
1214                 info->extent_tree_prealloc_nr = total_found;
1215         }
1216         if (!data) {
1217                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1218                 if (block_group) {
1219                         if (fill_prealloc)
1220                                 block_group->last_prealloc =
1221                                      info->extent_tree_prealloc[total_needed-1];
1222                         else
1223                                 trans->block_group = block_group;
1224                 }
1225         }
1226         ins->offset = num_blocks;
1227         btrfs_free_path(path);
1228         return 0;
1229
1230 new_group:
1231         if (search_start + num_blocks >= search_end) {
1232                 search_start = orig_search_start;
1233                 if (full_scan) {
1234                         ret = -ENOSPC;
1235                         goto error;
1236                 }
1237                 if (wrapped)
1238                         full_scan = 1;
1239                 else
1240                         wrapped = 1;
1241         }
1242         block_group = btrfs_lookup_block_group(info, search_start);
1243         cond_resched();
1244         if (!full_scan)
1245                 block_group = btrfs_find_block_group(root, block_group,
1246                                                      search_start, data, 0);
1247         goto check_failed;
1248
1249 error:
1250         btrfs_release_path(root, path);
1251         btrfs_free_path(path);
1252         return ret;
1253 }
1254 /*
1255  * finds a free extent and does all the dirty work required for allocation
1256  * returns the key for the extent through ins, and a tree buffer for
1257  * the first block of the extent through buf.
1258  *
1259  * returns 0 if everything worked, non-zero otherwise.
1260  */
1261 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1262                        struct btrfs_root *root, u64 owner,
1263                        u64 num_blocks, u64 hint_block,
1264                        u64 search_end, struct btrfs_key *ins, int data)
1265 {
1266         int ret;
1267         int pending_ret;
1268         u64 super_blocks_used;
1269         u64 search_start = 0;
1270         struct btrfs_fs_info *info = root->fs_info;
1271         struct btrfs_root *extent_root = info->extent_root;
1272         struct btrfs_extent_item extent_item;
1273         struct btrfs_key prealloc_key;
1274
1275         btrfs_set_extent_refs(&extent_item, 1);
1276         btrfs_set_extent_owner(&extent_item, owner);
1277
1278         if (root == extent_root) {
1279                 int nr;
1280                 BUG_ON(info->extent_tree_prealloc_nr == 0);
1281                 BUG_ON(num_blocks != 1);
1282                 ins->offset = 1;
1283                 info->extent_tree_prealloc_nr--;
1284                 nr = info->extent_tree_prealloc_nr;
1285                 ins->objectid = info->extent_tree_prealloc[nr];
1286                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
1287                         ins->objectid;
1288                 ret = update_block_group(trans, root,
1289                                          ins->objectid, ins->offset, 1, 0, 0);
1290                 BUG_ON(ret);
1291                 return 0;
1292         }
1293
1294         /*
1295          * if we're doing a data allocation, preallocate room in the
1296          * extent tree first.  This way the extent tree blocks end up
1297          * in the correct block group.
1298          */
1299         if (data) {
1300                 ret = find_free_extent(trans, root, 0, 0,
1301                                        search_end, 0, &prealloc_key, 0);
1302                 if (ret) {
1303                         return ret;
1304                 }
1305                 if (prealloc_key.objectid + prealloc_key.offset >= search_end) {
1306                         int nr = info->extent_tree_prealloc_nr;
1307                         search_end = info->extent_tree_prealloc[nr - 1] - 1;
1308                 } else {
1309                         search_start = info->extent_tree_prealloc[0] + 1;
1310                 }
1311         }
1312         if (hint_block < search_start)
1313                 hint_block = search_start;
1314         /* do the real allocation */
1315         ret = find_free_extent(trans, root, num_blocks, search_start,
1316                                search_end, hint_block, ins, data);
1317         if (ret) {
1318                 if (search_start == 0)
1319                         return ret;
1320                 search_end = search_start - 1;
1321                 search_start = 0;
1322                 hint_block = search_start;
1323                 ret = find_free_extent(trans, root, num_blocks, search_start,
1324                                        search_end, hint_block, ins, data);
1325                 if (ret)
1326                         return ret;
1327         }
1328
1329         /*
1330          * if we're doing a metadata allocation, preallocate space in the
1331          * extent tree second.  This way, we don't create a tiny hole
1332          * in the allocation map between any unused preallocation blocks
1333          * and the metadata block we're actually allocating.  On disk,
1334          * it'll go:
1335          * [block we've allocated], [used prealloc 1], [ unused prealloc ]
1336          * The unused prealloc will get reused the next time around.
1337          */
1338         if (!data) {
1339                 if (ins->objectid + ins->offset >= search_end)
1340                         search_end = ins->objectid - 1;
1341                 else
1342                         search_start = ins->objectid + ins->offset;
1343
1344                 if (hint_block < search_start)
1345                         hint_block = search_start;
1346
1347                 ret = find_free_extent(trans, root, 0, search_start,
1348                                        search_end, hint_block,
1349                                        &prealloc_key, 0);
1350                 if (ret) {
1351                         if (search_start == 0)
1352                                 return ret;
1353                         search_end = search_start - 1;
1354                         search_start = 0;
1355                         hint_block = search_start;
1356                         ret = find_free_extent(trans, root, 0, search_start,
1357                                                search_end, hint_block,
1358                                                &prealloc_key, 0);
1359                         if (ret)
1360                                 return ret;
1361                 }
1362         }
1363
1364         super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
1365         btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used +
1366                                     num_blocks);
1367         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1368                                 sizeof(extent_item));
1369
1370         finish_current_insert(trans, extent_root);
1371         pending_ret = del_pending_extents(trans, extent_root);
1372         if (ret) {
1373                 return ret;
1374         }
1375         if (pending_ret) {
1376                 return pending_ret;
1377         }
1378         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1379                                  data);
1380         BUG_ON(ret);
1381         return 0;
1382 }
1383
1384 /*
1385  * helper function to allocate a block for a given tree
1386  * returns the tree buffer or NULL.
1387  */
1388 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1389                                            struct btrfs_root *root, u64 hint)
1390 {
1391         struct btrfs_key ins;
1392         int ret;
1393         struct buffer_head *buf;
1394
1395         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1396                                  1, hint, (unsigned long)-1, &ins, 0);
1397         if (ret) {
1398                 BUG_ON(ret > 0);
1399                 return ERR_PTR(ret);
1400         }
1401         buf = btrfs_find_create_tree_block(root, ins.objectid);
1402         if (!buf) {
1403                 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1404                 return ERR_PTR(-ENOMEM);
1405         }
1406         set_buffer_uptodate(buf);
1407         set_buffer_checked(buf);
1408         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1409         return buf;
1410 }
1411
1412 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1413                          struct btrfs_root *root, struct buffer_head *cur)
1414 {
1415         struct btrfs_disk_key *key;
1416         struct btrfs_leaf *leaf;
1417         struct btrfs_file_extent_item *fi;
1418         int i;
1419         int nritems;
1420         int ret;
1421
1422         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1423         leaf = btrfs_buffer_leaf(cur);
1424         nritems = btrfs_header_nritems(&leaf->header);
1425         for (i = 0; i < nritems; i++) {
1426                 u64 disk_blocknr;
1427                 key = &leaf->items[i].key;
1428                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1429                         continue;
1430                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1431                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1432                         continue;
1433                 /*
1434                  * FIXME make sure to insert a trans record that
1435                  * repeats the snapshot del on crash
1436                  */
1437                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1438                 if (disk_blocknr == 0)
1439                         continue;
1440                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1441                                         btrfs_file_extent_disk_num_blocks(fi),
1442                                         0);
1443                 BUG_ON(ret);
1444         }
1445         return 0;
1446 }
1447
1448 static void reada_walk_down(struct btrfs_root *root,
1449                             struct btrfs_node *node)
1450 {
1451         int i;
1452         u32 nritems;
1453         u64 blocknr;
1454         int ret;
1455         u32 refs;
1456
1457         nritems = btrfs_header_nritems(&node->header);
1458         for (i = 0; i < nritems; i++) {
1459                 blocknr = btrfs_node_blockptr(node, i);
1460                 ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs);
1461                 BUG_ON(ret);
1462                 if (refs != 1)
1463                         continue;
1464                 ret = readahead_tree_block(root, blocknr);
1465                 if (ret)
1466                         break;
1467         }
1468 }
1469
1470 /*
1471  * helper function for drop_snapshot, this walks down the tree dropping ref
1472  * counts as it goes.
1473  */
1474 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1475                           *root, struct btrfs_path *path, int *level)
1476 {
1477         struct buffer_head *next;
1478         struct buffer_head *cur;
1479         u64 blocknr;
1480         int ret;
1481         u32 refs;
1482
1483         WARN_ON(*level < 0);
1484         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1485         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1486                                1, &refs);
1487         BUG_ON(ret);
1488         if (refs > 1)
1489                 goto out;
1490
1491         /*
1492          * walk down to the last node level and free all the leaves
1493          */
1494         while(*level >= 0) {
1495                 WARN_ON(*level < 0);
1496                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1497                 cur = path->nodes[*level];
1498
1499                 if (*level > 0 && path->slots[*level] == 0)
1500                         reada_walk_down(root, btrfs_buffer_node(cur));
1501
1502                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1503                         WARN_ON(1);
1504
1505                 if (path->slots[*level] >=
1506                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1507                         break;
1508                 if (*level == 0) {
1509                         ret = drop_leaf_ref(trans, root, cur);
1510                         BUG_ON(ret);
1511                         break;
1512                 }
1513                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1514                                               path->slots[*level]);
1515                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1516                 BUG_ON(ret);
1517                 if (refs != 1) {
1518                         path->slots[*level]++;
1519                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1520                         BUG_ON(ret);
1521                         continue;
1522                 }
1523                 next = read_tree_block(root, blocknr);
1524                 WARN_ON(*level <= 0);
1525                 if (path->nodes[*level-1])
1526                         btrfs_block_release(root, path->nodes[*level-1]);
1527                 path->nodes[*level-1] = next;
1528                 *level = btrfs_header_level(btrfs_buffer_header(next));
1529                 path->slots[*level] = 0;
1530         }
1531 out:
1532         WARN_ON(*level < 0);
1533         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1534         ret = btrfs_free_extent(trans, root,
1535                                 bh_blocknr(path->nodes[*level]), 1, 1);
1536         btrfs_block_release(root, path->nodes[*level]);
1537         path->nodes[*level] = NULL;
1538         *level += 1;
1539         BUG_ON(ret);
1540         return 0;
1541 }
1542
1543 /*
1544  * helper for dropping snapshots.  This walks back up the tree in the path
1545  * to find the first node higher up where we haven't yet gone through
1546  * all the slots
1547  */
1548 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1549                         *root, struct btrfs_path *path, int *level)
1550 {
1551         int i;
1552         int slot;
1553         int ret;
1554         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1555                 slot = path->slots[i];
1556                 if (slot < btrfs_header_nritems(
1557                     btrfs_buffer_header(path->nodes[i])) - 1) {
1558                         path->slots[i]++;
1559                         *level = i;
1560                         return 0;
1561                 } else {
1562                         ret = btrfs_free_extent(trans, root,
1563                                                 bh_blocknr(path->nodes[*level]),
1564                                                 1, 1);
1565                         BUG_ON(ret);
1566                         btrfs_block_release(root, path->nodes[*level]);
1567                         path->nodes[*level] = NULL;
1568                         *level = i + 1;
1569                 }
1570         }
1571         return 1;
1572 }
1573
1574 /*
1575  * drop the reference count on the tree rooted at 'snap'.  This traverses
1576  * the tree freeing any blocks that have a ref count of zero after being
1577  * decremented.
1578  */
1579 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1580                         *root, struct buffer_head *snap)
1581 {
1582         int ret = 0;
1583         int wret;
1584         int level;
1585         struct btrfs_path *path;
1586         int i;
1587         int orig_level;
1588
1589         path = btrfs_alloc_path();
1590         BUG_ON(!path);
1591
1592         level = btrfs_header_level(btrfs_buffer_header(snap));
1593         orig_level = level;
1594         path->nodes[level] = snap;
1595         path->slots[level] = 0;
1596         while(1) {
1597                 wret = walk_down_tree(trans, root, path, &level);
1598                 if (wret > 0)
1599                         break;
1600                 if (wret < 0)
1601                         ret = wret;
1602
1603                 wret = walk_up_tree(trans, root, path, &level);
1604                 if (wret > 0)
1605                         break;
1606                 if (wret < 0)
1607                         ret = wret;
1608         }
1609         for (i = 0; i <= orig_level; i++) {
1610                 if (path->nodes[i]) {
1611                         btrfs_block_release(root, path->nodes[i]);
1612                 }
1613         }
1614         btrfs_free_path(path);
1615         return ret;
1616 }
1617
1618 static int free_block_group_radix(struct radix_tree_root *radix)
1619 {
1620         int ret;
1621         struct btrfs_block_group_cache *cache[8];
1622         int i;
1623
1624         while(1) {
1625                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1626                                              ARRAY_SIZE(cache));
1627                 if (!ret)
1628                         break;
1629                 for (i = 0; i < ret; i++) {
1630                         radix_tree_delete(radix, cache[i]->key.objectid +
1631                                           cache[i]->key.offset - 1);
1632                         kfree(cache[i]);
1633                 }
1634         }
1635         return 0;
1636 }
1637
1638 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1639 {
1640         int ret;
1641         int ret2;
1642         unsigned long gang[16];
1643         int i;
1644
1645         ret = free_block_group_radix(&info->block_group_radix);
1646         ret2 = free_block_group_radix(&info->block_group_data_radix);
1647         if (ret)
1648                 return ret;
1649         if (ret2)
1650                 return ret2;
1651
1652         while(1) {
1653                 ret = find_first_radix_bit(&info->extent_map_radix,
1654                                            gang, 0, ARRAY_SIZE(gang));
1655                 if (!ret)
1656                         break;
1657                 for (i = 0; i < ret; i++) {
1658                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1659                 }
1660         }
1661         return 0;
1662 }
1663
1664 int btrfs_read_block_groups(struct btrfs_root *root)
1665 {
1666         struct btrfs_path *path;
1667         int ret;
1668         int err = 0;
1669         struct btrfs_block_group_item *bi;
1670         struct btrfs_block_group_cache *cache;
1671         struct btrfs_fs_info *info = root->fs_info;
1672         struct radix_tree_root *radix;
1673         struct btrfs_key key;
1674         struct btrfs_key found_key;
1675         struct btrfs_leaf *leaf;
1676         u64 group_size_blocks;
1677         u64 used;
1678
1679         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1680                 root->fs_info->sb->s_blocksize_bits;
1681         root = info->extent_root;
1682         key.objectid = 0;
1683         key.offset = group_size_blocks;
1684         key.flags = 0;
1685         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1686
1687         path = btrfs_alloc_path();
1688         if (!path)
1689                 return -ENOMEM;
1690
1691         while(1) {
1692                 ret = btrfs_search_slot(NULL, info->extent_root,
1693                                         &key, path, 0, 0);
1694                 if (ret != 0) {
1695                         err = ret;
1696                         break;
1697                 }
1698                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1699                 btrfs_disk_key_to_cpu(&found_key,
1700                                       &leaf->items[path->slots[0]].key);
1701                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1702                 if (!cache) {
1703                         err = -1;
1704                         break;
1705                 }
1706
1707                 bi = btrfs_item_ptr(leaf, path->slots[0],
1708                                     struct btrfs_block_group_item);
1709                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1710                         radix = &info->block_group_data_radix;
1711                         cache->data = 1;
1712                 } else {
1713                         radix = &info->block_group_radix;
1714                         cache->data = 0;
1715                 }
1716
1717                 memcpy(&cache->item, bi, sizeof(*bi));
1718                 memcpy(&cache->key, &found_key, sizeof(found_key));
1719                 cache->last_alloc = cache->key.objectid;
1720                 cache->first_free = cache->key.objectid;
1721                 cache->last_prealloc = cache->key.objectid;
1722                 cache->pinned = 0;
1723                 cache->cached = 0;
1724
1725                 cache->radix = radix;
1726
1727                 key.objectid = found_key.objectid + found_key.offset;
1728                 btrfs_release_path(root, path);
1729                 ret = radix_tree_insert(radix, found_key.objectid +
1730                                         found_key.offset - 1,
1731                                         (void *)cache);
1732                 BUG_ON(ret);
1733                 used = btrfs_block_group_used(bi);
1734                 if (used < div_factor(key.offset, 8)) {
1735                         radix_tree_tag_set(radix, found_key.objectid +
1736                                            found_key.offset - 1,
1737                                            BTRFS_BLOCK_GROUP_AVAIL);
1738                 }
1739                 if (key.objectid >=
1740                     btrfs_super_total_blocks(&info->super_copy))
1741                         break;
1742         }
1743
1744         btrfs_free_path(path);
1745         return 0;
1746 }