Btrfs: ratelimit the generation printk for the free space cache
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88
89         spin_lock(&block_group->lock);
90         if (block_group->inode)
91                 inode = igrab(block_group->inode);
92         spin_unlock(&block_group->lock);
93         if (inode)
94                 return inode;
95
96         inode = __lookup_free_space_inode(root, path,
97                                           block_group->key.objectid);
98         if (IS_ERR(inode))
99                 return inode;
100
101         spin_lock(&block_group->lock);
102         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
103                 printk(KERN_INFO "Old style space inode found, converting.\n");
104                 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
105                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
106         }
107
108         if (!btrfs_fs_closing(root->fs_info)) {
109                 block_group->inode = igrab(inode);
110                 block_group->iref = 1;
111         }
112         spin_unlock(&block_group->lock);
113
114         return inode;
115 }
116
117 int __create_free_space_inode(struct btrfs_root *root,
118                               struct btrfs_trans_handle *trans,
119                               struct btrfs_path *path, u64 ino, u64 offset)
120 {
121         struct btrfs_key key;
122         struct btrfs_disk_key disk_key;
123         struct btrfs_free_space_header *header;
124         struct btrfs_inode_item *inode_item;
125         struct extent_buffer *leaf;
126         int ret;
127
128         ret = btrfs_insert_empty_inode(trans, root, path, ino);
129         if (ret)
130                 return ret;
131
132         leaf = path->nodes[0];
133         inode_item = btrfs_item_ptr(leaf, path->slots[0],
134                                     struct btrfs_inode_item);
135         btrfs_item_key(leaf, &disk_key, path->slots[0]);
136         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
137                              sizeof(*inode_item));
138         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
139         btrfs_set_inode_size(leaf, inode_item, 0);
140         btrfs_set_inode_nbytes(leaf, inode_item, 0);
141         btrfs_set_inode_uid(leaf, inode_item, 0);
142         btrfs_set_inode_gid(leaf, inode_item, 0);
143         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
144         btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
145                               BTRFS_INODE_PREALLOC);
146         btrfs_set_inode_nlink(leaf, inode_item, 1);
147         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
148         btrfs_set_inode_block_group(leaf, inode_item, offset);
149         btrfs_mark_buffer_dirty(leaf);
150         btrfs_release_path(path);
151
152         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
153         key.offset = offset;
154         key.type = 0;
155
156         ret = btrfs_insert_empty_item(trans, root, path, &key,
157                                       sizeof(struct btrfs_free_space_header));
158         if (ret < 0) {
159                 btrfs_release_path(path);
160                 return ret;
161         }
162         leaf = path->nodes[0];
163         header = btrfs_item_ptr(leaf, path->slots[0],
164                                 struct btrfs_free_space_header);
165         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
166         btrfs_set_free_space_key(leaf, header, &disk_key);
167         btrfs_mark_buffer_dirty(leaf);
168         btrfs_release_path(path);
169
170         return 0;
171 }
172
173 int create_free_space_inode(struct btrfs_root *root,
174                             struct btrfs_trans_handle *trans,
175                             struct btrfs_block_group_cache *block_group,
176                             struct btrfs_path *path)
177 {
178         int ret;
179         u64 ino;
180
181         ret = btrfs_find_free_objectid(root, &ino);
182         if (ret < 0)
183                 return ret;
184
185         return __create_free_space_inode(root, trans, path, ino,
186                                          block_group->key.objectid);
187 }
188
189 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
190                                     struct btrfs_trans_handle *trans,
191                                     struct btrfs_path *path,
192                                     struct inode *inode)
193 {
194         struct btrfs_block_rsv *rsv;
195         loff_t oldsize;
196         int ret = 0;
197
198         rsv = trans->block_rsv;
199         trans->block_rsv = root->orphan_block_rsv;
200         ret = btrfs_block_rsv_check(trans, root,
201                                     root->orphan_block_rsv,
202                                     0, 5);
203         if (ret)
204                 return ret;
205
206         oldsize = i_size_read(inode);
207         btrfs_i_size_write(inode, 0);
208         truncate_pagecache(inode, oldsize, 0);
209
210         /*
211          * We don't need an orphan item because truncating the free space cache
212          * will never be split across transactions.
213          */
214         ret = btrfs_truncate_inode_items(trans, root, inode,
215                                          0, BTRFS_EXTENT_DATA_KEY);
216
217         trans->block_rsv = rsv;
218         if (ret) {
219                 WARN_ON(1);
220                 return ret;
221         }
222
223         ret = btrfs_update_inode(trans, root, inode);
224         return ret;
225 }
226
227 static int readahead_cache(struct inode *inode)
228 {
229         struct file_ra_state *ra;
230         unsigned long last_index;
231
232         ra = kzalloc(sizeof(*ra), GFP_NOFS);
233         if (!ra)
234                 return -ENOMEM;
235
236         file_ra_state_init(ra, inode->i_mapping);
237         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
238
239         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
240
241         kfree(ra);
242
243         return 0;
244 }
245
246 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
247                             struct btrfs_free_space_ctl *ctl,
248                             struct btrfs_path *path, u64 offset)
249 {
250         struct btrfs_free_space_header *header;
251         struct extent_buffer *leaf;
252         struct page *page;
253         struct btrfs_key key;
254         struct list_head bitmaps;
255         u64 num_entries;
256         u64 num_bitmaps;
257         u64 generation;
258         pgoff_t index = 0;
259         int ret = 0;
260
261         INIT_LIST_HEAD(&bitmaps);
262
263         /* Nothing in the space cache, goodbye */
264         if (!i_size_read(inode))
265                 goto out;
266
267         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
268         key.offset = offset;
269         key.type = 0;
270
271         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
272         if (ret < 0)
273                 goto out;
274         else if (ret > 0) {
275                 btrfs_release_path(path);
276                 ret = 0;
277                 goto out;
278         }
279
280         ret = -1;
281
282         leaf = path->nodes[0];
283         header = btrfs_item_ptr(leaf, path->slots[0],
284                                 struct btrfs_free_space_header);
285         num_entries = btrfs_free_space_entries(leaf, header);
286         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287         generation = btrfs_free_space_generation(leaf, header);
288         btrfs_release_path(path);
289
290         if (BTRFS_I(inode)->generation != generation) {
291                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292                        " not match free space cache generation (%llu)\n",
293                        (unsigned long long)BTRFS_I(inode)->generation,
294                        (unsigned long long)generation);
295                 goto out;
296         }
297
298         if (!num_entries)
299                 goto out;
300
301         ret = readahead_cache(inode);
302         if (ret)
303                 goto out;
304
305         while (1) {
306                 struct btrfs_free_space_entry *entry;
307                 struct btrfs_free_space *e;
308                 void *addr;
309                 unsigned long offset = 0;
310                 int need_loop = 0;
311
312                 if (!num_entries && !num_bitmaps)
313                         break;
314
315                 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
316                 if (!page)
317                         goto free_cache;
318
319                 if (!PageUptodate(page)) {
320                         btrfs_readpage(NULL, page);
321                         lock_page(page);
322                         if (!PageUptodate(page)) {
323                                 unlock_page(page);
324                                 page_cache_release(page);
325                                 printk(KERN_ERR "btrfs: error reading free "
326                                        "space cache\n");
327                                 goto free_cache;
328                         }
329                 }
330                 addr = kmap(page);
331
332                 if (index == 0) {
333                         u64 *gen;
334
335                         /*
336                          * We put a bogus crc in the front of the first page in
337                          * case old kernels try to mount a fs with the new
338                          * format to make sure they discard the cache.
339                          */
340                         addr += sizeof(u64);
341                         offset += sizeof(u64);
342
343                         gen = addr;
344                         if (*gen != BTRFS_I(inode)->generation) {
345                                 printk_ratelimited(KERN_ERR "btrfs: space cache"
346                                         " generation (%llu) does not match "
347                                         "inode (%llu)\n",
348                                         (unsigned long long)*gen,
349                                         (unsigned long long)
350                                         BTRFS_I(inode)->generation);
351                                 kunmap(page);
352                                 unlock_page(page);
353                                 page_cache_release(page);
354                                 goto free_cache;
355                         }
356                         addr += sizeof(u64);
357                         offset += sizeof(u64);
358                 }
359                 entry = addr;
360
361                 while (1) {
362                         if (!num_entries)
363                                 break;
364
365                         need_loop = 1;
366                         e = kmem_cache_zalloc(btrfs_free_space_cachep,
367                                               GFP_NOFS);
368                         if (!e) {
369                                 kunmap(page);
370                                 unlock_page(page);
371                                 page_cache_release(page);
372                                 goto free_cache;
373                         }
374
375                         e->offset = le64_to_cpu(entry->offset);
376                         e->bytes = le64_to_cpu(entry->bytes);
377                         if (!e->bytes) {
378                                 kunmap(page);
379                                 kmem_cache_free(btrfs_free_space_cachep, e);
380                                 unlock_page(page);
381                                 page_cache_release(page);
382                                 goto free_cache;
383                         }
384
385                         if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
386                                 spin_lock(&ctl->tree_lock);
387                                 ret = link_free_space(ctl, e);
388                                 spin_unlock(&ctl->tree_lock);
389                                 if (ret) {
390                                         printk(KERN_ERR "Duplicate entries in "
391                                                "free space cache, dumping\n");
392                                         kunmap(page);
393                                         unlock_page(page);
394                                         page_cache_release(page);
395                                         goto free_cache;
396                                 }
397                         } else {
398                                 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
399                                 if (!e->bitmap) {
400                                         kunmap(page);
401                                         kmem_cache_free(
402                                                 btrfs_free_space_cachep, e);
403                                         unlock_page(page);
404                                         page_cache_release(page);
405                                         goto free_cache;
406                                 }
407                                 spin_lock(&ctl->tree_lock);
408                                 ret = link_free_space(ctl, e);
409                                 ctl->total_bitmaps++;
410                                 ctl->op->recalc_thresholds(ctl);
411                                 spin_unlock(&ctl->tree_lock);
412                                 if (ret) {
413                                         printk(KERN_ERR "Duplicate entries in "
414                                                "free space cache, dumping\n");
415                                         kunmap(page);
416                                         unlock_page(page);
417                                         page_cache_release(page);
418                                         goto free_cache;
419                                 }
420                                 list_add_tail(&e->list, &bitmaps);
421                         }
422
423                         num_entries--;
424                         offset += sizeof(struct btrfs_free_space_entry);
425                         if (offset + sizeof(struct btrfs_free_space_entry) >=
426                             PAGE_CACHE_SIZE)
427                                 break;
428                         entry++;
429                 }
430
431                 /*
432                  * We read an entry out of this page, we need to move on to the
433                  * next page.
434                  */
435                 if (need_loop) {
436                         kunmap(page);
437                         goto next;
438                 }
439
440                 /*
441                  * We add the bitmaps at the end of the entries in order that
442                  * the bitmap entries are added to the cache.
443                  */
444                 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
445                 list_del_init(&e->list);
446                 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
447                 kunmap(page);
448                 num_bitmaps--;
449 next:
450                 unlock_page(page);
451                 page_cache_release(page);
452                 index++;
453         }
454
455         ret = 1;
456 out:
457         return ret;
458 free_cache:
459         __btrfs_remove_free_space_cache(ctl);
460         goto out;
461 }
462
463 int load_free_space_cache(struct btrfs_fs_info *fs_info,
464                           struct btrfs_block_group_cache *block_group)
465 {
466         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
467         struct btrfs_root *root = fs_info->tree_root;
468         struct inode *inode;
469         struct btrfs_path *path;
470         int ret;
471         bool matched;
472         u64 used = btrfs_block_group_used(&block_group->item);
473
474         /*
475          * If we're unmounting then just return, since this does a search on the
476          * normal root and not the commit root and we could deadlock.
477          */
478         if (btrfs_fs_closing(fs_info))
479                 return 0;
480
481         /*
482          * If this block group has been marked to be cleared for one reason or
483          * another then we can't trust the on disk cache, so just return.
484          */
485         spin_lock(&block_group->lock);
486         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
487                 spin_unlock(&block_group->lock);
488                 return 0;
489         }
490         spin_unlock(&block_group->lock);
491
492         path = btrfs_alloc_path();
493         if (!path)
494                 return 0;
495
496         inode = lookup_free_space_inode(root, block_group, path);
497         if (IS_ERR(inode)) {
498                 btrfs_free_path(path);
499                 return 0;
500         }
501
502         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
503                                       path, block_group->key.objectid);
504         btrfs_free_path(path);
505         if (ret <= 0)
506                 goto out;
507
508         spin_lock(&ctl->tree_lock);
509         matched = (ctl->free_space == (block_group->key.offset - used -
510                                        block_group->bytes_super));
511         spin_unlock(&ctl->tree_lock);
512
513         if (!matched) {
514                 __btrfs_remove_free_space_cache(ctl);
515                 printk(KERN_ERR "block group %llu has an wrong amount of free "
516                        "space\n", block_group->key.objectid);
517                 ret = -1;
518         }
519 out:
520         if (ret < 0) {
521                 /* This cache is bogus, make sure it gets cleared */
522                 spin_lock(&block_group->lock);
523                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
524                 spin_unlock(&block_group->lock);
525                 ret = 0;
526
527                 printk(KERN_ERR "btrfs: failed to load free space cache "
528                        "for block group %llu\n", block_group->key.objectid);
529         }
530
531         iput(inode);
532         return ret;
533 }
534
535 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
536                             struct btrfs_free_space_ctl *ctl,
537                             struct btrfs_block_group_cache *block_group,
538                             struct btrfs_trans_handle *trans,
539                             struct btrfs_path *path, u64 offset)
540 {
541         struct btrfs_free_space_header *header;
542         struct extent_buffer *leaf;
543         struct rb_node *node;
544         struct list_head *pos, *n;
545         struct page **pages;
546         struct page *page;
547         struct extent_state *cached_state = NULL;
548         struct btrfs_free_cluster *cluster = NULL;
549         struct extent_io_tree *unpin = NULL;
550         struct list_head bitmap_list;
551         struct btrfs_key key;
552         u64 start, end, len;
553         u64 bytes = 0;
554         u32 crc = ~(u32)0;
555         int index = 0, num_pages = 0;
556         int entries = 0;
557         int bitmaps = 0;
558         int ret = -1;
559         bool next_page = false;
560         bool out_of_space = false;
561
562         INIT_LIST_HEAD(&bitmap_list);
563
564         node = rb_first(&ctl->free_space_offset);
565         if (!node)
566                 return 0;
567
568         if (!i_size_read(inode))
569                 return -1;
570
571         num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
572                 PAGE_CACHE_SHIFT;
573
574         filemap_write_and_wait(inode->i_mapping);
575         btrfs_wait_ordered_range(inode, inode->i_size &
576                                  ~(root->sectorsize - 1), (u64)-1);
577
578         pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
579         if (!pages)
580                 return -1;
581
582         /* Get the cluster for this block_group if it exists */
583         if (block_group && !list_empty(&block_group->cluster_list))
584                 cluster = list_entry(block_group->cluster_list.next,
585                                      struct btrfs_free_cluster,
586                                      block_group_list);
587
588         /*
589          * We shouldn't have switched the pinned extents yet so this is the
590          * right one
591          */
592         unpin = root->fs_info->pinned_extents;
593
594         /*
595          * Lock all pages first so we can lock the extent safely.
596          *
597          * NOTE: Because we hold the ref the entire time we're going to write to
598          * the page find_get_page should never fail, so we don't do a check
599          * after find_get_page at this point.  Just putting this here so people
600          * know and don't freak out.
601          */
602         while (index < num_pages) {
603                 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
604                 if (!page) {
605                         int i;
606
607                         for (i = 0; i < num_pages; i++) {
608                                 unlock_page(pages[i]);
609                                 page_cache_release(pages[i]);
610                         }
611                         goto out;
612                 }
613                 pages[index] = page;
614                 index++;
615         }
616
617         index = 0;
618         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
619                          0, &cached_state, GFP_NOFS);
620
621         /*
622          * When searching for pinned extents, we need to start at our start
623          * offset.
624          */
625         if (block_group)
626                 start = block_group->key.objectid;
627
628         /* Write out the extent entries */
629         do {
630                 struct btrfs_free_space_entry *entry;
631                 void *addr, *orig;
632                 unsigned long offset = 0;
633
634                 next_page = false;
635
636                 if (index >= num_pages) {
637                         out_of_space = true;
638                         break;
639                 }
640
641                 page = pages[index];
642
643                 orig = addr = kmap(page);
644                 if (index == 0) {
645                         u64 *gen;
646
647                         /*
648                          * We're going to put in a bogus crc for this page to
649                          * make sure that old kernels who aren't aware of this
650                          * format will be sure to discard the cache.
651                          */
652                         addr += sizeof(u64);
653                         offset += sizeof(u64);
654
655                         gen = addr;
656                         *gen = trans->transid;
657                         addr += sizeof(u64);
658                         offset += sizeof(u64);
659                 }
660                 entry = addr;
661
662                 memset(addr, 0, PAGE_CACHE_SIZE - offset);
663                 while (node && !next_page) {
664                         struct btrfs_free_space *e;
665
666                         e = rb_entry(node, struct btrfs_free_space, offset_index);
667                         entries++;
668
669                         entry->offset = cpu_to_le64(e->offset);
670                         entry->bytes = cpu_to_le64(e->bytes);
671                         if (e->bitmap) {
672                                 entry->type = BTRFS_FREE_SPACE_BITMAP;
673                                 list_add_tail(&e->list, &bitmap_list);
674                                 bitmaps++;
675                         } else {
676                                 entry->type = BTRFS_FREE_SPACE_EXTENT;
677                         }
678                         node = rb_next(node);
679                         if (!node && cluster) {
680                                 node = rb_first(&cluster->root);
681                                 cluster = NULL;
682                         }
683                         offset += sizeof(struct btrfs_free_space_entry);
684                         if (offset + sizeof(struct btrfs_free_space_entry) >=
685                             PAGE_CACHE_SIZE)
686                                 next_page = true;
687                         entry++;
688                 }
689
690                 /*
691                  * We want to add any pinned extents to our free space cache
692                  * so we don't leak the space
693                  */
694                 while (block_group && !next_page &&
695                        (start < block_group->key.objectid +
696                         block_group->key.offset)) {
697                         ret = find_first_extent_bit(unpin, start, &start, &end,
698                                                     EXTENT_DIRTY);
699                         if (ret) {
700                                 ret = 0;
701                                 break;
702                         }
703
704                         /* This pinned extent is out of our range */
705                         if (start >= block_group->key.objectid +
706                             block_group->key.offset)
707                                 break;
708
709                         len = block_group->key.objectid +
710                                 block_group->key.offset - start;
711                         len = min(len, end + 1 - start);
712
713                         entries++;
714                         entry->offset = cpu_to_le64(start);
715                         entry->bytes = cpu_to_le64(len);
716                         entry->type = BTRFS_FREE_SPACE_EXTENT;
717
718                         start = end + 1;
719                         offset += sizeof(struct btrfs_free_space_entry);
720                         if (offset + sizeof(struct btrfs_free_space_entry) >=
721                             PAGE_CACHE_SIZE)
722                                 next_page = true;
723                         entry++;
724                 }
725
726                 /* Generate bogus crc value */
727                 if (index == 0) {
728                         u32 *tmp;
729                         crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
730                                               PAGE_CACHE_SIZE - sizeof(u64));
731                         btrfs_csum_final(crc, (char *)&crc);
732                         crc++;
733                         tmp = orig;
734                         *tmp = crc;
735                 }
736
737                 kunmap(page);
738
739                 bytes += PAGE_CACHE_SIZE;
740
741                 index++;
742         } while (node || next_page);
743
744         /* Write out the bitmaps */
745         list_for_each_safe(pos, n, &bitmap_list) {
746                 void *addr;
747                 struct btrfs_free_space *entry =
748                         list_entry(pos, struct btrfs_free_space, list);
749
750                 if (index >= num_pages) {
751                         out_of_space = true;
752                         break;
753                 }
754                 page = pages[index];
755
756                 addr = kmap(page);
757                 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
758                 kunmap(page);
759                 bytes += PAGE_CACHE_SIZE;
760
761                 list_del_init(&entry->list);
762                 index++;
763         }
764
765         if (out_of_space) {
766                 btrfs_drop_pages(pages, num_pages);
767                 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
768                                      i_size_read(inode) - 1, &cached_state,
769                                      GFP_NOFS);
770                 ret = 0;
771                 goto out;
772         }
773
774         /* Zero out the rest of the pages just to make sure */
775         while (index < num_pages) {
776                 void *addr;
777
778                 page = pages[index];
779                 addr = kmap(page);
780                 memset(addr, 0, PAGE_CACHE_SIZE);
781                 kunmap(page);
782                 bytes += PAGE_CACHE_SIZE;
783                 index++;
784         }
785
786         ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
787                                             bytes, &cached_state);
788         btrfs_drop_pages(pages, num_pages);
789         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
790                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
791
792         if (ret) {
793                 ret = 0;
794                 goto out;
795         }
796
797         BTRFS_I(inode)->generation = trans->transid;
798
799         filemap_write_and_wait(inode->i_mapping);
800
801         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
802         key.offset = offset;
803         key.type = 0;
804
805         ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
806         if (ret < 0) {
807                 ret = -1;
808                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
809                                  EXTENT_DIRTY | EXTENT_DELALLOC |
810                                  EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
811                 goto out;
812         }
813         leaf = path->nodes[0];
814         if (ret > 0) {
815                 struct btrfs_key found_key;
816                 BUG_ON(!path->slots[0]);
817                 path->slots[0]--;
818                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
819                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
820                     found_key.offset != offset) {
821                         ret = -1;
822                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
823                                          EXTENT_DIRTY | EXTENT_DELALLOC |
824                                          EXTENT_DO_ACCOUNTING, 0, 0, NULL,
825                                          GFP_NOFS);
826                         btrfs_release_path(path);
827                         goto out;
828                 }
829         }
830         header = btrfs_item_ptr(leaf, path->slots[0],
831                                 struct btrfs_free_space_header);
832         btrfs_set_free_space_entries(leaf, header, entries);
833         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
834         btrfs_set_free_space_generation(leaf, header, trans->transid);
835         btrfs_mark_buffer_dirty(leaf);
836         btrfs_release_path(path);
837
838         ret = 1;
839
840 out:
841         kfree(pages);
842         if (ret != 1) {
843                 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
844                 BTRFS_I(inode)->generation = 0;
845         }
846         btrfs_update_inode(trans, root, inode);
847         return ret;
848 }
849
850 int btrfs_write_out_cache(struct btrfs_root *root,
851                           struct btrfs_trans_handle *trans,
852                           struct btrfs_block_group_cache *block_group,
853                           struct btrfs_path *path)
854 {
855         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
856         struct inode *inode;
857         int ret = 0;
858
859         root = root->fs_info->tree_root;
860
861         spin_lock(&block_group->lock);
862         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
863                 spin_unlock(&block_group->lock);
864                 return 0;
865         }
866         spin_unlock(&block_group->lock);
867
868         inode = lookup_free_space_inode(root, block_group, path);
869         if (IS_ERR(inode))
870                 return 0;
871
872         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
873                                       path, block_group->key.objectid);
874         if (ret < 0) {
875                 spin_lock(&block_group->lock);
876                 block_group->disk_cache_state = BTRFS_DC_ERROR;
877                 spin_unlock(&block_group->lock);
878                 ret = 0;
879
880                 printk(KERN_ERR "btrfs: failed to write free space cace "
881                        "for block group %llu\n", block_group->key.objectid);
882         }
883
884         iput(inode);
885         return ret;
886 }
887
888 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
889                                           u64 offset)
890 {
891         BUG_ON(offset < bitmap_start);
892         offset -= bitmap_start;
893         return (unsigned long)(div_u64(offset, unit));
894 }
895
896 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
897 {
898         return (unsigned long)(div_u64(bytes, unit));
899 }
900
901 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
902                                    u64 offset)
903 {
904         u64 bitmap_start;
905         u64 bytes_per_bitmap;
906
907         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
908         bitmap_start = offset - ctl->start;
909         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
910         bitmap_start *= bytes_per_bitmap;
911         bitmap_start += ctl->start;
912
913         return bitmap_start;
914 }
915
916 static int tree_insert_offset(struct rb_root *root, u64 offset,
917                               struct rb_node *node, int bitmap)
918 {
919         struct rb_node **p = &root->rb_node;
920         struct rb_node *parent = NULL;
921         struct btrfs_free_space *info;
922
923         while (*p) {
924                 parent = *p;
925                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
926
927                 if (offset < info->offset) {
928                         p = &(*p)->rb_left;
929                 } else if (offset > info->offset) {
930                         p = &(*p)->rb_right;
931                 } else {
932                         /*
933                          * we could have a bitmap entry and an extent entry
934                          * share the same offset.  If this is the case, we want
935                          * the extent entry to always be found first if we do a
936                          * linear search through the tree, since we want to have
937                          * the quickest allocation time, and allocating from an
938                          * extent is faster than allocating from a bitmap.  So
939                          * if we're inserting a bitmap and we find an entry at
940                          * this offset, we want to go right, or after this entry
941                          * logically.  If we are inserting an extent and we've
942                          * found a bitmap, we want to go left, or before
943                          * logically.
944                          */
945                         if (bitmap) {
946                                 if (info->bitmap) {
947                                         WARN_ON_ONCE(1);
948                                         return -EEXIST;
949                                 }
950                                 p = &(*p)->rb_right;
951                         } else {
952                                 if (!info->bitmap) {
953                                         WARN_ON_ONCE(1);
954                                         return -EEXIST;
955                                 }
956                                 p = &(*p)->rb_left;
957                         }
958                 }
959         }
960
961         rb_link_node(node, parent, p);
962         rb_insert_color(node, root);
963
964         return 0;
965 }
966
967 /*
968  * searches the tree for the given offset.
969  *
970  * fuzzy - If this is set, then we are trying to make an allocation, and we just
971  * want a section that has at least bytes size and comes at or after the given
972  * offset.
973  */
974 static struct btrfs_free_space *
975 tree_search_offset(struct btrfs_free_space_ctl *ctl,
976                    u64 offset, int bitmap_only, int fuzzy)
977 {
978         struct rb_node *n = ctl->free_space_offset.rb_node;
979         struct btrfs_free_space *entry, *prev = NULL;
980
981         /* find entry that is closest to the 'offset' */
982         while (1) {
983                 if (!n) {
984                         entry = NULL;
985                         break;
986                 }
987
988                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
989                 prev = entry;
990
991                 if (offset < entry->offset)
992                         n = n->rb_left;
993                 else if (offset > entry->offset)
994                         n = n->rb_right;
995                 else
996                         break;
997         }
998
999         if (bitmap_only) {
1000                 if (!entry)
1001                         return NULL;
1002                 if (entry->bitmap)
1003                         return entry;
1004
1005                 /*
1006                  * bitmap entry and extent entry may share same offset,
1007                  * in that case, bitmap entry comes after extent entry.
1008                  */
1009                 n = rb_next(n);
1010                 if (!n)
1011                         return NULL;
1012                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1013                 if (entry->offset != offset)
1014                         return NULL;
1015
1016                 WARN_ON(!entry->bitmap);
1017                 return entry;
1018         } else if (entry) {
1019                 if (entry->bitmap) {
1020                         /*
1021                          * if previous extent entry covers the offset,
1022                          * we should return it instead of the bitmap entry
1023                          */
1024                         n = &entry->offset_index;
1025                         while (1) {
1026                                 n = rb_prev(n);
1027                                 if (!n)
1028                                         break;
1029                                 prev = rb_entry(n, struct btrfs_free_space,
1030                                                 offset_index);
1031                                 if (!prev->bitmap) {
1032                                         if (prev->offset + prev->bytes > offset)
1033                                                 entry = prev;
1034                                         break;
1035                                 }
1036                         }
1037                 }
1038                 return entry;
1039         }
1040
1041         if (!prev)
1042                 return NULL;
1043
1044         /* find last entry before the 'offset' */
1045         entry = prev;
1046         if (entry->offset > offset) {
1047                 n = rb_prev(&entry->offset_index);
1048                 if (n) {
1049                         entry = rb_entry(n, struct btrfs_free_space,
1050                                         offset_index);
1051                         BUG_ON(entry->offset > offset);
1052                 } else {
1053                         if (fuzzy)
1054                                 return entry;
1055                         else
1056                                 return NULL;
1057                 }
1058         }
1059
1060         if (entry->bitmap) {
1061                 n = &entry->offset_index;
1062                 while (1) {
1063                         n = rb_prev(n);
1064                         if (!n)
1065                                 break;
1066                         prev = rb_entry(n, struct btrfs_free_space,
1067                                         offset_index);
1068                         if (!prev->bitmap) {
1069                                 if (prev->offset + prev->bytes > offset)
1070                                         return prev;
1071                                 break;
1072                         }
1073                 }
1074                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1075                         return entry;
1076         } else if (entry->offset + entry->bytes > offset)
1077                 return entry;
1078
1079         if (!fuzzy)
1080                 return NULL;
1081
1082         while (1) {
1083                 if (entry->bitmap) {
1084                         if (entry->offset + BITS_PER_BITMAP *
1085                             ctl->unit > offset)
1086                                 break;
1087                 } else {
1088                         if (entry->offset + entry->bytes > offset)
1089                                 break;
1090                 }
1091
1092                 n = rb_next(&entry->offset_index);
1093                 if (!n)
1094                         return NULL;
1095                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1096         }
1097         return entry;
1098 }
1099
1100 static inline void
1101 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1102                     struct btrfs_free_space *info)
1103 {
1104         rb_erase(&info->offset_index, &ctl->free_space_offset);
1105         ctl->free_extents--;
1106 }
1107
1108 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1109                               struct btrfs_free_space *info)
1110 {
1111         __unlink_free_space(ctl, info);
1112         ctl->free_space -= info->bytes;
1113 }
1114
1115 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1116                            struct btrfs_free_space *info)
1117 {
1118         int ret = 0;
1119
1120         BUG_ON(!info->bitmap && !info->bytes);
1121         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1122                                  &info->offset_index, (info->bitmap != NULL));
1123         if (ret)
1124                 return ret;
1125
1126         ctl->free_space += info->bytes;
1127         ctl->free_extents++;
1128         return ret;
1129 }
1130
1131 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1132 {
1133         struct btrfs_block_group_cache *block_group = ctl->private;
1134         u64 max_bytes;
1135         u64 bitmap_bytes;
1136         u64 extent_bytes;
1137         u64 size = block_group->key.offset;
1138         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1139         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1140
1141         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1142
1143         /*
1144          * The goal is to keep the total amount of memory used per 1gb of space
1145          * at or below 32k, so we need to adjust how much memory we allow to be
1146          * used by extent based free space tracking
1147          */
1148         if (size < 1024 * 1024 * 1024)
1149                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1150         else
1151                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1152                         div64_u64(size, 1024 * 1024 * 1024);
1153
1154         /*
1155          * we want to account for 1 more bitmap than what we have so we can make
1156          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1157          * we add more bitmaps.
1158          */
1159         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1160
1161         if (bitmap_bytes >= max_bytes) {
1162                 ctl->extents_thresh = 0;
1163                 return;
1164         }
1165
1166         /*
1167          * we want the extent entry threshold to always be at most 1/2 the maxw
1168          * bytes we can have, or whatever is less than that.
1169          */
1170         extent_bytes = max_bytes - bitmap_bytes;
1171         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1172
1173         ctl->extents_thresh =
1174                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1175 }
1176
1177 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1178                                        struct btrfs_free_space *info,
1179                                        u64 offset, u64 bytes)
1180 {
1181         unsigned long start, count;
1182
1183         start = offset_to_bit(info->offset, ctl->unit, offset);
1184         count = bytes_to_bits(bytes, ctl->unit);
1185         BUG_ON(start + count > BITS_PER_BITMAP);
1186
1187         bitmap_clear(info->bitmap, start, count);
1188
1189         info->bytes -= bytes;
1190 }
1191
1192 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1193                               struct btrfs_free_space *info, u64 offset,
1194                               u64 bytes)
1195 {
1196         __bitmap_clear_bits(ctl, info, offset, bytes);
1197         ctl->free_space -= bytes;
1198 }
1199
1200 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1201                             struct btrfs_free_space *info, u64 offset,
1202                             u64 bytes)
1203 {
1204         unsigned long start, count;
1205
1206         start = offset_to_bit(info->offset, ctl->unit, offset);
1207         count = bytes_to_bits(bytes, ctl->unit);
1208         BUG_ON(start + count > BITS_PER_BITMAP);
1209
1210         bitmap_set(info->bitmap, start, count);
1211
1212         info->bytes += bytes;
1213         ctl->free_space += bytes;
1214 }
1215
1216 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1217                          struct btrfs_free_space *bitmap_info, u64 *offset,
1218                          u64 *bytes)
1219 {
1220         unsigned long found_bits = 0;
1221         unsigned long bits, i;
1222         unsigned long next_zero;
1223
1224         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1225                           max_t(u64, *offset, bitmap_info->offset));
1226         bits = bytes_to_bits(*bytes, ctl->unit);
1227
1228         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1229              i < BITS_PER_BITMAP;
1230              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1231                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1232                                                BITS_PER_BITMAP, i);
1233                 if ((next_zero - i) >= bits) {
1234                         found_bits = next_zero - i;
1235                         break;
1236                 }
1237                 i = next_zero;
1238         }
1239
1240         if (found_bits) {
1241                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1242                 *bytes = (u64)(found_bits) * ctl->unit;
1243                 return 0;
1244         }
1245
1246         return -1;
1247 }
1248
1249 static struct btrfs_free_space *
1250 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1251 {
1252         struct btrfs_free_space *entry;
1253         struct rb_node *node;
1254         int ret;
1255
1256         if (!ctl->free_space_offset.rb_node)
1257                 return NULL;
1258
1259         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1260         if (!entry)
1261                 return NULL;
1262
1263         for (node = &entry->offset_index; node; node = rb_next(node)) {
1264                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1265                 if (entry->bytes < *bytes)
1266                         continue;
1267
1268                 if (entry->bitmap) {
1269                         ret = search_bitmap(ctl, entry, offset, bytes);
1270                         if (!ret)
1271                                 return entry;
1272                         continue;
1273                 }
1274
1275                 *offset = entry->offset;
1276                 *bytes = entry->bytes;
1277                 return entry;
1278         }
1279
1280         return NULL;
1281 }
1282
1283 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1284                            struct btrfs_free_space *info, u64 offset)
1285 {
1286         info->offset = offset_to_bitmap(ctl, offset);
1287         info->bytes = 0;
1288         link_free_space(ctl, info);
1289         ctl->total_bitmaps++;
1290
1291         ctl->op->recalc_thresholds(ctl);
1292 }
1293
1294 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1295                         struct btrfs_free_space *bitmap_info)
1296 {
1297         unlink_free_space(ctl, bitmap_info);
1298         kfree(bitmap_info->bitmap);
1299         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1300         ctl->total_bitmaps--;
1301         ctl->op->recalc_thresholds(ctl);
1302 }
1303
1304 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1305                               struct btrfs_free_space *bitmap_info,
1306                               u64 *offset, u64 *bytes)
1307 {
1308         u64 end;
1309         u64 search_start, search_bytes;
1310         int ret;
1311
1312 again:
1313         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1314
1315         /*
1316          * XXX - this can go away after a few releases.
1317          *
1318          * since the only user of btrfs_remove_free_space is the tree logging
1319          * stuff, and the only way to test that is under crash conditions, we
1320          * want to have this debug stuff here just in case somethings not
1321          * working.  Search the bitmap for the space we are trying to use to
1322          * make sure its actually there.  If its not there then we need to stop
1323          * because something has gone wrong.
1324          */
1325         search_start = *offset;
1326         search_bytes = *bytes;
1327         search_bytes = min(search_bytes, end - search_start + 1);
1328         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1329         BUG_ON(ret < 0 || search_start != *offset);
1330
1331         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1332                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1333                 *bytes -= end - *offset + 1;
1334                 *offset = end + 1;
1335         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1336                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1337                 *bytes = 0;
1338         }
1339
1340         if (*bytes) {
1341                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1342                 if (!bitmap_info->bytes)
1343                         free_bitmap(ctl, bitmap_info);
1344
1345                 /*
1346                  * no entry after this bitmap, but we still have bytes to
1347                  * remove, so something has gone wrong.
1348                  */
1349                 if (!next)
1350                         return -EINVAL;
1351
1352                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1353                                        offset_index);
1354
1355                 /*
1356                  * if the next entry isn't a bitmap we need to return to let the
1357                  * extent stuff do its work.
1358                  */
1359                 if (!bitmap_info->bitmap)
1360                         return -EAGAIN;
1361
1362                 /*
1363                  * Ok the next item is a bitmap, but it may not actually hold
1364                  * the information for the rest of this free space stuff, so
1365                  * look for it, and if we don't find it return so we can try
1366                  * everything over again.
1367                  */
1368                 search_start = *offset;
1369                 search_bytes = *bytes;
1370                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1371                                     &search_bytes);
1372                 if (ret < 0 || search_start != *offset)
1373                         return -EAGAIN;
1374
1375                 goto again;
1376         } else if (!bitmap_info->bytes)
1377                 free_bitmap(ctl, bitmap_info);
1378
1379         return 0;
1380 }
1381
1382 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1383                                struct btrfs_free_space *info, u64 offset,
1384                                u64 bytes)
1385 {
1386         u64 bytes_to_set = 0;
1387         u64 end;
1388
1389         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1390
1391         bytes_to_set = min(end - offset, bytes);
1392
1393         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1394
1395         return bytes_to_set;
1396
1397 }
1398
1399 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1400                       struct btrfs_free_space *info)
1401 {
1402         struct btrfs_block_group_cache *block_group = ctl->private;
1403
1404         /*
1405          * If we are below the extents threshold then we can add this as an
1406          * extent, and don't have to deal with the bitmap
1407          */
1408         if (ctl->free_extents < ctl->extents_thresh) {
1409                 /*
1410                  * If this block group has some small extents we don't want to
1411                  * use up all of our free slots in the cache with them, we want
1412                  * to reserve them to larger extents, however if we have plent
1413                  * of cache left then go ahead an dadd them, no sense in adding
1414                  * the overhead of a bitmap if we don't have to.
1415                  */
1416                 if (info->bytes <= block_group->sectorsize * 4) {
1417                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1418                                 return false;
1419                 } else {
1420                         return false;
1421                 }
1422         }
1423
1424         /*
1425          * some block groups are so tiny they can't be enveloped by a bitmap, so
1426          * don't even bother to create a bitmap for this
1427          */
1428         if (BITS_PER_BITMAP * block_group->sectorsize >
1429             block_group->key.offset)
1430                 return false;
1431
1432         return true;
1433 }
1434
1435 static struct btrfs_free_space_op free_space_op = {
1436         .recalc_thresholds      = recalculate_thresholds,
1437         .use_bitmap             = use_bitmap,
1438 };
1439
1440 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1441                               struct btrfs_free_space *info)
1442 {
1443         struct btrfs_free_space *bitmap_info;
1444         struct btrfs_block_group_cache *block_group = NULL;
1445         int added = 0;
1446         u64 bytes, offset, bytes_added;
1447         int ret;
1448
1449         bytes = info->bytes;
1450         offset = info->offset;
1451
1452         if (!ctl->op->use_bitmap(ctl, info))
1453                 return 0;
1454
1455         if (ctl->op == &free_space_op)
1456                 block_group = ctl->private;
1457 again:
1458         /*
1459          * Since we link bitmaps right into the cluster we need to see if we
1460          * have a cluster here, and if so and it has our bitmap we need to add
1461          * the free space to that bitmap.
1462          */
1463         if (block_group && !list_empty(&block_group->cluster_list)) {
1464                 struct btrfs_free_cluster *cluster;
1465                 struct rb_node *node;
1466                 struct btrfs_free_space *entry;
1467
1468                 cluster = list_entry(block_group->cluster_list.next,
1469                                      struct btrfs_free_cluster,
1470                                      block_group_list);
1471                 spin_lock(&cluster->lock);
1472                 node = rb_first(&cluster->root);
1473                 if (!node) {
1474                         spin_unlock(&cluster->lock);
1475                         goto no_cluster_bitmap;
1476                 }
1477
1478                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1479                 if (!entry->bitmap) {
1480                         spin_unlock(&cluster->lock);
1481                         goto no_cluster_bitmap;
1482                 }
1483
1484                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1485                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1486                                                           offset, bytes);
1487                         bytes -= bytes_added;
1488                         offset += bytes_added;
1489                 }
1490                 spin_unlock(&cluster->lock);
1491                 if (!bytes) {
1492                         ret = 1;
1493                         goto out;
1494                 }
1495         }
1496
1497 no_cluster_bitmap:
1498         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1499                                          1, 0);
1500         if (!bitmap_info) {
1501                 BUG_ON(added);
1502                 goto new_bitmap;
1503         }
1504
1505         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1506         bytes -= bytes_added;
1507         offset += bytes_added;
1508         added = 0;
1509
1510         if (!bytes) {
1511                 ret = 1;
1512                 goto out;
1513         } else
1514                 goto again;
1515
1516 new_bitmap:
1517         if (info && info->bitmap) {
1518                 add_new_bitmap(ctl, info, offset);
1519                 added = 1;
1520                 info = NULL;
1521                 goto again;
1522         } else {
1523                 spin_unlock(&ctl->tree_lock);
1524
1525                 /* no pre-allocated info, allocate a new one */
1526                 if (!info) {
1527                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1528                                                  GFP_NOFS);
1529                         if (!info) {
1530                                 spin_lock(&ctl->tree_lock);
1531                                 ret = -ENOMEM;
1532                                 goto out;
1533                         }
1534                 }
1535
1536                 /* allocate the bitmap */
1537                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1538                 spin_lock(&ctl->tree_lock);
1539                 if (!info->bitmap) {
1540                         ret = -ENOMEM;
1541                         goto out;
1542                 }
1543                 goto again;
1544         }
1545
1546 out:
1547         if (info) {
1548                 if (info->bitmap)
1549                         kfree(info->bitmap);
1550                 kmem_cache_free(btrfs_free_space_cachep, info);
1551         }
1552
1553         return ret;
1554 }
1555
1556 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1557                           struct btrfs_free_space *info, bool update_stat)
1558 {
1559         struct btrfs_free_space *left_info;
1560         struct btrfs_free_space *right_info;
1561         bool merged = false;
1562         u64 offset = info->offset;
1563         u64 bytes = info->bytes;
1564
1565         /*
1566          * first we want to see if there is free space adjacent to the range we
1567          * are adding, if there is remove that struct and add a new one to
1568          * cover the entire range
1569          */
1570         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1571         if (right_info && rb_prev(&right_info->offset_index))
1572                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1573                                      struct btrfs_free_space, offset_index);
1574         else
1575                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1576
1577         if (right_info && !right_info->bitmap) {
1578                 if (update_stat)
1579                         unlink_free_space(ctl, right_info);
1580                 else
1581                         __unlink_free_space(ctl, right_info);
1582                 info->bytes += right_info->bytes;
1583                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1584                 merged = true;
1585         }
1586
1587         if (left_info && !left_info->bitmap &&
1588             left_info->offset + left_info->bytes == offset) {
1589                 if (update_stat)
1590                         unlink_free_space(ctl, left_info);
1591                 else
1592                         __unlink_free_space(ctl, left_info);
1593                 info->offset = left_info->offset;
1594                 info->bytes += left_info->bytes;
1595                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1596                 merged = true;
1597         }
1598
1599         return merged;
1600 }
1601
1602 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1603                            u64 offset, u64 bytes)
1604 {
1605         struct btrfs_free_space *info;
1606         int ret = 0;
1607
1608         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1609         if (!info)
1610                 return -ENOMEM;
1611
1612         info->offset = offset;
1613         info->bytes = bytes;
1614
1615         spin_lock(&ctl->tree_lock);
1616
1617         if (try_merge_free_space(ctl, info, true))
1618                 goto link;
1619
1620         /*
1621          * There was no extent directly to the left or right of this new
1622          * extent then we know we're going to have to allocate a new extent, so
1623          * before we do that see if we need to drop this into a bitmap
1624          */
1625         ret = insert_into_bitmap(ctl, info);
1626         if (ret < 0) {
1627                 goto out;
1628         } else if (ret) {
1629                 ret = 0;
1630                 goto out;
1631         }
1632 link:
1633         ret = link_free_space(ctl, info);
1634         if (ret)
1635                 kmem_cache_free(btrfs_free_space_cachep, info);
1636 out:
1637         spin_unlock(&ctl->tree_lock);
1638
1639         if (ret) {
1640                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1641                 BUG_ON(ret == -EEXIST);
1642         }
1643
1644         return ret;
1645 }
1646
1647 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1648                             u64 offset, u64 bytes)
1649 {
1650         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1651         struct btrfs_free_space *info;
1652         struct btrfs_free_space *next_info = NULL;
1653         int ret = 0;
1654
1655         spin_lock(&ctl->tree_lock);
1656
1657 again:
1658         info = tree_search_offset(ctl, offset, 0, 0);
1659         if (!info) {
1660                 /*
1661                  * oops didn't find an extent that matched the space we wanted
1662                  * to remove, look for a bitmap instead
1663                  */
1664                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1665                                           1, 0);
1666                 if (!info) {
1667                         WARN_ON(1);
1668                         goto out_lock;
1669                 }
1670         }
1671
1672         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1673                 u64 end;
1674                 next_info = rb_entry(rb_next(&info->offset_index),
1675                                              struct btrfs_free_space,
1676                                              offset_index);
1677
1678                 if (next_info->bitmap)
1679                         end = next_info->offset +
1680                               BITS_PER_BITMAP * ctl->unit - 1;
1681                 else
1682                         end = next_info->offset + next_info->bytes;
1683
1684                 if (next_info->bytes < bytes ||
1685                     next_info->offset > offset || offset > end) {
1686                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1687                               " trying to use %llu\n",
1688                               (unsigned long long)info->offset,
1689                               (unsigned long long)info->bytes,
1690                               (unsigned long long)bytes);
1691                         WARN_ON(1);
1692                         ret = -EINVAL;
1693                         goto out_lock;
1694                 }
1695
1696                 info = next_info;
1697         }
1698
1699         if (info->bytes == bytes) {
1700                 unlink_free_space(ctl, info);
1701                 if (info->bitmap) {
1702                         kfree(info->bitmap);
1703                         ctl->total_bitmaps--;
1704                 }
1705                 kmem_cache_free(btrfs_free_space_cachep, info);
1706                 goto out_lock;
1707         }
1708
1709         if (!info->bitmap && info->offset == offset) {
1710                 unlink_free_space(ctl, info);
1711                 info->offset += bytes;
1712                 info->bytes -= bytes;
1713                 link_free_space(ctl, info);
1714                 goto out_lock;
1715         }
1716
1717         if (!info->bitmap && info->offset <= offset &&
1718             info->offset + info->bytes >= offset + bytes) {
1719                 u64 old_start = info->offset;
1720                 /*
1721                  * we're freeing space in the middle of the info,
1722                  * this can happen during tree log replay
1723                  *
1724                  * first unlink the old info and then
1725                  * insert it again after the hole we're creating
1726                  */
1727                 unlink_free_space(ctl, info);
1728                 if (offset + bytes < info->offset + info->bytes) {
1729                         u64 old_end = info->offset + info->bytes;
1730
1731                         info->offset = offset + bytes;
1732                         info->bytes = old_end - info->offset;
1733                         ret = link_free_space(ctl, info);
1734                         WARN_ON(ret);
1735                         if (ret)
1736                                 goto out_lock;
1737                 } else {
1738                         /* the hole we're creating ends at the end
1739                          * of the info struct, just free the info
1740                          */
1741                         kmem_cache_free(btrfs_free_space_cachep, info);
1742                 }
1743                 spin_unlock(&ctl->tree_lock);
1744
1745                 /* step two, insert a new info struct to cover
1746                  * anything before the hole
1747                  */
1748                 ret = btrfs_add_free_space(block_group, old_start,
1749                                            offset - old_start);
1750                 WARN_ON(ret);
1751                 goto out;
1752         }
1753
1754         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1755         if (ret == -EAGAIN)
1756                 goto again;
1757         BUG_ON(ret);
1758 out_lock:
1759         spin_unlock(&ctl->tree_lock);
1760 out:
1761         return ret;
1762 }
1763
1764 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1765                            u64 bytes)
1766 {
1767         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1768         struct btrfs_free_space *info;
1769         struct rb_node *n;
1770         int count = 0;
1771
1772         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1773                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1774                 if (info->bytes >= bytes)
1775                         count++;
1776                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1777                        (unsigned long long)info->offset,
1778                        (unsigned long long)info->bytes,
1779                        (info->bitmap) ? "yes" : "no");
1780         }
1781         printk(KERN_INFO "block group has cluster?: %s\n",
1782                list_empty(&block_group->cluster_list) ? "no" : "yes");
1783         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1784                "\n", count);
1785 }
1786
1787 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1788 {
1789         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1790
1791         spin_lock_init(&ctl->tree_lock);
1792         ctl->unit = block_group->sectorsize;
1793         ctl->start = block_group->key.objectid;
1794         ctl->private = block_group;
1795         ctl->op = &free_space_op;
1796
1797         /*
1798          * we only want to have 32k of ram per block group for keeping
1799          * track of free space, and if we pass 1/2 of that we want to
1800          * start converting things over to using bitmaps
1801          */
1802         ctl->extents_thresh = ((1024 * 32) / 2) /
1803                                 sizeof(struct btrfs_free_space);
1804 }
1805
1806 /*
1807  * for a given cluster, put all of its extents back into the free
1808  * space cache.  If the block group passed doesn't match the block group
1809  * pointed to by the cluster, someone else raced in and freed the
1810  * cluster already.  In that case, we just return without changing anything
1811  */
1812 static int
1813 __btrfs_return_cluster_to_free_space(
1814                              struct btrfs_block_group_cache *block_group,
1815                              struct btrfs_free_cluster *cluster)
1816 {
1817         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1818         struct btrfs_free_space *entry;
1819         struct rb_node *node;
1820
1821         spin_lock(&cluster->lock);
1822         if (cluster->block_group != block_group)
1823                 goto out;
1824
1825         cluster->block_group = NULL;
1826         cluster->window_start = 0;
1827         list_del_init(&cluster->block_group_list);
1828
1829         node = rb_first(&cluster->root);
1830         while (node) {
1831                 bool bitmap;
1832
1833                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1834                 node = rb_next(&entry->offset_index);
1835                 rb_erase(&entry->offset_index, &cluster->root);
1836
1837                 bitmap = (entry->bitmap != NULL);
1838                 if (!bitmap)
1839                         try_merge_free_space(ctl, entry, false);
1840                 tree_insert_offset(&ctl->free_space_offset,
1841                                    entry->offset, &entry->offset_index, bitmap);
1842         }
1843         cluster->root = RB_ROOT;
1844
1845 out:
1846         spin_unlock(&cluster->lock);
1847         btrfs_put_block_group(block_group);
1848         return 0;
1849 }
1850
1851 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1852 {
1853         struct btrfs_free_space *info;
1854         struct rb_node *node;
1855
1856         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1857                 info = rb_entry(node, struct btrfs_free_space, offset_index);
1858                 if (!info->bitmap) {
1859                         unlink_free_space(ctl, info);
1860                         kmem_cache_free(btrfs_free_space_cachep, info);
1861                 } else {
1862                         free_bitmap(ctl, info);
1863                 }
1864                 if (need_resched()) {
1865                         spin_unlock(&ctl->tree_lock);
1866                         cond_resched();
1867                         spin_lock(&ctl->tree_lock);
1868                 }
1869         }
1870 }
1871
1872 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1873 {
1874         spin_lock(&ctl->tree_lock);
1875         __btrfs_remove_free_space_cache_locked(ctl);
1876         spin_unlock(&ctl->tree_lock);
1877 }
1878
1879 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1880 {
1881         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1882         struct btrfs_free_cluster *cluster;
1883         struct list_head *head;
1884
1885         spin_lock(&ctl->tree_lock);
1886         while ((head = block_group->cluster_list.next) !=
1887                &block_group->cluster_list) {
1888                 cluster = list_entry(head, struct btrfs_free_cluster,
1889                                      block_group_list);
1890
1891                 WARN_ON(cluster->block_group != block_group);
1892                 __btrfs_return_cluster_to_free_space(block_group, cluster);
1893                 if (need_resched()) {
1894                         spin_unlock(&ctl->tree_lock);
1895                         cond_resched();
1896                         spin_lock(&ctl->tree_lock);
1897                 }
1898         }
1899         __btrfs_remove_free_space_cache_locked(ctl);
1900         spin_unlock(&ctl->tree_lock);
1901
1902 }
1903
1904 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1905                                u64 offset, u64 bytes, u64 empty_size)
1906 {
1907         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1908         struct btrfs_free_space *entry = NULL;
1909         u64 bytes_search = bytes + empty_size;
1910         u64 ret = 0;
1911
1912         spin_lock(&ctl->tree_lock);
1913         entry = find_free_space(ctl, &offset, &bytes_search);
1914         if (!entry)
1915                 goto out;
1916
1917         ret = offset;
1918         if (entry->bitmap) {
1919                 bitmap_clear_bits(ctl, entry, offset, bytes);
1920                 if (!entry->bytes)
1921                         free_bitmap(ctl, entry);
1922         } else {
1923                 unlink_free_space(ctl, entry);
1924                 entry->offset += bytes;
1925                 entry->bytes -= bytes;
1926                 if (!entry->bytes)
1927                         kmem_cache_free(btrfs_free_space_cachep, entry);
1928                 else
1929                         link_free_space(ctl, entry);
1930         }
1931
1932 out:
1933         spin_unlock(&ctl->tree_lock);
1934
1935         return ret;
1936 }
1937
1938 /*
1939  * given a cluster, put all of its extents back into the free space
1940  * cache.  If a block group is passed, this function will only free
1941  * a cluster that belongs to the passed block group.
1942  *
1943  * Otherwise, it'll get a reference on the block group pointed to by the
1944  * cluster and remove the cluster from it.
1945  */
1946 int btrfs_return_cluster_to_free_space(
1947                                struct btrfs_block_group_cache *block_group,
1948                                struct btrfs_free_cluster *cluster)
1949 {
1950         struct btrfs_free_space_ctl *ctl;
1951         int ret;
1952
1953         /* first, get a safe pointer to the block group */
1954         spin_lock(&cluster->lock);
1955         if (!block_group) {
1956                 block_group = cluster->block_group;
1957                 if (!block_group) {
1958                         spin_unlock(&cluster->lock);
1959                         return 0;
1960                 }
1961         } else if (cluster->block_group != block_group) {
1962                 /* someone else has already freed it don't redo their work */
1963                 spin_unlock(&cluster->lock);
1964                 return 0;
1965         }
1966         atomic_inc(&block_group->count);
1967         spin_unlock(&cluster->lock);
1968
1969         ctl = block_group->free_space_ctl;
1970
1971         /* now return any extents the cluster had on it */
1972         spin_lock(&ctl->tree_lock);
1973         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1974         spin_unlock(&ctl->tree_lock);
1975
1976         /* finally drop our ref */
1977         btrfs_put_block_group(block_group);
1978         return ret;
1979 }
1980
1981 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1982                                    struct btrfs_free_cluster *cluster,
1983                                    struct btrfs_free_space *entry,
1984                                    u64 bytes, u64 min_start)
1985 {
1986         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1987         int err;
1988         u64 search_start = cluster->window_start;
1989         u64 search_bytes = bytes;
1990         u64 ret = 0;
1991
1992         search_start = min_start;
1993         search_bytes = bytes;
1994
1995         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
1996         if (err)
1997                 return 0;
1998
1999         ret = search_start;
2000         __bitmap_clear_bits(ctl, entry, ret, bytes);
2001
2002         return ret;
2003 }
2004
2005 /*
2006  * given a cluster, try to allocate 'bytes' from it, returns 0
2007  * if it couldn't find anything suitably large, or a logical disk offset
2008  * if things worked out
2009  */
2010 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2011                              struct btrfs_free_cluster *cluster, u64 bytes,
2012                              u64 min_start)
2013 {
2014         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2015         struct btrfs_free_space *entry = NULL;
2016         struct rb_node *node;
2017         u64 ret = 0;
2018
2019         spin_lock(&cluster->lock);
2020         if (bytes > cluster->max_size)
2021                 goto out;
2022
2023         if (cluster->block_group != block_group)
2024                 goto out;
2025
2026         node = rb_first(&cluster->root);
2027         if (!node)
2028                 goto out;
2029
2030         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2031         while(1) {
2032                 if (entry->bytes < bytes ||
2033                     (!entry->bitmap && entry->offset < min_start)) {
2034                         node = rb_next(&entry->offset_index);
2035                         if (!node)
2036                                 break;
2037                         entry = rb_entry(node, struct btrfs_free_space,
2038                                          offset_index);
2039                         continue;
2040                 }
2041
2042                 if (entry->bitmap) {
2043                         ret = btrfs_alloc_from_bitmap(block_group,
2044                                                       cluster, entry, bytes,
2045                                                       min_start);
2046                         if (ret == 0) {
2047                                 node = rb_next(&entry->offset_index);
2048                                 if (!node)
2049                                         break;
2050                                 entry = rb_entry(node, struct btrfs_free_space,
2051                                                  offset_index);
2052                                 continue;
2053                         }
2054                 } else {
2055                         ret = entry->offset;
2056
2057                         entry->offset += bytes;
2058                         entry->bytes -= bytes;
2059                 }
2060
2061                 if (entry->bytes == 0)
2062                         rb_erase(&entry->offset_index, &cluster->root);
2063                 break;
2064         }
2065 out:
2066         spin_unlock(&cluster->lock);
2067
2068         if (!ret)
2069                 return 0;
2070
2071         spin_lock(&ctl->tree_lock);
2072
2073         ctl->free_space -= bytes;
2074         if (entry->bytes == 0) {
2075                 ctl->free_extents--;
2076                 if (entry->bitmap) {
2077                         kfree(entry->bitmap);
2078                         ctl->total_bitmaps--;
2079                         ctl->op->recalc_thresholds(ctl);
2080                 }
2081                 kmem_cache_free(btrfs_free_space_cachep, entry);
2082         }
2083
2084         spin_unlock(&ctl->tree_lock);
2085
2086         return ret;
2087 }
2088
2089 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2090                                 struct btrfs_free_space *entry,
2091                                 struct btrfs_free_cluster *cluster,
2092                                 u64 offset, u64 bytes, u64 min_bytes)
2093 {
2094         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2095         unsigned long next_zero;
2096         unsigned long i;
2097         unsigned long search_bits;
2098         unsigned long total_bits;
2099         unsigned long found_bits;
2100         unsigned long start = 0;
2101         unsigned long total_found = 0;
2102         int ret;
2103         bool found = false;
2104
2105         i = offset_to_bit(entry->offset, block_group->sectorsize,
2106                           max_t(u64, offset, entry->offset));
2107         search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2108         total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2109
2110 again:
2111         found_bits = 0;
2112         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2113              i < BITS_PER_BITMAP;
2114              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2115                 next_zero = find_next_zero_bit(entry->bitmap,
2116                                                BITS_PER_BITMAP, i);
2117                 if (next_zero - i >= search_bits) {
2118                         found_bits = next_zero - i;
2119                         break;
2120                 }
2121                 i = next_zero;
2122         }
2123
2124         if (!found_bits)
2125                 return -ENOSPC;
2126
2127         if (!found) {
2128                 start = i;
2129                 found = true;
2130         }
2131
2132         total_found += found_bits;
2133
2134         if (cluster->max_size < found_bits * block_group->sectorsize)
2135                 cluster->max_size = found_bits * block_group->sectorsize;
2136
2137         if (total_found < total_bits) {
2138                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2139                 if (i - start > total_bits * 2) {
2140                         total_found = 0;
2141                         cluster->max_size = 0;
2142                         found = false;
2143                 }
2144                 goto again;
2145         }
2146
2147         cluster->window_start = start * block_group->sectorsize +
2148                 entry->offset;
2149         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2150         ret = tree_insert_offset(&cluster->root, entry->offset,
2151                                  &entry->offset_index, 1);
2152         BUG_ON(ret);
2153
2154         return 0;
2155 }
2156
2157 /*
2158  * This searches the block group for just extents to fill the cluster with.
2159  */
2160 static noinline int
2161 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2162                         struct btrfs_free_cluster *cluster,
2163                         struct list_head *bitmaps, u64 offset, u64 bytes,
2164                         u64 min_bytes)
2165 {
2166         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2167         struct btrfs_free_space *first = NULL;
2168         struct btrfs_free_space *entry = NULL;
2169         struct btrfs_free_space *prev = NULL;
2170         struct btrfs_free_space *last;
2171         struct rb_node *node;
2172         u64 window_start;
2173         u64 window_free;
2174         u64 max_extent;
2175         u64 max_gap = 128 * 1024;
2176
2177         entry = tree_search_offset(ctl, offset, 0, 1);
2178         if (!entry)
2179                 return -ENOSPC;
2180
2181         /*
2182          * We don't want bitmaps, so just move along until we find a normal
2183          * extent entry.
2184          */
2185         while (entry->bitmap) {
2186                 if (list_empty(&entry->list))
2187                         list_add_tail(&entry->list, bitmaps);
2188                 node = rb_next(&entry->offset_index);
2189                 if (!node)
2190                         return -ENOSPC;
2191                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2192         }
2193
2194         window_start = entry->offset;
2195         window_free = entry->bytes;
2196         max_extent = entry->bytes;
2197         first = entry;
2198         last = entry;
2199         prev = entry;
2200
2201         while (window_free <= min_bytes) {
2202                 node = rb_next(&entry->offset_index);
2203                 if (!node)
2204                         return -ENOSPC;
2205                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2206
2207                 if (entry->bitmap) {
2208                         if (list_empty(&entry->list))
2209                                 list_add_tail(&entry->list, bitmaps);
2210                         continue;
2211                 }
2212
2213                 /*
2214                  * we haven't filled the empty size and the window is
2215                  * very large.  reset and try again
2216                  */
2217                 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2218                     entry->offset - window_start > (min_bytes * 2)) {
2219                         first = entry;
2220                         window_start = entry->offset;
2221                         window_free = entry->bytes;
2222                         last = entry;
2223                         max_extent = entry->bytes;
2224                 } else {
2225                         last = entry;
2226                         window_free += entry->bytes;
2227                         if (entry->bytes > max_extent)
2228                                 max_extent = entry->bytes;
2229                 }
2230                 prev = entry;
2231         }
2232
2233         cluster->window_start = first->offset;
2234
2235         node = &first->offset_index;
2236
2237         /*
2238          * now we've found our entries, pull them out of the free space
2239          * cache and put them into the cluster rbtree
2240          */
2241         do {
2242                 int ret;
2243
2244                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2245                 node = rb_next(&entry->offset_index);
2246                 if (entry->bitmap)
2247                         continue;
2248
2249                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2250                 ret = tree_insert_offset(&cluster->root, entry->offset,
2251                                          &entry->offset_index, 0);
2252                 BUG_ON(ret);
2253         } while (node && entry != last);
2254
2255         cluster->max_size = max_extent;
2256
2257         return 0;
2258 }
2259
2260 /*
2261  * This specifically looks for bitmaps that may work in the cluster, we assume
2262  * that we have already failed to find extents that will work.
2263  */
2264 static noinline int
2265 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2266                      struct btrfs_free_cluster *cluster,
2267                      struct list_head *bitmaps, u64 offset, u64 bytes,
2268                      u64 min_bytes)
2269 {
2270         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2271         struct btrfs_free_space *entry;
2272         struct rb_node *node;
2273         int ret = -ENOSPC;
2274
2275         if (ctl->total_bitmaps == 0)
2276                 return -ENOSPC;
2277
2278         /*
2279          * First check our cached list of bitmaps and see if there is an entry
2280          * here that will work.
2281          */
2282         list_for_each_entry(entry, bitmaps, list) {
2283                 if (entry->bytes < min_bytes)
2284                         continue;
2285                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2286                                            bytes, min_bytes);
2287                 if (!ret)
2288                         return 0;
2289         }
2290
2291         /*
2292          * If we do have entries on our list and we are here then we didn't find
2293          * anything, so go ahead and get the next entry after the last entry in
2294          * this list and start the search from there.
2295          */
2296         if (!list_empty(bitmaps)) {
2297                 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2298                                    list);
2299                 node = rb_next(&entry->offset_index);
2300                 if (!node)
2301                         return -ENOSPC;
2302                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2303                 goto search;
2304         }
2305
2306         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2307         if (!entry)
2308                 return -ENOSPC;
2309
2310 search:
2311         node = &entry->offset_index;
2312         do {
2313                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2314                 node = rb_next(&entry->offset_index);
2315                 if (!entry->bitmap)
2316                         continue;
2317                 if (entry->bytes < min_bytes)
2318                         continue;
2319                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2320                                            bytes, min_bytes);
2321         } while (ret && node);
2322
2323         return ret;
2324 }
2325
2326 /*
2327  * here we try to find a cluster of blocks in a block group.  The goal
2328  * is to find at least bytes free and up to empty_size + bytes free.
2329  * We might not find them all in one contiguous area.
2330  *
2331  * returns zero and sets up cluster if things worked out, otherwise
2332  * it returns -enospc
2333  */
2334 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2335                              struct btrfs_root *root,
2336                              struct btrfs_block_group_cache *block_group,
2337                              struct btrfs_free_cluster *cluster,
2338                              u64 offset, u64 bytes, u64 empty_size)
2339 {
2340         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2341         struct list_head bitmaps;
2342         struct btrfs_free_space *entry, *tmp;
2343         u64 min_bytes;
2344         int ret;
2345
2346         /* for metadata, allow allocates with more holes */
2347         if (btrfs_test_opt(root, SSD_SPREAD)) {
2348                 min_bytes = bytes + empty_size;
2349         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2350                 /*
2351                  * we want to do larger allocations when we are
2352                  * flushing out the delayed refs, it helps prevent
2353                  * making more work as we go along.
2354                  */
2355                 if (trans->transaction->delayed_refs.flushing)
2356                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
2357                 else
2358                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
2359         } else
2360                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2361
2362         spin_lock(&ctl->tree_lock);
2363
2364         /*
2365          * If we know we don't have enough space to make a cluster don't even
2366          * bother doing all the work to try and find one.
2367          */
2368         if (ctl->free_space < min_bytes) {
2369                 spin_unlock(&ctl->tree_lock);
2370                 return -ENOSPC;
2371         }
2372
2373         spin_lock(&cluster->lock);
2374
2375         /* someone already found a cluster, hooray */
2376         if (cluster->block_group) {
2377                 ret = 0;
2378                 goto out;
2379         }
2380
2381         INIT_LIST_HEAD(&bitmaps);
2382         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2383                                       bytes, min_bytes);
2384         if (ret)
2385                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2386                                            offset, bytes, min_bytes);
2387
2388         /* Clear our temporary list */
2389         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2390                 list_del_init(&entry->list);
2391
2392         if (!ret) {
2393                 atomic_inc(&block_group->count);
2394                 list_add_tail(&cluster->block_group_list,
2395                               &block_group->cluster_list);
2396                 cluster->block_group = block_group;
2397         }
2398 out:
2399         spin_unlock(&cluster->lock);
2400         spin_unlock(&ctl->tree_lock);
2401
2402         return ret;
2403 }
2404
2405 /*
2406  * simple code to zero out a cluster
2407  */
2408 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2409 {
2410         spin_lock_init(&cluster->lock);
2411         spin_lock_init(&cluster->refill_lock);
2412         cluster->root = RB_ROOT;
2413         cluster->max_size = 0;
2414         INIT_LIST_HEAD(&cluster->block_group_list);
2415         cluster->block_group = NULL;
2416 }
2417
2418 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2419                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2420 {
2421         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2422         struct btrfs_free_space *entry = NULL;
2423         struct btrfs_fs_info *fs_info = block_group->fs_info;
2424         u64 bytes = 0;
2425         u64 actually_trimmed;
2426         int ret = 0;
2427
2428         *trimmed = 0;
2429
2430         while (start < end) {
2431                 spin_lock(&ctl->tree_lock);
2432
2433                 if (ctl->free_space < minlen) {
2434                         spin_unlock(&ctl->tree_lock);
2435                         break;
2436                 }
2437
2438                 entry = tree_search_offset(ctl, start, 0, 1);
2439                 if (!entry)
2440                         entry = tree_search_offset(ctl,
2441                                                    offset_to_bitmap(ctl, start),
2442                                                    1, 1);
2443
2444                 if (!entry || entry->offset >= end) {
2445                         spin_unlock(&ctl->tree_lock);
2446                         break;
2447                 }
2448
2449                 if (entry->bitmap) {
2450                         ret = search_bitmap(ctl, entry, &start, &bytes);
2451                         if (!ret) {
2452                                 if (start >= end) {
2453                                         spin_unlock(&ctl->tree_lock);
2454                                         break;
2455                                 }
2456                                 bytes = min(bytes, end - start);
2457                                 bitmap_clear_bits(ctl, entry, start, bytes);
2458                                 if (entry->bytes == 0)
2459                                         free_bitmap(ctl, entry);
2460                         } else {
2461                                 start = entry->offset + BITS_PER_BITMAP *
2462                                         block_group->sectorsize;
2463                                 spin_unlock(&ctl->tree_lock);
2464                                 ret = 0;
2465                                 continue;
2466                         }
2467                 } else {
2468                         start = entry->offset;
2469                         bytes = min(entry->bytes, end - start);
2470                         unlink_free_space(ctl, entry);
2471                         kmem_cache_free(btrfs_free_space_cachep, entry);
2472                 }
2473
2474                 spin_unlock(&ctl->tree_lock);
2475
2476                 if (bytes >= minlen) {
2477                         struct btrfs_space_info *space_info;
2478                         int update = 0;
2479
2480                         space_info = block_group->space_info;
2481                         spin_lock(&space_info->lock);
2482                         spin_lock(&block_group->lock);
2483                         if (!block_group->ro) {
2484                                 block_group->reserved += bytes;
2485                                 space_info->bytes_reserved += bytes;
2486                                 update = 1;
2487                         }
2488                         spin_unlock(&block_group->lock);
2489                         spin_unlock(&space_info->lock);
2490
2491                         ret = btrfs_error_discard_extent(fs_info->extent_root,
2492                                                          start,
2493                                                          bytes,
2494                                                          &actually_trimmed);
2495
2496                         btrfs_add_free_space(block_group, start, bytes);
2497                         if (update) {
2498                                 spin_lock(&space_info->lock);
2499                                 spin_lock(&block_group->lock);
2500                                 if (block_group->ro)
2501                                         space_info->bytes_readonly += bytes;
2502                                 block_group->reserved -= bytes;
2503                                 space_info->bytes_reserved -= bytes;
2504                                 spin_unlock(&space_info->lock);
2505                                 spin_unlock(&block_group->lock);
2506                         }
2507
2508                         if (ret)
2509                                 break;
2510                         *trimmed += actually_trimmed;
2511                 }
2512                 start += bytes;
2513                 bytes = 0;
2514
2515                 if (fatal_signal_pending(current)) {
2516                         ret = -ERESTARTSYS;
2517                         break;
2518                 }
2519
2520                 cond_resched();
2521         }
2522
2523         return ret;
2524 }
2525
2526 /*
2527  * Find the left-most item in the cache tree, and then return the
2528  * smallest inode number in the item.
2529  *
2530  * Note: the returned inode number may not be the smallest one in
2531  * the tree, if the left-most item is a bitmap.
2532  */
2533 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2534 {
2535         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2536         struct btrfs_free_space *entry = NULL;
2537         u64 ino = 0;
2538
2539         spin_lock(&ctl->tree_lock);
2540
2541         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2542                 goto out;
2543
2544         entry = rb_entry(rb_first(&ctl->free_space_offset),
2545                          struct btrfs_free_space, offset_index);
2546
2547         if (!entry->bitmap) {
2548                 ino = entry->offset;
2549
2550                 unlink_free_space(ctl, entry);
2551                 entry->offset++;
2552                 entry->bytes--;
2553                 if (!entry->bytes)
2554                         kmem_cache_free(btrfs_free_space_cachep, entry);
2555                 else
2556                         link_free_space(ctl, entry);
2557         } else {
2558                 u64 offset = 0;
2559                 u64 count = 1;
2560                 int ret;
2561
2562                 ret = search_bitmap(ctl, entry, &offset, &count);
2563                 BUG_ON(ret);
2564
2565                 ino = offset;
2566                 bitmap_clear_bits(ctl, entry, offset, 1);
2567                 if (entry->bytes == 0)
2568                         free_bitmap(ctl, entry);
2569         }
2570 out:
2571         spin_unlock(&ctl->tree_lock);
2572
2573         return ino;
2574 }
2575
2576 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2577                                     struct btrfs_path *path)
2578 {
2579         struct inode *inode = NULL;
2580
2581         spin_lock(&root->cache_lock);
2582         if (root->cache_inode)
2583                 inode = igrab(root->cache_inode);
2584         spin_unlock(&root->cache_lock);
2585         if (inode)
2586                 return inode;
2587
2588         inode = __lookup_free_space_inode(root, path, 0);
2589         if (IS_ERR(inode))
2590                 return inode;
2591
2592         spin_lock(&root->cache_lock);
2593         if (!btrfs_fs_closing(root->fs_info))
2594                 root->cache_inode = igrab(inode);
2595         spin_unlock(&root->cache_lock);
2596
2597         return inode;
2598 }
2599
2600 int create_free_ino_inode(struct btrfs_root *root,
2601                           struct btrfs_trans_handle *trans,
2602                           struct btrfs_path *path)
2603 {
2604         return __create_free_space_inode(root, trans, path,
2605                                          BTRFS_FREE_INO_OBJECTID, 0);
2606 }
2607
2608 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2609 {
2610         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2611         struct btrfs_path *path;
2612         struct inode *inode;
2613         int ret = 0;
2614         u64 root_gen = btrfs_root_generation(&root->root_item);
2615
2616         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2617                 return 0;
2618
2619         /*
2620          * If we're unmounting then just return, since this does a search on the
2621          * normal root and not the commit root and we could deadlock.
2622          */
2623         if (btrfs_fs_closing(fs_info))
2624                 return 0;
2625
2626         path = btrfs_alloc_path();
2627         if (!path)
2628                 return 0;
2629
2630         inode = lookup_free_ino_inode(root, path);
2631         if (IS_ERR(inode))
2632                 goto out;
2633
2634         if (root_gen != BTRFS_I(inode)->generation)
2635                 goto out_put;
2636
2637         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2638
2639         if (ret < 0)
2640                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2641                        "root %llu\n", root->root_key.objectid);
2642 out_put:
2643         iput(inode);
2644 out:
2645         btrfs_free_path(path);
2646         return ret;
2647 }
2648
2649 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2650                               struct btrfs_trans_handle *trans,
2651                               struct btrfs_path *path)
2652 {
2653         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2654         struct inode *inode;
2655         int ret;
2656
2657         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2658                 return 0;
2659
2660         inode = lookup_free_ino_inode(root, path);
2661         if (IS_ERR(inode))
2662                 return 0;
2663
2664         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2665         if (ret < 0)
2666                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2667                        "for root %llu\n", root->root_key.objectid);
2668
2669         iput(inode);
2670         return ret;
2671 }