Merge branch 'x86-seccomp-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / disk-io.c
index 09b3c8a0c790097be00ba378e83d4470db9335cb..fa45e3cae40db660c6170811cfc54c66d81556c1 100644 (file)
@@ -72,21 +72,41 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root);
 static void btrfs_error_commit_super(struct btrfs_root *root);
 
 /*
- * end_io_wq structs are used to do processing in task context when an IO is
- * complete.  This is used during reads to verify checksums, and it is used
+ * btrfs_end_io_wq structs are used to do processing in task context when an IO
+ * is complete.  This is used during reads to verify checksums, and it is used
  * by writes to insert metadata for new file extents after IO is complete.
  */
-struct end_io_wq {
+struct btrfs_end_io_wq {
        struct bio *bio;
        bio_end_io_t *end_io;
        void *private;
        struct btrfs_fs_info *info;
        int error;
-       int metadata;
+       enum btrfs_wq_endio_type metadata;
        struct list_head list;
        struct btrfs_work work;
 };
 
+static struct kmem_cache *btrfs_end_io_wq_cache;
+
+int __init btrfs_end_io_wq_init(void)
+{
+       btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
+                                       sizeof(struct btrfs_end_io_wq),
+                                       0,
+                                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                                       NULL);
+       if (!btrfs_end_io_wq_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void btrfs_end_io_wq_exit(void)
+{
+       if (btrfs_end_io_wq_cache)
+               kmem_cache_destroy(btrfs_end_io_wq_cache);
+}
+
 /*
  * async submit bios are used to offload expensive checksumming
  * onto the worker threads.  They checksum file and metadata bios
@@ -327,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 {
        struct extent_state *cached_state = NULL;
        int ret;
-       bool need_lock = (current->journal_info ==
-                         (void *)BTRFS_SEND_TRANS_STUB);
+       bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB);
 
        if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
                return 0;
@@ -690,7 +709,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
 
 static void end_workqueue_bio(struct bio *bio, int err)
 {
-       struct end_io_wq *end_io_wq = bio->bi_private;
+       struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
        struct btrfs_fs_info *fs_info;
        struct btrfs_workqueue *wq;
        btrfs_work_func_t func;
@@ -733,20 +752,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
        btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-/*
- * For the metadata arg you want
- *
- * 0 - if data
- * 1 - if normal metadta
- * 2 - if writing to the free space cache area
- * 3 - raid parity work
- */
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
-                       int metadata)
+                       enum btrfs_wq_endio_type metadata)
 {
-       struct end_io_wq *end_io_wq;
+       struct btrfs_end_io_wq *end_io_wq;
 
-       end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
+       end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
        if (!end_io_wq)
                return -ENOMEM;
 
@@ -930,7 +941,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                 * can happen in the async kernel threads
                 */
                ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
-                                         bio, 1);
+                                         bio, BTRFS_WQ_ENDIO_METADATA);
                if (ret)
                        goto out_w_error;
                ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
@@ -1062,20 +1073,17 @@ static const struct address_space_operations btree_aops = {
        .set_page_dirty = btree_set_page_dirty,
 };
 
-int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
-                        u64 parent_transid)
+void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
 {
        struct extent_buffer *buf = NULL;
        struct inode *btree_inode = root->fs_info->btree_inode;
-       int ret = 0;
 
        buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!buf)
-               return 0;
+               return;
        read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
                                 buf, 0, WAIT_NONE, btree_get_extent, 0);
        free_extent_buffer(buf);
-       return ret;
 }
 
 int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1111,7 +1119,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 }
 
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
-                                           u64 bytenr, u32 blocksize)
+                                           u64 bytenr)
 {
        return find_extent_buffer(root->fs_info, bytenr);
 }
@@ -1119,11 +1127,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                 u64 bytenr, u32 blocksize)
 {
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-       if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+       if (btrfs_test_is_dummy_root(root))
                return alloc_test_extent_buffer(root->fs_info, bytenr,
                                                blocksize);
-#endif
        return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
 }
 
@@ -1141,12 +1147,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
 }
 
 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
-                                     u32 blocksize, u64 parent_transid)
+                                     u64 parent_transid)
 {
        struct extent_buffer *buf = NULL;
        int ret;
 
-       buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+       buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
        if (!buf)
                return NULL;
 
@@ -1188,7 +1194,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
        if (!writers)
                return ERR_PTR(-ENOMEM);
 
-       ret = percpu_counter_init(&writers->counter, 0);
+       ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
        if (ret < 0) {
                kfree(writers);
                return ERR_PTR(ret);
@@ -1327,8 +1333,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        root->root_key.type = BTRFS_ROOT_ITEM_KEY;
        root->root_key.offset = 0;
 
-       leaf = btrfs_alloc_free_block(trans, root, root->nodesize,
-                                     0, objectid, NULL, 0, 0, 0);
+       leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
        if (IS_ERR(leaf)) {
                ret = PTR_ERR(leaf);
                leaf = NULL;
@@ -1415,9 +1420,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
         * updated (along with back refs to the log tree).
         */
 
-       leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
-                                     BTRFS_TREE_LOG_OBJECTID, NULL,
-                                     0, 0, 0);
+       leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
+                       NULL, 0, 0, 0);
        if (IS_ERR(leaf)) {
                kfree(root);
                return ERR_CAST(leaf);
@@ -1487,7 +1491,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
        struct btrfs_path *path;
        u64 generation;
-       u32 blocksize;
        int ret;
 
        path = btrfs_alloc_path();
@@ -1512,9 +1515,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        }
 
        generation = btrfs_root_generation(&root->root_item);
-       blocksize = root->nodesize;
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
-                                    blocksize, generation);
+                                    generation);
        if (!root->node) {
                ret = -ENOMEM;
                goto find_fail;
@@ -1731,16 +1733,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 static void end_workqueue_fn(struct btrfs_work *work)
 {
        struct bio *bio;
-       struct end_io_wq *end_io_wq;
+       struct btrfs_end_io_wq *end_io_wq;
        int error;
 
-       end_io_wq = container_of(work, struct end_io_wq, work);
+       end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
        bio = end_io_wq->bio;
 
        error = end_io_wq->error;
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
-       kfree(end_io_wq);
+       kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
        bio_endio_nodec(bio, error);
 }
 
@@ -2142,7 +2144,6 @@ int open_ctree(struct super_block *sb,
 {
        u32 sectorsize;
        u32 nodesize;
-       u32 blocksize;
        u32 stripesize;
        u64 generation;
        u64 features;
@@ -2186,7 +2187,7 @@ int open_ctree(struct super_block *sb,
                goto fail_srcu;
        }
 
-       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+       ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_bdi;
@@ -2194,13 +2195,13 @@ int open_ctree(struct super_block *sb,
        fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
                                        (1 + ilog2(nr_cpu_ids));
 
-       ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+       ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_dirty_metadata_bytes;
        }
 
-       ret = percpu_counter_init(&fs_info->bio_counter, 0);
+       ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
        if (ret) {
                err = ret;
                goto fail_delalloc_bytes;
@@ -2260,7 +2261,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
        fs_info->sb = sb;
-       fs_info->max_inline = 8192 * 1024;
+       fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
        fs_info->defrag_inodes = RB_ROOT;
        fs_info->free_chunk_space = 0;
@@ -2646,7 +2647,6 @@ int open_ctree(struct super_block *sb,
                goto fail_sb_buffer;
        }
 
-       blocksize = tree_root->nodesize;
        generation = btrfs_super_chunk_root_generation(disk_super);
 
        __setup_root(nodesize, sectorsize, stripesize, chunk_root,
@@ -2654,7 +2654,7 @@ int open_ctree(struct super_block *sb,
 
        chunk_root->node = read_tree_block(chunk_root,
                                           btrfs_super_chunk_root(disk_super),
-                                          blocksize, generation);
+                                          generation);
        if (!chunk_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
                printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
@@ -2687,12 +2687,11 @@ int open_ctree(struct super_block *sb,
        }
 
 retry_root_backup:
-       blocksize = tree_root->nodesize;
        generation = btrfs_super_generation(disk_super);
 
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
-                                         blocksize, generation);
+                                         generation);
        if (!tree_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
@@ -2861,7 +2860,6 @@ retry_root_backup:
                        err = -EIO;
                        goto fail_qgroup;
                }
-               blocksize = tree_root->nodesize;
 
                log_tree_root = btrfs_alloc_root(fs_info);
                if (!log_tree_root) {
@@ -2873,7 +2871,6 @@ retry_root_backup:
                             log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
 
                log_tree_root->node = read_tree_block(tree_root, bytenr,
-                                                     blocksize,
                                                      generation + 1);
                if (!log_tree_root->node ||
                    !extent_buffer_uptodate(log_tree_root->node)) {
@@ -3817,10 +3814,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                              int read_only)
 {
+       struct btrfs_super_block *sb = fs_info->super_copy;
+       int ret = 0;
+
+       if (sb->root_level > BTRFS_MAX_LEVEL) {
+               printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n",
+                               sb->root_level, BTRFS_MAX_LEVEL);
+               ret = -EINVAL;
+       }
+       if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
+               printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n",
+                               sb->chunk_root_level, BTRFS_MAX_LEVEL);
+               ret = -EINVAL;
+       }
+       if (sb->log_root_level > BTRFS_MAX_LEVEL) {
+               printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n",
+                               sb->log_root_level, BTRFS_MAX_LEVEL);
+               ret = -EINVAL;
+       }
+
        /*
-        * Placeholder for checks
+        * The common minimum, we don't know if we can trust the nodesize/sectorsize
+        * items yet, they'll be verified later. Issue just a warning.
         */
-       return 0;
+       if (!IS_ALIGNED(sb->root, 4096))
+               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+                               sb->root);
+       if (!IS_ALIGNED(sb->chunk_root, 4096))
+               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+                               sb->chunk_root);
+       if (!IS_ALIGNED(sb->log_root, 4096))
+               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
+                               sb->log_root);
+
+       if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
+               printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
+                               fs_info->fsid, sb->dev_item.fsid);
+               ret = -EINVAL;
+       }
+
+       /*
+        * Hint to catch really bogus numbers, bitflips or so, more exact checks are
+        * done later
+        */
+       if (sb->num_devices > (1UL << 31))
+               printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
+                               sb->num_devices);
+
+       if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) {
+               printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
+                               sb->bytenr, BTRFS_SUPER_INFO_OFFSET);
+               ret = -EINVAL;
+       }
+
+       /*
+        * The generation is a global counter, we'll trust it more than the others
+        * but it's still possible that it's the one that's wrong.
+        */
+       if (sb->generation < sb->chunk_root_generation)
+               printk(KERN_WARNING
+                       "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n",
+                       sb->generation, sb->chunk_root_generation);
+       if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1)
+               printk(KERN_WARNING
+                       "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n",
+                       sb->generation, sb->cache_generation);
+
+       return ret;
 }
 
 static void btrfs_error_commit_super(struct btrfs_root *root)
@@ -4012,8 +4072,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 
                clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
                while (start <= end) {
-                       eb = btrfs_find_tree_block(root, start,
-                                                  root->nodesize);
+                       eb = btrfs_find_tree_block(root, start);
                        start += root->nodesize;
                        if (!eb)
                                continue;