Btrfs: fix race in check-integrity caused by usage of bitfield
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / ctree.c
index b334362110003165a72b63433b192f2b481c3b01..5c2cf992e717b1bf5549bbff20178940444a6fc1 100644 (file)
@@ -38,8 +38,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                              struct extent_buffer *dst_buf,
                              struct extent_buffer *src_buf);
 static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                   struct btrfs_path *path, int level, int slot,
-                   int tree_mod_log);
+                   struct btrfs_path *path, int level, int slot);
 static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
                                 struct extent_buffer *eb);
 struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
@@ -596,6 +595,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
        if (tree_mod_dont_log(fs_info, eb))
                return 0;
 
+       /*
+        * When we override something during the move, we log these removals.
+        * This can only happen when we move towards the beginning of the
+        * buffer, i.e. dst_slot < src_slot.
+        */
        for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
                ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
                                              MOD_LOG_KEY_REMOVE_WHILE_MOVING);
@@ -647,8 +651,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        if (tree_mod_dont_log(fs_info, NULL))
                return 0;
 
-       __tree_mod_log_free_eb(fs_info, old_root);
-
        ret = tree_mod_alloc(fs_info, flags, &tm);
        if (ret < 0)
                goto out;
@@ -773,8 +775,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 
 static noinline void
 tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *eb,
-                         struct btrfs_disk_key *disk_key, int slot, int atomic)
+                         struct extent_buffer *eb, int slot, int atomic)
 {
        int ret;
 
@@ -926,12 +927,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        ret = btrfs_dec_ref(trans, root, buf, 1, 1);
                        BUG_ON(ret); /* -ENOMEM */
                }
-               /*
-                * don't log freeing in case we're freeing the root node, this
-                * is done by tree_mod_log_set_root_pointer later
-                */
-               if (buf != root->node && btrfs_header_level(buf) != 0)
-                       tree_mod_log_free_eb(root->fs_info, buf);
+               tree_mod_log_free_eb(root->fs_info, buf);
                clean_tree_block(trans, root, buf);
                *last_ref = 1;
        }
@@ -1142,13 +1138,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
                switch (tm->op) {
                case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
                        BUG_ON(tm->slot < n);
-               case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
                case MOD_LOG_KEY_REMOVE:
+                       n++;
+               case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
                        btrfs_set_node_key(eb, &tm->key, tm->slot);
                        btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
                        btrfs_set_node_ptr_generation(eb, tm->slot,
                                                      tm->generation);
-                       n++;
                        break;
                case MOD_LOG_KEY_REPLACE:
                        BUG_ON(tm->slot >= n);
@@ -1225,6 +1221,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
        free_extent_buffer(eb);
 
        __tree_mod_log_rewind(eb_rewin, time_seq, tm);
+       WARN_ON(btrfs_header_nritems(eb_rewin) >
+               BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
 
        return eb_rewin;
 }
@@ -1241,9 +1239,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 {
        struct tree_mod_elem *tm;
        struct extent_buffer *eb;
+       struct extent_buffer *old;
        struct tree_mod_root *old_root = NULL;
        u64 old_generation = 0;
        u64 logical;
+       u32 blocksize;
 
        eb = btrfs_read_lock_root_node(root);
        tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
@@ -1259,14 +1259,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
        }
 
        tm = tree_mod_log_search(root->fs_info, logical, time_seq);
-       if (old_root)
+       if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
+               btrfs_tree_read_unlock(root->node);
+               free_extent_buffer(root->node);
+               blocksize = btrfs_level_size(root, old_root->level);
+               old = read_tree_block(root, logical, blocksize, 0);
+               if (!old) {
+                       pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
+                               logical);
+                       WARN_ON(1);
+               } else {
+                       eb = btrfs_clone_extent_buffer(old);
+                       free_extent_buffer(old);
+               }
+       } else if (old_root) {
+               btrfs_tree_read_unlock(root->node);
+               free_extent_buffer(root->node);
                eb = alloc_dummy_extent_buffer(logical, root->nodesize);
-       else
+       } else {
                eb = btrfs_clone_extent_buffer(root->node);
-       btrfs_tree_read_unlock(root->node);
-       free_extent_buffer(root->node);
+               btrfs_tree_read_unlock(root->node);
+               free_extent_buffer(root->node);
+       }
+
        if (!eb)
                return NULL;
+       extent_buffer_get(eb);
        btrfs_tree_read_lock(eb);
        if (old_root) {
                btrfs_set_header_bytenr(eb, eb->start);
@@ -1279,11 +1297,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                __tree_mod_log_rewind(eb, time_seq, tm);
        else
                WARN_ON(btrfs_header_level(eb) != 0);
-       extent_buffer_get(eb);
+       WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
 
        return eb;
 }
 
+int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
+{
+       struct tree_mod_elem *tm;
+       int level;
+
+       tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+       if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
+               level = tm->old_root.level;
+       } else {
+               rcu_read_lock();
+               level = btrfs_header_level(root->node);
+               rcu_read_unlock();
+       }
+
+       return level;
+}
+
 static inline int should_cow_block(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct extent_buffer *buf)
@@ -1324,19 +1359,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
        u64 search_start;
        int ret;
 
-       if (trans->transaction != root->fs_info->running_transaction) {
-               printk(KERN_CRIT "trans %llu running %llu\n",
+       if (trans->transaction != root->fs_info->running_transaction)
+               WARN(1, KERN_CRIT "trans %llu running %llu\n",
                       (unsigned long long)trans->transid,
                       (unsigned long long)
                       root->fs_info->running_transaction->transid);
-               WARN_ON(1);
-       }
-       if (trans->transid != root->fs_info->generation) {
-               printk(KERN_CRIT "trans %llu running %llu\n",
+
+       if (trans->transid != root->fs_info->generation)
+               WARN(1, KERN_CRIT "trans %llu running %llu\n",
                       (unsigned long long)trans->transid,
                       (unsigned long long)root->fs_info->generation);
-               WARN_ON(1);
-       }
 
        if (!should_cow_block(trans, root, buf)) {
                *cow_ret = buf;
@@ -1432,10 +1464,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        if (cache_only && parent_level != 1)
                return 0;
 
-       if (trans->transaction != root->fs_info->running_transaction)
-               WARN_ON(1);
-       if (trans->transid != root->fs_info->generation)
-               WARN_ON(1);
+       WARN_ON(trans->transaction != root->fs_info->running_transaction);
+       WARN_ON(trans->transid != root->fs_info->generation);
 
        parent_nritems = btrfs_header_nritems(parent);
        blocksize = btrfs_level_size(root, parent_level - 1);
@@ -1725,6 +1755,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        goto enospc;
                }
 
+               tree_mod_log_free_eb(root->fs_info, root->node);
                tree_mod_log_set_root_pointer(root, child);
                rcu_assign_pointer(root->node, child);
 
@@ -1789,7 +1820,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (btrfs_header_nritems(right) == 0) {
                        clean_tree_block(trans, root, right);
                        btrfs_tree_unlock(right);
-                       del_ptr(trans, root, path, level + 1, pslot + 1, 1);
+                       del_ptr(trans, root, path, level + 1, pslot + 1);
                        root_sub_used(root, right->len);
                        btrfs_free_tree_block(trans, root, right, 0, 1);
                        free_extent_buffer_stale(right);
@@ -1798,7 +1829,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        struct btrfs_disk_key right_key;
                        btrfs_node_key(right, &right_key, 0);
                        tree_mod_log_set_node_key(root->fs_info, parent,
-                                                 &right_key, pslot + 1, 0);
+                                                 pslot + 1, 0);
                        btrfs_set_node_key(parent, &right_key, pslot + 1);
                        btrfs_mark_buffer_dirty(parent);
                }
@@ -1833,7 +1864,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        if (btrfs_header_nritems(mid) == 0) {
                clean_tree_block(trans, root, mid);
                btrfs_tree_unlock(mid);
-               del_ptr(trans, root, path, level + 1, pslot, 1);
+               del_ptr(trans, root, path, level + 1, pslot);
                root_sub_used(root, mid->len);
                btrfs_free_tree_block(trans, root, mid, 0, 1);
                free_extent_buffer_stale(mid);
@@ -1842,7 +1873,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                /* update the parent key to reflect our changes */
                struct btrfs_disk_key mid_key;
                btrfs_node_key(mid, &mid_key, 0);
-               tree_mod_log_set_node_key(root->fs_info, parent, &mid_key,
+               tree_mod_log_set_node_key(root->fs_info, parent,
                                          pslot, 0);
                btrfs_set_node_key(parent, &mid_key, pslot);
                btrfs_mark_buffer_dirty(parent);
@@ -1942,7 +1973,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                        orig_slot += left_nr;
                        btrfs_node_key(mid, &disk_key, 0);
                        tree_mod_log_set_node_key(root->fs_info, parent,
-                                                 &disk_key, pslot, 0);
+                                                 pslot, 0);
                        btrfs_set_node_key(parent, &disk_key, pslot);
                        btrfs_mark_buffer_dirty(parent);
                        if (btrfs_header_nritems(left) > orig_slot) {
@@ -1995,7 +2026,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 
                        btrfs_node_key(right, &disk_key, 0);
                        tree_mod_log_set_node_key(root->fs_info, parent,
-                                                 &disk_key, pslot + 1, 0);
+                                                 pslot + 1, 0);
                        btrfs_set_node_key(parent, &disk_key, pslot + 1);
                        btrfs_mark_buffer_dirty(parent);
 
@@ -2879,7 +2910,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
                if (!path->nodes[i])
                        break;
                t = path->nodes[i];
-               tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1);
+               tree_mod_log_set_node_key(root->fs_info, t, tslot, 1);
                btrfs_set_node_key(t, key, tslot);
                btrfs_mark_buffer_dirty(path->nodes[i]);
                if (tslot != 0)
@@ -2970,8 +3001,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
                           push_items * sizeof(struct btrfs_key_ptr));
 
        if (push_items < src_nritems) {
-               tree_mod_log_eb_move(root->fs_info, src, 0, push_items,
-                                    src_nritems - push_items);
+               /*
+                * don't call tree_mod_log_eb_move here, key removal was already
+                * fully logged by tree_mod_log_eb_copy above.
+                */
                memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
                                      btrfs_node_key_ptr_offset(push_items),
                                      (src_nritems - push_items) *
@@ -3363,8 +3396,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (push_items == 0)
                goto out_unlock;
 
-       if (!empty && push_items == left_nritems)
-               WARN_ON(1);
+       WARN_ON(!empty && push_items == left_nritems);
 
        /* push left to right */
        right_nritems = btrfs_header_nritems(right);
@@ -3602,11 +3634,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        btrfs_set_header_nritems(left, old_left_nritems + push_items);
 
        /* fixup right node */
-       if (push_items > right_nritems) {
-               printk(KERN_CRIT "push items %d nr %u\n", push_items,
+       if (push_items > right_nritems)
+               WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
                       right_nritems);
-               WARN_ON(1);
-       }
 
        if (push_items < right_nritems) {
                push_space = btrfs_item_offset_nr(right, push_items - 1) -
@@ -4562,16 +4592,21 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
  * empty a node.
  */
 static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                   struct btrfs_path *path, int level, int slot,
-                   int tree_mod_log)
+                   struct btrfs_path *path, int level, int slot)
 {
        struct extent_buffer *parent = path->nodes[level];
        u32 nritems;
        int ret;
 
+       if (level) {
+               ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
+                                             MOD_LOG_KEY_REMOVE);
+               BUG_ON(ret < 0);
+       }
+
        nritems = btrfs_header_nritems(parent);
        if (slot != nritems - 1) {
-               if (tree_mod_log && level)
+               if (level)
                        tree_mod_log_eb_move(root->fs_info, parent, slot,
                                             slot + 1, nritems - slot - 1);
                memmove_extent_buffer(parent,
@@ -4579,10 +4614,6 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                              btrfs_node_key_ptr_offset(slot + 1),
                              sizeof(struct btrfs_key_ptr) *
                              (nritems - slot - 1));
-       } else if (tree_mod_log && level) {
-               ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
-                                             MOD_LOG_KEY_REMOVE);
-               BUG_ON(ret < 0);
        }
 
        nritems--;
@@ -4616,7 +4647,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
                                    struct extent_buffer *leaf)
 {
        WARN_ON(btrfs_header_generation(leaf) != trans->transid);
-       del_ptr(trans, root, path, 1, path->slots[1], 1);
+       del_ptr(trans, root, path, 1, path->slots[1]);
 
        /*
         * btrfs_free_extent is expensive, we want to make sure we