Btrfs: avoid delayed metadata items during commits
authorChris Mason <chris.mason@oracle.com>
Fri, 17 Jun 2011 20:14:09 +0000 (16:14 -0400)
committerChris Mason <chris.mason@oracle.com>
Fri, 17 Jun 2011 20:38:47 +0000 (16:38 -0400)
Snapshot creation has two phases.  One is the initial snapshot setup,
and the second is done during commit, while nobody is allowed to modify
the root we are snapshotting.

The delayed metadata insertion code can break that rule, it does a
delayed inode update on the inode of the parent of the snapshot,
and delayed directory item insertion.

This makes sure to run the pending delayed operations before we
record the snapshot root, which avoids corruptions.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/transaction.c

index fc515b787e8cf3860bd142b7e2af4c43b33b3294..f1cbd028f7b33e2a693292641cacd21b535a5e75 100644 (file)
@@ -1237,6 +1237,13 @@ again:
        return 0;
 }
 
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+       struct btrfs_delayed_root *delayed_root;
+       delayed_root = btrfs_get_delayed_root(root);
+       WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
        struct btrfs_delayed_root *delayed_root;
index cb79b6771e8257e6ba1fb18a163c63ee864dbe0b..d1a6a2915c668afe2500c187d731da0ea1f6bdaa 100644 (file)
@@ -137,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
 /* for init */
 int __init btrfs_delayed_inode_init(void);
 void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
 #endif
index c073d85e14f315bd8efb38585d5cd97cf3557663..51dcec86757f071654bc3866123e65157ef0286b 100644 (file)
@@ -957,6 +957,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        BUG_ON(ret);
 
+       /*
+        * pull in the delayed directory update
+        * and the delayed inode item
+        * otherwise we corrupt the FS during
+        * snapshot
+        */
+       ret = btrfs_run_delayed_items(trans, root);
+       BUG_ON(ret);
+
        record_root_in_trans(trans, root);
        btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
        memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
@@ -1018,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
        int ret;
 
        list_for_each_entry(pending, head, list) {
-               /*
-                * We must deal with the delayed items before creating
-                * snapshots, or we will create a snapthot with inconsistent
-                * information.
-               */
-               ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
-               BUG_ON(ret);
-
                ret = create_pending_snapshot(trans, fs_info, pending);
                BUG_ON(ret);
        }
@@ -1319,15 +1320,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         */
        mutex_lock(&root->fs_info->reloc_mutex);
 
-       ret = create_pending_snapshots(trans, root->fs_info);
+       ret = btrfs_run_delayed_items(trans, root);
        BUG_ON(ret);
 
-       ret = btrfs_run_delayed_items(trans, root);
+       ret = create_pending_snapshots(trans, root->fs_info);
        BUG_ON(ret);
 
        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
        BUG_ON(ret);
 
+       /*
+        * make sure none of the code above managed to slip in a
+        * delayed item
+        */
+       btrfs_assert_delayed_root_empty(root);
+
        WARN_ON(cur_trans != trans->transaction);
 
        btrfs_scrub_pause(root);