btrfs: delayed_ref: release and free qgroup reserved at proper timing
authorQu Wenruo <quwenruo@cn.fujitsu.com>
Tue, 8 Sep 2015 09:08:37 +0000 (17:08 +0800)
committerChris Mason <clm@fb.com>
Thu, 22 Oct 2015 01:37:47 +0000 (18:37 -0700)
Qgroup reserved space needs to be released from inode dirty map and get
freed at different timing:

1) Release when the metadata is written into tree
After corresponding metadata is written into tree, any newer write will
be COWed(don't include NOCOW case yet).
So we must release its range from inode dirty range map, or we will
forget to reserve needed range, causing accounting exceeding the limit.

2) Free reserved bytes when delayed ref is run
When delayed refs are run, qgroup accounting will follow soon and turn
the reserved bytes into rfer/excl numbers.
As run_delayed_refs and qgroup accounting are all done at
commit_transaction() time, we are safe to free reserved space in
run_delayed_ref time().

With these timing to release/free reserved space, we should be able to
resolve the long existing qgroup reserve space leak problem.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h

index 2df4bc77f5b477e6d0f403b4962256707462d6b7..6c7927cd4f411fc4302c1ce99394a2d7e33100e3 100644 (file)
@@ -2345,6 +2345,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                                                      node->num_bytes);
                        }
                }
+
+               /* Also free its reserved qgroup space */
+               btrfs_qgroup_free_delayed_ref(root->fs_info,
+                                             head->qgroup_ref_root,
+                                             head->qgroup_reserved);
                return ret;
        }
 
index 5ce55f6eefceb8103ab6d8c7194c1797f0668ce3..c21ed7b1469131d23bc786bd5b28b603217ee852 100644 (file)
@@ -2119,6 +2119,16 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_alloc_reserved_file_extent(trans, root,
                                        root->root_key.objectid,
                                        btrfs_ino(inode), file_pos, &ins);
+       if (ret < 0)
+               goto out;
+       /*
+        * Release the reserved range from inode dirty range map, and
+        * move it to delayed ref codes, as now accounting only happens at
+        * commit_transaction() time.
+        */
+       btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
+       ret = btrfs_add_delayed_qgroup_reserve(root->fs_info, trans,
+                       root->objectid, disk_bytenr, ram_bytes);
 out:
        btrfs_free_path(path);
 
index e05d1f6aa293f0e95768032c96ba225777036f10..75d8e584c3e52bc02bca18e19352fc9a2ede50f8 100644 (file)
@@ -2116,14 +2116,13 @@ out:
        return ret;
 }
 
-void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
+void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
+                              u64 ref_root, u64 num_bytes)
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct ulist_node *unode;
        struct ulist_iterator uiter;
-       u64 ref_root = root->root_key.objectid;
        int ret = 0;
 
        if (!is_fstree(ref_root))
index 564eb214774091da9ba03499eac3415059d15981..80924aeceb09cc7b39f29c845bc51bb85775537a 100644 (file)
@@ -72,7 +72,23 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                         struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
                         struct btrfs_qgroup_inherit *inherit);
 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
-void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
+void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
+                              u64 ref_root, u64 num_bytes);
+static inline void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
+{
+       return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
+                                        num_bytes);
+}
+
+/*
+ * TODO: Add proper trace point for it, as btrfs_qgroup_free() is
+ * called by everywhere, can't provide good trace for delayed ref case.
+ */
+static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
+                                                u64 ref_root, u64 num_bytes)
+{
+       btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
+}
 
 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);