Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / qgroup.c
index d904ee1c53497034ed6685b8e12f40d0880aaaca..46476c226395e777045a2303a1ada87b9e4b4a78 100644 (file)
@@ -1652,10 +1652,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
                        }
                }
 
-               /* For exclusive extent, free its reserved bytes too */
-               if (nr_old_roots == 0 && nr_new_roots == 1 &&
-                   cur_new_count == nr_new_roots)
-                       qg->reserved -= num_bytes;
                if (dirty)
                        qgroup_dirty(fs_info, qg);
        }
@@ -2035,7 +2031,7 @@ out:
        return ret;
 }
 
-int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
@@ -2116,14 +2112,13 @@ out:
        return ret;
 }
 
-void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
+void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
+                              u64 ref_root, u64 num_bytes)
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
-       struct btrfs_fs_info *fs_info = root->fs_info;
        struct ulist_node *unode;
        struct ulist_iterator uiter;
-       u64 ref_root = root->root_key.objectid;
        int ret = 0;
 
        if (!is_fstree(ref_root))
@@ -2169,6 +2164,11 @@ out:
        spin_unlock(&fs_info->qgroup_lock);
 }
 
+static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes)
+{
+       return btrfs_qgroup_free_refroot(root->fs_info, root->objectid,
+                                        num_bytes);
+}
 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 {
        if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
@@ -2188,10 +2188,10 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
  */
 static int
 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-                  struct btrfs_trans_handle *trans,
-                  struct extent_buffer *scratch_leaf)
+                  struct btrfs_trans_handle *trans)
 {
        struct btrfs_key found;
+       struct extent_buffer *scratch_leaf = NULL;
        struct ulist *roots = NULL;
        struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
        u64 num_bytes;
@@ -2229,7 +2229,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
        fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
 
        btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
-       memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+       scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
+       if (!scratch_leaf) {
+               ret = -ENOMEM;
+               mutex_unlock(&fs_info->qgroup_rescan_lock);
+               goto out;
+       }
+       extent_buffer_get(scratch_leaf);
+       btrfs_tree_read_lock(scratch_leaf);
+       btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
        slot = path->slots[0];
        btrfs_release_path(path);
        mutex_unlock(&fs_info->qgroup_rescan_lock);
@@ -2255,6 +2263,10 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
                        goto out;
        }
 out:
+       if (scratch_leaf) {
+               btrfs_tree_read_unlock_blocking(scratch_leaf);
+               free_extent_buffer(scratch_leaf);
+       }
        btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
 
        return ret;
@@ -2266,16 +2278,12 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                                                     qgroup_rescan_work);
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans = NULL;
-       struct extent_buffer *scratch_leaf = NULL;
        int err = -ENOMEM;
        int ret = 0;
 
        path = btrfs_alloc_path();
        if (!path)
                goto out;
-       scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
-       if (!scratch_leaf)
-               goto out;
 
        err = 0;
        while (!err) {
@@ -2287,8 +2295,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                if (!fs_info->quota_enabled) {
                        err = -EINTR;
                } else {
-                       err = qgroup_rescan_leaf(fs_info, path, trans,
-                                                scratch_leaf);
+                       err = qgroup_rescan_leaf(fs_info, path, trans);
                }
                if (err > 0)
                        btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2297,7 +2304,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        }
 
 out:
-       kfree(scratch_leaf);
        btrfs_free_path(path);
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
@@ -2486,3 +2492,190 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
                btrfs_queue_work(fs_info->qgroup_rescan_workers,
                                 &fs_info->qgroup_rescan_work);
 }
+
+/*
+ * Reserve qgroup space for range [start, start + len).
+ *
+ * This function will either reserve space from related qgroups or doing
+ * nothing if the range is already reserved.
+ *
+ * Return 0 for successful reserve
+ * Return <0 for error (including -EQUOT)
+ *
+ * NOTE: this function may sleep for memory allocation.
+ */
+int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_changeset changeset;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       int ret;
+
+       if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) ||
+           len == 0)
+               return 0;
+
+       changeset.bytes_changed = 0;
+       changeset.range_changed = ulist_alloc(GFP_NOFS);
+       ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
+                       start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS,
+                       &changeset);
+       trace_btrfs_qgroup_reserve_data(inode, start, len,
+                                       changeset.bytes_changed,
+                                       QGROUP_RESERVE);
+       if (ret < 0)
+               goto cleanup;
+       ret = qgroup_reserve(root, changeset.bytes_changed);
+       if (ret < 0)
+               goto cleanup;
+
+       ulist_free(changeset.range_changed);
+       return ret;
+
+cleanup:
+       /* cleanup already reserved ranges */
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(changeset.range_changed, &uiter)))
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
+                                unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
+                                GFP_NOFS);
+       ulist_free(changeset.range_changed);
+       return ret;
+}
+
+static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
+                                      int free)
+{
+       struct extent_changeset changeset;
+       int trace_op = QGROUP_RELEASE;
+       int ret;
+
+       changeset.bytes_changed = 0;
+       changeset.range_changed = ulist_alloc(GFP_NOFS);
+       if (!changeset.range_changed)
+               return -ENOMEM;
+
+       ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 
+                       start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS,
+                       &changeset);
+       if (ret < 0)
+               goto out;
+
+       if (free) {
+               qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+               trace_op = QGROUP_FREE;
+       }
+       trace_btrfs_qgroup_release_data(inode, start, len,
+                                       changeset.bytes_changed, trace_op);
+out:
+       ulist_free(changeset.range_changed);
+       return ret;
+}
+
+/*
+ * Free a reserved space range from io_tree and related qgroups
+ *
+ * Should be called when a range of pages get invalidated before reaching disk.
+ * Or for error cleanup case.
+ *
+ * For data written to disk, use btrfs_qgroup_release_data().
+ *
+ * NOTE: This function may sleep for memory allocation.
+ */
+int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
+{
+       return __btrfs_qgroup_release_data(inode, start, len, 1);
+}
+
+/*
+ * Release a reserved space range from io_tree only.
+ *
+ * Should be called when a range of pages get written to disk and corresponding
+ * FILE_EXTENT is inserted into corresponding root.
+ *
+ * Since new qgroup accounting framework will only update qgroup numbers at
+ * commit_transaction() time, its reserved space shouldn't be freed from
+ * related qgroups.
+ *
+ * But we should release the range from io_tree, to allow further write to be
+ * COWed.
+ *
+ * NOTE: This function may sleep for memory allocation.
+ */
+int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
+{
+       return __btrfs_qgroup_release_data(inode, start, len, 0);
+}
+
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
+{
+       int ret;
+
+       if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) ||
+           num_bytes == 0)
+               return 0;
+
+       BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
+       ret = qgroup_reserve(root, num_bytes);
+       if (ret < 0)
+               return ret;
+       atomic_add(num_bytes, &root->qgroup_meta_rsv);
+       return ret;
+}
+
+void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
+{
+       int reserved;
+
+       if (!root->fs_info->quota_enabled || !is_fstree(root->objectid))
+               return;
+
+       reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
+       if (reserved == 0)
+               return;
+       qgroup_free(root, reserved);
+}
+
+void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
+{
+       if (!root->fs_info->quota_enabled || !is_fstree(root->objectid))
+               return;
+
+       BUG_ON(num_bytes != round_down(num_bytes, root->nodesize));
+       WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
+       atomic_sub(num_bytes, &root->qgroup_meta_rsv);
+       qgroup_free(root, num_bytes);
+}
+
+/*
+ * Check qgroup reserved space leaking, normally at destory inode
+ * time
+ */
+void btrfs_qgroup_check_reserved_leak(struct inode *inode)
+{
+       struct extent_changeset changeset;
+       struct ulist_node *unode;
+       struct ulist_iterator iter;
+       int ret;
+
+       changeset.bytes_changed = 0;
+       changeset.range_changed = ulist_alloc(GFP_NOFS);
+       if (WARN_ON(!changeset.range_changed))
+               return;
+
+       ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+                       EXTENT_QGROUP_RESERVED, GFP_NOFS, &changeset);
+
+       WARN_ON(ret < 0);
+       if (WARN_ON(changeset.bytes_changed)) {
+               ULIST_ITER_INIT(&iter);
+               while ((unode = ulist_next(changeset.range_changed, &iter))) {
+                       btrfs_warn(BTRFS_I(inode)->root->fs_info,
+                               "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
+                               inode->i_ino, unode->val, unode->aux);
+               }
+               qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed);
+       }
+       ulist_free(changeset.range_changed);
+}