Btrfs: fix scrub preventing unused block groups from being deleted
authorFilipe Manana <fdmanana@suse.com>
Thu, 19 Nov 2015 11:45:48 +0000 (11:45 +0000)
committerChris Mason <clm@fb.com>
Wed, 25 Nov 2015 13:22:08 +0000 (05:22 -0800)
Currently scrub can race with the cleaner kthread when the later attempts
to delete an unused block group, and the result is preventing the cleaner
kthread from ever deleting later the block group - unless the block group
becomes used and unused again. The following diagram illustrates that
race:

              CPU 1                                 CPU 2

 cleaner kthread
   btrfs_delete_unused_bgs()

     gets block group X from
     fs_info->unused_bgs and
     removes it from that list

                                             scrub_enumerate_chunks()

                                               searches device tree using
                                               its commit root

                                               finds device extent for
                                               block group X

                                               gets block group X from the tree
                                               fs_info->block_group_cache_tree
                                               (via btrfs_lookup_block_group())

                                               sets bg X to RO

     sees the block group is
     already RO and therefore
     doesn't delete it nor adds
     it back to unused list

So fix this by making scrub add the block group again to the list of
unused block groups if the block group is still unused when it finished
scrubbing it and it hasn't been removed already.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/scrub.c

index d88994f71eae5fb0c2d9f23da045153674d5f198..a0165c6e6243768aff240add47dfe0ba52a589d2 100644 (file)
@@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
                                                 struct btrfs_fs_info *info,
                                                 u64 bytenr);
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int get_block_group_index(struct btrfs_block_group_cache *cache);
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
index e97d6d61cd4215ec66e549ba9d4af7d8554d6f9d..8fd14b6f1d332d2254e057e1a7c9d3f842d44e5b 100644 (file)
@@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
        return (cache->flags & bits) == bits;
 }
 
-static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
 {
        atomic_inc(&cache->count);
 }
index 68af3169d527c1f8cc5d1df49ff7c2f5598161a4..b091d94ceef68013e992161b0aaf31b47cc645dc 100644 (file)
@@ -3641,6 +3641,28 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                if (ro_set)
                        btrfs_dec_block_group_ro(root, cache);
 
+               /*
+                * We might have prevented the cleaner kthread from deleting
+                * this block group if it was already unused because we raced
+                * and set it to RO mode first. So add it back to the unused
+                * list, otherwise it might not ever be deleted unless a manual
+                * balance is triggered or it becomes used and unused again.
+                */
+               spin_lock(&cache->lock);
+               if (!cache->removed && !cache->ro && cache->reserved == 0 &&
+                   btrfs_block_group_used(&cache->item) == 0) {
+                       spin_unlock(&cache->lock);
+                       spin_lock(&fs_info->unused_bgs_lock);
+                       if (list_empty(&cache->bg_list)) {
+                               btrfs_get_block_group(cache);
+                               list_add_tail(&cache->bg_list,
+                                             &fs_info->unused_bgs);
+                       }
+                       spin_unlock(&fs_info->unused_bgs_lock);
+               } else {
+                       spin_unlock(&cache->lock);
+               }
+
                btrfs_put_block_group(cache);
                if (ret)
                        break;