Merge branches 'irq-urgent-for-linus' and 'timers-urgent-for-linus' of git://git...
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / volumes.c
index dd644d7677821ab90544db5f8fbd0b763dde389f..a6df8fdc1312ce78e97f9f90037ff5b236a58a92 100644 (file)
 #include "dev-replace.h"
 #include "sysfs.h"
 
+const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = {
+               .sub_stripes    = 2,
+               .dev_stripes    = 1,
+               .devs_max       = 0,    /* 0 == as many as possible */
+               .devs_min       = 4,
+               .tolerated_failures = 1,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID1] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 2,
+               .devs_min       = 2,
+               .tolerated_failures = 1,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_DUP] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 2,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID0] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+       [BTRFS_RAID_SINGLE] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+       [BTRFS_RAID_RAID5] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .tolerated_failures = 1,
+               .devs_increment = 1,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID6] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 3,
+               .tolerated_failures = 2,
+               .devs_increment = 1,
+               .ncopies        = 3,
+       },
+};
+
+const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
+       [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
+       [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
+       [BTRFS_RAID_RAID0]  = BTRFS_BLOCK_GROUP_RAID0,
+       [BTRFS_RAID_SINGLE] = 0,
+       [BTRFS_RAID_RAID5]  = BTRFS_BLOCK_GROUP_RAID5,
+       [BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
+};
+
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                struct btrfs_device *device);
@@ -156,8 +232,8 @@ static struct btrfs_device *__alloc_device(void)
        spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
-       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
        return dev;
 }
@@ -344,6 +420,9 @@ loop_lock:
                pending = pending->bi_next;
                cur->bi_next = NULL;
 
+               /*
+                * atomic_dec_return implies a barrier for waitqueue_active
+                */
                if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
                    waitqueue_active(&fs_info->async_submit_wait))
                        wake_up(&fs_info->async_submit_wait);
@@ -1383,7 +1462,7 @@ again:
                btrfs_std_error(root->fs_info, ret,
                            "Failed to remove dev extent item");
        } else {
-               trans->transaction->have_free_bgs = 1;
+               set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
        }
 out:
        btrfs_free_path(path);
@@ -2980,16 +3059,19 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
         * (albeit full) chunks.
         */
        if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
                bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
                bctl->data.usage = 90;
        }
        if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
                bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
                bctl->sys.usage = 90;
        }
        if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
                bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
                bctl->meta.usage = 90;
@@ -3043,6 +3125,39 @@ static int chunk_profiles_filter(u64 chunk_type,
 
 static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
                              struct btrfs_balance_args *bargs)
+{
+       struct btrfs_block_group_cache *cache;
+       u64 chunk_used;
+       u64 user_thresh_min;
+       u64 user_thresh_max;
+       int ret = 1;
+
+       cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+       chunk_used = btrfs_block_group_used(&cache->item);
+
+       if (bargs->usage_min == 0)
+               user_thresh_min = 0;
+       else
+               user_thresh_min = div_factor_fine(cache->key.offset,
+                                       bargs->usage_min);
+
+       if (bargs->usage_max == 0)
+               user_thresh_max = 1;
+       else if (bargs->usage_max > 100)
+               user_thresh_max = cache->key.offset;
+       else
+               user_thresh_max = div_factor_fine(cache->key.offset,
+                                       bargs->usage_max);
+
+       if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
+               ret = 0;
+
+       btrfs_put_block_group(cache);
+       return ret;
+}
+
+static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info,
+               u64 chunk_offset, struct btrfs_balance_args *bargs)
 {
        struct btrfs_block_group_cache *cache;
        u64 chunk_used, user_thresh;
@@ -3051,7 +3166,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
        cache = btrfs_lookup_block_group(fs_info, chunk_offset);
        chunk_used = btrfs_block_group_used(&cache->item);
 
-       if (bargs->usage == 0)
+       if (bargs->usage_min == 0)
                user_thresh = 1;
        else if (bargs->usage > 100)
                user_thresh = cache->key.offset;
@@ -3141,6 +3256,19 @@ static int chunk_vrange_filter(struct extent_buffer *leaf,
        return 1;
 }
 
+static int chunk_stripes_range_filter(struct extent_buffer *leaf,
+                              struct btrfs_chunk *chunk,
+                              struct btrfs_balance_args *bargs)
+{
+       int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+       if (bargs->stripes_min <= num_stripes
+                       && num_stripes <= bargs->stripes_max)
+               return 0;
+
+       return 1;
+}
+
 static int chunk_soft_convert_filter(u64 chunk_type,
                                     struct btrfs_balance_args *bargs)
 {
@@ -3187,6 +3315,9 @@ static int should_balance_chunk(struct btrfs_root *root,
        if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
            chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
                return 0;
+       } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
+           chunk_usage_range_filter(bctl->fs_info, chunk_offset, bargs)) {
+               return 0;
        }
 
        /* devid filter */
@@ -3207,6 +3338,12 @@ static int should_balance_chunk(struct btrfs_root *root,
                return 0;
        }
 
+       /* stripes filter */
+       if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
+           chunk_stripes_range_filter(leaf, chunk, bargs)) {
+               return 0;
+       }
+
        /* soft profile changing mode */
        if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
            chunk_soft_convert_filter(chunk_type, bargs)) {
@@ -3221,6 +3358,16 @@ static int should_balance_chunk(struct btrfs_root *root,
                        return 0;
                else
                        bargs->limit--;
+       } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
+               /*
+                * Same logic as the 'limit' filter; the minimum cannot be
+                * determined here because we do not have the global informatoin
+                * about the count of all chunks that satisfy the filters.
+                */
+               if (bargs->limit_max == 0)
+                       return 0;
+               else
+                       bargs->limit_max--;
        }
 
        return 1;
@@ -3235,6 +3382,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        struct btrfs_device *device;
        u64 old_size;
        u64 size_to_free;
+       u64 chunk_type;
        struct btrfs_chunk *chunk;
        struct btrfs_path *path;
        struct btrfs_key key;
@@ -3245,9 +3393,14 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        int ret;
        int enospc_errors = 0;
        bool counting = true;
+       /* The single value limit and min/max limits use the same bytes in the */
        u64 limit_data = bctl->data.limit;
        u64 limit_meta = bctl->meta.limit;
        u64 limit_sys = bctl->sys.limit;
+       u32 count_data = 0;
+       u32 count_meta = 0;
+       u32 count_sys = 0;
+       int chunk_reserved = 0;
 
        /* step one make some room on all the devices */
        devices = &fs_info->fs_devices->devices;
@@ -3288,6 +3441,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        spin_unlock(&fs_info->balance_lock);
 again:
        if (!counting) {
+               /*
+                * The single value limit and min/max limits use the same bytes
+                * in the
+                */
                bctl->data.limit = limit_data;
                bctl->meta.limit = limit_meta;
                bctl->sys.limit = limit_sys;
@@ -3335,6 +3492,7 @@ again:
                }
 
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+               chunk_type = btrfs_chunk_type(leaf, chunk);
 
                if (!counting) {
                        spin_lock(&fs_info->balance_lock);
@@ -3344,6 +3502,7 @@ again:
 
                ret = should_balance_chunk(chunk_root, leaf, chunk,
                                           found_key.offset);
+
                btrfs_release_path(path);
                if (!ret) {
                        mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3355,9 +3514,50 @@ again:
                        spin_lock(&fs_info->balance_lock);
                        bctl->stat.expected++;
                        spin_unlock(&fs_info->balance_lock);
+
+                       if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
+                               count_data++;
+                       else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+                               count_sys++;
+                       else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
+                               count_meta++;
+
                        goto loop;
                }
 
+               /*
+                * Apply limit_min filter, no need to check if the LIMITS
+                * filter is used, limit_min is 0 by default
+                */
+               if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
+                                       count_data < bctl->data.limit_min)
+                               || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
+                                       count_meta < bctl->meta.limit_min)
+                               || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+                                       count_sys < bctl->sys.limit_min)) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                       goto loop;
+               }
+
+               if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
+                       trans = btrfs_start_transaction(chunk_root, 0);
+                       if (IS_ERR(trans)) {
+                               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                               ret = PTR_ERR(trans);
+                               goto error;
+                       }
+
+                       ret = btrfs_force_chunk_alloc(trans, chunk_root,
+                                                     BTRFS_BLOCK_GROUP_DATA);
+                       if (ret < 0) {
+                               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                               goto error;
+                       }
+
+                       btrfs_end_transaction(trans, chunk_root);
+                       chunk_reserved = 1;
+               }
+
                ret = btrfs_relocate_chunk(chunk_root,
                                           found_key.offset);
                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3437,6 +3637,15 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
 
+/* Non-zero return value signifies invalidity */
+static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
+               u64 allowed)
+{
+       return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+               (!alloc_profile_is_valid(bctl_arg->target, 1) ||
+                (bctl_arg->target & ~allowed)));
+}
+
 /*
  * Should be called with both balance and volume mutexes held
  */
@@ -3494,27 +3703,21 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        if (num_devices > 3)
                allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
                            BTRFS_BLOCK_GROUP_RAID6);
-       if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (!alloc_profile_is_valid(bctl->data.target, 1) ||
-            (bctl->data.target & ~allowed))) {
+       if (validate_convert_profile(&bctl->data, allowed)) {
                btrfs_err(fs_info, "unable to start balance with target "
                           "data profile %llu",
                       bctl->data.target);
                ret = -EINVAL;
                goto out;
        }
-       if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (!alloc_profile_is_valid(bctl->meta.target, 1) ||
-            (bctl->meta.target & ~allowed))) {
+       if (validate_convert_profile(&bctl->meta, allowed)) {
                btrfs_err(fs_info,
                           "unable to start balance with target metadata profile %llu",
                       bctl->meta.target);
                ret = -EINVAL;
                goto out;
        }
-       if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (!alloc_profile_is_valid(bctl->sys.target, 1) ||
-            (bctl->sys.target & ~allowed))) {
+       if (validate_convert_profile(&bctl->sys, allowed)) {
                btrfs_err(fs_info,
                           "unable to start balance with target system profile %llu",
                       bctl->sys.target);
@@ -4256,65 +4459,6 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
        return 0;
 }
 
-static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
-       [BTRFS_RAID_RAID10] = {
-               .sub_stripes    = 2,
-               .dev_stripes    = 1,
-               .devs_max       = 0,    /* 0 == as many as possible */
-               .devs_min       = 4,
-               .devs_increment = 2,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_RAID1] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 2,
-               .devs_min       = 2,
-               .devs_increment = 2,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_DUP] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 2,
-               .devs_max       = 1,
-               .devs_min       = 1,
-               .devs_increment = 1,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_RAID0] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 0,
-               .devs_min       = 2,
-               .devs_increment = 1,
-               .ncopies        = 1,
-       },
-       [BTRFS_RAID_SINGLE] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 1,
-               .devs_min       = 1,
-               .devs_increment = 1,
-               .ncopies        = 1,
-       },
-       [BTRFS_RAID_RAID5] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 0,
-               .devs_min       = 2,
-               .devs_increment = 1,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_RAID6] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 0,
-               .devs_min       = 3,
-               .devs_increment = 1,
-               .ncopies        = 3,
-       },
-};
-
 static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
 {
        /* TODO allow them to set a preferred stripe size */