Merge branch 'jeffm-discard-4.3' into for-linus-4.3

author Chris Mason <clm@fb.com>

Sun, 9 Aug 2015 14:35:33 +0000 (07:35 -0700)

committer Chris Mason <clm@fb.com>

Sun, 9 Aug 2015 14:35:33 +0000 (07:35 -0700)
author Chris Mason <clm@fb.com>
Sun, 9 Aug 2015 14:35:33 +0000 (07:35 -0700)
committer Chris Mason <clm@fb.com>
Sun, 9 Aug 2015 14:35:33 +0000 (07:35 -0700)
diff --combined fs/btrfs/ctree.h

index d4042c89d29bb74a5fc93a68ed2ad08cb4bf997a,19ef3f30655978dc8b92b99431ccb8c7dce723b7..938efe33be809240bc19bdf81d854c623191a3d8
--- 1/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -1300,7 -1300,7 +1300,7 @@@ struct btrfs_block_group_cache 
         /* for raid56, this is a full stripe, without parity */
         unsigned long full_stripe_len;
   
- -      unsigned int ro:1;
+ +      unsigned int ro;
         unsigned int iref:1;
         unsigned int has_caching_ctl:1;
         unsigned int removed:1;
@@@ -1518,6 -1518,12 +1518,6 @@@ struct btrfs_fs_info 
          */
         struct mutex ordered_operations_mutex;
   
- -      /*
- -       * Same as ordered_operations_mutex except this is for ordered extents
- -       * and not the operations.
- -       */
- -      struct mutex ordered_extent_flush_mutex;
- -
         struct rw_semaphore commit_root_sem;
   
         struct rw_semaphore cleanup_work_sem;
@@@ -3431,6 -3437,8 +3431,8 @@@ int btrfs_remove_block_group(struct btr
                              struct btrfs_root *root, u64 group_start,
                              struct extent_map *em);
   void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
+ void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
+ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
   void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root);
   u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@@@ -3489,9 -3497,9 +3491,9 @@@ int btrfs_cond_migrate_bytes(struct btr
   void btrfs_block_rsv_release(struct btrfs_root *root,
                              struct btrfs_block_rsv *block_rsv,
                              u64 num_bytes);
- -int btrfs_set_block_group_ro(struct btrfs_root *root,
+ +int btrfs_inc_block_group_ro(struct btrfs_root *root,
                              struct btrfs_block_group_cache *cache);
- -void btrfs_set_block_group_rw(struct btrfs_root *root,
+ +void btrfs_dec_block_group_ro(struct btrfs_root *root,
                               struct btrfs_block_group_cache *cache);
   void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
   u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@@@ -4067,6 -4075,7 +4069,7 @@@ __col
   void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                      unsigned int line, int errno, const char *fmt, ...);
   
+ const char *btrfs_decode_error(int errno);
   
   __cold
   void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
@@@ -4179,7 -4188,8 +4182,7 @@@ int btrfs_reloc_clone_csums(struct inod
   int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, struct extent_buffer *buf,
                           struct extent_buffer *cow);
- -void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
- -                            struct btrfs_pending_snapshot *pending,
+ +void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
                               u64 *bytes_to_reserve);
   int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
                               struct btrfs_pending_snapshot *pending);
diff --combined fs/btrfs/disk-io.c

index 230546b45474253a16e8db8f8f7b24c1659ad726,053109ba26b7e22a8ce995d2485977dc58dbd9ba..cc15514b4a7675a9a2493f1d8198dd0c9e236850
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -1724,7 -1724,6 +1724,7 @@@ static int setup_bdi(struct btrfs_fs_in
         bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
         bdi->congested_fn       = btrfs_congested_fn;
         bdi->congested_data     = info;
+ +      bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
         return 0;
   }
   
@@@ -1745,7 -1744,7 +1745,7 @@@ static void end_workqueue_fn(struct btr
         bio->bi_private = end_io_wq->private;
         bio->bi_end_io = end_io_wq->end_io;
         kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
- -      bio_endio_nodec(bio, error);
+ +      bio_endio(bio, error);
   }
   
   static int cleaner_kthread(void *arg)
@@@ -2609,6 -2608,7 +2609,6 @@@ int open_ctree(struct super_block *sb
   
   
         mutex_init(&fs_info->ordered_operations_mutex);
- -      mutex_init(&fs_info->ordered_extent_flush_mutex);
         mutex_init(&fs_info->tree_log_mutex);
         mutex_init(&fs_info->chunk_mutex);
         mutex_init(&fs_info->transaction_kthread_mutex);
@@@ -2950,9 -2950,8 +2950,9 @@@ retry_root_backup
         if (fs_info->fs_devices->missing_devices >
              fs_info->num_tolerated_disk_barrier_failures &&
             !(sb->s_flags & MS_RDONLY)) {
- -              printk(KERN_WARNING "BTRFS: "
- -                      "too many missing devices, writeable mount is not allowed\n");
+ +              pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
+ +                      fs_info->fs_devices->missing_devices,
+ +                      fs_info->num_tolerated_disk_barrier_failures);
                 goto fail_sysfs;
         }
   
@@@ -3327,8 -3326,11 +3327,8 @@@ static int write_dev_supers(struct btrf
    */
   static void btrfs_end_empty_barrier(struct bio *bio, int err)
   {
- -      if (err) {
- -              if (err == -EOPNOTSUPP)
- -                      set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+ +      if (err)
                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
- -      }
         if (bio->bi_private)
                 complete(bio->bi_private);
         bio_put(bio);
@@@ -3356,7 -3358,11 +3356,7 @@@ static int write_dev_flush(struct btrfs
   
                 wait_for_completion(&device->flush_wait);
   
- -              if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
- -                      printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
- -                                    rcu_str_deref(device->name));
- -                      device->nobarriers = 1;
- -              } else if (!bio_flagged(bio, BIO_UPTODATE)) {
+ +              if (!bio_flagged(bio, BIO_UPTODATE)) {
                         ret = -EIO;
                         btrfs_dev_stat_inc_and_print(device,
                                 BTRFS_DEV_STAT_FLUSH_ERRS);
@@@ -3761,6 -3767,15 +3761,15 @@@ void close_ctree(struct btrfs_root *roo
         cancel_work_sync(&fs_info->async_reclaim_work);
   
         if (!(fs_info->sb->s_flags & MS_RDONLY)) {
+               /*
+                * If the cleaner thread is stopped and there are
+                * block groups queued for removal, the deletion will be
+                * skipped when we quit the cleaner thread.
+                */
+               mutex_lock(&root->fs_info->cleaner_mutex);
+               btrfs_delete_unused_bgs(root->fs_info);
+               mutex_unlock(&root->fs_info->cleaner_mutex);
+ 
                 ret = btrfs_commit_super(root);
                 if (ret)
                         btrfs_err(fs_info, "commit super ret %d", ret);
diff --combined fs/btrfs/extent-tree.c

index 59d59d98bca141bd4665730911cd5ec95ed25863,6b791f39469827721e0b465a8c9105ad3949ac98..5411f0ab56831aa3923f5d4b2d1d53adf9a88991
--- 1/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@@ -1316,7 -1316,8 +1316,7 @@@ static noinline int remove_extent_data_
         return ret;
   }
   
- -static noinline u32 extent_data_ref_count(struct btrfs_root *root,
- -                                        struct btrfs_path *path,
+ +static noinline u32 extent_data_ref_count(struct btrfs_path *path,
                                           struct btrfs_extent_inline_ref *iref)
   {
         struct btrfs_key key;
@@@ -1882,10 -1883,77 +1882,77 @@@ static int remove_extent_backref(struc
         return ret;
   }
   
- static int btrfs_issue_discard(struct block_device *bdev,
-                               u64 start, u64 len)
+ #define in_range(b, first, len)        ((b) >= (first) && (b) < (first) + (len))
+ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
+                              u64 *discarded_bytes)
   {
-       return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
+       int j, ret = 0;
+       u64 bytes_left, end;
+       u64 aligned_start = ALIGN(start, 1 << 9);
+ 
+       if (WARN_ON(start != aligned_start)) {
+               len -= aligned_start - start;
+               len = round_down(len, 1 << 9);
+               start = aligned_start;
+       }
+ 
+       *discarded_bytes = 0;
+ 
+       if (!len)
+               return 0;
+ 
+       end = start + len;
+       bytes_left = len;
+ 
+       /* Skip any superblocks on this device. */
+       for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
+               u64 sb_start = btrfs_sb_offset(j);
+               u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
+               u64 size = sb_start - start;
+ 
+               if (!in_range(sb_start, start, bytes_left) &&
+                   !in_range(sb_end, start, bytes_left) &&
+                   !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
+                       continue;
+ 
+               /*
+                * Superblock spans beginning of range.  Adjust start and
+                * try again.
+                */
+               if (sb_start <= start) {
+                       start += sb_end - start;
+                       if (start > end) {
+                               bytes_left = 0;
+                               break;
+                       }
+                       bytes_left = end - start;
+                       continue;
+               }
+ 
+               if (size) {
+                       ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
+                                                  GFP_NOFS, 0);
+                       if (!ret)
+                               *discarded_bytes += size;
+                       else if (ret != -EOPNOTSUPP)
+                               return ret;
+               }
+ 
+               start = sb_end;
+               if (start > end) {
+                       bytes_left = 0;
+                       break;
+               }
+               bytes_left = end - start;
+       }
+ 
+       if (bytes_left) {
+               ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
+                                          GFP_NOFS, 0);
+               if (!ret)
+                       *discarded_bytes += bytes_left;
+       }
+       return ret;
   }
   
   int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@@ -1906,14 -1974,16 +1973,16 @@@
   
   
                 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
+                       u64 bytes;
                         if (!stripe->dev->can_discard)
                                 continue;
   
                         ret = btrfs_issue_discard(stripe->dev->bdev,
                                                   stripe->physical,
-                                                 stripe->length);
+                                                 stripe->length,
+                                                 &bytes);
                         if (!ret)
-                               discarded_bytes += stripe->length;
+                               discarded_bytes += bytes;
                         else if (ret != -EOPNOTSUPP)
                                 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
   
@@@ -6061,20 -6131,19 +6130,19 @@@ int btrfs_finish_extent_commit(struct b
                                struct btrfs_root *root)
   {
         struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *block_group, *tmp;
+       struct list_head *deleted_bgs;
         struct extent_io_tree *unpin;
         u64 start;
         u64 end;
         int ret;
   
-       if (trans->aborted)
-               return 0;
- 
         if (fs_info->pinned_extents == &fs_info->freed_extents[0])
                 unpin = &fs_info->freed_extents[1];
         else
                 unpin = &fs_info->freed_extents[0];
   
-       while (1) {
+       while (!trans->aborted) {
                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
                 ret = find_first_extent_bit(unpin, 0, &start, &end,
                                             EXTENT_DIRTY, NULL);
@@@ -6093,6 -6162,34 +6161,34 @@@
                 cond_resched();
         }
   
+       /*
+        * Transaction is finished.  We don't need the lock anymore.  We
+        * do need to clean up the block groups in case of a transaction
+        * abort.
+        */
+       deleted_bgs = &trans->transaction->deleted_bgs;
+       list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
+               u64 trimmed = 0;
+ 
+               ret = -EROFS;
+               if (!trans->aborted)
+                       ret = btrfs_discard_extent(root,
+                                                  block_group->key.objectid,
+                                                  block_group->key.offset,
+                                                  &trimmed);
+ 
+               list_del_init(&block_group->bg_list);
+               btrfs_put_block_group_trimming(block_group);
+               btrfs_put_block_group(block_group);
+ 
+               if (ret) {
+                       const char *errstr = btrfs_decode_error(ret);
+                       btrfs_warn(fs_info,
+                                  "Discard failed while removing blockgroup: errno=%d %s\n",
+                                  ret, errstr);
+               }
+       }
+ 
         return 0;
   }
   
@@@ -6348,7 -6445,7 +6444,7 @@@ static int __btrfs_free_extent(struct b
         } else {
                 if (found_extent) {
                         BUG_ON(is_data && refs_to_drop !=
- -                             extent_data_ref_count(root, path, iref));
+ +                             extent_data_ref_count(path, iref));
                         if (iref) {
                                 BUG_ON(path->slots[0] != extent_slot);
                         } else {
@@@ -7566,6 -7663,9 +7662,6 @@@ static void unuse_block_rsv(struct btrf
   
   /*
    * finds a free extent and does all the dirty work required for allocation
- - * returns the key for the extent through ins, and a tree buffer for
- - * the first block of the extent through buf.
- - *
    * returns the tree buffer or an ERR_PTR on error.
    */
   struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
@@@ -8719,13 -8819,14 +8815,13 @@@ static u64 update_block_group_flags(str
         return flags;
   }
   
- -static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
+ +static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
   {
         struct btrfs_space_info *sinfo = cache->space_info;
         u64 num_bytes;
         u64 min_allocable_bytes;
         int ret = -ENOSPC;
   
- -
         /*
          * We need some metadata space and system metadata space for
          * allocating chunks in some corner cases until we force to set
@@@ -8742,7 -8843,6 +8838,7 @@@
         spin_lock(&cache->lock);
   
         if (cache->ro) {
+ +              cache->ro++;
                 ret = 0;
                 goto out;
         }
@@@ -8754,7 -8854,7 +8850,7 @@@
             sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
             min_allocable_bytes <= sinfo->total_bytes) {
                 sinfo->bytes_readonly += num_bytes;
- -              cache->ro = 1;
+ +              cache->ro++;
                 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
                 ret = 0;
         }
@@@ -8764,7 -8864,7 +8860,7 @@@ out
         return ret;
   }
   
- -int btrfs_set_block_group_ro(struct btrfs_root *root,
+ +int btrfs_inc_block_group_ro(struct btrfs_root *root,
                              struct btrfs_block_group_cache *cache)
   
   {
@@@ -8772,6 -8872,8 +8868,6 @@@
         u64 alloc_flags;
         int ret;
   
- -      BUG_ON(cache->ro);
- -
   again:
         trans = btrfs_join_transaction(root);
         if (IS_ERR(trans))
@@@ -8814,7 -8916,7 +8910,7 @@@
                         goto out;
         }
   
- -      ret = set_block_group_ro(cache, 0);
+ +      ret = inc_block_group_ro(cache, 0);
         if (!ret)
                 goto out;
         alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@@ -8822,7 -8924,7 +8918,7 @@@
                              CHUNK_ALLOC_FORCE);
         if (ret < 0)
                 goto out;
- -      ret = set_block_group_ro(cache, 0);
+ +      ret = inc_block_group_ro(cache, 0);
   out:
         if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
                 alloc_flags = update_block_group_flags(root, cache->flags);
@@@ -8885,7 -8987,7 +8981,7 @@@ u64 btrfs_account_ro_block_groups_free_
         return free_bytes;
   }
   
- -void btrfs_set_block_group_rw(struct btrfs_root *root,
+ +void btrfs_dec_block_group_ro(struct btrfs_root *root,
                               struct btrfs_block_group_cache *cache)
   {
         struct btrfs_space_info *sinfo = cache->space_info;
@@@ -8895,13 -8997,11 +8991,13 @@@
   
         spin_lock(&sinfo->lock);
         spin_lock(&cache->lock);
- -      num_bytes = cache->key.offset - cache->reserved - cache->pinned -
- -                  cache->bytes_super - btrfs_block_group_used(&cache->item);
- -      sinfo->bytes_readonly -= num_bytes;
- -      cache->ro = 0;
- -      list_del_init(&cache->ro_list);
+ +      if (!--cache->ro) {
+ +              num_bytes = cache->key.offset - cache->reserved -
+ +                          cache->pinned - cache->bytes_super -
+ +                          btrfs_block_group_used(&cache->item);
+ +              sinfo->bytes_readonly -= num_bytes;
+ +              list_del_init(&cache->ro_list);
+ +      }
         spin_unlock(&cache->lock);
         spin_unlock(&sinfo->lock);
   }
@@@ -9417,7 -9517,7 +9513,7 @@@ int btrfs_read_block_groups(struct btrf
   
                 set_avail_alloc_bits(root->fs_info, cache->flags);
                 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
- -                      set_block_group_ro(cache, 1);
+ +                      inc_block_group_ro(cache, 1);
                 } else if (btrfs_block_group_used(&cache->item) == 0) {
                         spin_lock(&info->unused_bgs_lock);
                         /* Should always be true but just in case. */
@@@ -9445,11 -9545,11 +9541,11 @@@
                 list_for_each_entry(cache,
                                 &space_info->block_groups[BTRFS_RAID_RAID0],
                                 list)
- -                      set_block_group_ro(cache, 1);
+ +                      inc_block_group_ro(cache, 1);
                 list_for_each_entry(cache,
                                 &space_info->block_groups[BTRFS_RAID_SINGLE],
                                 list)
- -                      set_block_group_ro(cache, 1);
+ +                      inc_block_group_ro(cache, 1);
         }
   
         init_global_block_rsv(info);
@@@ -9830,6 -9930,11 +9926,11 @@@ int btrfs_remove_block_group(struct btr
          * currently running transaction might finish and a new one start,
          * allowing for new block groups to be created that can reuse the same
          * physical device locations unless we take this special care.
+        *
+        * There may also be an implicit trim operation if the file system
+        * is mounted with -odiscard. The same protections must remain
+        * in place until the extents have been discarded completely when
+        * the transaction commit has completed.
          */
         remove_em = (atomic_read(&block_group->trimming) == 0);
         /*
@@@ -9904,6 -10009,7 +10005,7 @@@ void btrfs_delete_unused_bgs(struct btr
         spin_lock(&fs_info->unused_bgs_lock);
         while (!list_empty(&fs_info->unused_bgs)) {
                 u64 start, end;
+               int trimming;
   
                 block_group = list_first_entry(&fs_info->unused_bgs,
                                                struct btrfs_block_group_cache,
@@@ -9937,7 -10043,7 +10039,7 @@@
                 spin_unlock(&block_group->lock);
   
                 /* We don't want to force the issue, only flip if it's ok. */
- -              ret = set_block_group_ro(block_group, 0);
+ +              ret = inc_block_group_ro(block_group, 0);
                 up_write(&space_info->groups_sem);
                 if (ret < 0) {
                         ret = 0;
@@@ -9951,7 -10057,7 +10053,7 @@@
                 /* 1 for btrfs_orphan_reserve_metadata() */
                 trans = btrfs_start_transaction(root, 1);
                 if (IS_ERR(trans)) {
- -                      btrfs_set_block_group_rw(root, block_group);
+ +                      btrfs_dec_block_group_ro(root, block_group);
                         ret = PTR_ERR(trans);
                         goto next;
                 }
@@@ -9978,14 -10084,14 +10080,14 @@@
                                   EXTENT_DIRTY, GFP_NOFS);
                 if (ret) {
                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
- -                      btrfs_set_block_group_rw(root, block_group);
+ +                      btrfs_dec_block_group_ro(root, block_group);
                         goto end_trans;
                 }
                 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
                                   EXTENT_DIRTY, GFP_NOFS);
                 if (ret) {
                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
- -                      btrfs_set_block_group_rw(root, block_group);
+ +                      btrfs_dec_block_group_ro(root, block_group);
                         goto end_trans;
                 }
                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
@@@ -10003,12 -10109,39 +10105,39 @@@
                 spin_unlock(&block_group->lock);
                 spin_unlock(&space_info->lock);
   
+               /* DISCARD can flip during remount */
+               trimming = btrfs_test_opt(root, DISCARD);
+ 
+               /* Implicit trim during transaction commit. */
+               if (trimming)
+                       btrfs_get_block_group_trimming(block_group);
+ 
                 /*
                  * Btrfs_remove_chunk will abort the transaction if things go
                  * horribly wrong.
                  */
                 ret = btrfs_remove_chunk(trans, root,
                                          block_group->key.objectid);
+ 
+               if (ret) {
+                       if (trimming)
+                               btrfs_put_block_group_trimming(block_group);
+                       goto end_trans;
+               }
+ 
+               /*
+                * If we're not mounted with -odiscard, we can just forget
+                * about this block group. Otherwise we'll need to wait
+                * until transaction commit to do the actual discard.
+                */
+               if (trimming) {
+                       WARN_ON(!list_empty(&block_group->bg_list));
+                       spin_lock(&trans->transaction->deleted_bgs_lock);
+                       list_move(&block_group->bg_list,
+                                 &trans->transaction->deleted_bgs);
+                       spin_unlock(&trans->transaction->deleted_bgs_lock);
+                       btrfs_get_block_group(block_group);
+               }
   end_trans:
                 btrfs_end_transaction(trans, root);
   next:
@@@ -10062,10 -10195,99 +10191,99 @@@ int btrfs_error_unpin_extent_range(stru
         return unpin_extent_range(root, start, end, false);
   }
   
+ /*
+  * It used to be that old block groups would be left around forever.
+  * Iterating over them would be enough to trim unused space.  Since we
+  * now automatically remove them, we also need to iterate over unallocated
+  * space.
+  *
+  * We don't want a transaction for this since the discard may take a
+  * substantial amount of time.  We don't require that a transaction be
+  * running, but we do need to take a running transaction into account
+  * to ensure that we're not discarding chunks that were released in
+  * the current transaction.
+  *
+  * Holding the chunks lock will prevent other threads from allocating
+  * or releasing chunks, but it won't prevent a running transaction
+  * from committing and releasing the memory that the pending chunks
+  * list head uses.  For that, we need to take a reference to the
+  * transaction.
+  */
+ static int btrfs_trim_free_extents(struct btrfs_device *device,
+                                  u64 minlen, u64 *trimmed)
+ {
+       u64 start = 0, len = 0;
+       int ret;
+ 
+       *trimmed = 0;
+ 
+       /* Not writeable = nothing to do. */
+       if (!device->writeable)
+               return 0;
+ 
+       /* No free space = nothing to do. */
+       if (device->total_bytes <= device->bytes_used)
+               return 0;
+ 
+       ret = 0;
+ 
+       while (1) {
+               struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
+               struct btrfs_transaction *trans;
+               u64 bytes;
+ 
+               ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
+               if (ret)
+                       return ret;
+ 
+               down_read(&fs_info->commit_root_sem);
+ 
+               spin_lock(&fs_info->trans_lock);
+               trans = fs_info->running_transaction;
+               if (trans)
+                       atomic_inc(&trans->use_count);
+               spin_unlock(&fs_info->trans_lock);
+ 
+               ret = find_free_dev_extent_start(trans, device, minlen, start,
+                                                &start, &len);
+               if (trans)
+                       btrfs_put_transaction(trans);
+ 
+               if (ret) {
+                       up_read(&fs_info->commit_root_sem);
+                       mutex_unlock(&fs_info->chunk_mutex);
+                       if (ret == -ENOSPC)
+                               ret = 0;
+                       break;
+               }
+ 
+               ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
+               up_read(&fs_info->commit_root_sem);
+               mutex_unlock(&fs_info->chunk_mutex);
+ 
+               if (ret)
+                       break;
+ 
+               start += len;
+               *trimmed += bytes;
+ 
+               if (fatal_signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+ 
+               cond_resched();
+       }
+ 
+       return ret;
+ }
+ 
   int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
   {
         struct btrfs_fs_info *fs_info = root->fs_info;
         struct btrfs_block_group_cache *cache = NULL;
+       struct btrfs_device *device;
+       struct list_head *devices;
         u64 group_trimmed;
         u64 start;
         u64 end;
@@@ -10120,6 -10342,18 +10338,18 @@@
                 cache = next_block_group(fs_info->tree_root, cache);
         }
   
+       mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+       devices = &root->fs_info->fs_devices->alloc_list;
+       list_for_each_entry(device, devices, dev_alloc_list) {
+               ret = btrfs_trim_free_extents(device, range->minlen,
+                                             &group_trimmed);
+               if (ret)
+                       break;
+ 
+               trimmed += group_trimmed;
+       }
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+ 
         range->len = trimmed;
         return ret;
   }
diff --combined fs/btrfs/super.c

index d366dd4664d066269e9394403c060fece1a9100d,8da24e24289630631d0e4930380de96e996c5e33..c389c13f0f383c4e086bb12045a47c4cfb343ef8
--- 1/fs/btrfs/super.c
--- 2/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@@ -69,7 -69,7 +69,7 @@@ static struct file_system_type btrfs_fs
   
   static int btrfs_remount(struct super_block *sb, int *flags, char *data);
   
- static const char *btrfs_decode_error(int errno)
+ const char *btrfs_decode_error(int errno)
   {
         char *errstr = "unknown";
   
@@@ -1033,7 -1033,6 +1033,7 @@@ static int btrfs_fill_super(struct supe
         sb->s_flags |= MS_POSIXACL;
   #endif
         sb->s_flags |= MS_I_VERSION;
+ +      sb->s_iflags |= SB_I_CGROUPWB;
         err = open_ctree(sb, fs_devices, (char *)data);
         if (err) {
                 printk(KERN_ERR "BTRFS: open_ctree failed\n");
@@@ -1651,6 -1650,17 +1651,17 @@@ static int btrfs_remount(struct super_b
   
                 sb->s_flags |= MS_RDONLY;
   
+               /*
+                * Setting MS_RDONLY will put the cleaner thread to
+                * sleep at the next loop if it's already active.
+                * If it's already asleep, we'll leave unused block
+                * groups on disk until we're mounted read-write again
+                * unless we clean them up here.
+                */
+               mutex_lock(&root->fs_info->cleaner_mutex);
+               btrfs_delete_unused_bgs(fs_info);
+               mutex_unlock(&root->fs_info->cleaner_mutex);
+ 
                 btrfs_dev_replace_suspend_for_unmount(fs_info);
                 btrfs_scrub_cancel(fs_info);
                 btrfs_pause_balance(fs_info);
diff --combined fs/btrfs/transaction.c

index 91f44c9f7ebcd5ce2ead5ca05f7fb677179be920,44da9299a25bf0e4d35942428a1be933880ba576..20267d47dbcd501dda238e7ff1ce0381137473a7
--- 1/fs/btrfs/transaction.c
--- 2/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@@ -258,6 -258,8 +258,8 @@@ loop
         mutex_init(&cur_trans->cache_write_mutex);
         cur_trans->num_dirty_bgs = 0;
         spin_lock_init(&cur_trans->dirty_bgs_lock);
+       INIT_LIST_HEAD(&cur_trans->deleted_bgs);
+       spin_lock_init(&cur_trans->deleted_bgs_lock);
         list_add_tail(&cur_trans->list, &fs_info->trans_list);
         extent_io_tree_init(&cur_trans->dirty_pages,
                              fs_info->btree_inode->i_mapping);
@@@ -1301,7 -1303,7 +1303,7 @@@ static noinline int create_pending_snap
          */
         btrfs_set_skip_qgroup(trans, objectid);
   
- -      btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
+ +      btrfs_reloc_pre_snapshot(pending, &to_reserve);
   
         if (to_reserve > 0) {
                 pending->error = btrfs_block_rsv_add(root,
diff --combined fs/btrfs/volumes.c

index 88e2fe931bde2320b4df5a7e9310dff76e2a3bca,141c6051cf58ac40b43466d82419035b51542b88..7c84a8122c37847c0c0ccb6cfb9818954db1e1df
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -349,7 -349,7 +349,7 @@@ loop_lock
                     waitqueue_active(&fs_info->async_submit_wait))
                         wake_up(&fs_info->async_submit_wait);
   
- -              BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+ +              BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
   
                 /*
                  * if we're doing the sync list, record that our
@@@ -1116,15 -1116,18 +1116,18 @@@ out
         return ret;
   }
   
- static int contains_pending_extent(struct btrfs_trans_handle *trans,
+ static int contains_pending_extent(struct btrfs_transaction *transaction,
                                    struct btrfs_device *device,
                                    u64 *start, u64 len)
   {
+       struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
         struct extent_map *em;
-       struct list_head *search_list = &trans->transaction->pending_chunks;
+       struct list_head *search_list = &fs_info->pinned_chunks;
         int ret = 0;
         u64 physical_start = *start;
   
+       if (transaction)
+               search_list = &transaction->pending_chunks;
   again:
         list_for_each_entry(em, search_list, list) {
                 struct map_lookup *map;
@@@ -1159,8 -1162,8 +1162,8 @@@
                         }
                 }
         }
-       if (search_list == &trans->transaction->pending_chunks) {
-               search_list = &trans->root->fs_info->pinned_chunks;
+       if (search_list != &fs_info->pinned_chunks) {
+               search_list = &fs_info->pinned_chunks;
                 goto again;
         }
   
@@@ -1169,12 -1172,13 +1172,13 @@@
   
   
   /*
-  * find_free_dev_extent - find free space in the specified device
-  * @device:   the device which we search the free space in
-  * @num_bytes:        the size of the free space that we need
-  * @start:    store the start of the free space.
-  * @len:      the size of the free space. that we find, or the size of the max
-  *            free space if we don't find suitable free space
+  * find_free_dev_extent_start - find free space in the specified device
+  * @device:     the device which we search the free space in
+  * @num_bytes:          the size of the free space that we need
+  * @search_start: the position from which to begin the search
+  * @start:      store the start of the free space.
+  * @len:        the size of the free space. that we find, or the size
+  *              of the max free space if we don't find suitable free space
    *
    * this uses a pretty simple search, the expectation is that it is
    * called very infrequently and that a given device has a small number
@@@ -1188,9 -1192,9 +1192,9 @@@
    * But if we don't find suitable free space, it is used to store the size of
    * the max free space.
    */
- int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                        struct btrfs_device *device, u64 num_bytes,
-                        u64 *start, u64 *len)
+ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
+                              struct btrfs_device *device, u64 num_bytes,
+                              u64 search_start, u64 *start, u64 *len)
   {
         struct btrfs_key key;
         struct btrfs_root *root = device->dev_root;
@@@ -1200,19 -1204,11 +1204,11 @@@
         u64 max_hole_start;
         u64 max_hole_size;
         u64 extent_end;
-       u64 search_start;
         u64 search_end = device->total_bytes;
         int ret;
         int slot;
         struct extent_buffer *l;
   
-       /* FIXME use last free of some kind */
- 
-       /* we don't want to overwrite the superblock on the drive,
-        * so we make sure to start at an offset of at least 1MB
-        */
-       search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
- 
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
@@@ -1273,7 -1269,7 +1269,7 @@@ again
                          * Have to check before we set max_hole_start, otherwise
                          * we could end up sending back this offset anyway.
                          */
-                       if (contains_pending_extent(trans, device,
+                       if (contains_pending_extent(transaction, device,
                                                     &search_start,
                                                     hole_size)) {
                                 if (key.offset >= search_start) {
@@@ -1322,7 -1318,7 +1318,7 @@@ next
         if (search_end > search_start) {
                 hole_size = search_end - search_start;
   
-               if (contains_pending_extent(trans, device, &search_start,
+               if (contains_pending_extent(transaction, device, &search_start,
                                             hole_size)) {
                         btrfs_release_path(path);
                         goto again;
@@@ -1348,6 -1344,24 +1344,24 @@@ out
         return ret;
   }
   
+ int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 *start, u64 *len)
+ {
+       struct btrfs_root *root = device->dev_root;
+       u64 search_start;
+ 
+       /* FIXME use last free of some kind */
+ 
+       /*
+        * we don't want to overwrite the superblock on the drive,
+        * so we make sure to start at an offset of at least 1MB
+        */
+       search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
+       return find_free_dev_extent_start(trans->transaction, device,
+                                         num_bytes, search_start, start, len);
+ }
+ 
   static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
                           struct btrfs_device *device,
                           u64 start, u64 *dev_extent_len)
@@@ -2755,7 -2769,9 +2769,7 @@@ out
         return ret;
   }
   
- -static int btrfs_relocate_chunk(struct btrfs_root *root,
- -                              u64 chunk_objectid,
- -                              u64 chunk_offset)
+ +static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
   {
         struct btrfs_root *extent_root;
         struct btrfs_trans_handle *trans;
@@@ -2783,9 -2799,7 +2797,9 @@@
                 return -ENOSPC;
   
         /* step one, relocate all the extents inside this chunk */
+ +      btrfs_scrub_pause(root);
         ret = btrfs_relocate_block_group(extent_root, chunk_offset);
+ +      btrfs_scrub_continue(root);
         if (ret)
                 return ret;
   
@@@ -2855,6 -2869,7 +2869,6 @@@ again
   
                 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
                         ret = btrfs_relocate_chunk(chunk_root,
- -                                                 found_key.objectid,
                                                    found_key.offset);
                         if (ret == -ENOSPC)
                                 failed++;
@@@ -3374,6 -3389,7 +3388,6 @@@ again
                 }
   
                 ret = btrfs_relocate_chunk(chunk_root,
- -                                         found_key.objectid,
                                            found_key.offset);
                 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                 if (ret && ret != -ENOSPC)
@@@ -4075,6 -4091,7 +4089,6 @@@ int btrfs_shrink_device(struct btrfs_de
         struct btrfs_dev_extent *dev_extent = NULL;
         struct btrfs_path *path;
         u64 length;
- -      u64 chunk_objectid;
         u64 chunk_offset;
         int ret;
         int slot;
@@@ -4151,10 -4168,11 +4165,10 @@@ again
                         break;
                 }
   
- -              chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
                 btrfs_release_path(path);
   
- -              ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+ +              ret = btrfs_relocate_chunk(root, chunk_offset);
                 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                 if (ret && ret != -ENOSPC)
                         goto done;
@@@ -4196,7 -4214,8 +4210,8 @@@
                 u64 start = new_size;
                 u64 len = old_size - new_size;
   
-               if (contains_pending_extent(trans, device, &start, len)) {
+               if (contains_pending_extent(trans->transaction, device,
+                                           &start, len)) {
                         unlock_chunks(root);
                         checked_pending_chunks = true;
                         failed = 0;
@@@ -5739,10 -5758,10 +5754,10 @@@ int btrfs_rmap_block(struct btrfs_mappi
   
   static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
   {
- -      if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
- -              bio_endio_nodec(bio, err);
- -      else
- -              bio_endio(bio, err);
+ +      bio->bi_private = bbio->private;
+ +      bio->bi_end_io = bbio->end_io;
+ +      bio_endio(bio, err);
+ +
         btrfs_put_bbio(bbio);
   }
   
@@@ -5786,6 -5805,8 +5801,6 @@@ static void btrfs_end_bio(struct bio *b
                         bio = bbio->orig_bio;
                 }
   
- -              bio->bi_private = bbio->private;
- -              bio->bi_end_io = bbio->end_io;
                 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                 /* only send an error to the higher layers if it is
                  * beyond the tolerance of the btrfs bio
@@@ -5942,14 -5963,6 +5957,14 @@@ again
         if (!bio)
                 return -ENOMEM;
   
+ +      if (first_bio->bi_ioc) {
+ +              get_io_context_active(first_bio->bi_ioc);
+ +              bio->bi_ioc = first_bio->bi_ioc;
+ +      }
+ +      if (first_bio->bi_css) {
+ +              css_get(first_bio->bi_css);
+ +              bio->bi_css = first_bio->bi_css;
+ +      }
         while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
                 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
                                  bvec->bv_offset) < bvec->bv_len) {
@@@ -5975,6 -5988,8 +5990,6 @@@ static void bbio_error(struct btrfs_bi
                 /* Shoud be the original bio. */
                 WARN_ON(bio != bbio->orig_bio);
   
- -              bio->bi_private = bbio->private;
- -              bio->bi_end_io = bbio->end_io;
                 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                 bio->bi_iter.bi_sector = logical >> 9;
   
@@@ -6055,8 -6070,10 +6070,8 @@@ int btrfs_map_bio(struct btrfs_root *ro
                 if (dev_nr < total_devs - 1) {
                         bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                         BUG_ON(!bio); /* -ENOMEM */
- -              } else {
+ +              } else
                         bio = first_bio;
- -                      bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
- -              }
   
                 submit_stripe_bio(root, bbio, bio,
                                   bbio->stripes[dev_nr].physical, dev_nr, rw,
diff --combined fs/btrfs/volumes.h

index 95842a909e7f7cbbb9be2a70e24376db7f08baf7,57b0217b5300748e3b181e0cc2987ff8e3cfece4..2ca784a14e84bc2a00d0c3d1ec1a15290128edfc
--- 1/fs/btrfs/volumes.h
--- 2/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@@ -298,6 -298,8 +298,6 @@@ struct btrfs_bio_stripe 
   struct btrfs_bio;
   typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
   
- -#define BTRFS_BIO_ORIG_BIO_SUBMITTED  (1 << 0)
- -
   struct btrfs_bio {
         atomic_t refs;
         atomic_t stripes_pending;
@@@ -453,6 -455,9 +453,9 @@@ int btrfs_cancel_balance(struct btrfs_f
   int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
   int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
   int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
+ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 search_start, u64 *start, u64 *max_avail);
   int find_free_dev_extent(struct btrfs_trans_handle *trans,
                          struct btrfs_device *device, u64 num_bytes,
                          u64 *start, u64 *max_avail);
author	Chris Mason <clm@fb.com>
	Sun, 9 Aug 2015 14:35:33 +0000 (07:35 -0700)
committer	Chris Mason <clm@fb.com>
	Sun, 9 Aug 2015 14:35:33 +0000 (07:35 -0700)
		1	2
fs/btrfs/ctree.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent-tree.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/transaction.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.h	patch \|	diff1 \|	diff2 \|	blob \| history