Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback: (27 commits)
  mm: properly reflect task dirty limits in dirty_exceeded logic
  writeback: don't busy retry writeback on new/freeing inodes
  writeback: scale IO chunk size up to half device bandwidth
  writeback: trace global_dirty_state
  writeback: introduce max-pause and pass-good dirty limits
  writeback: introduce smoothed global dirty limit
  writeback: consolidate variable names in balance_dirty_pages()
  writeback: show bdi write bandwidth in debugfs
  writeback: bdi write bandwidth estimation
  writeback: account per-bdi accumulated written pages
  writeback: make writeback_control.nr_to_write straight
  writeback: skip tmpfs early in balance_dirty_pages_ratelimited_nr()
  writeback: trace event writeback_queue_io
  writeback: trace event writeback_single_inode
  writeback: remove .nonblocking and .encountered_congestion
  writeback: remove writeback_control.more_io
  writeback: skip balance_dirty_pages() for in-memory fs
  writeback: add bdi_dirty_limit() kernel-doc
  writeback: avoid extra sync work at enqueue time
  writeback: elevate queue_io() into wb_writeback()
  ...

Fix up trivial conflicts in fs/fs-writeback.c and mm/filemap.c

1  2 
fs/block_dev.c
fs/ext4/inode.c
fs/fs-writeback.c
fs/inode.c
fs/nfs/write.c
include/trace/events/ext4.h
mm/backing-dev.c
mm/filemap.c
mm/page-writeback.c
mm/rmap.c

diff --cc fs/block_dev.c
Simple merge
diff --cc fs/ext4/inode.c
Simple merge
index b8c507ca42f76141c31fc8405f7d2274966adb9a,6d49439ca31dc2b0b89f659b55e46e361e303c1a..1599aa985fe2accf6dcc9456a570ba942201685d
@@@ -460,6 -480,63 +480,37 @@@ writeback_single_inode(struct inode *in
        return ret;
  }
  
 -/*
 - * For background writeback the caller does not have the sb pinned
 - * before calling writeback. So make sure that we do pin it, so it doesn't
 - * go away while we are writing inodes from it.
 - */
 -static bool pin_sb_for_writeback(struct super_block *sb)
 -{
 -      spin_lock(&sb_lock);
 -      if (list_empty(&sb->s_instances)) {
 -              spin_unlock(&sb_lock);
 -              return false;
 -      }
 -
 -      sb->s_count++;
 -      spin_unlock(&sb_lock);
 -
 -      if (down_read_trylock(&sb->s_umount)) {
 -              if (sb->s_root)
 -                      return true;
 -              up_read(&sb->s_umount);
 -      }
 -
 -      put_super(sb);
 -      return false;
 -}
 -
+ static long writeback_chunk_size(struct backing_dev_info *bdi,
+                                struct wb_writeback_work *work)
+ {
+       long pages;
+       /*
+        * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
+        * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
+        * here avoids calling into writeback_inodes_wb() more than once.
+        *
+        * The intended call sequence for WB_SYNC_ALL writeback is:
+        *
+        *      wb_writeback()
+        *          writeback_sb_inodes()       <== called only once
+        *              write_cache_pages()     <== called once for each inode
+        *                   (quickly) tag currently dirty pages
+        *                   (maybe slowly) sync all tagged pages
+        */
+       if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
+               pages = LONG_MAX;
+       else {
+               pages = min(bdi->avg_write_bandwidth / 2,
+                           global_dirty_limit / DIRTY_SCOPE);
+               pages = min(pages, work->nr_pages);
+               pages = round_down(pages + MIN_WRITEBACK_PAGES,
+                                  MIN_WRITEBACK_PAGES);
+       }
+       return pages;
+ }
  /*
   * Write a portion of b_io inodes which belong to @sb.
   *
@@@ -559,40 -643,41 +617,41 @@@ static long __writeback_inodes_wb(struc
                struct inode *inode = wb_inode(wb->b_io.prev);
                struct super_block *sb = inode->i_sb;
  
 -              if (!pin_sb_for_writeback(sb)) {
 +              if (!grab_super_passive(sb)) {
-                       requeue_io(inode);
+                       requeue_io(inode, wb);
                        continue;
                }
-               ret = writeback_sb_inodes(sb, wb, wbc, false);
+               wrote += writeback_sb_inodes(sb, wb, work);
                drop_super(sb);
  
-               if (ret)
-                       break;
+               /* refer to the same tests at the end of writeback_sb_inodes */
+               if (wrote) {
+                       if (time_is_before_jiffies(start_time + HZ / 10UL))
+                               break;
+                       if (work->nr_pages <= 0)
+                               break;
+               }
        }
-       spin_unlock(&inode_wb_list_lock);
        /* Leave any unwritten inodes on b_io */
+       return wrote;
  }
  
- static void __writeback_inodes_sb(struct super_block *sb,
-               struct bdi_writeback *wb, struct writeback_control *wbc)
+ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
  {
-       WARN_ON(!rwsem_is_locked(&sb->s_umount));
+       struct wb_writeback_work work = {
+               .nr_pages       = nr_pages,
+               .sync_mode      = WB_SYNC_NONE,
+               .range_cyclic   = 1,
+       };
  
-       spin_lock(&inode_wb_list_lock);
-       if (!wbc->for_kupdate || list_empty(&wb->b_io))
-               queue_io(wb, wbc->older_than_this);
-       writeback_sb_inodes(sb, wb, wbc, true);
-       spin_unlock(&inode_wb_list_lock);
- }
+       spin_lock(&wb->list_lock);
+       if (list_empty(&wb->b_io))
+               queue_io(wb, NULL);
+       __writeback_inodes_wb(wb, &work);
+       spin_unlock(&wb->list_lock);
  
- /*
-  * The maximum number of pages to writeout in a single bdi flush/kupdate
-  * operation.  We do this so we don't hold I_SYNC against an inode for
-  * enormous amounts of time, which would block a userspace task which has
-  * been forced to throttle against that inode.  Also, the code reevaluates
-  * the dirty each time it has written this many pages.
-  */
- #define MAX_WRITEBACK_PAGES     1024
+       return nr_pages - work.nr_pages;
+ }
  
  static inline bool over_bground_thresh(void)
  {
diff --cc fs/inode.c
index 96c77b81167c81b75bfdbaf6f8c1ea0566892748,4be128cbc7543e36d33ad86304fda15ffe7851fe..a48fa5355fb44ef5a8e5a3a1acbcdaea095d829e
   *
   * inode->i_lock protects:
   *   inode->i_state, inode->i_hash, __iget()
 - * inode_lru_lock protects:
 - *   inode_lru, inode->i_lru
 + * inode->i_sb->s_inode_lru_lock protects:
 + *   inode->i_sb->s_inode_lru, inode->i_lru
   * inode_sb_list_lock protects:
   *   sb->s_inodes, inode->i_sb_list
-  * inode_wb_list_lock protects:
+  * bdi->wb.list_lock protects:
   *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
   * inode_hash_lock protects:
   *   inode_hashtable, inode->i_hash
@@@ -46,9 -46,9 +46,9 @@@
   *
   * inode_sb_list_lock
   *   inode->i_lock
 - *     inode_lru_lock
 + *     inode->i_sb->s_inode_lru_lock
   *
-  * inode_wb_list_lock
+  * bdi->wb.list_lock
   *   inode->i_lock
   *
   * inode_hash_lock
@@@ -64,9 -64,22 +64,8 @@@ static unsigned int i_hash_shift __read
  static struct hlist_head *inode_hashtable __read_mostly;
  static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
  
 -static LIST_HEAD(inode_lru);
 -static DEFINE_SPINLOCK(inode_lru_lock);
 -
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
  
 -/*
 - * iprune_sem provides exclusion between the icache shrinking and the
 - * umount path.
 - *
 - * We don't actually need it to protect anything in the umount path,
 - * but only need to cycle through it to make sure any inode that
 - * prune_icache took off the LRU list has been fully torn down by the
 - * time we are past evict_inodes.
 - */
 -static DECLARE_RWSEM(iprune_sem);
 -
  /*
   * Empty aops. Can be used for the cases where the user does not
   * define any of the address_space operations.
diff --cc fs/nfs/write.c
Simple merge
Simple merge
Simple merge
diff --cc mm/filemap.c
index 10a17111327362f9b0d5f30dc5f8519983e8f1e8,1e492c3dd6f893e1fece7daa10f675e80a99d7e2..867d40222ec798ce99a8332b991e8d7c4bdfb8ec
   *  ->i_mutex                 (generic_file_buffered_write)
   *    ->mmap_sem              (fault_in_pages_readable->do_page_fault)
   *
-  *  inode_wb_list_lock
 - *  ->i_mutex
 - *    ->i_alloc_sem             (various)
 - *
+  *  bdi->wb.list_lock
   *    sb_lock                 (fs/fs-writeback.c)
   *    ->mapping->tree_lock    (__sync_single_inode)
   *
Simple merge
diff --cc mm/rmap.c
index 9701574bb67a2501ea15ec0c82601c0cb6e9a987,d04e36a7cc9fb474c1b8dee5b766243958a77b1c..8005080fb9e361316870e684c4057a569d86acf3
+++ b/mm/rmap.c
   *                 sb_lock (within inode_lock in fs/fs-writeback.c)
   *                 mapping->tree_lock (widely used, in set_page_dirty,
   *                           in arch-dependent flush_dcache_mmap_lock,
-  *                           within inode_wb_list_lock in __sync_single_inode)
+  *                           within bdi.wb->list_lock in __sync_single_inode)
   *
 - * (code doesn't rely on that order so it could be switched around)
 - * ->tasklist_lock
 - *   anon_vma->mutex      (memory_failure, collect_procs_anon)
 + * anon_vma->mutex,mapping->i_mutex      (memory_failure, collect_procs_anon)
 + *   ->tasklist_lock
   *     pte map lock
   */