Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
diff --cc fs/block_dev.c
Simple merge
diff --cc fs/ext4/inode.c
Simple merge
diff --cc fs/fs-writeback.c

index b8c507ca42f76141c31fc8405f7d2274966adb9a,6d49439ca31dc2b0b89f659b55e46e361e303c1a..1599aa985fe2accf6dcc9456a570ba942201685d
--- 1/fs/fs-writeback.c
--- 2/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@@ -460,6 -480,63 +480,37 @@@ writeback_single_inode(struct inode *in
         return ret;
   }
   
- -/*
- - * For background writeback the caller does not have the sb pinned
- - * before calling writeback. So make sure that we do pin it, so it doesn't
- - * go away while we are writing inodes from it.
- - */
- -static bool pin_sb_for_writeback(struct super_block *sb)
- -{
- -      spin_lock(&sb_lock);
- -      if (list_empty(&sb->s_instances)) {
- -              spin_unlock(&sb_lock);
- -              return false;
- -      }
- -
- -      sb->s_count++;
- -      spin_unlock(&sb_lock);
- -
- -      if (down_read_trylock(&sb->s_umount)) {
- -              if (sb->s_root)
- -                      return true;
- -              up_read(&sb->s_umount);
- -      }
- -
- -      put_super(sb);
- -      return false;
- -}
- -
+ static long writeback_chunk_size(struct backing_dev_info *bdi,
+                                struct wb_writeback_work *work)
+ {
+       long pages;
+ 
+       /*
+        * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
+        * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
+        * here avoids calling into writeback_inodes_wb() more than once.
+        *
+        * The intended call sequence for WB_SYNC_ALL writeback is:
+        *
+        *      wb_writeback()
+        *          writeback_sb_inodes()       <== called only once
+        *              write_cache_pages()     <== called once for each inode
+        *                   (quickly) tag currently dirty pages
+        *                   (maybe slowly) sync all tagged pages
+        */
+       if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
+               pages = LONG_MAX;
+       else {
+               pages = min(bdi->avg_write_bandwidth / 2,
+                           global_dirty_limit / DIRTY_SCOPE);
+               pages = min(pages, work->nr_pages);
+               pages = round_down(pages + MIN_WRITEBACK_PAGES,
+                                  MIN_WRITEBACK_PAGES);
+       }
+ 
+       return pages;
+ }
+ 
   /*
    * Write a portion of b_io inodes which belong to @sb.
    *
@@@ -559,40 -643,41 +617,41 @@@ static long __writeback_inodes_wb(struc
                 struct inode *inode = wb_inode(wb->b_io.prev);
                 struct super_block *sb = inode->i_sb;
   
- -              if (!pin_sb_for_writeback(sb)) {
+ +              if (!grab_super_passive(sb)) {
-                       requeue_io(inode);
+                       requeue_io(inode, wb);
                         continue;
                 }
-               ret = writeback_sb_inodes(sb, wb, wbc, false);
+               wrote += writeback_sb_inodes(sb, wb, work);
                 drop_super(sb);
   
-               if (ret)
-                       break;
+               /* refer to the same tests at the end of writeback_sb_inodes */
+               if (wrote) {
+                       if (time_is_before_jiffies(start_time + HZ / 10UL))
+                               break;
+                       if (work->nr_pages <= 0)
+                               break;
+               }
         }
-       spin_unlock(&inode_wb_list_lock);
         /* Leave any unwritten inodes on b_io */
+       return wrote;
   }
   
- static void __writeback_inodes_sb(struct super_block *sb,
-               struct bdi_writeback *wb, struct writeback_control *wbc)
+ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
   {
-       WARN_ON(!rwsem_is_locked(&sb->s_umount));
+       struct wb_writeback_work work = {
+               .nr_pages       = nr_pages,
+               .sync_mode      = WB_SYNC_NONE,
+               .range_cyclic   = 1,
+       };
   
-       spin_lock(&inode_wb_list_lock);
-       if (!wbc->for_kupdate || list_empty(&wb->b_io))
-               queue_io(wb, wbc->older_than_this);
-       writeback_sb_inodes(sb, wb, wbc, true);
-       spin_unlock(&inode_wb_list_lock);
- }
+       spin_lock(&wb->list_lock);
+       if (list_empty(&wb->b_io))
+               queue_io(wb, NULL);
+       __writeback_inodes_wb(wb, &work);
+       spin_unlock(&wb->list_lock);
   
- /*
-  * The maximum number of pages to writeout in a single bdi flush/kupdate
-  * operation.  We do this so we don't hold I_SYNC against an inode for
-  * enormous amounts of time, which would block a userspace task which has
-  * been forced to throttle against that inode.  Also, the code reevaluates
-  * the dirty each time it has written this many pages.
-  */
- #define MAX_WRITEBACK_PAGES     1024
+       return nr_pages - work.nr_pages;
+ }
   
   static inline bool over_bground_thresh(void)
   {
diff --cc fs/inode.c

index 96c77b81167c81b75bfdbaf6f8c1ea0566892748,4be128cbc7543e36d33ad86304fda15ffe7851fe..a48fa5355fb44ef5a8e5a3a1acbcdaea095d829e
--- 1/fs/inode.c
--- 2/fs/inode.c
+++ b/fs/inode.c
@@@ -33,11 -33,11 +33,11 @@@
    *
    * inode->i_lock protects:
    *   inode->i_state, inode->i_hash, __iget()
- - * inode_lru_lock protects:
- - *   inode_lru, inode->i_lru
+ + * inode->i_sb->s_inode_lru_lock protects:
+ + *   inode->i_sb->s_inode_lru, inode->i_lru
    * inode_sb_list_lock protects:
    *   sb->s_inodes, inode->i_sb_list
-  * inode_wb_list_lock protects:
+  * bdi->wb.list_lock protects:
    *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
    * inode_hash_lock protects:
    *   inode_hashtable, inode->i_hash
@@@ -46,9 -46,9 +46,9 @@@
    *
    * inode_sb_list_lock
    *   inode->i_lock
- - *     inode_lru_lock
+ + *     inode->i_sb->s_inode_lru_lock
    *
-  * inode_wb_list_lock
+  * bdi->wb.list_lock
    *   inode->i_lock
    *
    * inode_hash_lock
@@@ -64,9 -64,22 +64,8 @@@ static unsigned int i_hash_shift __read
   static struct hlist_head *inode_hashtable __read_mostly;
   static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
   
- -static LIST_HEAD(inode_lru);
- -static DEFINE_SPINLOCK(inode_lru_lock);
- -
   __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
   
- -/*
- - * iprune_sem provides exclusion between the icache shrinking and the
- - * umount path.
- - *
- - * We don't actually need it to protect anything in the umount path,
- - * but only need to cycle through it to make sure any inode that
- - * prune_icache took off the LRU list has been fully torn down by the
- - * time we are past evict_inodes.
- - */
- -static DECLARE_RWSEM(iprune_sem);
- -
   /*
    * Empty aops. Can be used for the cases where the user does not
    * define any of the address_space operations.
diff --cc fs/nfs/write.c
Simple merge
diff --cc include/trace/events/ext4.h
Simple merge
diff --cc mm/backing-dev.c
Simple merge
diff --cc mm/filemap.c

index 10a17111327362f9b0d5f30dc5f8519983e8f1e8,1e492c3dd6f893e1fece7daa10f675e80a99d7e2..867d40222ec798ce99a8332b991e8d7c4bdfb8ec
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -78,7 -78,10 +78,7 @@@
    *  ->i_mutex                 (generic_file_buffered_write)
    *    ->mmap_sem              (fault_in_pages_readable->do_page_fault)
    *
-  *  inode_wb_list_lock
- - *  ->i_mutex
- - *    ->i_alloc_sem             (various)
- - *
+  *  bdi->wb.list_lock
    *    sb_lock                 (fs/fs-writeback.c)
    *    ->mapping->tree_lock    (__sync_single_inode)
    *
diff --cc mm/page-writeback.c
Simple merge
diff --cc mm/rmap.c

index 9701574bb67a2501ea15ec0c82601c0cb6e9a987,d04e36a7cc9fb474c1b8dee5b766243958a77b1c..8005080fb9e361316870e684c4057a569d86acf3
--- 1/mm/rmap.c
--- 2/mm/rmap.c
+++ b/mm/rmap.c
@@@ -35,10 -36,11 +35,10 @@@
    *                 sb_lock (within inode_lock in fs/fs-writeback.c)
    *                 mapping->tree_lock (widely used, in set_page_dirty,
    *                           in arch-dependent flush_dcache_mmap_lock,
-  *                           within inode_wb_list_lock in __sync_single_inode)
+  *                           within bdi.wb->list_lock in __sync_single_inode)
    *
- - * (code doesn't rely on that order so it could be switched around)
- - * ->tasklist_lock
- - *   anon_vma->mutex      (memory_failure, collect_procs_anon)
+ + * anon_vma->mutex,mapping->i_mutex      (memory_failure, collect_procs_anon)
+ + *   ->tasklist_lock
    *     pte map lock
    */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 26 Jul 2011 17:39:54 +0000 (10:39 -0700)
		1	2
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fs-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/write.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/trace/events/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/backing-dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/rmap.c	patch \|	diff1 \|	diff2 \|	blob \| history