xfs: force buffer writeback before blocking on the ilock in inode reclaim
authorChristoph Hellwig <hch@infradead.org>
Tue, 6 Dec 2011 21:21:15 +0000 (16:21 -0500)
committerGreg Kroah-Hartman <gregkh@suse.de>
Fri, 9 Dec 2011 16:52:46 +0000 (08:52 -0800)
commit 4dd2cb4a28b7ab1f37163a4eba280926a13a8749 upstream.

If we are doing synchronous inode reclaim we block the VM from making
progress in memory reclaim.  So if we encouter a flush locked inode
promote it in the delwri list and wake up xfsbufd to write it out now.
Without this we can get hangs of up to 30 seconds during workloads hitting
synchronous inode reclaim.

The scheme is copied from what we do for dquot reclaims.

Reported-by: Simon Kirby <sim@hostway.ca>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Simon Kirby <sim@hostway.ca>
Signed-off-by: Ben Myers <bpm@sgi.com>
Acked-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h

index 8ecad5ff9f9b0e0bdb5a4f76dd97b694147a5a55..b69688d0776e61adb3222c9bd49e50925fa33f09 100644 (file)
@@ -772,6 +772,17 @@ restart:
        if (!xfs_iflock_nowait(ip)) {
                if (!(sync_mode & SYNC_WAIT))
                        goto out;
+
+               /*
+                * If we only have a single dirty inode in a cluster there is
+                * a fair chance that the AIL push may have pushed it into
+                * the buffer, but xfsbufd won't touch it until 30 seconds
+                * from now, and thus we will lock up here.
+                *
+                * Promote the inode buffer to the front of the delwri list
+                * and wake up xfsbufd now.
+                */
+               xfs_promote_inode(ip);
                xfs_iflock(ip);
        }
 
index a098a20ca63e29bbd021a266e287d87ca796fd8c..c6888a420c516b78bb90562988d061dada4abfb4 100644 (file)
@@ -3099,6 +3099,27 @@ corrupt_out:
        return XFS_ERROR(EFSCORRUPTED);
 }
 
+void
+xfs_promote_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_buf          *bp;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+
+       bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
+                       ip->i_imap.im_len, XBF_TRYLOCK);
+       if (!bp)
+               return;
+
+       if (XFS_BUF_ISDELAYWRITE(bp)) {
+               xfs_buf_delwri_promote(bp);
+               wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
+       }
+
+       xfs_buf_relse(bp);
+}
+
 /*
  * Return a pointer to the extent record at file index idx.
  */
index 964cfea776868684afb26f818b8a761ce652b1b5..28b3596453e031e189f72125a5be5cb03adea8a4 100644 (file)
@@ -509,6 +509,7 @@ int         xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void           xfs_iext_realloc(xfs_inode_t *, int, int);
 void           xfs_iunpin_wait(xfs_inode_t *);
 int            xfs_iflush(xfs_inode_t *, uint);
+void           xfs_promote_inode(struct xfs_inode *);
 void           xfs_lock_inodes(xfs_inode_t **, int, uint);
 void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);