xfs: force buffer writeback before blocking on the ilock in inode reclaim
authorChristoph Hellwig <hch@lst.de>
Tue, 29 Nov 2011 18:06:14 +0000 (12:06 -0600)
committerBen Myers <bpm@sgi.com>
Tue, 29 Nov 2011 18:06:14 +0000 (12:06 -0600)
If we are doing synchronous inode reclaim we block the VM from making
progress in memory reclaim.  So if we encouter a flush locked inode
promote it in the delwri list and wake up xfsbufd to write it out now.
Without this we can get hangs of up to 30 seconds during workloads hitting
synchronous inode reclaim.

The scheme is copied from what we do for dquot reclaims.

Reported-by: Simon Kirby <sim@hostway.ca>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Simon Kirby <sim@hostway.ca>
Signed-off-by: Ben Myers <bpm@sgi.com>
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_sync.c

index c0237c602f11deb92fa5f533f74a647005fbf1b8..755ee8164880fe4122bac9de94119f1c7086a9b7 100644 (file)
@@ -2835,6 +2835,27 @@ corrupt_out:
        return XFS_ERROR(EFSCORRUPTED);
 }
 
+void
+xfs_promote_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_buf          *bp;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+
+       bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
+                       ip->i_imap.im_len, XBF_TRYLOCK);
+       if (!bp)
+               return;
+
+       if (XFS_BUF_ISDELAYWRITE(bp)) {
+               xfs_buf_delwri_promote(bp);
+               wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
+       }
+
+       xfs_buf_relse(bp);
+}
+
 /*
  * Return a pointer to the extent record at file index idx.
  */
index 760140d1dd661f42e653576a7c947b11eb8b6fbd..b4cd4739f98e74b2e256295b48fe64e285b320b0 100644 (file)
@@ -498,6 +498,7 @@ int         xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void           xfs_iext_realloc(xfs_inode_t *, int, int);
 void           xfs_iunpin_wait(xfs_inode_t *);
 int            xfs_iflush(xfs_inode_t *, uint);
+void           xfs_promote_inode(struct xfs_inode *);
 void           xfs_lock_inodes(xfs_inode_t **, int, uint);
 void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
index aa3dc1a4d53d4f85f97a38f0db217bfcc4e3c953..be5c51d8f7572d715fc51f0c86563b46aba380c8 100644 (file)
@@ -770,6 +770,17 @@ restart:
        if (!xfs_iflock_nowait(ip)) {
                if (!(sync_mode & SYNC_WAIT))
                        goto out;
+
+               /*
+                * If we only have a single dirty inode in a cluster there is
+                * a fair chance that the AIL push may have pushed it into
+                * the buffer, but xfsbufd won't touch it until 30 seconds
+                * from now, and thus we will lock up here.
+                *
+                * Promote the inode buffer to the front of the delwri list
+                * and wake up xfsbufd now.
+                */
+               xfs_promote_inode(ip);
                xfs_iflock(ip);
        }