xfs: introduce inode cluster buffer trylocks for xfs_iflush

author Dave Chinner <dchinner@redhat.com>

Fri, 25 Mar 2011 22:13:55 +0000 (09:13 +1100)

committer Dave Chinner <david@fromorbit.com>

Fri, 25 Mar 2011 22:13:55 +0000 (09:13 +1100)
author Dave Chinner <dchinner@redhat.com>
Fri, 25 Mar 2011 22:13:55 +0000 (09:13 +1100)
committer Dave Chinner <david@fromorbit.com>
Fri, 25 Mar 2011 22:13:55 +0000 (09:13 +1100)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 818c4cf2de863e5450ac75ad300a1c78bfdbca60..8a70b2a17d6f410c3d61ee6e5845722058ed4588 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1078,7 +1078,7 @@ xfs_fs_write_inode(
                         error = 0;
                         goto out_unlock;
                 }
-               error = xfs_iflush(ip, 0);
+               error = xfs_iflush(ip, SYNC_TRYLOCK);
         }
  
   out_unlock:
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c

index 6c10f1d2e3d3a8e4e094c5153d90b5c8ac60973f..594cd822d84de225755006321e5b97ebfe35cf70 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -761,8 +761,10 @@ xfs_reclaim_inode(
         struct xfs_perag        *pag,
         int                     sync_mode)
  {
-       int     error = 0;
+       int     error;
  
+restart:
+       error = 0;
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         if (!xfs_iflock_nowait(ip)) {
                 if (!(sync_mode & SYNC_WAIT))
@@ -788,9 +790,31 @@ xfs_reclaim_inode(
         if (xfs_inode_clean(ip))
                 goto reclaim;
  
-       /* Now we have an inode that needs flushing */
-       error = xfs_iflush(ip, sync_mode);
+       /*
+        * Now we have an inode that needs flushing.
+        *
+        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
+        * reclaim as we can deadlock with inode cluster removal.
+        * xfs_ifree_cluster() can lock the inode buffer before it locks the
+        * ip->i_lock, and we are doing the exact opposite here. As a result,
+        * doing a blocking xfs_itobp() to get the cluster buffer will result
+        * in an ABBA deadlock with xfs_ifree_cluster().
+        *
+        * As xfs_ifree_cluser() must gather all inodes that are active in the
+        * cache to mark them stale, if we hit this case we don't actually want
+        * to do IO here - we want the inode marked stale so we can simply
+        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
+        * just unlock the inode, back off and try again. Hopefully the next
+        * pass through will see the stale flag set on the inode.
+        */
+       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
         if (sync_mode & SYNC_WAIT) {
+               if (error == EAGAIN) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       /* backoff longer than in xfs_ifree_cluster */
+                       delay(2);
+                       goto restart;
+               }
                 xfs_iflock(ip);
                 goto reclaim;
         }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index da871f5322368dffac20f1514a5966a45f26061e..742c8330994a82ff09bbc22f68695743751bc3eb 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2835,7 +2835,7 @@ xfs_iflush(
          * Get the buffer containing the on-disk inode.
          */
         error = xfs_itobp(mp, NULL, ip, &dip, &bp,
-                               (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
+                               (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
         if (error || !bp) {
                 xfs_ifunlock(ip);
                 return error;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index fd4f398bd6f1d8d01cca49046a65991d4d696d77..46cc40131d4a43c91ec5eb1d188c46e967c5193c 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -760,11 +760,11 @@ xfs_inode_item_push(
          * Push the inode to it's backing buffer. This will not remove the
          * inode from the AIL - a further push will be required to trigger a
          * buffer push. However, this allows all the dirty inodes to be pushed
-        * to the buffer before it is pushed to disk. THe buffer IO completion
-        * will pull th einode from the AIL, mark it clean and unlock the flush
+        * to the buffer before it is pushed to disk. The buffer IO completion
+        * will pull the inode from the AIL, mark it clean and unlock the flush
          * lock.
          */
-       (void) xfs_iflush(ip, 0);
+       (void) xfs_iflush(ip, SYNC_TRYLOCK);
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
  }
author	Dave Chinner <dchinner@redhat.com>
	Fri, 25 Mar 2011 22:13:55 +0000 (09:13 +1100)
committer	Dave Chinner <david@fromorbit.com>
	Fri, 25 Mar 2011 22:13:55 +0000 (09:13 +1100)
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_sync.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| history