* write. If zeroing is needed and we are currently holding the
* iolock shared, we need to update it to exclusive which implies
* having to redo all checks before.
+ *
+ * We need to serialise against EOF updates that occur in IO
+ * completions here. We want to make sure that nobody is changing the
+ * size while we do this check until we have placed an IO barrier (i.e.
+ * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+ * The spinlock effectively forms a memory barrier once we have the
+ * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+ * and hence be able to correctly determine if we need to run zeroing.
*/
+ spin_lock(&ip->i_flags_lock);
if (*pos > i_size_read(inode)) {
+ bool zero = false;
+
+ spin_unlock(&ip->i_flags_lock);
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
+
+ /*
+ * We now have an IO submission barrier in place, but
+ * AIO can do EOF updates during IO completion and hence
+ * we now need to wait for all of them to drain. Non-AIO
+ * DIO will have drained before we are given the
+ * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+ * no-op.
+ */
+ inode_dio_wait(inode);
goto restart;
}
- error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
if (error)
return error;
- }
+ } else
+ spin_unlock(&ip->i_flags_lock);
/*
* Updating the timestamps will grab the ilock again from