}
/*
- * When we map a DIO buffer, we need to attach an ioend that describes the type
- * of write IO we are doing. This passes to the completion function the
- * operations it needs to perform.
+ * When we map a DIO buffer, we may need to attach an ioend that describes the
+ * type of write IO we are doing. This passes to the completion function the
+ * operations it needs to perform. If the mapping is for an overwrite wholly
+ * within the EOF then we don't need an ioend and so we don't allocate one.
+ * This avoids the unnecessary overhead of allocating and freeing ioends for
+ * workloads that don't require transactions on IO completion.
*
* If we get multiple mappings in a single IO, we might be mapping different
* types. But because the direct IO can only have a single private pointer, we
* need to ensure that:
*
- * a) the ioend spans the entire region of the IO; and
+ * a) i) the ioend spans the entire region of unwritten mappings; or
+ * ii) the ioend spans all the mappings that cross or are beyond EOF; and
* b) if it contains unwritten extents, it is *permanently* marked as such
*
* We could do this by chaining ioends like buffered IO does, but we only
trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
ioend->io_size, ioend->io_type,
imap);
- } else {
+ } else if (type == XFS_IO_UNWRITTEN ||
+ offset + size > i_size_read(inode)) {
ioend = xfs_alloc_ioend(inode, type);
ioend->io_offset = offset;
ioend->io_size = size;
+
bh_result->b_private = ioend;
+ set_buffer_defer_completion(bh_result);
trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
imap);
+ } else {
+ trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
+ imap);
}
-
- if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
- set_buffer_defer_completion(bh_result);
}
-
/*
* If this is O_DIRECT or the mpage code calling tell them how large the mapping
* is, so that we can avoid repeated get_blocks calls.
/*
* Complete a direct I/O write request.
*
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
+ * case the completion can be called in interrupt context, whereas if we have an
+ * ioend we will always be called in task context (i.e. from a workqueue).
*/
STATIC void
xfs_end_io_direct_write(
struct xfs_mount *mp = ip->i_mount;
struct xfs_ioend *ioend = private;
- trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL);
+ trace_xfs_gbmap_direct_endio(ip, offset, size,
+ ioend ? ioend->io_type : 0, NULL);
+
+ if (!ioend) {
+ ASSERT(offset + size <= i_size_read(inode));
+ return;
+ }
if (XFS_FORCED_SHUTDOWN(mp))
goto out_end_io;
/*
* The ioend only maps whole blocks, while the IO may be sector aligned.
- * Hence the ioend offset/size may not match the IO offset/size exactly,
- * but should span it completely. Write the IO sizes into the ioend so
- * that completion processing does the right thing.
+ * Hence the ioend offset/size may not match the IO offset/size exactly.
+ * Because we don't map overwrites within EOF into the ioend, the offset
+ * may not match, but only if the endio spans EOF. Either way, write
+ * the IO sizes into the ioend so that completion processing does the
+ * right thing.
*/
- ASSERT(size <= ioend->io_size);
- ASSERT(offset >= ioend->io_offset);
ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
ioend->io_size = size;
ioend->io_offset = offset;
* The ioend tells us whether we are doing unwritten extent conversion
* or an append transaction that updates the on-disk file size. These
* cases are the only cases where we should *potentially* be needing
- * to update the VFS inode size. When the ioend indicates this, we
- * are *guaranteed* to be running in non-interrupt context.
+ * to update the VFS inode size.
*
* We need to update the in-core inode size here so that we don't end up
- * with the on-disk inode size being outside the in-core inode size.
- * While we can do this in the process context after the IO has
- * completed, this does not work for AIO and hence we always update
- * the in-core inode size here if necessary.
+ * with the on-disk inode size being outside the in-core inode size. We
+ * have no other method of updating EOF for AIO, so always do it here
+ * if necessary.
*/
- if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) {
- if (offset + size > i_size_read(inode))
- i_size_write(inode, offset + size);
- } else
- ASSERT(offset + size <= i_size_read(inode));
+ if (offset + size > i_size_read(inode))
+ i_size_write(inode, offset + size);
/*
* If we are doing an append IO that needs to update the EOF on disk,
* result in the ioend processing passing on the error if it is
* possible as we can't return it from here.
*/
- if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend))
+ if (ioend->io_type == XFS_IO_OVERWRITE)
ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
out_end_io: