xfs: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate

author Namjae Jeon <namjae.jeon@samsung.com>

Sun, 23 Feb 2014 23:58:19 +0000 (10:58 +1100)

committer Dave Chinner <david@fromorbit.com>

Sun, 23 Feb 2014 23:58:19 +0000 (10:58 +1100)
author Namjae Jeon <namjae.jeon@samsung.com>
Sun, 23 Feb 2014 23:58:19 +0000 (10:58 +1100)
committer Dave Chinner <david@fromorbit.com>
Sun, 23 Feb 2014 23:58:19 +0000 (10:58 +1100)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c

index 152543c4ca7031e718bc921b82e9e0a72955de11..5b6092ef51efa9eb6e02c980980c6aa99e486170 100644 (file)
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
         }
         return error;
  }
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     *done,
+       xfs_fileoff_t           start_fsb,
+       xfs_fileoff_t           offset_shift_fsb,
+       xfs_extnum_t            *current_ext,
+       xfs_fsblock_t           *firstblock,
+       struct xfs_bmap_free    *flist,
+       int                     num_exts)
+{
+       struct xfs_btree_cur            *cur;
+       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            left;
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_ifork                *ifp;
+       xfs_extnum_t                    nexts = 0;
+       xfs_fileoff_t                   startoff;
+       int                             error = 0;
+       int                             i;
+       int                             whichfork = XFS_DATA_FORK;
+       int                             logflags;
+       xfs_filblks_t                   blockcount = 0;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+                                XFS_ERRLEVEL_LOW, mp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       ASSERT(current_ext != NULL);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               /* Read in all the extents */
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * If *current_ext is 0, we would need to lookup the extent
+        * from where we would start shifting and store it in gotp.
+        */
+       if (!*current_ext) {
+               gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+               /*
+                * gotp can be null in 2 cases: 1) if there are no extents
+                * or 2) start_fsb lies in a hole beyond which there are
+                * no extents. Either way, we are done.
+                */
+               if (!gotp) {
+                       *done = 1;
+                       return 0;
+               }
+       }
+
+       /* We are going to change core inode */
+       logflags = XFS_ILOG_CORE;
+
+       if (ifp->if_flags & XFS_IFBROOT) {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstblock;
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.flags = 0;
+       } else {
+               cur = NULL;
+               logflags |= XFS_ILOG_DEXT;
+       }
+
+       while (nexts++ < num_exts &&
+              *current_ext <  XFS_IFORK_NEXTENTS(ip, whichfork)) {
+
+               gotp = xfs_iext_get_ext(ifp, *current_ext);
+               xfs_bmbt_get_all(gotp, &got);
+               startoff = got.br_startoff - offset_shift_fsb;
+
+               /*
+                * Before shifting extent into hole, make sure that the hole
+                * is large enough to accomodate the shift.
+                */
+               if (*current_ext) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+                                               *current_ext - 1), &left);
+
+                       if (startoff < left.br_startoff + left.br_blockcount)
+                               error = XFS_ERROR(EINVAL);
+               } else if (offset_shift_fsb > got.br_startoff) {
+                       /*
+                        * When first extent is shifted, offset_shift_fsb
+                        * should be less than the stating offset of
+                        * the first extent.
+                        */
+                       error = XFS_ERROR(EINVAL);
+               }
+
+               if (error)
+                       goto del_cursor;
+
+               if (cur) {
+                       error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                  got.br_startblock,
+                                                  got.br_blockcount,
+                                                  &i);
+                       if (error)
+                               goto del_cursor;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+               }
+
+               /* Check if we can merge 2 adjacent extents */
+               if (*current_ext &&
+                   left.br_startoff + left.br_blockcount == startoff &&
+                   left.br_startblock + left.br_blockcount ==
+                               got.br_startblock &&
+                   left.br_state == got.br_state &&
+                   left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+                       blockcount = left.br_blockcount +
+                               got.br_blockcount;
+                       xfs_iext_remove(ip, *current_ext, 1, 0);
+                       if (cur) {
+                               error = xfs_btree_delete(cur, &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+                       XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+                       gotp = xfs_iext_get_ext(ifp, --*current_ext);
+                       xfs_bmbt_get_all(gotp, &got);
+
+                       /* Make cursor point to the extent we will update */
+                       if (cur) {
+                               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                          got.br_startblock,
+                                                          got.br_blockcount,
+                                                          &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+
+                       xfs_bmbt_set_blockcount(gotp, blockcount);
+                       got.br_blockcount = blockcount;
+               } else {
+                       /* We have to update the startoff */
+                       xfs_bmbt_set_startoff(gotp, startoff);
+                       got.br_startoff = startoff;
+               }
+
+               if (cur) {
+                       error = xfs_bmbt_update(cur, got.br_startoff,
+                                               got.br_startblock,
+                                               got.br_blockcount,
+                                               got.br_state);
+                       if (error)
+                               goto del_cursor;
+               }
+
+               (*current_ext)++;
+       }
+
+       /* Check if we are done */
+       if (*current_ext ==  XFS_IFORK_NEXTENTS(ip, whichfork))
+               *done = 1;
+
+del_cursor:
+       if (cur)
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+       xfs_trans_log_inode(tp, ip, logflags);
+
+       return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h

index 33b41f35122574e0b1cf7ad7a2a9ae23ecfadddb..f84bd7af43bec38bd4493c473f95d32e7f590337 100644 (file)
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
         { BMAP_RIGHT_FILLING,   "RF" }, \
         { BMAP_ATTRFORK,        "ATTR" }
  
+
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS     1
+
  #ifdef DEBUG
  void   xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
                 int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int        xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
  int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                 xfs_extnum_t num);
  uint   xfs_default_attroffset(struct xfs_inode *ip);
+int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+               int *done, xfs_fileoff_t start_fsb,
+               xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+               xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+               int num_exts);
  
  #endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index f264616080cac0c6bdfdc084c188b15505aa92e7..01f6a646caa121895265cfe33a7d97860f786a18 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
                  * the freeing of the space succeeds at ENOSPC.
                  */
                 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               tp->t_flags |= XFS_TRANS_RESERVE;
                 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
  
                 /*
@@ -1467,6 +1466,102 @@ out:
  
  }
  
+/*
+ * xfs_collapse_file_space()
+ *     This routine frees disk space and shift extent for the given file.
+ *     The first thing we do is to free data blocks in the specified range
+ *     by calling xfs_free_file_space(). It would also sync dirty data
+ *     and invalidate page cache over the region on which collapse range
+ *     is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len)
+{
+       int                     done = 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+       xfs_extnum_t            current_ext = 0;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           first_block;
+       int                     committed;
+       xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           shift_fsb;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+       trace_xfs_collapse_file_space(ip);
+
+       start_fsb = XFS_B_TO_FSB(mp, offset + len);
+       shift_fsb = XFS_B_TO_FSB(mp, len);
+
+       error = xfs_free_file_space(ip, offset, len);
+       if (error)
+               return error;
+
+       while (!error && !done) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+               tp->t_flags |= XFS_TRANS_RESERVE;
+               /*
+                * We would need to reserve permanent block for transaction.
+                * This will come into picture when after shifting extent into
+                * hole we found that adjacent extents can be merged which
+                * may lead to freeing of a block during record update.
+                */
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+               if (error) {
+                       ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+                       xfs_trans_cancel(tp, 0);
+                       break;
+               }
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+                               ip->i_gdquot, ip->i_pdquot,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+                               XFS_QMOPT_RES_REGBLKS);
+               if (error)
+                       goto out;
+
+               xfs_trans_ijoin(tp, ip, 0);
+
+               xfs_bmap_init(&free_list, &first_block);
+
+               /*
+                * We are using the write transaction in which max 2 bmbt
+                * updates are allowed
+                */
+               error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+                                              shift_fsb, &current_ext,
+                                              &first_block, &free_list,
+                                              XFS_BMAP_MAX_SHIFT_EXTENTS);
+               if (error)
+                       goto out;
+
+               error = xfs_bmap_finish(&tp, &free_list, &committed);
+               if (error)
+                       goto out;
+
+               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+
+       return error;
+
+out:
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+}
+
  /*
   * We need to check that the format of the data fork in the temporary inode is
   * valid for the target inode before doing the swap. This is not a problem with
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h

index 900747b25772c2b1a41821fba8e99c3cde2b3ffc..935ed2b24edfb05b4d5893dccf7cebdb09a374ed 100644 (file)
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int   xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
                             xfs_off_t len);
  int    xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
                             xfs_off_t len);
+int    xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+                               xfs_off_t len);
  
  /* EOF block manipulation functions */
  bool   xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 2e7989e3a2d67374d17e5086ec3b15bfbcb32e2d..52f96e16694c1ef6b3a3a8e4889a9a136ae61e6a 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@ xfs_file_fallocate(
  
         if (!S_ISREG(inode->i_mode))
                 return -EINVAL;
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_COLLAPSE_RANGE))
                 return -EOPNOTSUPP;
  
         xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
                 error = xfs_free_file_space(ip, offset, len);
                 if (error)
                         goto out_unlock;
+       } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+               if (offset & blksize_mask || len & blksize_mask) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
+               ASSERT(offset + len < i_size_read(inode));
+               new_size = i_size_read(inode) - len;
+
+               error = xfs_collapse_file_space(ip, offset, len);
+               if (error)
+                       goto out_unlock;
         } else {
                 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                     offset + len > i_size_read(inode)) {
@@ -859,7 +874,7 @@ xfs_file_fallocate(
         if (ip->i_d.di_mode & S_IXGRP)
                 ip->i_d.di_mode &= ~S_ISGID;
  
-       if (!(mode & FALLOC_FL_PUNCH_HOLE))
+       if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
                 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
  
         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 425dfa45b9a087472676cf4f832138316d4b0fa4..a4ae41c179a8a66a5772914a61642b8a53be1c4b 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
  DEFINE_INODE_EVENT(xfs_inactive_symlink);
  DEFINE_INODE_EVENT(xfs_alloc_file_space);
  DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_collapse_file_space);
  DEFINE_INODE_EVENT(xfs_readdir);
  #ifdef CONFIG_XFS_POSIX_ACL
  DEFINE_INODE_EVENT(xfs_get_acl);
author	Namjae Jeon <namjae.jeon@samsung.com>
	Sun, 23 Feb 2014 23:58:19 +0000 (10:58 +1100)
committer	Dave Chinner <david@fromorbit.com>
	Sun, 23 Feb 2014 23:58:19 +0000 (10:58 +1100)
fs/xfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/xfs_bmap.h		patch \| blob \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| history
fs/xfs/xfs_bmap_util.h		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history