ext4: collapse ext4_convert_initialized_extents()
[firefly-linux-kernel-4.4.55.git] / fs / ext4 / extents.c
index 76c2df382b7d05619bbe4093d2115ca6dafa617e..5fc5e2b6e3a7fb6f10a1439678b004569079e38f 100644 (file)
@@ -291,6 +291,19 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
        return size;
 }
 
+static inline int
+ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
+                          struct ext4_ext_path *path, ext4_lblk_t lblk,
+                          int nofail)
+{
+       int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
+
+       return ext4_split_extent_at(handle, inode, path, lblk, unwritten ?
+                       EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
+                       EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
+                       (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+}
+
 /*
  * Calculate the number of metadata blocks needed
  * to allocate @blocks
@@ -842,11 +855,13 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 
 struct ext4_ext_path *
 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
-                    struct ext4_ext_path *path, int flags)
+                    struct ext4_ext_path **orig_path, int flags)
 {
        struct ext4_extent_header *eh;
        struct buffer_head *bh;
-       short int depth, i, ppos = 0, alloc = 0;
+       struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
+       short int depth, i, ppos = 0;
+       short free_on_err = (flags & EXT4_EX_NOFREE_ON_ERR) == 0;
        int ret;
 
        eh = ext_inode_hdr(inode);
@@ -856,9 +871,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        if (!path) {
                path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
                                GFP_NOFS);
-               if (!path)
+               if (unlikely(!path))
                        return ERR_PTR(-ENOMEM);
-               alloc = 1;
+               free_on_err = 1;
        }
        path[0].p_hdr = eh;
        path[0].p_bh = NULL;
@@ -876,7 +891,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 
                bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
                                            flags);
-               if (IS_ERR(bh)) {
+               if (unlikely(IS_ERR(bh))) {
                        ret = PTR_ERR(bh);
                        goto err;
                }
@@ -910,8 +925,11 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 
 err:
        ext4_ext_drop_refs(path);
-       if (alloc)
+       if (free_on_err) {
                kfree(path);
+               if (orig_path)
+                       *orig_path = NULL;
+       }
        return ERR_PTR(ret);
 }
 
@@ -1343,7 +1361,7 @@ repeat:
                ext4_ext_drop_refs(path);
                path = ext4_ext_find_extent(inode,
                                    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-                                   path, gb_flags);
+                                   &path, gb_flags | EXT4_EX_NOFREE_ON_ERR);
                if (IS_ERR(path))
                        err = PTR_ERR(path);
        } else {
@@ -1356,7 +1374,7 @@ repeat:
                ext4_ext_drop_refs(path);
                path = ext4_ext_find_extent(inode,
                                   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-                                   path, gb_flags);
+                                   &path, gb_flags | EXT4_EX_NOFREE_ON_ERR);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -1559,7 +1577,7 @@ found_extent:
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
  */
-static ext4_lblk_t
+ext4_lblk_t
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 {
        int depth;
@@ -2139,7 +2157,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                        path = NULL;
                }
 
-               path = ext4_ext_find_extent(inode, block, path, 0);
+               path = ext4_ext_find_extent(inode, block, &path, 0);
                if (IS_ERR(path)) {
                        up_read(&EXT4_I(inode)->i_data_sem);
                        err = PTR_ERR(path);
@@ -2854,24 +2872,14 @@ again:
                 */
                if (end >= ee_block &&
                    end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
-                       int split_flag = 0;
-
-                       if (ext4_ext_is_unwritten(ex))
-                               split_flag = EXT4_EXT_MARK_UNWRIT1 |
-                                            EXT4_EXT_MARK_UNWRIT2;
-
                        /*
                         * Split the extent in two so that 'end' is the last
                         * block in the first new extent. Also we should not
                         * fail removing space due to ENOSPC so try to use
                         * reserved block if that happens.
                         */
-                       err = ext4_split_extent_at(handle, inode, path,
-                                       end + 1, split_flag,
-                                       EXT4_EX_NOCACHE |
-                                       EXT4_GET_BLOCKS_PRE_IO |
-                                       EXT4_GET_BLOCKS_METADATA_NOFAIL);
-
+                       err = ext4_force_split_extent_at(handle, inode, path,
+                                                        end + 1, 1);
                        if (err < 0)
                                goto out;
                }
@@ -3310,7 +3318,8 @@ static int ext4_split_extent(handle_t *handle,
         * result in split of original leaf or extent zeroout.
         */
        ext4_ext_drop_refs(path);
-       path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+       path = ext4_ext_find_extent(inode, map->m_lblk, &path,
+                                   EXT4_EX_NOFREE_ON_ERR);
        if (IS_ERR(path))
                return PTR_ERR(path);
        depth = ext_depth(inode);
@@ -3668,66 +3677,6 @@ static int ext4_split_convert_extents(handle_t *handle,
        return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
 
-static int ext4_convert_initialized_extents(handle_t *handle,
-                                           struct inode *inode,
-                                           struct ext4_map_blocks *map,
-                                           struct ext4_ext_path *path)
-{
-       struct ext4_extent *ex;
-       ext4_lblk_t ee_block;
-       unsigned int ee_len;
-       int depth;
-       int err = 0;
-
-       depth = ext_depth(inode);
-       ex = path[depth].p_ext;
-       ee_block = le32_to_cpu(ex->ee_block);
-       ee_len = ext4_ext_get_actual_len(ex);
-
-       ext_debug("%s: inode %lu, logical"
-               "block %llu, max_blocks %u\n", __func__, inode->i_ino,
-                 (unsigned long long)ee_block, ee_len);
-
-       if (ee_block != map->m_lblk || ee_len > map->m_len) {
-               err = ext4_split_convert_extents(handle, inode, map, path,
-                               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
-               if (err < 0)
-                       goto out;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       goto out;
-               }
-               depth = ext_depth(inode);
-               ex = path[depth].p_ext;
-               if (!ex) {
-                       EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
-                                        (unsigned long) map->m_lblk);
-                       err = -EIO;
-                       goto out;
-               }
-       }
-
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
-       /* first mark the extent as unwritten */
-       ext4_ext_mark_unwritten(ex);
-
-       /* note: ext4_ext_correct_indexes() isn't needed here because
-        * borders are not changed
-        */
-       ext4_ext_try_to_merge(handle, inode, path, ex);
-
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-out:
-       ext4_ext_show_leaf(inode, path);
-       return err;
-}
-
-
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                                                struct inode *inode,
                                                struct ext4_map_blocks *map,
@@ -3766,7 +3715,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                if (err < 0)
                        goto out;
                ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+               path = ext4_ext_find_extent(inode, map->m_lblk, &path,
+                                           EXT4_EX_NOFREE_ON_ERR);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -3963,12 +3913,15 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
 }
 
 static int
-ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
-                       struct ext4_map_blocks *map,
-                       struct ext4_ext_path *path, int flags,
-                       unsigned int allocated, ext4_fsblk_t newblock)
+convert_initialized_extent(handle_t *handle, struct inode *inode,
+                          struct ext4_map_blocks *map,
+                          struct ext4_ext_path *path, int flags,
+                          unsigned int allocated, ext4_fsblk_t newblock)
 {
-       int ret = 0;
+       struct ext4_extent *ex;
+       ext4_lblk_t ee_block;
+       unsigned int ee_len;
+       int depth;
        int err = 0;
 
        /*
@@ -3978,20 +3931,60 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
        if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
                map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
 
-       ret = ext4_convert_initialized_extents(handle, inode, map,
-                                               path);
-       if (ret >= 0) {
-               ext4_update_inode_fsync_trans(handle, inode, 1);
-               err = check_eofblocks_fl(handle, inode, map->m_lblk,
-                                        path, map->m_len);
-       } else
-               err = ret;
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
+
+       ext_debug("%s: inode %lu, logical"
+               "block %llu, max_blocks %u\n", __func__, inode->i_ino,
+                 (unsigned long long)ee_block, ee_len);
+
+       if (ee_block != map->m_lblk || ee_len > map->m_len) {
+               err = ext4_split_convert_extents(handle, inode, map, path,
+                               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
+               if (err < 0)
+                       return err;
+               ext4_ext_drop_refs(path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, &path,
+                                           EXT4_EX_NOFREE_ON_ERR);
+               if (IS_ERR(path))
+                       return PTR_ERR(path);
+               depth = ext_depth(inode);
+               ex = path[depth].p_ext;
+               if (!ex) {
+                       EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                        (unsigned long) map->m_lblk);
+                       return -EIO;
+               }
+       }
+
+       err = ext4_ext_get_access(handle, inode, path + depth);
+       if (err)
+               return err;
+       /* first mark the extent as unwritten */
+       ext4_ext_mark_unwritten(ex);
+
+       /* note: ext4_ext_correct_indexes() isn't needed here because
+        * borders are not changed
+        */
+       ext4_ext_try_to_merge(handle, inode, path, ex);
+
+       /* Mark modified extent as dirty */
+       err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+       if (err)
+               return err;
+       ext4_ext_show_leaf(inode, path);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
+       err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
+       if (err)
+               return err;
        map->m_flags |= EXT4_MAP_UNWRITTEN;
        if (allocated > map->m_len)
                allocated = map->m_len;
        map->m_len = allocated;
-
-       return err ? err : allocated;
+       return allocated;
 }
 
 static int
@@ -4331,7 +4324,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                         */
                        if ((!ext4_ext_is_unwritten(ex)) &&
                            (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
-                               allocated = ext4_ext_convert_initialized_extent(
+                               allocated = convert_initialized_extent(
                                                handle, inode, map, path, flags,
                                                allocated, newblock);
                                goto out2;
@@ -4665,7 +4658,8 @@ retry:
 }
 
 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
-                                 ext4_lblk_t len, int flags, int mode)
+                                 ext4_lblk_t len, loff_t new_size,
+                                 int flags, int mode)
 {
        struct inode *inode = file_inode(file);
        handle_t *handle;
@@ -4674,8 +4668,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        int retries = 0;
        struct ext4_map_blocks map;
        unsigned int credits;
+       loff_t epos;
 
        map.m_lblk = offset;
+       map.m_len = len;
        /*
         * Don't normalize the request if it can fit in one extent so
         * that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4686,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        credits = ext4_chunk_trans_blocks(inode, len);
 
 retry:
-       while (ret >= 0 && ret < len) {
-               map.m_lblk = map.m_lblk + ret;
-               map.m_len = len = len - ret;
+       while (ret >= 0 && len) {
                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
                                            credits);
                if (IS_ERR(handle)) {
@@ -4709,6 +4703,21 @@ retry:
                        ret2 = ext4_journal_stop(handle);
                        break;
                }
+               map.m_lblk += ret;
+               map.m_len = len = len - ret;
+               epos = (loff_t)map.m_lblk << inode->i_blkbits;
+               inode->i_ctime = ext4_current_time(inode);
+               if (new_size) {
+                       if (epos > new_size)
+                               epos = new_size;
+                       if (ext4_update_inode_size(inode, epos) & 0x1)
+                               inode->i_mtime = inode->i_ctime;
+               } else {
+                       if (epos > inode->i_size)
+                               ext4_set_inode_flag(inode,
+                                                   EXT4_INODE_EOFBLOCKS);
+               }
+               ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
@@ -4731,7 +4740,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        loff_t new_size = 0;
        int ret = 0;
        int flags;
-       int partial;
+       int credits;
+       int partial_begin, partial_end;
        loff_t start, end;
        ext4_lblk_t lblk;
        struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4781,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
        if (start < offset || end > offset + len)
                return -EINVAL;
-       partial = (offset + len) & ((1 << blkbits) - 1);
+       partial_begin = offset & ((1 << blkbits) - 1);
+       partial_end = (offset + len) & ((1 << blkbits) - 1);
 
        lblk = start >> blkbits;
        max_blocks = (end >> blkbits);
@@ -4781,7 +4792,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                max_blocks -= lblk;
 
        flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
-               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
+               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+               EXT4_EX_NOCACHE;
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
@@ -4805,7 +4817,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                 * If we have a partial block after EOF we have to allocate
                 * the entire block.
                 */
-               if (partial)
+               if (partial_end)
                        max_blocks += 1;
        }
 
@@ -4813,25 +4825,41 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
                /* Now release the pages and zero block aligned part of pages*/
                truncate_pagecache_range(inode, start, end - 1);
+               inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
                /* Wait all existing dio workers, newcomers will block on i_mutex */
                ext4_inode_block_unlocked_dio(inode);
                inode_dio_wait(inode);
 
+               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+                                            flags, mode);
+               if (ret)
+                       goto out_dio;
                /*
                 * Remove entire range from the extent status tree.
+                *
+                * ext4_es_remove_extent(inode, lblk, max_blocks) is
+                * NOT sufficient.  I'm not sure why this is the case,
+                * but let's be conservative and remove the extent
+                * status tree for the entire inode.  There should be
+                * no outstanding delalloc extents thanks to the
+                * filemap_write_and_wait_range() call above.
                 */
-               ret = ext4_es_remove_extent(inode, lblk, max_blocks);
-               if (ret)
-                       goto out_dio;
-
-               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
-                                            mode);
+               ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
                if (ret)
                        goto out_dio;
        }
+       if (!partial_begin && !partial_end)
+               goto out_dio;
 
-       handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
+       /*
+        * In worst case we have to writeout two nonadjacent unwritten
+        * blocks and update the inode
+        */
+       credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
+       if (ext4_should_journal_data(inode))
+               credits += 2;
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                ext4_std_error(inode->i_sb, ret);
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        }
 
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-
        if (new_size) {
-               if (new_size > i_size_read(inode))
-                       i_size_write(inode, new_size);
-               if (new_size > EXT4_I(inode)->i_disksize)
-                       ext4_update_i_disksize(inode, new_size);
+               ext4_update_inode_size(inode, new_size);
        } else {
                /*
                * Mark that we allocate beyond EOF so the subsequent truncate
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                if ((offset + len) > i_size_read(inode))
                        ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
        }
-
        ext4_mark_inode_dirty(handle, inode);
 
        /* Zero out partial block at the edges of the range */
@@ -4880,13 +4903,11 @@ out_mutex:
 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
        struct inode *inode = file_inode(file);
-       handle_t *handle;
        loff_t new_size = 0;
        unsigned int max_blocks;
        int ret = 0;
        int flags;
        ext4_lblk_t lblk;
-       struct timespec tv;
        unsigned int blkbits = inode->i_blkbits;
 
        /* Return error if mode is not supported */
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
-       ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
+       ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+                                    flags, mode);
        if (ret)
                goto out;
 
-       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
-       if (IS_ERR(handle))
-               goto out;
-
-       tv = inode->i_ctime = ext4_current_time(inode);
-
-       if (new_size) {
-               if (new_size > i_size_read(inode)) {
-                       i_size_write(inode, new_size);
-                       inode->i_mtime = tv;
-               }
-               if (new_size > EXT4_I(inode)->i_disksize)
-                       ext4_update_i_disksize(inode, new_size);
-       } else {
-               /*
-               * Mark that we allocate beyond EOF so the subsequent truncate
-               * can proceed even if the new size is the same as i_size.
-               */
-               if ((offset + len) > i_size_read(inode))
-                       ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+       if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
+               ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+                                               EXT4_I(inode)->i_sync_tid);
        }
-       ext4_mark_inode_dirty(handle, inode);
-       if (file->f_flags & O_SYNC)
-               ext4_handle_sync(handle);
-
-       ext4_journal_stop(handle);
 out:
        mutex_unlock(&inode->i_mutex);
        trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
@@ -5304,7 +5304,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
        struct ext4_ext_path *path;
        int ret = 0, depth;
        struct ext4_extent *extent;
-       ext4_lblk_t stop_block, current_block;
+       ext4_lblk_t stop_block;
        ext4_lblk_t ex_start, ex_end;
 
        /* Let path point to the last extent */
@@ -5365,17 +5365,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                                         (unsigned long) start);
                        return -EIO;
                }
-
-               current_block = le32_to_cpu(extent->ee_block);
-               if (start > current_block) {
+               if (start > le32_to_cpu(extent->ee_block)) {
                        /* Hole, move to the next extent */
-                       ret = mext_next_extent(inode, path, &extent);
-                       if (ret != 0) {
+                       if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
+                               path[depth].p_ext++;
+                       } else {
+                               start = ext4_ext_next_allocated_block(path);
                                ext4_ext_drop_refs(path);
                                kfree(path);
-                               if (ret == 1)
-                                       ret = 0;
-                               break;
+                               continue;
                        }
                }
                ret = ext4_ext_shift_path_extents(path, shift, inode,
@@ -5508,3 +5506,204 @@ out_mutex:
        mutex_unlock(&inode->i_mutex);
        return ret;
 }
+
+/**
+ * ext4_swap_extents - Swap extents between two inodes
+ *
+ * @inode1:    First inode
+ * @inode2:    Second inode
+ * @lblk1:     Start block for first inode
+ * @lblk2:     Start block for second inode
+ * @count:     Number of blocks to swap
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @erp:       Pointer to save error value
+ *
+ * This helper routine does exactly what is promise "swap extents". All other
+ * stuff such as page-cache locking consistency, bh mapping consistency or
+ * extent's data copying must be performed by caller.
+ * Locking:
+ *             i_mutex is held for both inodes
+ *             i_data_sem is locked for write for both inodes
+ * Assumptions:
+ *             All pages from requested range are locked for both inodes
+ */
+int
+ext4_swap_extents(handle_t *handle, struct inode *inode1,
+                    struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+                 ext4_lblk_t count, int unwritten, int *erp)
+{
+       struct ext4_ext_path *path1 = NULL;
+       struct ext4_ext_path *path2 = NULL;
+       int replaced_count = 0;
+
+       BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
+       BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
+       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+
+       *erp = ext4_es_remove_extent(inode1, lblk1, count);
+       if (unlikely(*erp))
+               return 0;
+       *erp = ext4_es_remove_extent(inode2, lblk2, count);
+       if (unlikely(*erp))
+               return 0;
+
+       while (count) {
+               struct ext4_extent *ex1, *ex2, tmp_ex;
+               ext4_lblk_t e1_blk, e2_blk;
+               int e1_len, e2_len, len;
+               int split = 0;
+
+               path1 = ext4_ext_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
+               if (unlikely(IS_ERR(path1))) {
+                       *erp = PTR_ERR(path1);
+                       path1 = NULL;
+               finish:
+                       count = 0;
+                       goto repeat;
+               }
+               path2 = ext4_ext_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
+               if (unlikely(IS_ERR(path2))) {
+                       *erp = PTR_ERR(path2);
+                       path2 = NULL;
+                       goto finish;
+               }
+               ex1 = path1[path1->p_depth].p_ext;
+               ex2 = path2[path2->p_depth].p_ext;
+               /* Do we have somthing to swap ? */
+               if (unlikely(!ex2 || !ex1))
+                       goto finish;
+
+               e1_blk = le32_to_cpu(ex1->ee_block);
+               e2_blk = le32_to_cpu(ex2->ee_block);
+               e1_len = ext4_ext_get_actual_len(ex1);
+               e2_len = ext4_ext_get_actual_len(ex2);
+
+               /* Hole handling */
+               if (!in_range(lblk1, e1_blk, e1_len) ||
+                   !in_range(lblk2, e2_blk, e2_len)) {
+                       ext4_lblk_t next1, next2;
+
+                       /* if hole after extent, then go to next extent */
+                       next1 = ext4_ext_next_allocated_block(path1);
+                       next2 = ext4_ext_next_allocated_block(path2);
+                       /* If hole before extent, then shift to that extent */
+                       if (e1_blk > lblk1)
+                               next1 = e1_blk;
+                       if (e2_blk > lblk2)
+                               next2 = e1_blk;
+                       /* Do we have something to swap */
+                       if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
+                               goto finish;
+                       /* Move to the rightest boundary */
+                       len = next1 - lblk1;
+                       if (len < next2 - lblk2)
+                               len = next2 - lblk2;
+                       if (len > count)
+                               len = count;
+                       lblk1 += len;
+                       lblk2 += len;
+                       count -= len;
+                       goto repeat;
+               }
+
+               /* Prepare left boundary */
+               if (e1_blk < lblk1) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode1,
+                                               path1, lblk1, 0);
+                       if (unlikely(*erp))
+                               goto finish;
+               }
+               if (e2_blk < lblk2) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode2,
+                                               path2,  lblk2, 0);
+                       if (unlikely(*erp))
+                               goto finish;
+               }
+               /* ext4_split_extent_at() may retult in leaf extent split,
+                * path must to be revalidated. */
+               if (split)
+                       goto repeat;
+
+               /* Prepare right boundary */
+               len = count;
+               if (len > e1_blk + e1_len - lblk1)
+                       len = e1_blk + e1_len - lblk1;
+               if (len > e2_blk + e2_len - lblk2)
+                       len = e2_blk + e2_len - lblk2;
+
+               if (len != e1_len) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode1,
+                                               path1, lblk1 + len, 0);
+                       if (unlikely(*erp))
+                               goto finish;
+               }
+               if (len != e2_len) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode2,
+                                               path2, lblk2 + len, 0);
+                       if (*erp)
+                               goto finish;
+               }
+               /* ext4_split_extent_at() may retult in leaf extent split,
+                * path must to be revalidated. */
+               if (split)
+                       goto repeat;
+
+               BUG_ON(e2_len != e1_len);
+               *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
+               if (unlikely(*erp))
+                       goto finish;
+               *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
+               if (unlikely(*erp))
+                       goto finish;
+
+               /* Both extents are fully inside boundaries. Swap it now */
+               tmp_ex = *ex1;
+               ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
+               ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
+               ex1->ee_len = cpu_to_le16(e2_len);
+               ex2->ee_len = cpu_to_le16(e1_len);
+               if (unwritten)
+                       ext4_ext_mark_unwritten(ex2);
+               if (ext4_ext_is_unwritten(&tmp_ex))
+                       ext4_ext_mark_unwritten(ex1);
+
+               ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+               ext4_ext_try_to_merge(handle, inode1, path1, ex1);
+               *erp = ext4_ext_dirty(handle, inode2, path2 +
+                                     path2->p_depth);
+               if (unlikely(*erp))
+                       goto finish;
+               *erp = ext4_ext_dirty(handle, inode1, path1 +
+                                     path1->p_depth);
+               /*
+                * Looks scarry ah..? second inode already points to new blocks,
+                * and it was successfully dirtied. But luckily error may happen
+                * only due to journal error, so full transaction will be
+                * aborted anyway.
+                */
+               if (unlikely(*erp))
+                       goto finish;
+               lblk1 += len;
+               lblk2 += len;
+               replaced_count += len;
+               count -= len;
+
+       repeat:
+               if (path1) {
+                       ext4_ext_drop_refs(path1);
+                       kfree(path1);
+                       path1 = NULL;
+               }
+               if (path2) {
+                       ext4_ext_drop_refs(path2);
+                       kfree(path2);
+                       path2 = NULL;
+               }
+       }
+       return replaced_count;
+}