ext4: fix unwritten counter leakage
[firefly-linux-kernel-4.4.55.git] / fs / ext4 / extents.c
index 91341ec6e06a94f2f400d10039a64585cd17ed2e..54a94426ef7b2ef91d4b9105280399e3addc8144 100644 (file)
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
                  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
                  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
 
-       neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
+       le16_add_cpu(&neh->eh_depth, 1);
        ext4_mark_inode_dirty(handle, inode);
 out:
        brelse(bh);
@@ -1655,17 +1655,61 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
        return merge_done;
 }
 
+/*
+ * This function does a very simple check to see if we can collapse
+ * an extent tree with a single extent tree leaf block into the inode.
+ */
+static void ext4_ext_try_to_merge_up(handle_t *handle,
+                                    struct inode *inode,
+                                    struct ext4_ext_path *path)
+{
+       size_t s;
+       unsigned max_root = ext4_ext_space_root(inode, 0);
+       ext4_fsblk_t blk;
+
+       if ((path[0].p_depth != 1) ||
+           (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
+           (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
+               return;
+
+       /*
+        * We need to modify the block allocation bitmap and the block
+        * group descriptor to release the extent tree block.  If we
+        * can't get the journal credits, give up.
+        */
+       if (ext4_journal_extend(handle, 2))
+               return;
+
+       /*
+        * Copy the extent data up to the inode
+        */
+       blk = ext4_idx_pblock(path[0].p_idx);
+       s = le16_to_cpu(path[1].p_hdr->eh_entries) *
+               sizeof(struct ext4_extent_idx);
+       s += sizeof(struct ext4_extent_header);
+
+       memcpy(path[0].p_hdr, path[1].p_hdr, s);
+       path[0].p_depth = 0;
+       path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
+               (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
+       path[0].p_hdr->eh_max = cpu_to_le16(max_root);
+
+       brelse(path[1].p_bh);
+       ext4_free_blocks(handle, inode, NULL, blk, 1,
+                        EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+}
+
 /*
  * This function tries to merge the @ex extent to neighbours in the tree.
  * return 1 if merge left else 0.
  */
-static int ext4_ext_try_to_merge(struct inode *inode,
+static void ext4_ext_try_to_merge(handle_t *handle,
+                                 struct inode *inode,
                                  struct ext4_ext_path *path,
                                  struct ext4_extent *ex) {
        struct ext4_extent_header *eh;
        unsigned int depth;
        int merge_done = 0;
-       int ret = 0;
 
        depth = ext_depth(inode);
        BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
                merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
 
        if (!merge_done)
-               ret = ext4_ext_try_to_merge_right(inode, path, ex);
+               (void) ext4_ext_try_to_merge_right(inode, path, ex);
 
-       return ret;
+       ext4_ext_try_to_merge_up(handle, inode, path);
 }
 
 /*
@@ -1891,18 +1935,17 @@ has_space:
        nearex->ee_len = newext->ee_len;
 
 merge:
-       /* try to merge extents to the right */
+       /* try to merge extents */
        if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
-               ext4_ext_try_to_merge(inode, path, nearex);
+               ext4_ext_try_to_merge(handle, inode, path, nearex);
 
-       /* try to merge extents to the left */
 
        /* time to correct all indexes above */
        err = ext4_ext_correct_indexes(handle, inode, path);
        if (err)
                goto cleanup;
 
-       err = ext4_ext_dirty(handle, inode, path + depth);
+       err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 
 cleanup:
        if (npath) {
@@ -2093,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 }
 
 /*
- * ext4_ext_check_cache()
+ * ext4_ext_in_cache()
  * Checks to see if the given block is in the cache.
  * If it is, the cached extent is stored in the given
- * cache extent pointer.  If the cached extent is a hole,
- * this routine should be used instead of
- * ext4_ext_in_cache if the calling function needs to
- * know the size of the hole.
+ * cache extent pointer.
  *
  * @inode: The files inode
  * @block: The block to look for in the cache
@@ -2108,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
  *
  * Return 0 if cache is invalid; 1 if the cache is valid
  */
-static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
-       struct ext4_ext_cache *ex){
+static int
+ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
+                 struct ext4_extent *ex)
+{
        struct ext4_ext_cache *cex;
        struct ext4_sb_info *sbi;
        int ret = 0;
@@ -2126,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
                goto errout;
 
        if (in_range(block, cex->ec_block, cex->ec_len)) {
-               memcpy(ex, cex, sizeof(struct ext4_ext_cache));
+               ex->ee_block = cpu_to_le32(cex->ec_block);
+               ext4_ext_store_pblock(ex, cex->ec_start);
+               ex->ee_len = cpu_to_le16(cex->ec_len);
                ext_debug("%u cached by %u:%u:%llu\n",
                                block,
                                cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2138,37 +2182,6 @@ errout:
        return ret;
 }
 
-/*
- * ext4_ext_in_cache()
- * Checks to see if the given block is in the cache.
- * If it is, the cached extent is stored in the given
- * extent pointer.
- *
- * @inode: The files inode
- * @block: The block to look for in the cache
- * @ex:    Pointer where the cached extent will be stored
- *         if it contains block
- *
- * Return 0 if cache is invalid; 1 if the cache is valid
- */
-static int
-ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
-                       struct ext4_extent *ex)
-{
-       struct ext4_ext_cache cex;
-       int ret = 0;
-
-       if (ext4_ext_check_cache(inode, block, &cex)) {
-               ex->ee_block = cpu_to_le32(cex.ec_block);
-               ext4_ext_store_pblock(ex, cex.ec_start);
-               ex->ee_len = cpu_to_le16(cex.ec_len);
-               ret = 1;
-       }
-
-       return ret;
-}
-
-
 /*
  * ext4_ext_rm_idx:
  * removes index from the index block.
@@ -2275,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        unsigned short ee_len =  ext4_ext_get_actual_len(ex);
        ext4_fsblk_t pblk;
-       int flags = EXT4_FREE_BLOCKS_FORGET;
+       int flags = 0;
 
        if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-               flags |= EXT4_FREE_BLOCKS_METADATA;
+               flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
+       else if (ext4_should_journal_data(inode))
+               flags |= EXT4_FREE_BLOCKS_FORGET;
+
        /*
         * For bigalloc file systems, we never free a partial cluster
         * at the beginning of the extent.  Instead, we make a note
@@ -2570,10 +2586,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
 {
        struct super_block *sb = inode->i_sb;
        int depth = ext_depth(inode);
-       struct ext4_ext_path *path;
+       struct ext4_ext_path *path = NULL;
        ext4_fsblk_t partial_cluster = 0;
        handle_t *handle;
-       int i, err;
+       int i = 0, err;
 
        ext_debug("truncate since %u to %u\n", start, end);
 
@@ -2606,8 +2622,12 @@ again:
                }
                depth = ext_depth(inode);
                ex = path[depth].p_ext;
-               if (!ex)
+               if (!ex) {
+                       ext4_ext_drop_refs(path);
+                       kfree(path);
+                       path = NULL;
                        goto cont;
+               }
 
                ee_block = le32_to_cpu(ex->ee_block);
 
@@ -2637,8 +2657,6 @@ again:
                        if (err < 0)
                                goto out;
                }
-               ext4_ext_drop_refs(path);
-               kfree(path);
        }
 cont:
 
@@ -2647,19 +2665,28 @@ cont:
         * after i_size and walking into the tree depth-wise.
         */
        depth = ext_depth(inode);
-       path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
-       if (path == NULL) {
-               ext4_journal_stop(handle);
-               return -ENOMEM;
-       }
-       path[0].p_depth = depth;
-       path[0].p_hdr = ext_inode_hdr(inode);
+       if (path) {
+               int k = i = depth;
+               while (--k > 0)
+                       path[k].p_block =
+                               le16_to_cpu(path[k].p_hdr->eh_entries)+1;
+       } else {
+               path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
+                              GFP_NOFS);
+               if (path == NULL) {
+                       ext4_journal_stop(handle);
+                       return -ENOMEM;
+               }
+               path[0].p_depth = depth;
+               path[0].p_hdr = ext_inode_hdr(inode);
+               i = 0;
 
-       if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
-               err = -EIO;
-               goto out;
+               if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
+                       err = -EIO;
+                       goto out;
+               }
        }
-       i = err = 0;
+       err = 0;
 
        while (i >= 0 && err == 0) {
                if (i == depth) {
@@ -2773,8 +2800,10 @@ cont:
 out:
        ext4_ext_drop_refs(path);
        kfree(path);
-       if (err == -EAGAIN)
+       if (err == -EAGAIN) {
+               path = NULL;
                goto again;
+       }
        ext4_journal_stop(handle);
 
        return err;
@@ -2912,9 +2941,9 @@ static int ext4_split_extent_at(handle_t *handle,
                        ext4_ext_mark_initialized(ex);
 
                if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
-                       ext4_ext_try_to_merge(inode, path, ex);
+                       ext4_ext_try_to_merge(handle, inode, path, ex);
 
-               err = ext4_ext_dirty(handle, inode, path + depth);
+               err = ext4_ext_dirty(handle, inode, path + path->p_depth);
                goto out;
        }
 
@@ -2946,8 +2975,8 @@ static int ext4_split_extent_at(handle_t *handle,
                        goto fix_extent_len;
                /* update the extent length and mark as initialized */
                ex->ee_len = cpu_to_le16(ee_len);
-               ext4_ext_try_to_merge(inode, path, ex);
-               err = ext4_ext_dirty(handle, inode, path + depth);
+               ext4_ext_try_to_merge(handle, inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + path->p_depth);
                goto out;
        } else if (err)
                goto fix_extent_len;
@@ -3029,7 +3058,6 @@ out:
        return err ? err : map->m_len;
 }
 
-#define EXT4_EXT_ZERO_LEN 7
 /*
  * This function is called by ext4_ext_map_blocks() if someone tries to write
  * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3055,13 +3083,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct ext4_map_blocks *map,
                                           struct ext4_ext_path *path)
 {
+       struct ext4_sb_info *sbi;
        struct ext4_extent_header *eh;
        struct ext4_map_blocks split_map;
        struct ext4_extent zero_ex;
        struct ext4_extent *ex;
        ext4_lblk_t ee_block, eof_block;
        unsigned int ee_len, depth;
-       int allocated;
+       int allocated, max_zeroout = 0;
        int err = 0;
        int split_flag = 0;
 
@@ -3069,6 +3098,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                "block %llu, max_blocks %u\n", inode->i_ino,
                (unsigned long long)map->m_lblk, map->m_len);
 
+       sbi = EXT4_SB(inode->i_sb);
        eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
                inode->i_sb->s_blocksize_bits;
        if (eof_block < map->m_lblk + map->m_len)
@@ -3168,9 +3198,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
         */
        split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 
-       /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-       if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
-           (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+       if (EXT4_EXT_MAY_ZEROOUT & split_flag)
+               max_zeroout = sbi->s_extent_max_zeroout_kb >>
+                       inode->i_sb->s_blocksize_bits;
+
+       /* If extent is less than s_max_zeroout_kb, zeroout directly */
+       if (max_zeroout && (ee_len <= max_zeroout)) {
                err = ext4_ext_zeroout(inode, ex);
                if (err)
                        goto out;
@@ -3179,8 +3212,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                if (err)
                        goto out;
                ext4_ext_mark_initialized(ex);
-               ext4_ext_try_to_merge(inode, path, ex);
-               err = ext4_ext_dirty(handle, inode, path + depth);
+               ext4_ext_try_to_merge(handle, inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + path->p_depth);
                goto out;
        }
 
@@ -3194,9 +3227,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        split_map.m_lblk = map->m_lblk;
        split_map.m_len = map->m_len;
 
-       if (allocated > map->m_len) {
-               if (allocated <= EXT4_EXT_ZERO_LEN &&
-                   (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+       if (max_zeroout && (allocated > map->m_len)) {
+               if (allocated <= max_zeroout) {
                        /* case 3 */
                        zero_ex.ee_block =
                                         cpu_to_le32(map->m_lblk);
@@ -3208,9 +3240,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                goto out;
                        split_map.m_lblk = map->m_lblk;
                        split_map.m_len = allocated;
-               } else if ((map->m_lblk - ee_block + map->m_len <
-                          EXT4_EXT_ZERO_LEN) &&
-                          (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+               } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
                        /* case 2 */
                        if (map->m_lblk != ee_block) {
                                zero_ex.ee_block = ex->ee_block;
@@ -3230,7 +3260,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        }
 
        allocated = ext4_split_extent(handle, inode, path,
-                                      &split_map, split_flag, 0);
+                                     &split_map, split_flag, 0);
        if (allocated < 0)
                err = allocated;
 
@@ -3244,7 +3274,7 @@ out:
  * to an uninitialized extent.
  *
  * Writing to an uninitialized extent may result in splitting the uninitialized
- * extent into multiple /initialized uninitialized extents (up to three)
+ * extent into multiple initialized/uninitialized extents (up to three)
  * There are three possibilities:
  *   a> There is no split required: Entire extent should be uninitialized
  *   b> Splits in two extents: Write is happening at either end of the extent
@@ -3321,10 +3351,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
        /* note: ext4_ext_correct_indexes() isn't needed here because
         * borders are not changed
         */
-       ext4_ext_try_to_merge(inode, path, ex);
+       ext4_ext_try_to_merge(handle, inode, path, ex);
 
        /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
+       err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 out:
        ext4_ext_show_leaf(inode, path);
        return err;
@@ -3588,7 +3618,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 {
        int ret = 0;
        int err = 0;
-       ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
+       ext4_io_end_t *io = ext4_inode_aio(inode);
 
        ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
                  "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3603,6 +3633,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
                ret = ext4_split_unwritten_extents(handle, inode, map,
                                                   path, flags);
+               if (ret <= 0)
+                       goto out;
                /*
                 * Flag the inode(non aio case) or end_io struct (aio case)
                 * that this IO needs to conversion to written when IO is
@@ -3846,8 +3878,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        unsigned int allocated = 0, offset = 0;
        unsigned int allocated_clusters = 0;
        struct ext4_allocation_request ar;
-       ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
+       ext4_io_end_t *io = ext4_inode_aio(inode);
        ext4_lblk_t cluster_offset;
+       int set_unwritten = 0;
 
        ext_debug("blocks %u/%u requested for inode %lu\n",
                  map->m_lblk, map->m_len, inode->i_ino);
@@ -4070,13 +4103,8 @@ got_allocated_blocks:
                 * For non asycn direct IO case, flag the inode state
                 * that we need to perform conversion when IO is done.
                 */
-               if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-                       if (io)
-                               ext4_set_io_unwritten_flag(inode, io);
-                       else
-                               ext4_set_inode_state(inode,
-                                                    EXT4_STATE_DIO_UNWRITTEN);
-               }
+               if ((flags & EXT4_GET_BLOCKS_PRE_IO))
+                       set_unwritten = 1;
                if (ext4_should_dioread_nolock(inode))
                        map->m_flags |= EXT4_MAP_UNINIT;
        }
@@ -4088,6 +4116,15 @@ got_allocated_blocks:
        if (!err)
                err = ext4_ext_insert_extent(handle, inode, path,
                                             &newex, flags);
+
+       if (!err && set_unwritten) {
+               if (io)
+                       ext4_set_io_unwritten_flag(inode, io);
+               else
+                       ext4_set_inode_state(inode,
+                                            EXT4_STATE_DIO_UNWRITTEN);
+       }
+
        if (err && free_on_err) {
                int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
                        EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4420,6 +4457,8 @@ retry:
                ext4_falloc_update_inode(inode, mode, new_size,
                                         (map.m_flags & EXT4_MAP_NEW));
                ext4_mark_inode_dirty(handle, inode);
+               if ((file->f_flags & O_SYNC) && ret >= max_blocks)
+                       ext4_handle_sync(handle);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
@@ -4801,9 +4840,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
-       err = ext4_orphan_add(handle, inode);
-       if (err)
-               goto out;
 
        /*
         * Now we need to zero out the non-page-aligned data in the
@@ -4889,7 +4925,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
        up_write(&EXT4_I(inode)->i_data_sem);
 
 out:
-       ext4_orphan_del(handle, inode);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);