ext4: revert "ext4: use io_end for multiple bios"

[firefly-linux-kernel-4.4.55.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 9bda50aa34e2f45ff4402fff2301d5e6b17a6c63..d666569923589dcbc01355226144a24233904627 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -55,21 +55,21 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
         __u16 csum_hi = 0;
         __u32 csum;
  
-       csum_lo = raw->i_checksum_lo;
+       csum_lo = le16_to_cpu(raw->i_checksum_lo);
         raw->i_checksum_lo = 0;
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
             EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
-               csum_hi = raw->i_checksum_hi;
+               csum_hi = le16_to_cpu(raw->i_checksum_hi);
                 raw->i_checksum_hi = 0;
         }
  
         csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
                            EXT4_INODE_SIZE(inode->i_sb));
  
-       raw->i_checksum_lo = csum_lo;
+       raw->i_checksum_lo = cpu_to_le16(csum_lo);
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
             EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
-               raw->i_checksum_hi = csum_hi;
+               raw->i_checksum_hi = cpu_to_le16(csum_hi);
  
         return csum;
  }
@@ -210,8 +210,7 @@ void ext4_evict_inode(struct inode *inode)
                         journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
                         tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
  
-                       jbd2_log_start_commit(journal, commit_tid);
-                       jbd2_log_wait_commit(journal, commit_tid);
+                       jbd2_complete_transaction(journal, commit_tid);
                         filemap_write_and_wait(&inode->i_data);
                 }
                 truncate_inode_pages(&inode->i_data, 0);
@@ -1081,10 +1080,14 @@ retry_journal:
  /* For write_end() in data=journal mode */
  static int write_end_fn(handle_t *handle, struct buffer_head *bh)
  {
+       int ret;
         if (!buffer_mapped(bh) || buffer_freed(bh))
                 return 0;
         set_buffer_uptodate(bh);
-       return ext4_handle_dirty_metadata(handle, NULL, bh);
+       ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+       clear_buffer_meta(bh);
+       clear_buffer_prio(bh);
+       return ret;
  }
  
  /*
@@ -1616,22 +1619,25 @@ static void ext4_print_free_blocks(struct inode *inode)
  {
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct super_block *sb = inode->i_sb;
+       struct ext4_inode_info *ei = EXT4_I(inode);
  
         ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
                EXT4_C2B(EXT4_SB(inode->i_sb),
-                       ext4_count_free_clusters(inode->i_sb)));
+                       ext4_count_free_clusters(sb)));
         ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
         ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
-              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+              (long long) EXT4_C2B(EXT4_SB(sb),
                 percpu_counter_sum(&sbi->s_freeclusters_counter)));
         ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
-              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+              (long long) EXT4_C2B(EXT4_SB(sb),
                 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
         ext4_msg(sb, KERN_CRIT, "Block reservation details");
         ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
-                EXT4_I(inode)->i_reserved_data_blocks);
+                ei->i_reserved_data_blocks);
         ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
-              EXT4_I(inode)->i_reserved_meta_blocks);
+              ei->i_reserved_meta_blocks);
+       ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
+              ei->i_allocated_meta_blocks);
         return;
  }
  
@@ -1686,12 +1692,21 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
          */
         map.m_lblk = next;
         map.m_len = max_blocks;
-       get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
+       /*
+        * We're in delalloc path and it is possible that we're going to
+        * need more metadata blocks than previously reserved. However
+        * we must not fail because we're in writeback and there is
+        * nothing we can do about it so it might result in data loss.
+        * So use reserved blocks to allocate metadata if possible.
+        */
+       get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
+                          EXT4_GET_BLOCKS_METADATA_NOFAIL;
         if (ext4_should_dioread_nolock(mpd->inode))
                 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
         if (mpd->b_state & (1 << BH_Delay))
                 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
  
+
         blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
         if (blks < 0) {
                 struct super_block *sb = mpd->inode->i_sb;
@@ -2607,7 +2622,7 @@ out_writepages:
  
  static int ext4_nonda_switch(struct super_block *sb)
  {
-       s64 free_blocks, dirty_blocks;
+       s64 free_clusters, dirty_clusters;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
  
         /*
@@ -2618,17 +2633,18 @@ static int ext4_nonda_switch(struct super_block *sb)
          * Delalloc need an accurate free block accounting. So switch
          * to non delalloc when we are near to error range.
          */
-       free_blocks  = EXT4_C2B(sbi,
-               percpu_counter_read_positive(&sbi->s_freeclusters_counter));
-       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
+       free_clusters =
+               percpu_counter_read_positive(&sbi->s_freeclusters_counter);
+       dirty_clusters =
+               percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
         /*
          * Start pushing delalloc when 1/2 of free blocks are dirty.
          */
-       if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
+       if (dirty_clusters && (free_clusters < 2 * dirty_clusters))
                 try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
  
-       if (2 * free_blocks < 3 * dirty_blocks ||
-               free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
+       if (2 * free_clusters < 3 * dirty_clusters ||
+           free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) {
                 /*
                  * free block count is less than 150% of dirty blocks
                  * or free blocks is less than watermark
@@ -3738,9 +3754,9 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
                                             stop_block);
  
         ext4_discard_preallocations(inode);
+       up_write(&EXT4_I(inode)->i_data_sem);
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
-       up_write(&EXT4_I(inode)->i_data_sem);
         inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
         ext4_mark_inode_dirty(handle, inode);
  out_stop:
@@ -3782,6 +3798,19 @@ out_mutex:
   */
  void ext4_truncate(struct inode *inode)
  {
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       unsigned int credits;
+       handle_t *handle;
+       struct address_space *mapping = inode->i_mapping;
+       loff_t page_len;
+
+       /*
+        * There is a possibility that we're either freeing the inode
+        * or it completely new indode. In those cases we might not
+        * have i_mutex locked because it's not necessary.
+        */
+       if (!(inode->i_state & (I_NEW|I_FREEING)))
+               WARN_ON(!mutex_is_locked(&inode->i_mutex));
         trace_ext4_truncate_enter(inode);
  
         if (!ext4_can_truncate(inode))
@@ -3800,10 +3829,72 @@ void ext4_truncate(struct inode *inode)
                         return;
         }
  
+       /*
+        * finish any pending end_io work so we won't run the risk of
+        * converting any truncated blocks to initialized later
+        */
+       ext4_flush_unwritten_io(inode);
+
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               ext4_ext_truncate(inode);
+               credits = ext4_writepage_trans_blocks(inode);
         else
-               ext4_ind_truncate(inode);
+               credits = ext4_blocks_for_truncate(inode);
+
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle)) {
+               ext4_std_error(inode->i_sb, PTR_ERR(handle));
+               return;
+       }
+
+       if (inode->i_size % PAGE_CACHE_SIZE != 0) {
+               page_len = PAGE_CACHE_SIZE -
+                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+               if (ext4_discard_partial_page_buffers(handle,
+                               mapping, inode->i_size, page_len, 0))
+                       goto out_stop;
+       }
+
+       /*
+        * We add the inode to the orphan list, so that if this
+        * truncate spans multiple transactions, and we crash, we will
+        * resume the truncate when the filesystem recovers.  It also
+        * marks the inode dirty, to catch the new size.
+        *
+        * Implication: the file must always be in a sane, consistent
+        * truncatable state while each transaction commits.
+        */
+       if (ext4_orphan_add(handle, inode))
+               goto out_stop;
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+
+       ext4_discard_preallocations(inode);
+
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               ext4_ext_truncate(handle, inode);
+       else
+               ext4_ind_truncate(handle, inode);
+
+       up_write(&ei->i_data_sem);
+
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+
+out_stop:
+       /*
+        * If this was a simple ftruncate() and the file will remain alive,
+        * then we need to clear up the orphan record which we created above.
+        * However, if this was a real unlink then we were called by
+        * ext4_delete_inode(), and we allow that function to clean up the
+        * orphan info for us.
+        */
+       if (inode->i_nlink)
+               ext4_orphan_del(handle, inode);
+
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
  
         trace_ext4_truncate_exit(inode);
  }
@@ -3911,13 +4002,14 @@ make_io:
                 if (EXT4_SB(sb)->s_inode_readahead_blks) {
                         ext4_fsblk_t b, end, table;
                         unsigned num;
+                       __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
  
                         table = ext4_inode_table(sb, gdp);
                         /* s_inode_readahead_blks is always a power of 2 */
-                       b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
+                       b = block & ~((ext4_fsblk_t) ra_blks - 1);
                         if (table > b)
                                 b = table;
-                       end = b + EXT4_SB(sb)->s_inode_readahead_blks;
+                       end = b + ra_blks;
                         num = EXT4_INODES_PER_GROUP(sb);
                         if (ext4_has_group_desc_csum(sb))
                                 num -= ext4_itable_unused_count(sb, gdp);
@@ -4114,8 +4206,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
          * NeilBrown 1999oct15
          */
         if (inode->i_nlink == 0) {
-               if (inode->i_mode == 0 ||
-                   !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+               if ((inode->i_mode == 0 ||
+                    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
+                   ino != EXT4_BOOT_LOADER_INO) {
                         /* this inode is deleted */
                         ret = -ESTALE;
                         goto bad_inode;
@@ -4123,7 +4216,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 /* The only unlinked inodes we let through here have
                  * valid i_mode and are being read by the orphan
                  * recovery code: that's fine, we're about to complete
-                * the process of deleting those. */
+                * the process of deleting those.
+                * OR it is the EXT4_BOOT_LOADER_INO which is
+                * not initialized on a new filesystem. */
         }
         ei->i_flags = le32_to_cpu(raw_inode->i_flags);
         inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
@@ -4243,6 +4338,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 else
                         init_special_inode(inode, inode->i_mode,
                            new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+       } else if (ino == EXT4_BOOT_LOADER_INO) {
+               make_bad_inode(inode);
         } else {
                 ret = -EIO;
                 EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);