ext4: fold ext4_claim_inode into ext4_new_inode
authorTheodore Ts'o <tytso@mit.edu>
Tue, 7 Feb 2012 01:12:03 +0000 (20:12 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 7 Feb 2012 01:12:03 +0000 (20:12 -0500)
The function ext4_claim_inode() is only called by one function,
ext4_new_inode(), and by folding the functionality into
ext4_new_inode(), we can remove almost 50 lines of code, and put all
of the logic of allocating a new inode into a single place.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/ialloc.c

index 25d8c9781ad94ea758781f906a34412743b3cda0..84e6e9a3986b65ea0f7d65882ab9acced4c69613 100644 (file)
@@ -592,94 +592,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
        return -1;
 }
 
-/*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's ext4_group_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding ext4_group_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
- */
-static int ext4_claim_inode(struct super_block *sb,
-                       struct buffer_head *inode_bitmap_bh,
-                       unsigned long ino, ext4_group_t group, umode_t mode)
-{
-       int free = 0, retval = 0, count;
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-       struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-
-       /*
-        * We have to be sure that new inode allocation does not race with
-        * inode table initialization, because otherwise we may end up
-        * allocating and writing new inode right before sb_issue_zeroout
-        * takes place and overwriting our new inode with zeroes. So we
-        * take alloc_sem to prevent it.
-        */
-       down_read(&grp->alloc_sem);
-       ext4_lock_group(sb, group);
-       if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
-               /* not a free inode */
-               retval = 1;
-               goto err_ret;
-       }
-       ino++;
-       if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-                       ino > EXT4_INODES_PER_GROUP(sb)) {
-               ext4_unlock_group(sb, group);
-               up_read(&grp->alloc_sem);
-               ext4_error(sb, "reserved inode or inode > inodes count - "
-                          "block_group = %u, inode=%lu", group,
-                          ino + group * EXT4_INODES_PER_GROUP(sb));
-               return 1;
-       }
-       /* If we didn't allocate from within the initialized part of the inode
-        * table then we need to initialize up to this inode. */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-
-               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-                       /* When marking the block group with
-                        * ~EXT4_BG_INODE_UNINIT we don't want to depend
-                        * on the value of bg_itable_unused even though
-                        * mke2fs could have initialized the same for us.
-                        * Instead we calculated the value below
-                        */
-
-                       free = 0;
-               } else {
-                       free = EXT4_INODES_PER_GROUP(sb) -
-                               ext4_itable_unused_count(sb, gdp);
-               }
-
-               /*
-                * Check the relative inode number against the last used
-                * relative inode number in this group. if it is greater
-                * we need to  update the bg_itable_unused count
-                *
-                */
-               if (ino > free)
-                       ext4_itable_unused_set(sb, gdp,
-                                       (EXT4_INODES_PER_GROUP(sb) - ino));
-       }
-       count = ext4_free_inodes_count(sb, gdp) - 1;
-       ext4_free_inodes_set(sb, gdp, count);
-       if (S_ISDIR(mode)) {
-               count = ext4_used_dirs_count(sb, gdp) + 1;
-               ext4_used_dirs_set(sb, gdp, count);
-               if (sbi->s_log_groups_per_flex) {
-                       ext4_group_t f = ext4_flex_group(sbi, group);
-
-                       atomic_inc(&sbi->s_flex_groups[f].used_dirs);
-               }
-       }
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
-       ext4_unlock_group(sb, group);
-       up_read(&grp->alloc_sem);
-       return retval;
-}
-
 /*
  * There are two policies for allocating an inode.  If the new inode is
  * a directory, then a forward search is made for a block group with both
@@ -741,6 +653,11 @@ got_group:
        if (ret2 == -1)
                goto out;
 
+       /*
+        * Normally we will only go through one pass of this loop,
+        * unless we get unlucky and it turns out the group we selected
+        * had its last inode grabbed by someone else.
+        */
        for (i = 0; i < ngroups; i++, ino = 0) {
                err = -EIO;
 
@@ -757,51 +674,24 @@ repeat_in_this_group:
                ino = ext4_find_next_zero_bit((unsigned long *)
                                              inode_bitmap_bh->b_data,
                                              EXT4_INODES_PER_GROUP(sb), ino);
-
-               if (ino < EXT4_INODES_PER_GROUP(sb)) {
-
-                       BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
-                       err = ext4_journal_get_write_access(handle,
-                                                           inode_bitmap_bh);
-                       if (err)
-                               goto fail;
-
-                       BUFFER_TRACE(group_desc_bh, "get_write_access");
-                       err = ext4_journal_get_write_access(handle,
-                                                               group_desc_bh);
-                       if (err)
-                               goto fail;
-                       if (!ext4_claim_inode(sb, inode_bitmap_bh,
-                                               ino, group, mode)) {
-                               /* we won it */
-                               BUFFER_TRACE(inode_bitmap_bh,
-                                       "call ext4_handle_dirty_metadata");
-                               err = ext4_handle_dirty_metadata(handle,
-                                                                NULL,
-                                                       inode_bitmap_bh);
-                               if (err)
-                                       goto fail;
-                               /* zero bit is inode number 1*/
-                               ino++;
-                               goto got;
-                       }
-                       /* we lost it */
-                       ext4_handle_release_buffer(handle, inode_bitmap_bh);
-                       ext4_handle_release_buffer(handle, group_desc_bh);
-
-                       if (++ino < EXT4_INODES_PER_GROUP(sb))
-                               goto repeat_in_this_group;
+               if (ino >= EXT4_INODES_PER_GROUP(sb)) {
+                       if (++group == ngroups)
+                               group = 0;
+                       continue;
                }
-
-               /*
-                * This case is possible in concurrent environment.  It is very
-                * rare.  We cannot repeat the find_group_xxx() call because
-                * that will simply return the same blockgroup, because the
-                * group descriptor metadata has not yet been updated.
-                * So we just go onto the next blockgroup.
-                */
-               if (++group == ngroups)
-                       group = 0;
+               if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+                       ext4_error(sb, "reserved inode found cleared - "
+                                  "inode=%lu", ino + 1);
+                       continue;
+               }
+               ext4_lock_group(sb, group);
+               ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+               ext4_unlock_group(sb, group);
+               ino++;          /* the inode bitmap is zero-based */
+               if (!ret2)
+                       goto got; /* we grabbed the inode! */
+               if (ino < EXT4_INODES_PER_GROUP(sb))
+                       goto repeat_in_this_group;
        }
        err = -ENOSPC;
        goto out;
@@ -838,6 +728,59 @@ got:
                if (err)
                        goto fail;
        }
+
+       BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+       err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+       if (err)
+               goto fail;
+
+       BUFFER_TRACE(group_desc_bh, "get_write_access");
+       err = ext4_journal_get_write_access(handle, group_desc_bh);
+       if (err)
+               goto fail;
+
+       /* Update the relevant bg descriptor fields */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+               int free;
+               struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+               down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+               ext4_lock_group(sb, group); /* while we modify the bg desc */
+               free = EXT4_INODES_PER_GROUP(sb) -
+                       ext4_itable_unused_count(sb, gdp);
+               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+                       free = 0;
+               }
+               /*
+                * Check the relative inode number against the last used
+                * relative inode number in this group. if it is greater
+                * we need to update the bg_itable_unused count
+                */
+               if (ino > free)
+                       ext4_itable_unused_set(sb, gdp,
+                                       (EXT4_INODES_PER_GROUP(sb) - ino));
+               up_read(&grp->alloc_sem);
+       }
+       ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+       if (S_ISDIR(mode)) {
+               ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
+               if (sbi->s_log_groups_per_flex) {
+                       ext4_group_t f = ext4_flex_group(sbi, group);
+
+                       atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+               }
+       }
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+               gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+               ext4_unlock_group(sb, group);
+       }
+
+       BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+       err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+       if (err)
+               goto fail;
+
        BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
        err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
        if (err)
@@ -1101,7 +1044,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
  * where it is called from on active part of filesystem is ext4lazyinit
  * thread, so we do not need any special locks, however we have to prevent
  * inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_claim_inode until we are finished.
+ * block ext4_new_inode() until we are finished.
  */
 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
                                 int barrier)