ext4: Add nanosecond timestamps
authorKalpak Shah <kalpak@clusterfs.com>
Wed, 18 Jul 2007 13:15:20 +0000 (09:15 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Wed, 18 Jul 2007 13:15:20 +0000 (09:15 -0400)
This patch adds nanosecond timestamps for ext4. This involves adding
*time_extra fields to the ext4_inode to extend the timestamps to
64-bits.  Creation time is also added by this patch.

These extended fields will fit into an inode if the filesystem was
formatted with large inodes (-I 256 or larger) and there are currently
no EAs consuming all of the available space. For new inodes we always
reserve enough space for the kernel's known extended fields, but for
inodes created with an old kernel this might not have been the case. So
this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature
flag(ro-compat so that older kernels can't create inodes with a smaller
extra_isize). which indicates if the fields fitting inside
s_min_extra_isize are available or not.  If the expansion of inodes if
unsuccessful then this feature will be disabled.  This feature is only
enabled if requested by the sysadmin.

None of the extended inode fields is critical for correct filesystem
operation.

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Kalpak Shah <kalpak@clusterfs.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/xattr.c
include/linux/ext4_fs.h
include/linux/ext4_fs_i.h
include/linux/ext4_fs_sb.h

index c88b439ba5cd5838d264450746c450cc45245c16..427f83066a0da425b75757c68430768bba7428ea 100644 (file)
@@ -563,7 +563,8 @@ got:
        inode->i_ino = ino;
        /* This is the optimal IO size (for stat), not the fs block size */
        inode->i_blocks = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+       inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+                                                      ext4_current_time(inode);
 
        memset(ei->i_data, 0, sizeof(ei->i_data));
        ei->i_dir_start_lookup = 0;
@@ -595,9 +596,8 @@ got:
        spin_unlock(&sbi->s_next_gen_lock);
 
        ei->i_state = EXT4_STATE_NEW;
-       ei->i_extra_isize =
-               (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
-               sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
+
+       ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
        ret = inode;
        if(DQUOT_ALLOC_INODE(inode)) {
index 49035c5a2c43ac024557f728e5526c49f7b5fa4d..b83f91edebd1ee659cdcc832b65a3d4ba46488be 100644 (file)
@@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
        /* We are done with atomic stuff, now do the rest of housekeeping */
 
-       inode->i_ctime = CURRENT_TIME_SEC;
+       inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 
        /* had we spliced it onto indirect block? */
@@ -2375,7 +2375,7 @@ do_indirects:
        ext4_discard_reservation(inode);
 
        mutex_unlock(&ei->truncate_mutex);
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 
        /*
@@ -2629,10 +2629,6 @@ void ext4_read_inode(struct inode * inode)
        }
        inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
        inode->i_size = le32_to_cpu(raw_inode->i_size);
-       inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
-       inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
-       inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
-       inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
        ei->i_state = 0;
        ei->i_dir_start_lookup = 0;
@@ -2710,6 +2706,11 @@ void ext4_read_inode(struct inode * inode)
        } else
                ei->i_extra_isize = 0;
 
+       EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+       EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
+       EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
+       EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
@@ -2791,9 +2792,12 @@ static int ext4_do_update_inode(handle_t *handle,
        }
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
        raw_inode->i_size = cpu_to_le32(ei->i_disksize);
-       raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
-       raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
-       raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+
+       EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+       EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
+       EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
+       EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
+
        raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
        raw_inode->i_flags = cpu_to_le32(ei->i_flags);
index 5b00775d50961ed7d8acbb664daaa5544a799841..c04c7ccba9e3f140ed5243a6ccdcc58d6afcaa15 100644 (file)
@@ -97,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                ei->i_flags = flags;
 
                ext4_set_inode_flags(inode);
-               inode->i_ctime = CURRENT_TIME_SEC;
+               inode->i_ctime = ext4_current_time(inode);
 
                err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 flags_err:
@@ -134,7 +134,7 @@ flags_err:
                        return PTR_ERR(handle);
                err = ext4_reserve_inode_write(handle, inode, &iloc);
                if (err == 0) {
-                       inode->i_ctime = CURRENT_TIME_SEC;
+                       inode->i_ctime = ext4_current_time(inode);
                        inode->i_generation = generation;
                        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
                }
index 2de339dd755431fdde691de2f299d5fe62a4d28a..40106b7ea4b883d37eae792edafd0c77f682d115 100644 (file)
@@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
         * happen is that the times are slightly out of date
         * and/or different from the directory change time.
         */
-       dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+       dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
        ext4_update_dx_flag(dir);
        dir->i_version++;
        ext4_mark_inode_dirty(handle, dir);
@@ -2056,7 +2056,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
         * recovery. */
        inode->i_size = 0;
        ext4_orphan_add(handle, inode);
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
        drop_nlink(dir);
        ext4_update_dx_flag(dir);
@@ -2106,13 +2106,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
        retval = ext4_delete_entry(handle, dir, de, bh);
        if (retval)
                goto end_unlink;
-       dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+       dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
        ext4_update_dx_flag(dir);
        ext4_mark_inode_dirty(handle, dir);
        drop_nlink(inode);
        if (!inode->i_nlink)
                ext4_orphan_add(handle, inode);
-       inode->i_ctime = dir->i_ctime;
+       inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
        retval = 0;
 
@@ -2203,7 +2203,7 @@ retry:
        if (IS_DIRSYNC(dir))
                handle->h_sync = 1;
 
-       inode->i_ctime = CURRENT_TIME_SEC;
+       inode->i_ctime = ext4_current_time(inode);
        inc_nlink(inode);
        atomic_inc(&inode->i_count);
 
@@ -2305,7 +2305,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       old_inode->i_ctime = CURRENT_TIME_SEC;
+       old_inode->i_ctime = ext4_current_time(old_inode);
        ext4_mark_inode_dirty(handle, old_inode);
 
        /*
@@ -2338,9 +2338,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
 
        if (new_inode) {
                drop_nlink(new_inode);
-               new_inode->i_ctime = CURRENT_TIME_SEC;
+               new_inode->i_ctime = ext4_current_time(new_inode);
        }
-       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+       old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
        ext4_update_dx_flag(old_dir);
        if (dir_bh) {
                BUFFER_TRACE(dir_bh, "get_write_access");
index af0835187e76130ae57dc6d29783de3d8a418d75..b47259f6f39cd3e31db251b7c3eb904fdf4f083d 100644 (file)
@@ -1651,6 +1651,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                                sbi->s_inode_size);
                        goto failed_mount;
                }
+               if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
+                       sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
        }
        sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
                                   le32_to_cpu(es->s_log_frag_size);
@@ -1874,6 +1876,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        }
 
        ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+
+       /* determine the minimum size of new large inodes, if present */
+       if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+               sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+                                                    EXT4_GOOD_OLD_INODE_SIZE;
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
+                       if (sbi->s_want_extra_isize <
+                           le16_to_cpu(es->s_want_extra_isize))
+                               sbi->s_want_extra_isize =
+                                       le16_to_cpu(es->s_want_extra_isize);
+                       if (sbi->s_want_extra_isize <
+                           le16_to_cpu(es->s_min_extra_isize))
+                               sbi->s_want_extra_isize =
+                                       le16_to_cpu(es->s_min_extra_isize);
+               }
+       }
+       /* Check if enough inode space is available */
+       if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
+                                                       sbi->s_inode_size) {
+               sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+                                                      EXT4_GOOD_OLD_INODE_SIZE;
+               printk(KERN_INFO "EXT4-fs: required extra inode space not"
+                       "available.\n");
+       }
+
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
index e832e96095b33c177e880db6370e1686153439f6..fe16a569d06b0fd36af0add8160eff73e9810b32 100644 (file)
@@ -1013,7 +1013,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
        }
        if (!error) {
                ext4_xattr_update_super_block(handle, inode->i_sb);
-               inode->i_ctime = CURRENT_TIME_SEC;
+               inode->i_ctime = ext4_current_time(inode);
                error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
                /*
                 * The bh is consumed by ext4_mark_iloc_dirty, even with
index 45ec7258b2b1938b92ba0e991d3170090d8eebe1..df5e38faa15fbac9f7d4fe4813c609e769d68e48 100644 (file)
@@ -288,7 +288,7 @@ struct ext4_inode {
        __le16  i_uid;          /* Low 16 bits of Owner Uid */
        __le32  i_size;         /* Size in bytes */
        __le32  i_atime;        /* Access time */
-       __le32  i_ctime;        /* Creation time */
+       __le32  i_ctime;        /* Inode Change time */
        __le32  i_mtime;        /* Modification time */
        __le32  i_dtime;        /* Deletion Time */
        __le16  i_gid;          /* Low 16 bits of Group Id */
@@ -337,10 +337,85 @@ struct ext4_inode {
        } osd2;                         /* OS dependent 2 */
        __le16  i_extra_isize;
        __le16  i_pad1;
+       __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
+       __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
+       __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+       __le32  i_crtime;       /* File Creation time */
+       __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
 };
 
 #define i_size_high    i_dir_acl
 
+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)
+
+/*
+ * Extended fields will fit into an inode if the filesystem was formatted
+ * with large inodes (-I 256 or larger) and there are not currently any EAs
+ * consuming all of the available space. For new inodes we always reserve
+ * enough space for the kernel's known extended fields, but for inodes
+ * created with an old kernel this might not have been the case. None of
+ * the extended inode fields is critical for correct filesystem operation.
+ * This macro checks if a certain field fits in the inode. Note that
+ * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
+ */
+#define EXT4_FITS_IN_INODE(ext4_inode, einode, field)  \
+       ((offsetof(typeof(*ext4_inode), field) +        \
+         sizeof((ext4_inode)->field))                  \
+       <= (EXT4_GOOD_OLD_INODE_SIZE +                  \
+           (einode)->i_extra_isize))                   \
+
+static inline __le32 ext4_encode_extra_time(struct timespec *time)
+{
+       return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+                          time->tv_sec >> 32 : 0) |
+                          ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
+}
+
+static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+{
+       if (sizeof(time->tv_sec) > 4)
+              time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
+                              << 32;
+       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
+}
+
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                         \
+do {                                                                          \
+       (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);               \
+       if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
+               (raw_inode)->xtime ## _extra =                                 \
+                               ext4_encode_extra_time(&(inode)->xtime);       \
+} while (0)
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                               \
+do {                                                                          \
+       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
+               (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
+       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
+               (raw_inode)->xtime ## _extra =                                 \
+                               ext4_encode_extra_time(&(einode)->xtime);      \
+} while (0)
+
+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)                         \
+do {                                                                          \
+       (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);       \
+       if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
+               ext4_decode_extra_time(&(inode)->xtime,                        \
+                                      raw_inode->xtime ## _extra);            \
+} while (0)
+
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                               \
+do {                                                                          \
+       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
+               (einode)->xtime.tv_sec =                                       \
+                       (signed)le32_to_cpu((raw_inode)->xtime);               \
+       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
+               ext4_decode_extra_time(&(einode)->xtime,                       \
+                                      raw_inode->xtime ## _extra);            \
+} while (0)
+
 #if defined(__KERNEL__) || defined(__linux__)
 #define i_reserved1    osd1.linux1.l_i_reserved1
 #define i_frag         osd2.linux2.l_i_frag
@@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
        return container_of(inode, struct ext4_inode_info, vfs_inode);
 }
 
+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+       return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+               current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
        return ino == EXT4_ROOT_INO ||
@@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR       0x0004
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE     0x0040
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION      0x0001
 #define EXT4_FEATURE_INCOMPAT_FILETYPE         0x0002
@@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
                                         EXT4_FEATURE_INCOMPAT_64BIT)
 #define EXT4_FEATURE_RO_COMPAT_SUPP    (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
                                         EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
 
 /*
index 9de4944069955a46e0183fbfe0a00adf04163270..1a511e9905aa2ae7d5c38a23da98644632eb6977 100644 (file)
@@ -153,6 +153,11 @@ struct ext4_inode_info {
 
        unsigned long i_ext_generation;
        struct ext4_ext_cache i_cached_extent;
+       /*
+        * File creation time. Its function is same as that of
+        * struct timespec i_{a,c,m}time in the generic inode.
+        */
+       struct timespec i_crtime;
 };
 
 #endif /* _LINUX_EXT4_FS_I */
index 0f7dc15924bff0b9e41f9f62c4e7ee3bc48f8073..1b2ffee12be910095b846d0ca18a856c5ef75e82 100644 (file)
@@ -81,6 +81,7 @@ struct ext4_sb_info {
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
 #endif
+       unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
 
 #ifdef EXTENTS_STATS
        /* ext4 extents stats */