Merge branch 'for-linus' of git://neil.brown.name/md
[firefly-linux-kernel-4.4.55.git] / mm / shmem.c
index dfc7069102ee638215d7f494a0893c26e7687325..fcedf5464eb79dba02ad485681fcc67530bdb720 100644 (file)
@@ -99,6 +99,13 @@ static struct vfsmount *shm_mnt;
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+struct shmem_xattr {
+       struct list_head list;  /* anchored by shmem_inode_info->xattr_list */
+       char *name;             /* xattr name */
+       size_t size;
+       char value[0];
+};
+
 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
 enum sgp_type {
        SGP_READ,       /* don't exceed i_size, don't allocate page */
@@ -532,7 +539,7 @@ static void shmem_free_pages(struct list_head *next)
        } while (next);
 }
 
-static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
        unsigned long idx;
@@ -555,6 +562,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
        spinlock_t *punch_lock;
        unsigned long upper_limit;
 
+       truncate_inode_pages_range(inode->i_mapping, start, end);
+
        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
        idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        if (idx >= info->next_index)
@@ -731,16 +740,8 @@ done2:
                 * lowered next_index.  Also, though shmem_getpage checks
                 * i_size before adding to cache, no recheck after: so fix the
                 * narrow window there too.
-                *
-                * Recalling truncate_inode_pages_range and unmap_mapping_range
-                * every time for punch_hole (which never got a chance to clear
-                * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
-                * yet hardly ever necessary: try to optimize them out later.
                 */
                truncate_inode_pages_range(inode->i_mapping, start, end);
-               if (punch_hole)
-                       unmap_mapping_range(inode->i_mapping, start,
-                                                       end - start, 1);
        }
 
        spin_lock(&info->lock);
@@ -759,22 +760,23 @@ done2:
                shmem_free_pages(pages_to_free.next);
        }
 }
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
+static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = dentry->d_inode;
-       loff_t newsize = attr->ia_size;
        int error;
 
        error = inode_change_ok(inode, attr);
        if (error)
                return error;
 
-       if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
-                                       && newsize != inode->i_size) {
+       if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+               loff_t oldsize = inode->i_size;
+               loff_t newsize = attr->ia_size;
                struct page *page = NULL;
 
-               if (newsize < inode->i_size) {
+               if (newsize < oldsize) {
                        /*
                         * If truncating down to a partial page, then
                         * if that page is already allocated, hold it
@@ -803,12 +805,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
                                spin_unlock(&info->lock);
                        }
                }
-
-               /* XXX(truncate): truncate_setsize should be called last */
-               truncate_setsize(inode, newsize);
+               if (newsize != oldsize) {
+                       i_size_write(inode, newsize);
+                       inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+               }
+               if (newsize < oldsize) {
+                       loff_t holebegin = round_up(newsize, PAGE_SIZE);
+                       unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+                       shmem_truncate_range(inode, newsize, (loff_t)-1);
+                       /* unmap again to remove racily COWed private pages */
+                       unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+               }
                if (page)
                        page_cache_release(page);
-               shmem_truncate_range(inode, newsize, (loff_t)-1);
        }
 
        setattr_copy(inode, attr);
@@ -822,9 +831,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 static void shmem_evict_inode(struct inode *inode)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_xattr *xattr, *nxattr;
 
        if (inode->i_mapping->a_ops == &shmem_aops) {
-               truncate_inode_pages(inode->i_mapping, 0);
                shmem_unacct_size(info->flags, inode->i_size);
                inode->i_size = 0;
                shmem_truncate_range(inode, 0, (loff_t)-1);
@@ -834,6 +843,11 @@ static void shmem_evict_inode(struct inode *inode)
                        mutex_unlock(&shmem_swaplist_mutex);
                }
        }
+
+       list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
+               kfree(xattr->name);
+               kfree(xattr);
+       }
        BUG_ON(inode->i_blocks);
        shmem_free_inode(inode->i_sb);
        end_writeback(inode);
@@ -916,11 +930,12 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
                        if (size > ENTRIES_PER_PAGE)
                                size = ENTRIES_PER_PAGE;
                        offset = shmem_find_swp(entry, ptr, ptr+size);
+                       shmem_swp_unmap(ptr);
                        if (offset >= 0) {
                                shmem_dir_unmap(dir);
+                               ptr = shmem_swp_map(subdir);
                                goto found;
                        }
-                       shmem_swp_unmap(ptr);
                }
        }
 lost1:
@@ -1100,8 +1115,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                delete_from_page_cache(page);
                shmem_swp_set(info, entry, swap.val);
                shmem_swp_unmap(entry);
-               spin_unlock(&info->lock);
                swap_shmem_alloc(swap);
+               spin_unlock(&info->lock);
                BUG_ON(page_mapped(page));
                swap_writepage(page, wbc);
                return 0;
@@ -1291,12 +1306,10 @@ repeat:
                swappage = lookup_swap_cache(swap);
                if (!swappage) {
                        shmem_swp_unmap(entry);
+                       spin_unlock(&info->lock);
                        /* here we actually do the io */
-                       if (type && !(*type & VM_FAULT_MAJOR)) {
-                               __count_vm_event(PGMAJFAULT);
+                       if (type)
                                *type |= VM_FAULT_MAJOR;
-                       }
-                       spin_unlock(&info->lock);
                        swappage = shmem_swapin(swap, gfp, info, idx);
                        if (!swappage) {
                                spin_lock(&info->lock);
@@ -1535,7 +1548,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
        if (error)
                return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
-
+       if (ret & VM_FAULT_MAJOR) {
+               count_vm_event(PGMAJFAULT);
+               mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+       }
        return ret | VM_FAULT_LOCKED;
 }
 
@@ -1614,6 +1630,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
                spin_lock_init(&info->lock);
                info->flags = flags & VM_NORESERVE;
                INIT_LIST_HEAD(&info->swaplist);
+               INIT_LIST_HEAD(&info->xattr_list);
                cache_no_acl(inode);
 
                switch (mode & S_IFMT) {
@@ -2013,9 +2030,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
        info = SHMEM_I(inode);
        inode->i_size = len-1;
-       if (len <= (char *)inode - (char *)info) {
+       if (len <= SHMEM_SYMLINK_INLINE_LEN) {
                /* do it inline */
-               memcpy(info, symname, len);
+               memcpy(info->inline_symlink, symname, len);
                inode->i_op = &shmem_symlink_inline_operations;
        } else {
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -2041,7 +2058,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
 {
-       nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
+       nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
        return NULL;
 }
 
@@ -2065,63 +2082,253 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
        }
 }
 
-static const struct inode_operations shmem_symlink_inline_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = shmem_follow_link_inline,
-};
-
-static const struct inode_operations shmem_symlink_inode_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = shmem_follow_link,
-       .put_link       = shmem_put_link,
-};
-
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
 /*
- * Superblocks without xattr inode operations will get security.* xattr
- * support from the VFS "for free". As soon as we have any other xattrs
+ * Superblocks without xattr inode operations may get some security.* xattr
+ * support from the LSM "for free". As soon as we have any other xattrs
  * like ACLs, we also need to implement the security.* handlers at
  * filesystem level, though.
  */
 
-static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
-                                       size_t list_len, const char *name,
-                                       size_t name_len, int handler_flags)
+static int shmem_xattr_get(struct dentry *dentry, const char *name,
+                          void *buffer, size_t size)
 {
-       return security_inode_listsecurity(dentry->d_inode, list, list_len);
-}
+       struct shmem_inode_info *info;
+       struct shmem_xattr *xattr;
+       int ret = -ENODATA;
 
-static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
-               void *buffer, size_t size, int handler_flags)
-{
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-       return xattr_getsecurity(dentry->d_inode, name, buffer, size);
+       info = SHMEM_I(dentry->d_inode);
+
+       spin_lock(&info->lock);
+       list_for_each_entry(xattr, &info->xattr_list, list) {
+               if (strcmp(name, xattr->name))
+                       continue;
+
+               ret = xattr->size;
+               if (buffer) {
+                       if (size < xattr->size)
+                               ret = -ERANGE;
+                       else
+                               memcpy(buffer, xattr->value, xattr->size);
+               }
+               break;
+       }
+       spin_unlock(&info->lock);
+       return ret;
 }
 
-static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags, int handler_flags)
+static int shmem_xattr_set(struct dentry *dentry, const char *name,
+                          const void *value, size_t size, int flags)
 {
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-       return security_inode_setsecurity(dentry->d_inode, name, value,
-                                         size, flags);
+       struct inode *inode = dentry->d_inode;
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_xattr *xattr;
+       struct shmem_xattr *new_xattr = NULL;
+       size_t len;
+       int err = 0;
+
+       /* value == NULL means remove */
+       if (value) {
+               /* wrap around? */
+               len = sizeof(*new_xattr) + size;
+               if (len <= sizeof(*new_xattr))
+                       return -ENOMEM;
+
+               new_xattr = kmalloc(len, GFP_KERNEL);
+               if (!new_xattr)
+                       return -ENOMEM;
+
+               new_xattr->name = kstrdup(name, GFP_KERNEL);
+               if (!new_xattr->name) {
+                       kfree(new_xattr);
+                       return -ENOMEM;
+               }
+
+               new_xattr->size = size;
+               memcpy(new_xattr->value, value, size);
+       }
+
+       spin_lock(&info->lock);
+       list_for_each_entry(xattr, &info->xattr_list, list) {
+               if (!strcmp(name, xattr->name)) {
+                       if (flags & XATTR_CREATE) {
+                               xattr = new_xattr;
+                               err = -EEXIST;
+                       } else if (new_xattr) {
+                               list_replace(&xattr->list, &new_xattr->list);
+                       } else {
+                               list_del(&xattr->list);
+                       }
+                       goto out;
+               }
+       }
+       if (flags & XATTR_REPLACE) {
+               xattr = new_xattr;
+               err = -ENODATA;
+       } else {
+               list_add(&new_xattr->list, &info->xattr_list);
+               xattr = NULL;
+       }
+out:
+       spin_unlock(&info->lock);
+       if (xattr)
+               kfree(xattr->name);
+       kfree(xattr);
+       return err;
 }
 
-static const struct xattr_handler shmem_xattr_security_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .list   = shmem_xattr_security_list,
-       .get    = shmem_xattr_security_get,
-       .set    = shmem_xattr_security_set,
-};
 
 static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
        &generic_acl_access_handler,
        &generic_acl_default_handler,
-       &shmem_xattr_security_handler,
+#endif
        NULL
 };
+
+static int shmem_xattr_validate(const char *name)
+{
+       struct { const char *prefix; size_t len; } arr[] = {
+               { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
+               { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
+       };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(arr); i++) {
+               size_t preflen = arr[i].len;
+               if (strncmp(name, arr[i].prefix, preflen) == 0) {
+                       if (!name[preflen])
+                               return -EINVAL;
+                       return 0;
+               }
+       }
+       return -EOPNOTSUPP;
+}
+
+static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
+                             void *buffer, size_t size)
+{
+       int err;
+
+       /*
+        * If this is a request for a synthetic attribute in the system.*
+        * namespace use the generic infrastructure to resolve a handler
+        * for it via sb->s_xattr.
+        */
+       if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               return generic_getxattr(dentry, name, buffer, size);
+
+       err = shmem_xattr_validate(name);
+       if (err)
+               return err;
+
+       return shmem_xattr_get(dentry, name, buffer, size);
+}
+
+static int shmem_setxattr(struct dentry *dentry, const char *name,
+                         const void *value, size_t size, int flags)
+{
+       int err;
+
+       /*
+        * If this is a request for a synthetic attribute in the system.*
+        * namespace use the generic infrastructure to resolve a handler
+        * for it via sb->s_xattr.
+        */
+       if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               return generic_setxattr(dentry, name, value, size, flags);
+
+       err = shmem_xattr_validate(name);
+       if (err)
+               return err;
+
+       if (size == 0)
+               value = "";  /* empty EA, do not remove */
+
+       return shmem_xattr_set(dentry, name, value, size, flags);
+
+}
+
+static int shmem_removexattr(struct dentry *dentry, const char *name)
+{
+       int err;
+
+       /*
+        * If this is a request for a synthetic attribute in the system.*
+        * namespace use the generic infrastructure to resolve a handler
+        * for it via sb->s_xattr.
+        */
+       if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               return generic_removexattr(dentry, name);
+
+       err = shmem_xattr_validate(name);
+       if (err)
+               return err;
+
+       return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
+}
+
+static bool xattr_is_trusted(const char *name)
+{
+       return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+}
+
+static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+       bool trusted = capable(CAP_SYS_ADMIN);
+       struct shmem_xattr *xattr;
+       struct shmem_inode_info *info;
+       size_t used = 0;
+
+       info = SHMEM_I(dentry->d_inode);
+
+       spin_lock(&info->lock);
+       list_for_each_entry(xattr, &info->xattr_list, list) {
+               size_t len;
+
+               /* skip "trusted." attributes for unprivileged callers */
+               if (!trusted && xattr_is_trusted(xattr->name))
+                       continue;
+
+               len = strlen(xattr->name) + 1;
+               used += len;
+               if (buffer) {
+                       if (size < used) {
+                               used = -ERANGE;
+                               break;
+                       }
+                       memcpy(buffer, xattr->name, len);
+                       buffer += len;
+               }
+       }
+       spin_unlock(&info->lock);
+
+       return used;
+}
+#endif /* CONFIG_TMPFS_XATTR */
+
+static const struct inode_operations shmem_symlink_inline_operations = {
+       .readlink       = generic_readlink,
+       .follow_link    = shmem_follow_link_inline,
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
+};
+
+static const struct inode_operations shmem_symlink_inode_operations = {
+       .readlink       = generic_readlink,
+       .follow_link    = shmem_follow_link,
+       .put_link       = shmem_put_link,
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
 #endif
+};
 
 static struct dentry *shmem_get_parent(struct dentry *child)
 {
@@ -2401,8 +2608,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_magic = TMPFS_MAGIC;
        sb->s_op = &shmem_ops;
        sb->s_time_gran = 1;
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
        sb->s_xattr = shmem_xattr_handlers;
+#endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
        sb->s_flags |= MS_POSIXACL;
 #endif
 
@@ -2498,13 +2707,15 @@ static const struct file_operations shmem_file_operations = {
 };
 
 static const struct inode_operations shmem_inode_operations = {
-       .setattr        = shmem_notify_change,
+       .setattr        = shmem_setattr,
        .truncate_range = shmem_truncate_range,
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
        .check_acl      = generic_check_acl,
 #endif
 
@@ -2522,23 +2733,27 @@ static const struct inode_operations shmem_dir_inode_operations = {
        .mknod          = shmem_mknod,
        .rename         = shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-       .setattr        = shmem_notify_change,
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
+       .setattr        = shmem_setattr,
        .check_acl      = generic_check_acl,
 #endif
 };
 
 static const struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-       .setattr        = shmem_notify_change,
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
+       .setattr        = shmem_setattr,
        .check_acl      = generic_check_acl,
 #endif
 };
@@ -2694,6 +2909,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
        return 0;
 }
 
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+{
+       truncate_inode_pages_range(inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /**
  * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
@@ -2814,3 +3035,26 @@ int shmem_zero_setup(struct vm_area_struct *vma)
        vma->vm_flags |= VM_CAN_NONLINEAR;
        return 0;
 }
+
+/**
+ * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:   the page's address_space
+ * @index:     the page index
+ * @gfp:       the page allocator flags to use if allocating
+ *
+ * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
+ * with any new page allocations done using the specified allocation flags.
+ * But read_cache_page_gfp() uses the ->readpage() method: which does not
+ * suit tmpfs, since it may have pages in swapcache, and needs to find those
+ * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
+ *
+ * Provide a stub for those callers to start using now, then later
+ * flesh it out to call shmem_getpage() with additional gfp mask, when
+ * shmem_file_splice_read() is added and shmem_readpage() is removed.
+ */
+struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+                                        pgoff_t index, gfp_t gfp)
+{
+       return read_cache_page_gfp(mapping, index, gfp);
+}
+EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);