Merge branch 'for-linus' of git://neil.brown.name/md

[firefly-linux-kernel-4.4.55.git] / mm / shmem.c
diff --git a/mm/shmem.c b/mm/shmem.c

index 8fa27e4e582a236886eba7d875e6c612130468fc..fcedf5464eb79dba02ad485681fcc67530bdb720 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -99,6 +99,13 @@ static struct vfsmount *shm_mnt;
  /* Pretend that each entry is of this size in directory's i_size */
  #define BOGO_DIRENT_SIZE 20
  
+struct shmem_xattr {
+       struct list_head list;  /* anchored by shmem_inode_info->xattr_list */
+       char *name;             /* xattr name */
+       size_t size;
+       char value[0];
+};
+
  /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
  enum sgp_type {
         SGP_READ,       /* don't exceed i_size, don't allocate page */
@@ -532,7 +539,7 @@ static void shmem_free_pages(struct list_head *next)
         } while (next);
  }
  
-static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
  {
         struct shmem_inode_info *info = SHMEM_I(inode);
         unsigned long idx;
@@ -555,6 +562,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
         spinlock_t *punch_lock;
         unsigned long upper_limit;
  
+       truncate_inode_pages_range(inode->i_mapping, start, end);
+
         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
         idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
         if (idx >= info->next_index)
@@ -731,16 +740,8 @@ done2:
                  * lowered next_index.  Also, though shmem_getpage checks
                  * i_size before adding to cache, no recheck after: so fix the
                  * narrow window there too.
-                *
-                * Recalling truncate_inode_pages_range and unmap_mapping_range
-                * every time for punch_hole (which never got a chance to clear
-                * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
-                * yet hardly ever necessary: try to optimize them out later.
                  */
                 truncate_inode_pages_range(inode->i_mapping, start, end);
-               if (punch_hole)
-                       unmap_mapping_range(inode->i_mapping, start,
-                                                       end - start, 1);
         }
  
         spin_lock(&info->lock);
@@ -759,22 +760,23 @@ done2:
                 shmem_free_pages(pages_to_free.next);
         }
  }
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
  
-static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
+static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
  {
         struct inode *inode = dentry->d_inode;
-       loff_t newsize = attr->ia_size;
         int error;
  
         error = inode_change_ok(inode, attr);
         if (error)
                 return error;
  
-       if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
-                                       && newsize != inode->i_size) {
+       if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+               loff_t oldsize = inode->i_size;
+               loff_t newsize = attr->ia_size;
                 struct page *page = NULL;
  
-               if (newsize < inode->i_size) {
+               if (newsize < oldsize) {
                         /*
                          * If truncating down to a partial page, then
                          * if that page is already allocated, hold it
@@ -803,12 +805,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
                                 spin_unlock(&info->lock);
                         }
                 }
-
-               /* XXX(truncate): truncate_setsize should be called last */
-               truncate_setsize(inode, newsize);
+               if (newsize != oldsize) {
+                       i_size_write(inode, newsize);
+                       inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+               }
+               if (newsize < oldsize) {
+                       loff_t holebegin = round_up(newsize, PAGE_SIZE);
+                       unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+                       shmem_truncate_range(inode, newsize, (loff_t)-1);
+                       /* unmap again to remove racily COWed private pages */
+                       unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+               }
                 if (page)
                         page_cache_release(page);
-               shmem_truncate_range(inode, newsize, (loff_t)-1);
         }
  
         setattr_copy(inode, attr);
@@ -822,9 +831,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
  static void shmem_evict_inode(struct inode *inode)
  {
         struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_xattr *xattr, *nxattr;
  
         if (inode->i_mapping->a_ops == &shmem_aops) {
-               truncate_inode_pages(inode->i_mapping, 0);
                 shmem_unacct_size(info->flags, inode->i_size);
                 inode->i_size = 0;
                 shmem_truncate_range(inode, 0, (loff_t)-1);
@@ -834,6 +843,11 @@ static void shmem_evict_inode(struct inode *inode)
                         mutex_unlock(&shmem_swaplist_mutex);
                 }
         }
+
+       list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
+               kfree(xattr->name);
+               kfree(xattr);
+       }
         BUG_ON(inode->i_blocks);
         shmem_free_inode(inode->i_sb);
         end_writeback(inode);
@@ -852,7 +866,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
  
  static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
  {
-       struct inode *inode;
+       struct address_space *mapping;
         unsigned long idx;
         unsigned long size;
         unsigned long limit;
@@ -875,8 +889,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
         if (size > SHMEM_NR_DIRECT)
                 size = SHMEM_NR_DIRECT;
         offset = shmem_find_swp(entry, ptr, ptr+size);
-       if (offset >= 0)
+       if (offset >= 0) {
+               shmem_swp_balance_unmap();
                 goto found;
+       }
         if (!info->i_indirect)
                 goto lost2;
  
@@ -917,6 +933,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
                         shmem_swp_unmap(ptr);
                         if (offset >= 0) {
                                 shmem_dir_unmap(dir);
+                               ptr = shmem_swp_map(subdir);
                                 goto found;
                         }
                 }
@@ -928,8 +945,7 @@ lost2:
         return 0;
  found:
         idx += offset;
-       inode = igrab(&info->vfs_inode);
-       spin_unlock(&info->lock);
+       ptr += offset;
  
         /*
          * Move _head_ to start search for next from here.
@@ -940,37 +956,18 @@ found:
          */
         if (shmem_swaplist.next != &info->swaplist)
                 list_move_tail(&shmem_swaplist, &info->swaplist);
-       mutex_unlock(&shmem_swaplist_mutex);
  
-       error = 1;
-       if (!inode)
-               goto out;
         /*
-        * Charge page using GFP_KERNEL while we can wait.
-        * Charged back to the user(not to caller) when swap account is used.
-        * add_to_page_cache() will be called with GFP_NOWAIT.
+        * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
+        * but also to hold up shmem_evict_inode(): so inode cannot be freed
+        * beneath us (pagelock doesn't help until the page is in pagecache).
          */
-       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
-       if (error)
-               goto out;
-       error = radix_tree_preload(GFP_KERNEL);
-       if (error) {
-               mem_cgroup_uncharge_cache_page(page);
-               goto out;
-       }
-       error = 1;
-
-       spin_lock(&info->lock);
-       ptr = shmem_swp_entry(info, idx, NULL);
-       if (ptr && ptr->val == entry.val) {
-               error = add_to_page_cache_locked(page, inode->i_mapping,
-                                               idx, GFP_NOWAIT);
-               /* does mem_cgroup_uncharge_cache_page on error */
-       } else  /* we must compensate for our precharge above */
-               mem_cgroup_uncharge_cache_page(page);
+       mapping = info->vfs_inode.i_mapping;
+       error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
+       /* which does mem_cgroup_uncharge_cache_page on error */
  
         if (error == -EEXIST) {
-               struct page *filepage = find_get_page(inode->i_mapping, idx);
+               struct page *filepage = find_get_page(mapping, idx);
                 error = 1;
                 if (filepage) {
                         /*
@@ -990,14 +987,8 @@ found:
                 swap_free(entry);
                 error = 1;      /* not an error, but entry was found */
         }
-       if (ptr)
-               shmem_swp_unmap(ptr);
+       shmem_swp_unmap(ptr);
         spin_unlock(&info->lock);
-       radix_tree_preload_end();
-out:
-       unlock_page(page);
-       page_cache_release(page);
-       iput(inode);            /* allows for NULL */
         return error;
  }
  
@@ -1009,6 +1000,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
         struct list_head *p, *next;
         struct shmem_inode_info *info;
         int found = 0;
+       int error;
+
+       /*
+        * Charge page using GFP_KERNEL while we can wait, before taking
+        * the shmem_swaplist_mutex which might hold up shmem_writepage().
+        * Charged back to the user (not to caller) when swap account is used.
+        * add_to_page_cache() will be called with GFP_NOWAIT.
+        */
+       error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+       if (error)
+               goto out;
+       /*
+        * Try to preload while we can wait, to not make a habit of
+        * draining atomic reserves; but don't latch on to this cpu,
+        * it's okay if sometimes we get rescheduled after this.
+        */
+       error = radix_tree_preload(GFP_KERNEL);
+       if (error)
+               goto uncharge;
+       radix_tree_preload_end();
  
         mutex_lock(&shmem_swaplist_mutex);
         list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1027,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
                 found = shmem_unuse_inode(info, entry, page);
                 cond_resched();
                 if (found)
-                       goto out;
+                       break;
         }
         mutex_unlock(&shmem_swaplist_mutex);
-       /*
-        * Can some race bring us here?  We've been holding page lock,
-        * so I think not; but would rather try again later than BUG()
-        */
+
+uncharge:
+       if (!found)
+               mem_cgroup_uncharge_cache_page(page);
+       if (found < 0)
+               error = found;
+out:
         unlock_page(page);
         page_cache_release(page);
-out:
-       return (found < 0) ? found : 0;
+       return error;
  }
  
  /*
@@ -1064,7 +1077,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
         else
                 swap.val = 0;
  
+       /*
+        * Add inode to shmem_unuse()'s list of swapped-out inodes,
+        * if it's not already there.  Do it now because we cannot take
+        * mutex while holding spinlock, and must do so before the page
+        * is moved to swap cache, when its pagelock no longer protects
+        * the inode from eviction.  But don't unlock the mutex until
+        * we've taken the spinlock, because shmem_unuse_inode() will
+        * prune a !swapped inode from the swaplist under both locks.
+        */
+       if (swap.val) {
+               mutex_lock(&shmem_swaplist_mutex);
+               if (list_empty(&info->swaplist))
+                       list_add_tail(&info->swaplist, &shmem_swaplist);
+       }
+
         spin_lock(&info->lock);
+       if (swap.val)
+               mutex_unlock(&shmem_swaplist_mutex);
+
         if (index >= info->next_index) {
                 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
                 goto unlock;
@@ -1084,21 +1115,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                 delete_from_page_cache(page);
                 shmem_swp_set(info, entry, swap.val);
                 shmem_swp_unmap(entry);
-               if (list_empty(&info->swaplist))
-                       inode = igrab(inode);
-               else
-                       inode = NULL;
-               spin_unlock(&info->lock);
                 swap_shmem_alloc(swap);
+               spin_unlock(&info->lock);
                 BUG_ON(page_mapped(page));
                 swap_writepage(page, wbc);
-               if (inode) {
-                       mutex_lock(&shmem_swaplist_mutex);
-                       /* move instead of add in case we're racing */
-                       list_move_tail(&info->swaplist, &shmem_swaplist);
-                       mutex_unlock(&shmem_swaplist_mutex);
-                       iput(inode);
-               }
                 return 0;
         }
  
@@ -1286,12 +1306,10 @@ repeat:
                 swappage = lookup_swap_cache(swap);
                 if (!swappage) {
                         shmem_swp_unmap(entry);
+                       spin_unlock(&info->lock);
                         /* here we actually do the io */
-                       if (type && !(*type & VM_FAULT_MAJOR)) {
-                               __count_vm_event(PGMAJFAULT);
+                       if (type)
                                 *type |= VM_FAULT_MAJOR;
-                       }
-                       spin_unlock(&info->lock);
                         swappage = shmem_swapin(swap, gfp, info, idx);
                         if (!swappage) {
                                 spin_lock(&info->lock);
@@ -1400,20 +1418,14 @@ repeat:
                 if (sbinfo->max_blocks) {
                         if (percpu_counter_compare(&sbinfo->used_blocks,
                                                 sbinfo->max_blocks) >= 0 ||
-                           shmem_acct_block(info->flags)) {
-                               spin_unlock(&info->lock);
-                               error = -ENOSPC;
-                               goto failed;
-                       }
+                           shmem_acct_block(info->flags))
+                               goto nospace;
                         percpu_counter_inc(&sbinfo->used_blocks);
                         spin_lock(&inode->i_lock);
                         inode->i_blocks += BLOCKS_PER_PAGE;
                         spin_unlock(&inode->i_lock);
-               } else if (shmem_acct_block(info->flags)) {
-                       spin_unlock(&info->lock);
-                       error = -ENOSPC;
-                       goto failed;
-               }
+               } else if (shmem_acct_block(info->flags))
+                       goto nospace;
  
                 if (!filepage) {
                         int ret;
@@ -1493,6 +1505,24 @@ done:
         error = 0;
         goto out;
  
+nospace:
+       /*
+        * Perhaps the page was brought in from swap between find_lock_page
+        * and taking info->lock?  We allow for that at add_to_page_cache_lru,
+        * but must also avoid reporting a spurious ENOSPC while working on a
+        * full tmpfs.  (When filepage has been passed in to shmem_getpage, it
+        * is already in page cache, which prevents this race from occurring.)
+        */
+       if (!filepage) {
+               struct page *page = find_get_page(mapping, idx);
+               if (page) {
+                       spin_unlock(&info->lock);
+                       page_cache_release(page);
+                       goto repeat;
+               }
+       }
+       spin_unlock(&info->lock);
+       error = -ENOSPC;
  failed:
         if (*pagep != filepage) {
                 unlock_page(filepage);
@@ -1518,7 +1548,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
         if (error)
                 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
-
+       if (ret & VM_FAULT_MAJOR) {
+               count_vm_event(PGMAJFAULT);
+               mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+       }
         return ret | VM_FAULT_LOCKED;
  }
  
@@ -1597,6 +1630,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
                 spin_lock_init(&info->lock);
                 info->flags = flags & VM_NORESERVE;
                 INIT_LIST_HEAD(&info->swaplist);
+               INIT_LIST_HEAD(&info->xattr_list);
                 cache_no_acl(inode);
  
                 switch (mode & S_IFMT) {
@@ -1996,9 +2030,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
  
         info = SHMEM_I(inode);
         inode->i_size = len-1;
-       if (len <= (char *)inode - (char *)info) {
+       if (len <= SHMEM_SYMLINK_INLINE_LEN) {
                 /* do it inline */
-               memcpy(info, symname, len);
+               memcpy(info->inline_symlink, symname, len);
                 inode->i_op = &shmem_symlink_inline_operations;
         } else {
                 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -2024,7 +2058,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
  
  static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
  {
-       nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
+       nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
         return NULL;
  }
  
@@ -2048,63 +2082,253 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
         }
  }
  
-static const struct inode_operations shmem_symlink_inline_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = shmem_follow_link_inline,
-};
-
-static const struct inode_operations shmem_symlink_inode_operations = {
-       .readlink       = generic_readlink,
-       .follow_link    = shmem_follow_link,
-       .put_link       = shmem_put_link,
-};
-
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
  /*
- * Superblocks without xattr inode operations will get security.* xattr
- * support from the VFS "for free". As soon as we have any other xattrs
+ * Superblocks without xattr inode operations may get some security.* xattr
+ * support from the LSM "for free". As soon as we have any other xattrs
   * like ACLs, we also need to implement the security.* handlers at
   * filesystem level, though.
   */
  
-static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
-                                       size_t list_len, const char *name,
-                                       size_t name_len, int handler_flags)
+static int shmem_xattr_get(struct dentry *dentry, const char *name,
+                          void *buffer, size_t size)
  {
-       return security_inode_listsecurity(dentry->d_inode, list, list_len);
-}
+       struct shmem_inode_info *info;
+       struct shmem_xattr *xattr;
+       int ret = -ENODATA;
  
-static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
-               void *buffer, size_t size, int handler_flags)
-{
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-       return xattr_getsecurity(dentry->d_inode, name, buffer, size);
+       info = SHMEM_I(dentry->d_inode);
+
+       spin_lock(&info->lock);
+       list_for_each_entry(xattr, &info->xattr_list, list) {
+               if (strcmp(name, xattr->name))
+                       continue;
+
+               ret = xattr->size;
+               if (buffer) {
+                       if (size < xattr->size)
+                               ret = -ERANGE;
+                       else
+                               memcpy(buffer, xattr->value, xattr->size);
+               }
+               break;
+       }
+       spin_unlock(&info->lock);
+       return ret;
  }
  
-static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags, int handler_flags)
+static int shmem_xattr_set(struct dentry *dentry, const char *name,
+                          const void *value, size_t size, int flags)
  {
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-       return security_inode_setsecurity(dentry->d_inode, name, value,
-                                         size, flags);
+       struct inode *inode = dentry->d_inode;
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_xattr *xattr;
+       struct shmem_xattr *new_xattr = NULL;
+       size_t len;
+       int err = 0;
+
+       /* value == NULL means remove */
+       if (value) {
+               /* wrap around? */
+               len = sizeof(*new_xattr) + size;
+               if (len <= sizeof(*new_xattr))
+                       return -ENOMEM;
+
+               new_xattr = kmalloc(len, GFP_KERNEL);
+               if (!new_xattr)
+                       return -ENOMEM;
+
+               new_xattr->name = kstrdup(name, GFP_KERNEL);
+               if (!new_xattr->name) {
+                       kfree(new_xattr);
+                       return -ENOMEM;
+               }
+
+               new_xattr->size = size;
+               memcpy(new_xattr->value, value, size);
+       }
+
+       spin_lock(&info->lock);
+       list_for_each_entry(xattr, &info->xattr_list, list) {
+               if (!strcmp(name, xattr->name)) {
+                       if (flags & XATTR_CREATE) {
+                               xattr = new_xattr;
+                               err = -EEXIST;
+                       } else if (new_xattr) {
+                               list_replace(&xattr->list, &new_xattr->list);
+                       } else {
+                               list_del(&xattr->list);
+                       }
+                       goto out;
+               }
+       }
+       if (flags & XATTR_REPLACE) {
+               xattr = new_xattr;
+               err = -ENODATA;
+       } else {
+               list_add(&new_xattr->list, &info->xattr_list);
+               xattr = NULL;
+       }
+out:
+       spin_unlock(&info->lock);
+       if (xattr)
+               kfree(xattr->name);
+       kfree(xattr);
+       return err;
  }
  
-static const struct xattr_handler shmem_xattr_security_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .list   = shmem_xattr_security_list,
-       .get    = shmem_xattr_security_get,
-       .set    = shmem_xattr_security_set,
-};
  
  static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
         &generic_acl_access_handler,
         &generic_acl_default_handler,
-       &shmem_xattr_security_handler,
+#endif
         NULL
  };
+
+static int shmem_xattr_validate(const char *name)
+{
+       struct { const char *prefix; size_t len; } arr[] = {
+               { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
+               { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
+       };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(arr); i++) {
+               size_t preflen = arr[i].len;
+               if (strncmp(name, arr[i].prefix, preflen) == 0) {
+                       if (!name[preflen])
+                               return -EINVAL;
+                       return 0;
+               }
+       }
+       return -EOPNOTSUPP;
+}
+
+static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
+                             void *buffer, size_t size)
+{
+       int err;
+
+       /*
+        * If this is a request for a synthetic attribute in the system.*
+        * namespace use the generic infrastructure to resolve a handler
+        * for it via sb->s_xattr.
+        */
+       if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               return generic_getxattr(dentry, name, buffer, size);
+
+       err = shmem_xattr_validate(name);
+       if (err)
+               return err;
+
+       return shmem_xattr_get(dentry, name, buffer, size);
+}
+
+static int shmem_setxattr(struct dentry *dentry, const char *name,
+                         const void *value, size_t size, int flags)
+{
+       int err;
+
+       /*
+        * If this is a request for a synthetic attribute in the system.*
+        * namespace use the generic infrastructure to resolve a handler
+        * for it via sb->s_xattr.
+        */
+       if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               return generic_setxattr(dentry, name, value, size, flags);
+
+       err = shmem_xattr_validate(name);
+       if (err)
+               return err;
+
+       if (size == 0)
+               value = "";  /* empty EA, do not remove */
+
+       return shmem_xattr_set(dentry, name, value, size, flags);
+
+}
+
+static int shmem_removexattr(struct dentry *dentry, const char *name)
+{
+       int err;
+
+       /*
+        * If this is a request for a synthetic attribute in the system.*
+        * namespace use the generic infrastructure to resolve a handler
+        * for it via sb->s_xattr.
+        */
+       if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               return generic_removexattr(dentry, name);
+
+       err = shmem_xattr_validate(name);
+       if (err)
+               return err;
+
+       return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
+}
+
+static bool xattr_is_trusted(const char *name)
+{
+       return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+}
+
+static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+       bool trusted = capable(CAP_SYS_ADMIN);
+       struct shmem_xattr *xattr;
+       struct shmem_inode_info *info;
+       size_t used = 0;
+
+       info = SHMEM_I(dentry->d_inode);
+
+       spin_lock(&info->lock);
+       list_for_each_entry(xattr, &info->xattr_list, list) {
+               size_t len;
+
+               /* skip "trusted." attributes for unprivileged callers */
+               if (!trusted && xattr_is_trusted(xattr->name))
+                       continue;
+
+               len = strlen(xattr->name) + 1;
+               used += len;
+               if (buffer) {
+                       if (size < used) {
+                               used = -ERANGE;
+                               break;
+                       }
+                       memcpy(buffer, xattr->name, len);
+                       buffer += len;
+               }
+       }
+       spin_unlock(&info->lock);
+
+       return used;
+}
+#endif /* CONFIG_TMPFS_XATTR */
+
+static const struct inode_operations shmem_symlink_inline_operations = {
+       .readlink       = generic_readlink,
+       .follow_link    = shmem_follow_link_inline,
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
+};
+
+static const struct inode_operations shmem_symlink_inode_operations = {
+       .readlink       = generic_readlink,
+       .follow_link    = shmem_follow_link,
+       .put_link       = shmem_put_link,
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
  #endif
+};
  
  static struct dentry *shmem_get_parent(struct dentry *child)
  {
@@ -2384,8 +2608,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_magic = TMPFS_MAGIC;
         sb->s_op = &shmem_ops;
         sb->s_time_gran = 1;
-#ifdef CONFIG_TMPFS_POSIX_ACL
+#ifdef CONFIG_TMPFS_XATTR
         sb->s_xattr = shmem_xattr_handlers;
+#endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
         sb->s_flags |= MS_POSIXACL;
  #endif
  
@@ -2481,13 +2707,15 @@ static const struct file_operations shmem_file_operations = {
  };
  
  static const struct inode_operations shmem_inode_operations = {
-       .setattr        = shmem_notify_change,
+       .setattr        = shmem_setattr,
         .truncate_range = shmem_truncate_range,
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
  #ifdef CONFIG_TMPFS_POSIX_ACL
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
         .check_acl      = generic_check_acl,
  #endif
  
@@ -2505,23 +2733,27 @@ static const struct inode_operations shmem_dir_inode_operations = {
         .mknod          = shmem_mknod,
         .rename         = shmem_rename,
  #endif
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
  #ifdef CONFIG_TMPFS_POSIX_ACL
-       .setattr        = shmem_notify_change,
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
+       .setattr        = shmem_setattr,
         .check_acl      = generic_check_acl,
  #endif
  };
  
  static const struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_XATTR
+       .setxattr       = shmem_setxattr,
+       .getxattr       = shmem_getxattr,
+       .listxattr      = shmem_listxattr,
+       .removexattr    = shmem_removexattr,
+#endif
  #ifdef CONFIG_TMPFS_POSIX_ACL
-       .setattr        = shmem_notify_change,
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
+       .setattr        = shmem_setattr,
         .check_acl      = generic_check_acl,
  #endif
  };
@@ -2677,6 +2909,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
         return 0;
  }
  
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+{
+       truncate_inode_pages_range(inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
+
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /**
   * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
@@ -2797,3 +3035,26 @@ int shmem_zero_setup(struct vm_area_struct *vma)
         vma->vm_flags |= VM_CAN_NONLINEAR;
         return 0;
  }
+
+/**
+ * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:   the page's address_space
+ * @index:     the page index
+ * @gfp:       the page allocator flags to use if allocating
+ *
+ * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
+ * with any new page allocations done using the specified allocation flags.
+ * But read_cache_page_gfp() uses the ->readpage() method: which does not
+ * suit tmpfs, since it may have pages in swapcache, and needs to find those
+ * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
+ *
+ * Provide a stub for those callers to start using now, then later
+ * flesh it out to call shmem_getpage() with additional gfp mask, when
+ * shmem_file_splice_read() is added and shmem_readpage() is removed.
+ */
+struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+                                        pgoff_t index, gfp_t gfp)
+{
+       return read_cache_page_gfp(mapping, index, gfp);
+}
+EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);