Merge branch 'acpi-hotplug'

[firefly-linux-kernel-4.4.55.git] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index 500202ce10dbf0a7b10ebd0b5d418fe1e52470c6..ac2ce8a766e1a9c6250cdac616f9a951dddee45e 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
   * It should be taken for write in all cases where the vfsmount
   * tree or hash is modified or when a vfsmount structure is modified.
   */
-DEFINE_BRLOCK(vfsmount_lock);
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
  
  static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
  {
@@ -547,16 +547,38 @@ static void free_vfsmnt(struct mount *mnt)
         kmem_cache_free(mnt_cache, mnt);
  }
  
+/* call under rcu_read_lock */
+bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
+{
+       struct mount *mnt;
+       if (read_seqretry(&mount_lock, seq))
+               return false;
+       if (bastard == NULL)
+               return true;
+       mnt = real_mount(bastard);
+       mnt_add_count(mnt, 1);
+       if (likely(!read_seqretry(&mount_lock, seq)))
+               return true;
+       if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
+               mnt_add_count(mnt, -1);
+               return false;
+       }
+       rcu_read_unlock();
+       mntput(bastard);
+       rcu_read_lock();
+       return false;
+}
+
  /*
   * find the first mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * call under rcu_read_lock()
   */
  struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
  {
         struct list_head *head = mount_hashtable + hash(mnt, dentry);
         struct mount *p;
  
-       list_for_each_entry(p, head, mnt_hash)
+       list_for_each_entry_rcu(p, head, mnt_hash)
                 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
                         return p;
         return NULL;
@@ -564,7 +586,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
  
  /*
   * find the last mount at @dentry on vfsmount @mnt.
- * vfsmount_lock must be held for read or write.
+ * mount_lock must be held.
   */
  struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
  {
@@ -596,17 +618,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
  struct vfsmount *lookup_mnt(struct path *path)
  {
         struct mount *child_mnt;
+       struct vfsmount *m;
+       unsigned seq;
  
-       br_read_lock(&vfsmount_lock);
-       child_mnt = __lookup_mnt(path->mnt, path->dentry);
-       if (child_mnt) {
-               mnt_add_count(child_mnt, 1);
-               br_read_unlock(&vfsmount_lock);
-               return &child_mnt->mnt;
-       } else {
-               br_read_unlock(&vfsmount_lock);
-               return NULL;
-       }
+       rcu_read_lock();
+       do {
+               seq = read_seqbegin(&mount_lock);
+               child_mnt = __lookup_mnt(path->mnt, path->dentry);
+               m = child_mnt ? &child_mnt->mnt : NULL;
+       } while (!legitimize_mnt(m, seq));
+       rcu_read_unlock();
+       return m;
  }
  
  static struct mountpoint *new_mountpoint(struct dentry *dentry)
@@ -874,38 +896,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
         return ERR_PTR(err);
  }
  
+static void delayed_free(struct rcu_head *head)
+{
+       struct mount *mnt = container_of(head, struct mount, mnt_rcu);
+       kfree(mnt->mnt_devname);
+#ifdef CONFIG_SMP
+       free_percpu(mnt->mnt_pcp);
+#endif
+       kmem_cache_free(mnt_cache, mnt);
+}
+
  static void mntput_no_expire(struct mount *mnt)
  {
  put_again:
-#ifdef CONFIG_SMP
-       br_read_lock(&vfsmount_lock);
-       if (likely(mnt->mnt_ns)) {
-               /* shouldn't be the last one */
-               mnt_add_count(mnt, -1);
-               br_read_unlock(&vfsmount_lock);
+       rcu_read_lock();
+       mnt_add_count(mnt, -1);
+       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+               rcu_read_unlock();
                 return;
         }
-       br_read_unlock(&vfsmount_lock);
-
         lock_mount_hash();
-       mnt_add_count(mnt, -1);
         if (mnt_get_count(mnt)) {
+               rcu_read_unlock();
                 unlock_mount_hash();
                 return;
         }
-#else
-       mnt_add_count(mnt, -1);
-       if (likely(mnt_get_count(mnt)))
-               return;
-       lock_mount_hash();
-#endif
         if (unlikely(mnt->mnt_pinned)) {
                 mnt_add_count(mnt, mnt->mnt_pinned + 1);
                 mnt->mnt_pinned = 0;
+               rcu_read_unlock();
                 unlock_mount_hash();
                 acct_auto_close_mnt(&mnt->mnt);
                 goto put_again;
         }
+       if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
+               rcu_read_unlock();
+               unlock_mount_hash();
+               return;
+       }
+       mnt->mnt.mnt_flags |= MNT_DOOMED;
+       rcu_read_unlock();
  
         list_del(&mnt->mnt_instance);
         unlock_mount_hash();
@@ -924,7 +954,8 @@ put_again:
         fsnotify_vfsmount_delete(&mnt->mnt);
         dput(mnt->mnt.mnt_root);
         deactivate_super(mnt->mnt.mnt_sb);
-       free_vfsmnt(mnt);
+       mnt_free_id(mnt);
+       call_rcu(&mnt->mnt_rcu, delayed_free);
  }
  
  void mntput(struct vfsmount *mnt)
@@ -1137,6 +1168,8 @@ static void namespace_unlock(void)
         list_splice_init(&unmounted, &head);
         up_write(&namespace_sem);
  
+       synchronize_rcu();
+
         while (!list_empty(&head)) {
                 mnt = list_first_entry(&head, struct mount, mnt_hash);
                 list_del_init(&mnt->mnt_hash);
@@ -1152,10 +1185,13 @@ static inline void namespace_lock(void)
  }
  
  /*
- * vfsmount lock must be held for write
+ * mount_lock must be held
   * namespace_sem must be held for write
+ * how = 0 => just this tree, don't propagate
+ * how = 1 => propagate; we know that nobody else has reference to any victims
+ * how = 2 => lazy umount
   */
-void umount_tree(struct mount *mnt, int propagate)
+void umount_tree(struct mount *mnt, int how)
  {
         LIST_HEAD(tmp_list);
         struct mount *p;
@@ -1163,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate)
         for (p = mnt; p; p = next_mnt(p, mnt))
                 list_move(&p->mnt_hash, &tmp_list);
  
-       if (propagate)
+       if (how)
                 propagate_umount(&tmp_list);
  
         list_for_each_entry(p, &tmp_list, mnt_hash) {
@@ -1171,6 +1207,8 @@ void umount_tree(struct mount *mnt, int propagate)
                 list_del_init(&p->mnt_list);
                 __touch_mnt_namespace(p->mnt_ns);
                 p->mnt_ns = NULL;
+               if (how < 2)
+                       p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
                 list_del_init(&p->mnt_child);
                 if (mnt_has_parent(p)) {
                         put_mountpoint(p->mnt_mp);
@@ -1262,14 +1300,18 @@ static int do_umount(struct mount *mnt, int flags)
         lock_mount_hash();
         event++;
  
-       if (!(flags & MNT_DETACH))
-               shrink_submounts(mnt);
-
-       retval = -EBUSY;
-       if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
+       if (flags & MNT_DETACH) {
                 if (!list_empty(&mnt->mnt_list))
-                       umount_tree(mnt, 1);
+                       umount_tree(mnt, 2);
                 retval = 0;
+       } else {
+               shrink_submounts(mnt);
+               retval = -EBUSY;
+               if (!propagate_mount_busy(mnt, 2)) {
+                       if (!list_empty(&mnt->mnt_list))
+                               umount_tree(mnt, 1);
+                       retval = 0;
+               }
         }
         unlock_mount_hash();
         namespace_unlock();
@@ -1955,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
         struct mount *parent;
         int err;
  
-       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
+       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
  
         mp = lock_mount(path);
         if (IS_ERR(mp))
@@ -2172,7 +2214,7 @@ resume:
   * process a list of expirable mountpoints with the intent of discarding any
   * submounts of a specific parent mountpoint
   *
- * vfsmount_lock must be held for write
+ * mount_lock must be held for write
   */
  static void shrink_submounts(struct mount *mnt)
  {
@@ -2558,7 +2600,7 @@ out_type:
  /*
   * Return true if path is reachable from root
   *
- * namespace_sem or vfsmount_lock is held
+ * namespace_sem or mount_lock is held
   */
  bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
                          const struct path *root)
@@ -2573,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
  int path_is_under(struct path *path1, struct path *path2)
  {
         int res;
-       br_read_lock(&vfsmount_lock);
+       read_seqlock_excl(&mount_lock);
         res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
-       br_read_unlock(&vfsmount_lock);
+       read_sequnlock_excl(&mount_lock);
         return res;
  }
  EXPORT_SYMBOL(path_is_under);
@@ -2748,8 +2790,6 @@ void __init mnt_init(void)
         for (u = 0; u < HASH_SIZE; u++)
                 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
  
-       br_lock_init(&vfsmount_lock);
-
         err = sysfs_init();
         if (err)
                 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2788,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt)
  {
         /* release long term mount so mount point can be released */
         if (!IS_ERR_OR_NULL(mnt)) {
-               lock_mount_hash();
                 real_mount(mnt)->mnt_ns = NULL;
-               unlock_mount_hash();
+               synchronize_rcu();      /* yecchhh... */
                 mntput(mnt);
         }
  }