cgroup: enable task_cg_lists on the first cgroup mount

[firefly-linux-kernel-4.4.55.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 506ebd61d1c2865f3ff226eb679d21f304380140..506f6da67ad1c0e1361db63d11c0ecd91e340802 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -53,6 +53,7 @@
  #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
  #include <linux/flex_array.h> /* used in cgroup_attach_task */
  #include <linux/kthread.h>
+#include <linux/delay.h>
  
  #include <linux/atomic.h>
  
@@ -145,8 +146,6 @@ static int cgroup_root_count;
  /* hierarchy ID allocation and mapping, protected by cgroup_mutex */
  static DEFINE_IDR(cgroup_hierarchy_idr);
  
-static struct cgroup_name root_cgroup_name = { .name = "/" };
-
  /*
   * Assign a monotonically increasing serial number to cgroups.  It
   * guarantees cgroups with bigger numbers are newer than those with smaller
@@ -174,6 +173,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp);
  static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
                               bool is_add);
  static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
+static void cgroup_enable_task_cg_lists(void);
  
  /**
   * cgroup_css - obtain a cgroup's css for the specified subsystem
@@ -376,7 +376,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
   * fork()/exit() overhead for people who have cgroups compiled into their
   * kernel but not actually in use.
   */
-static int use_task_css_set_links __read_mostly;
+static bool use_task_css_set_links __read_mostly;
  
  static void __put_css_set(struct css_set *cset, int taskexit)
  {
@@ -730,45 +730,19 @@ static void cgroup_free_root(struct cgroupfs_root *root)
         }
  }
  
-static void cgroup_get_root(struct cgroupfs_root *root)
-{
-       /*
-        * The caller must ensure that @root is alive, which can be
-        * achieved by holding a ref on one of the member cgroups or
-        * following a registered reference to @root while holding
-        * cgroup_tree_mutex.
-        */
-       WARN_ON_ONCE(atomic_read(&root->refcnt) <= 0);
-       atomic_inc(&root->refcnt);
-}
-
-static void cgroup_put_root(struct cgroupfs_root *root)
+static void cgroup_destroy_root(struct cgroupfs_root *root)
  {
         struct cgroup *cgrp = &root->top_cgroup;
         struct cgrp_cset_link *link, *tmp_link;
-       int ret;
  
-       /*
-        * @root's refcnt reaching zero and its deregistration should be
-        * atomic w.r.t. cgroup_tree_mutex.  This ensures that
-        * cgroup_get_root() is safe to invoke if @root is registered.
-        */
         mutex_lock(&cgroup_tree_mutex);
-       if (!atomic_dec_and_test(&root->refcnt)) {
-               mutex_unlock(&cgroup_tree_mutex);
-               return;
-       }
         mutex_lock(&cgroup_mutex);
  
-       BUG_ON(root->number_of_cgroups != 1);
+       BUG_ON(atomic_read(&root->nr_cgrps));
         BUG_ON(!list_empty(&cgrp->children));
  
         /* Rebind all subsystems back to the default hierarchy */
-       if (root->flags & CGRP_ROOT_SUBSYS_BOUND) {
-               ret = rebind_subsystems(root, 0, root->subsys_mask);
-               /* Shouldn't be able to fail ... */
-               BUG_ON(ret);
-       }
+       WARN_ON(rebind_subsystems(root, 0, root->subsys_mask));
  
         /*
          * Release all the links from cset_links to this hierarchy's
@@ -888,17 +862,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
  static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
  static const struct file_operations proc_cgroupstats_operations;
  
-static struct cgroup_name *cgroup_alloc_name(const char *name_str)
-{
-       struct cgroup_name *name;
-
-       name = kmalloc(sizeof(*name) + strlen(name_str) + 1, GFP_KERNEL);
-       if (!name)
-               return NULL;
-       strcpy(name->name, name_str);
-       return name;
-}
-
  static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
                               char *buf)
  {
@@ -941,24 +904,25 @@ static void cgroup_free_fn(struct work_struct *work)
  {
         struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
  
-       mutex_lock(&cgroup_mutex);
-       cgrp->root->number_of_cgroups--;
-       mutex_unlock(&cgroup_mutex);
-
-       /*
-        * We get a ref to the parent, and put the ref when this cgroup is
-        * being freed, so it's guaranteed that the parent won't be
-        * destroyed before its children.
-        */
-       cgroup_put(cgrp->parent);
-
-       /* put the root reference that we took when we created the cgroup */
-       cgroup_put_root(cgrp->root);
-
+       atomic_dec(&cgrp->root->nr_cgrps);
         cgroup_pidlist_destroy_all(cgrp);
  
-       kfree(rcu_dereference_raw(cgrp->name));
-       kfree(cgrp);
+       if (cgrp->parent) {
+               /*
+                * We get a ref to the parent, and put the ref when this
+                * cgroup is being freed, so it's guaranteed that the
+                * parent won't be destroyed before its children.
+                */
+               cgroup_put(cgrp->parent);
+               kernfs_put(cgrp->kn);
+               kfree(cgrp);
+       } else {
+               /*
+                * This is top cgroup's refcnt reaching zero, which
+                * indicates that the root should be released.
+                */
+               cgroup_destroy_root(cgrp->root);
+       }
  }
  
  static void cgroup_free_rcu(struct rcu_head *head)
@@ -980,7 +944,7 @@ static void cgroup_put(struct cgroup *cgrp)
  {
         if (!atomic_dec_and_test(&cgrp->refcnt))
                 return;
-       if (WARN_ON_ONCE(!cgroup_is_dead(cgrp)))
+       if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
                 return;
  
         /*
@@ -1087,13 +1051,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
                 }
         }
  
-       /*
-        * Mark @root has finished binding subsystems.  @root->subsys_mask
-        * now matches the bound subsystems.
-        */
-       root->flags |= CGRP_ROOT_SUBSYS_BOUND;
         kernfs_activate(cgrp->kn);
-
         return 0;
  }
  
@@ -1258,18 +1216,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
         if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
                 pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
  
-               if (opts->flags & CGRP_ROOT_NOPREFIX) {
-                       pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
+               if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) ||
+                   opts->cpuset_clone_children || opts->release_agent ||
+                   opts->name) {
+                       pr_err("cgroup: sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n");
                         return -EINVAL;
                 }
-
-               if (opts->cpuset_clone_children) {
-                       pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
-                       return -EINVAL;
-               }
-
-               if (opts->flags & CGRP_ROOT_XATTR)
-                       pr_warning("cgroup: sane_behavior: xattr is always available, flag unnecessary\n");
         }
  
         /*
@@ -1333,7 +1285,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
         }
  
         /* remounting is not allowed for populated hierarchies */
-       if (root->number_of_cgroups > 1) {
+       if (!list_empty(&root->top_cgroup.children)) {
                 ret = -EBUSY;
                 goto out_unlock;
         }
@@ -1371,11 +1323,9 @@ static void init_cgroup_root(struct cgroupfs_root *root)
  {
         struct cgroup *cgrp = &root->top_cgroup;
  
-       atomic_set(&root->refcnt, 1);
         INIT_LIST_HEAD(&root->root_list);
-       root->number_of_cgroups = 1;
+       atomic_set(&root->nr_cgrps, 1);
         cgrp->root = root;
-       RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
         init_cgroup_housekeeping(cgrp);
         idr_init(&root->cgroup_idr);
  }
@@ -1393,15 +1343,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
  
         init_cgroup_root(root);
  
-       /*
-        * We need to set @root->subsys_mask now so that @root can be
-        * matched by cgroup_test_super() before it finishes
-        * initialization; otherwise, competing mounts with the same
-        * options may try to bind the same subsystems instead of waiting
-        * for the first one leading to unexpected mount errors.
-        * SUBSYS_BOUND will be set once actual binding is complete.
-        */
-       root->subsys_mask = opts->subsys_mask;
         root->flags = opts->flags;
         if (opts->release_agent)
                 strcpy(root->release_agent_path, opts->release_agent);
@@ -1412,7 +1353,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
         return root;
  }
  
-static int cgroup_setup_root(struct cgroupfs_root *root)
+static int cgroup_setup_root(struct cgroupfs_root *root, unsigned long ss_mask)
  {
         LIST_HEAD(tmp_links);
         struct cgroup *root_cgrp = &root->top_cgroup;
@@ -1455,7 +1396,7 @@ static int cgroup_setup_root(struct cgroupfs_root *root)
         if (ret)
                 goto destroy_root;
  
-       ret = rebind_subsystems(root, root->subsys_mask, 0);
+       ret = rebind_subsystems(root, ss_mask, 0);
         if (ret)
                 goto destroy_root;
  
@@ -1477,7 +1418,7 @@ static int cgroup_setup_root(struct cgroupfs_root *root)
         write_unlock(&css_set_lock);
  
         BUG_ON(!list_empty(&root_cgrp->children));
-       BUG_ON(root->number_of_cgroups != 1);
+       BUG_ON(atomic_read(&root->nr_cgrps) != 1);
  
         kernfs_activate(root_cgrp->kn);
         ret = 0;
@@ -1502,6 +1443,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         struct dentry *dentry;
         int ret;
  
+       /*
+        * The first time anyone tries to mount a cgroup, enable the list
+        * linking each css_set to its tasks and fix up all existing tasks.
+        */
+       if (!use_task_css_set_links)
+               cgroup_enable_task_cg_lists();
+retry:
         mutex_lock(&cgroup_tree_mutex);
         mutex_lock(&cgroup_mutex);
  
@@ -1547,7 +1495,21 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                         }
                 }
  
-               cgroup_get_root(root);
+               /*
+                * A root's lifetime is governed by its top cgroup.  Zero
+                * ref indicate that the root is being destroyed.  Wait for
+                * destruction to complete so that the subsystems are free.
+                * We can use wait_queue for the wait but this path is
+                * super cold.  Let's just sleep for a bit and retry.
+                */
+               if (!atomic_inc_not_zero(&root->top_cgroup.refcnt)) {
+                       mutex_unlock(&cgroup_mutex);
+                       mutex_unlock(&cgroup_tree_mutex);
+                       msleep(10);
+                       goto retry;
+               }
+
+               ret = 0;
                 goto out_unlock;
         }
  
@@ -1558,7 +1520,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 goto out_unlock;
         }
  
-       ret = cgroup_setup_root(root);
+       ret = cgroup_setup_root(root, opts.subsys_mask);
         if (ret)
                 cgroup_free_root(root);
  
@@ -1574,7 +1536,7 @@ out_unlock:
  
         dentry = kernfs_mount(fs_type, flags, root->kf_root);
         if (IS_ERR(dentry))
-               cgroup_put_root(root);
+               cgroup_put(&root->top_cgroup);
         return dentry;
  }
  
@@ -1583,7 +1545,7 @@ static void cgroup_kill_sb(struct super_block *sb)
         struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
         struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
  
-       cgroup_put_root(root);
+       cgroup_put(&root->top_cgroup);
         kernfs_kill_sb(sb);
  }
  
@@ -1595,57 +1557,6 @@ static struct file_system_type cgroup_fs_type = {
  
  static struct kobject *cgroup_kobj;
  
-/**
- * cgroup_path - generate the path of a cgroup
- * @cgrp: the cgroup in question
- * @buf: the buffer to write the path into
- * @buflen: the length of the buffer
- *
- * Writes path of cgroup into buf.  Returns 0 on success, -errno on error.
- *
- * We can't generate cgroup path using dentry->d_name, as accessing
- * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
- * inode's i_mutex, while on the other hand cgroup_path() can be called
- * with some irq-safe spinlocks held.
- */
-int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
-{
-       int ret = -ENAMETOOLONG;
-       char *start;
-
-       if (!cgrp->parent) {
-               if (strlcpy(buf, "/", buflen) >= buflen)
-                       return -ENAMETOOLONG;
-               return 0;
-       }
-
-       start = buf + buflen - 1;
-       *start = '\0';
-
-       rcu_read_lock();
-       do {
-               const char *name = cgroup_name(cgrp);
-               int len;
-
-               len = strlen(name);
-               if ((start -= len) < buf)
-                       goto out;
-               memcpy(start, name, len);
-
-               if (--start < buf)
-                       goto out;
-               *start = '/';
-
-               cgrp = cgrp->parent;
-       } while (cgrp->parent);
-       ret = 0;
-       memmove(buf, start, buf + buflen - start);
-out:
-       rcu_read_unlock();
-       return ret;
-}
-EXPORT_SYMBOL_GPL(cgroup_path);
-
  /**
   * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
   * @task: target task
@@ -1657,16 +1568,14 @@ EXPORT_SYMBOL_GPL(cgroup_path);
   * function grabs cgroup_mutex and shouldn't be used inside locks used by
   * cgroup controller callbacks.
   *
- * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
+ * Return value is the same as kernfs_path().
   */
-int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  {
         struct cgroupfs_root *root;
         struct cgroup *cgrp;
-       int hierarchy_id = 1, ret = 0;
-
-       if (buflen < 2)
-               return -ENAMETOOLONG;
+       int hierarchy_id = 1;
+       char *path = NULL;
  
         mutex_lock(&cgroup_mutex);
  
@@ -1674,14 +1583,15 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  
         if (root) {
                 cgrp = task_cgroup_from_root(task, root);
-               ret = cgroup_path(cgrp, buf, buflen);
+               path = cgroup_path(cgrp, buf, buflen);
         } else {
                 /* if no hierarchy exists, everyone is in "/" */
-               memcpy(buf, "/", 2);
+               if (strlcpy(buf, "/", buflen) < buflen)
+                       path = buf;
         }
  
         mutex_unlock(&cgroup_mutex);
-       return ret;
+       return path;
  }
  EXPORT_SYMBOL_GPL(task_cgroup_path);
  
@@ -1790,10 +1700,8 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
         rcu_assign_pointer(tsk->cgroups, new_cset);
         task_unlock(tsk);
  
-       /* Update the css_set linked lists if we're using them */
         write_lock(&css_set_lock);
-       if (!list_empty(&tsk->cg_list))
-               list_move(&tsk->cg_list, &new_cset->tasks);
+       list_move(&tsk->cg_list, &new_cset->tasks);
         write_unlock(&css_set_lock);
  
         /*
@@ -2209,7 +2117,6 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
                          const char *new_name_str)
  {
         struct cgroup *cgrp = kn->priv;
-       struct cgroup_name *name, *old_name;
         int ret;
  
         if (kernfs_type(kn) != KERNFS_DIR)
@@ -2224,25 +2131,13 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
         if (cgroup_sane_behavior(cgrp))
                 return -EPERM;
  
-       name = cgroup_alloc_name(new_name_str);
-       if (!name)
-               return -ENOMEM;
-
         mutex_lock(&cgroup_tree_mutex);
         mutex_lock(&cgroup_mutex);
  
         ret = kernfs_rename(kn, new_parent, new_name_str);
-       if (!ret) {
-               old_name = rcu_dereference_protected(cgrp->name, true);
-               rcu_assign_pointer(cgrp->name, name);
-       } else {
-               old_name = name;
-       }
  
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&cgroup_tree_mutex);
-
-       kfree_rcu(old_name, rcu_head);
         return ret;
  }
  
@@ -2305,46 +2200,19 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
         return 0;
  }
  
-static void cgroup_cfts_prepare(void)
-       __acquires(&cgroup_mutex)
-{
-       /*
-        * Thanks to the entanglement with vfs inode locking, we can't walk
-        * the existing cgroups under cgroup_mutex and create files.
-        * Instead, we use css_for_each_descendant_pre() and drop RCU read
-        * lock before calling cgroup_addrm_files().
-        */
-       mutex_lock(&cgroup_tree_mutex);
-       mutex_lock(&cgroup_mutex);
-}
-
-static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
-       __releases(&cgroup_mutex)
+static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
  {
         LIST_HEAD(pending);
         struct cgroup_subsys *ss = cfts[0].ss;
         struct cgroup *root = &ss->root->top_cgroup;
-       struct cgroup *prev = NULL;
         struct cgroup_subsys_state *css;
-       u64 update_before;
         int ret = 0;
  
-       mutex_unlock(&cgroup_mutex);
+       lockdep_assert_held(&cgroup_tree_mutex);
  
-       /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
-       if (!cfts || ss->root == &cgroup_dummy_root) {
-               mutex_unlock(&cgroup_tree_mutex);
+       /* don't bother if @ss isn't attached */
+       if (ss->root == &cgroup_dummy_root)
                 return 0;
-       }
-
-       cgroup_get_root(ss->root);
-
-       /*
-        * All cgroups which are created after we drop cgroup_mutex will
-        * have the updated set of files, so we only need to update the
-        * cgroups created before the current @cgroup_serial_nr_next.
-        */
-       update_before = cgroup_serial_nr_next;
  
         /* add/rm files for all cgroups created before */
         css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
@@ -2353,22 +2221,13 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
                 if (cgroup_is_dead(cgrp))
                         continue;
  
-               cgroup_get(cgrp);
-               if (prev)
-                       cgroup_put(prev);
-               prev = cgrp;
-
-               if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) {
-                       ret = cgroup_addrm_files(cgrp, cfts, is_add);
-                       if (is_add)
-                               kernfs_activate(cgrp->kn);
-               }
+               ret = cgroup_addrm_files(cgrp, cfts, is_add);
                 if (ret)
                         break;
         }
-       mutex_unlock(&cgroup_tree_mutex);
-       cgroup_put(prev);
-       cgroup_put_root(ss->root);
+
+       if (is_add && !ret)
+               kernfs_activate(root->kn);
         return ret;
  }
  
@@ -2419,6 +2278,19 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
         return 0;
  }
  
+static int cgroup_rm_cftypes_locked(struct cftype *cfts)
+{
+       lockdep_assert_held(&cgroup_tree_mutex);
+
+       if (!cfts || !cfts[0].ss)
+               return -ENOENT;
+
+       list_del(&cfts->node);
+       cgroup_apply_cftypes(cfts, false);
+       cgroup_exit_cftypes(cfts);
+       return 0;
+}
+
  /**
   * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
   * @cfts: zero-length name terminated array of cftypes
@@ -2432,15 +2304,12 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
   */
  int cgroup_rm_cftypes(struct cftype *cfts)
  {
-       if (!cfts || !cfts[0].ss)
-               return -ENOENT;
-
-       cgroup_cfts_prepare();
-       list_del(&cfts->node);
-       cgroup_cfts_commit(cfts, false);
+       int ret;
  
-       cgroup_exit_cftypes(cfts);
-       return 0;
+       mutex_lock(&cgroup_tree_mutex);
+       ret = cgroup_rm_cftypes_locked(cfts);
+       mutex_unlock(&cgroup_tree_mutex);
+       return ret;
  }
  
  /**
@@ -2465,11 +2334,14 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
         if (ret)
                 return ret;
  
-       cgroup_cfts_prepare();
+       mutex_lock(&cgroup_tree_mutex);
+
         list_add_tail(&cfts->node, &ss->cfts);
-       ret = cgroup_cfts_commit(cfts, true);
+       ret = cgroup_apply_cftypes(cfts, true);
         if (ret)
-               cgroup_rm_cftypes(cfts);
+               cgroup_rm_cftypes_locked(cfts);
+
+       mutex_unlock(&cgroup_tree_mutex);
         return ret;
  }
  EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
@@ -2496,13 +2368,19 @@ int cgroup_task_count(const struct cgroup *cgrp)
   * To reduce the fork() overhead for systems that are not actually using
   * their cgroups capability, we don't maintain the lists running through
   * each css_set to its tasks until we see the list actually used - in other
- * words after the first call to css_task_iter_start().
+ * words after the first mount.
   */
  static void cgroup_enable_task_cg_lists(void)
  {
         struct task_struct *p, *g;
+
         write_lock(&css_set_lock);
-       use_task_css_set_links = 1;
+
+       if (use_task_css_set_links)
+               goto out_unlock;
+
+       use_task_css_set_links = true;
+
         /*
          * We need tasklist_lock because RCU is not safe against
          * while_each_thread(). Besides, a forking task that has passed
@@ -2513,16 +2391,22 @@ static void cgroup_enable_task_cg_lists(void)
         read_lock(&tasklist_lock);
         do_each_thread(g, p) {
                 task_lock(p);
+
+               WARN_ON_ONCE(!list_empty(&p->cg_list) ||
+                            task_css_set(p) != &init_css_set);
+
                 /*
                  * We should check if the process is exiting, otherwise
                  * it will race with cgroup_exit() in that the list
                  * entry won't be deleted though the process has exited.
                  */
-               if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
+               if (!(p->flags & PF_EXITING))
                         list_add(&p->cg_list, &task_css_set(p)->tasks);
+
                 task_unlock(p);
         } while_each_thread(g, p);
         read_unlock(&tasklist_lock);
+out_unlock:
         write_unlock(&css_set_lock);
  }
  
@@ -2755,13 +2639,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
                          struct css_task_iter *it)
         __acquires(css_set_lock)
  {
-       /*
-        * The first time anyone tries to iterate across a css, we need to
-        * enable the list linking each css_set to its tasks, and fix up
-        * all existing tasks.
-        */
-       if (!use_task_css_set_links)
-               cgroup_enable_task_cg_lists();
+       /* no one should try to iterate before mounting cgroups */
+       WARN_ON_ONCE(!use_task_css_set_links);
  
         read_lock(&css_set_lock);
  
@@ -3740,14 +3619,13 @@ err_free:
  /**
   * cgroup_create - create a cgroup
   * @parent: cgroup that will be parent of the new cgroup
- * @name_str: name of the new cgroup
+ * @name: name of the new cgroup
   * @mode: mode to set on new cgroup
   */
-static long cgroup_create(struct cgroup *parent, const char *name_str,
+static long cgroup_create(struct cgroup *parent, const char *name,
                           umode_t mode)
  {
         struct cgroup *cgrp;
-       struct cgroup_name *name;
         struct cgroupfs_root *root = parent->root;
         int ssid, err;
         struct cgroup_subsys *ss;
@@ -3758,13 +3636,6 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
         if (!cgrp)
                 return -ENOMEM;
  
-       name = cgroup_alloc_name(name_str);
-       if (!name) {
-               err = -ENOMEM;
-               goto err_free_cgrp;
-       }
-       rcu_assign_pointer(cgrp->name, name);
-
         mutex_lock(&cgroup_tree_mutex);
  
         /*
@@ -3802,24 +3673,24 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
                 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
  
         /* create the directory */
-       kn = kernfs_create_dir(parent->kn, name->name, mode, cgrp);
+       kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
         if (IS_ERR(kn)) {
                 err = PTR_ERR(kn);
                 goto err_free_id;
         }
         cgrp->kn = kn;
  
+       /*
+        * This extra ref will be put in cgroup_free_fn() and guarantees
+        * that @cgrp->kn is always accessible.
+        */
+       kernfs_get(kn);
+
         cgrp->serial_nr = cgroup_serial_nr_next++;
  
         /* allocation complete, commit to creation */
         list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
-       root->number_of_cgroups++;
-
-       /*
-        * Grab a reference on the root and parent so that they don't get
-        * deleted while there are child cgroups.
-        */
-       cgroup_get_root(root);
+       atomic_inc(&root->nr_cgrps);
         cgroup_get(parent);
  
         /*
@@ -3854,8 +3725,6 @@ err_unlock:
         mutex_unlock(&cgroup_mutex);
  err_unlock_tree:
         mutex_unlock(&cgroup_tree_mutex);
-       kfree(rcu_dereference_raw(cgrp->name));
-err_free_cgrp:
         kfree(cgrp);
         return err;
  
@@ -3989,7 +3858,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
  {
         struct cgroup *child;
         struct cgroup_subsys_state *css;
-       struct kernfs_node *kn;
         bool empty;
         int ssid;
  
@@ -4067,13 +3935,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
          * clearing of cgrp->kn->priv backpointer, which should happen
          * after all files under it have been removed.
          */
-       kn = cgrp->kn;
-       kernfs_get(kn);
-
-       kernfs_remove(cgrp->kn);
-
+       kernfs_remove(cgrp->kn);        /* @cgrp has an extra ref on its kn */
         RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL);
-       kernfs_put(kn);
  
         mutex_lock(&cgroup_mutex);
  
@@ -4325,12 +4188,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
  {
         struct pid *pid;
         struct task_struct *tsk;
-       char *buf;
+       char *buf, *path;
         int retval;
         struct cgroupfs_root *root;
  
         retval = -ENOMEM;
-       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       buf = kmalloc(PATH_MAX, GFP_KERNEL);
         if (!buf)
                 goto out;
  
@@ -4358,10 +4221,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
                                    root->name);
                 seq_putc(m, ':');
                 cgrp = task_cgroup_from_root(tsk, root);
-               retval = cgroup_path(cgrp, buf, PAGE_SIZE);
-               if (retval < 0)
+               path = cgroup_path(cgrp, buf, PATH_MAX);
+               if (!path) {
+                       retval = -ENAMETOOLONG;
                         goto out_unlock;
-               seq_puts(m, buf);
+               }
+               seq_puts(m, path);
                 seq_putc(m, '\n');
         }
  
@@ -4391,7 +4256,7 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
         for_each_subsys(ss, i)
                 seq_printf(m, "%s\t%d\t%d\t%d\n",
                            ss->name, ss->root->hierarchy_id,
-                          ss->root->number_of_cgroups, !ss->disabled);
+                          atomic_read(&ss->root->nr_cgrps), !ss->disabled);
  
         mutex_unlock(&cgroup_mutex);
         return 0;
@@ -4609,16 +4474,17 @@ static void cgroup_release_agent(struct work_struct *work)
         while (!list_empty(&release_list)) {
                 char *argv[3], *envp[3];
                 int i;
-               char *pathbuf = NULL, *agentbuf = NULL;
+               char *pathbuf = NULL, *agentbuf = NULL, *path;
                 struct cgroup *cgrp = list_entry(release_list.next,
                                                     struct cgroup,
                                                     release_list);
                 list_del_init(&cgrp->release_list);
                 raw_spin_unlock(&release_list_lock);
-               pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
                 if (!pathbuf)
                         goto continue_free;
-               if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+               path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+               if (!path)
                         goto continue_free;
                 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
                 if (!agentbuf)
@@ -4626,7 +4492,7 @@ static void cgroup_release_agent(struct work_struct *work)
  
                 i = 0;
                 argv[i++] = agentbuf;
-               argv[i++] = pathbuf;
+               argv[i++] = path;
                 argv[i] = NULL;
  
                 i = 0;
@@ -4776,6 +4642,11 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
  {
         struct cgrp_cset_link *link;
         struct css_set *cset;
+       char *name_buf;
+
+       name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+       if (!name_buf)
+               return -ENOMEM;
  
         read_lock(&css_set_lock);
         rcu_read_lock();
@@ -4784,14 +4655,17 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
                 struct cgroup *c = link->cgrp;
                 const char *name = "?";
  
-               if (c != cgroup_dummy_top)
-                       name = cgroup_name(c);
+               if (c != cgroup_dummy_top) {
+                       cgroup_name(c, name_buf, NAME_MAX + 1);
+                       name = name_buf;
+               }
  
                 seq_printf(seq, "Root %d group %s\n",
                            c->root->hierarchy_id, name);
         }
         rcu_read_unlock();
         read_unlock(&css_set_lock);
+       kfree(name_buf);
         return 0;
  }