X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=kernel%2Fcgroup.c;h=3948f0af58f9f5dbb92af93932efcf21da4ae068;hb=eb8bf67cd55b218b98a88f6105e2115efbc94d17;hp=2731d115d725c0a216f06d18cd60367eab4210da;hpb=1acc9309eb2674533944f48dbaaa53e7750e3947;p=firefly-linux-kernel-4.4.55.git diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2731d115d725..3948f0af58f9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -268,6 +268,33 @@ static void cgroup_release_agent(struct work_struct *work); static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); +/* + * A queue for waiters to do rmdir() cgroup. A tasks will sleep when + * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some + * reference to css->refcnt. In general, this refcnt is expected to goes down + * to zero, soon. + * + * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; + */ +DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); + +static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) +{ + if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) + wake_up_all(&cgroup_rmdir_waitq); +} + +void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) +{ + css_get(css); +} + +void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) +{ + cgroup_wakeup_rmdir_waiter(css->cgroup); + css_put(css); +} + /* Link structure for associating css_set objects with cgroups */ struct cg_cgroup_link { /* @@ -327,52 +354,43 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) return &css_set_table[index]; } -/* We don't maintain the lists running through each css_set to its - * task until after the first call to cgroup_iter_start(). This - * reduces the fork()/exit() overhead for people who have cgroups - * compiled into their kernel but not actually in use */ -static int use_task_css_set_links __read_mostly; - -static void __put_css_set(struct css_set *cg, int taskexit) +static void free_css_set_work(struct work_struct *work) { + struct css_set *cg = container_of(work, struct css_set, work); struct cg_cgroup_link *link; struct cg_cgroup_link *saved_link; - /* - * Ensure that the refcount doesn't hit zero while any readers - * can see it. Similar to atomic_dec_and_lock(), but for an - * rwlock - */ - if (atomic_add_unless(&cg->refcount, -1, 1)) - return; - write_lock(&css_set_lock); - if (!atomic_dec_and_test(&cg->refcount)) { - write_unlock(&css_set_lock); - return; - } - - /* This css_set is dead. unlink it and release cgroup refcounts */ - hlist_del(&cg->hlist); - css_set_count--; + write_lock(&css_set_lock); list_for_each_entry_safe(link, saved_link, &cg->cg_links, cg_link_list) { struct cgroup *cgrp = link->cgrp; list_del(&link->cg_link_list); list_del(&link->cgrp_link_list); - if (atomic_dec_and_test(&cgrp->count) && - notify_on_release(cgrp)) { - if (taskexit) - set_bit(CGRP_RELEASABLE, &cgrp->flags); + if (atomic_dec_and_test(&cgrp->count)) { check_for_release(cgrp); + cgroup_wakeup_rmdir_waiter(cgrp); } - kfree(link); } - write_unlock(&css_set_lock); - kfree_rcu(cg, rcu_head); + + kfree(cg); +} + +static void free_css_set_rcu(struct rcu_head *obj) +{ + struct css_set *cg = container_of(obj, struct css_set, rcu_head); + + INIT_WORK(&cg->work, free_css_set_work); + schedule_work(&cg->work); } +/* We don't maintain the lists running through each css_set to its + * task until after the first call to cgroup_iter_start(). This + * reduces the fork()/exit() overhead for people who have cgroups + * compiled into their kernel but not actually in use */ +static int use_task_css_set_links __read_mostly; + /* * refcounted get/put for css_set objects */ @@ -381,14 +399,26 @@ static inline void get_css_set(struct css_set *cg) atomic_inc(&cg->refcount); } -static inline void put_css_set(struct css_set *cg) +static void put_css_set(struct css_set *cg) { - __put_css_set(cg, 0); -} + /* + * Ensure that the refcount doesn't hit zero while any readers + * can see it. Similar to atomic_dec_and_lock(), but for an + * rwlock + */ + if (atomic_add_unless(&cg->refcount, -1, 1)) + return; + write_lock(&css_set_lock); + if (!atomic_dec_and_test(&cg->refcount)) { + write_unlock(&css_set_lock); + return; + } -static inline void put_css_set_taskexit(struct css_set *cg) -{ - __put_css_set(cg, 1); + hlist_del(&cg->hlist); + css_set_count--; + + write_unlock(&css_set_lock); + call_rcu(&cg->rcu_head, free_css_set_rcu); } /* @@ -720,9 +750,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * cgroup_attach_task(), which overwrites one tasks cgroup pointer with * another. It does so using cgroup_mutex, however there are * several performance critical places that need to reference - * task->cgroup without the expense of grabbing a system global + * task->cgroups without the expense of grabbing a system global * mutex. Therefore except as noted below, when dereferencing or, as - * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use + * in cgroup_attach_task(), modifying a task's cgroups pointer we use * task_lock(), which acts on a spinlock (task->alloc_lock) already in * the task_struct routinely used for such matters. * @@ -911,33 +941,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry) remove_dir(dentry); } -/* - * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some - * reference to css->refcnt. In general, this refcnt is expected to goes down - * to zero, soon. - * - * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; - */ -DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); - -static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) -{ - if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) - wake_up_all(&cgroup_rmdir_waitq); -} - -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) -{ - css_get(css); -} - -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) -{ - cgroup_wakeup_rmdir_waiter(css->cgroup); - css_put(css); -} - /* * Call with cgroup_mutex held. Drops reference counts on modules, including * any duplicate ones that parse_cgroupfs_options took. If this function @@ -1173,10 +1176,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* * If the 'all' option was specified select all the subsystems, - * otherwise 'all, 'none' and a subsystem name options were not - * specified, let's default to 'all' + * otherwise if 'none', 'name=' and a subsystem name options + * were not specified, let's default to 'all' */ - if (all_ss || (!all_ss && !one_ss && !opts->none)) { + if (all_ss || (!one_ss && !opts->none && !opts->name)) { for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss == NULL) @@ -1800,9 +1803,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, * trading it for newcg is protected by cgroup_mutex, we're safe to drop * it here; it will be freed under RCU. */ - put_css_set(oldcg); - set_bit(CGRP_RELEASABLE, &oldcgrp->flags); + put_css_set(oldcg); return 0; } @@ -1820,6 +1822,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct cgroup_subsys *ss, *failed_ss = NULL; struct cgroup *oldcgrp; struct cgroupfs_root *root = cgrp->root; + struct css_set *cg; /* Nothing to do if the task is already in that cgroup */ oldcgrp = task_cgroup_from_root(tsk, root); @@ -1849,6 +1852,11 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) } } + task_lock(tsk); + cg = tsk->cgroups; + get_css_set(cg); + task_unlock(tsk); + retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); if (retval) goto out; @@ -1861,8 +1869,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) if (ss->attach) ss->attach(ss, cgrp, oldcgrp, tsk); } - - synchronize_rcu(); + set_bit(CGRP_RELEASABLE, &cgrp->flags); + /* put_css_set will not destroy cg until after an RCU grace period */ + put_css_set(cg); /* * wake up rmdir() waiter. the rmdir should fail since the cgroup @@ -2095,11 +2104,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) continue; /* get old css_set pointer */ task_lock(tsk); - if (tsk->flags & PF_EXITING) { - /* ignore this task if it's going away */ - task_unlock(tsk); - continue; - } oldcg = tsk->cgroups; get_css_set(oldcg); task_unlock(tsk); @@ -2189,6 +2193,24 @@ out_free_group_list: return retval; } +static int cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk) +{ + struct cgroup_subsys *ss; + int ret; + + for_each_subsys(cgrp->root, ss) { + if (ss->allow_attach) { + ret = ss->allow_attach(cgrp, tsk); + if (ret) + return ret; + } else { + return -EACCES; + } + } + + return 0; +} + /* * Find the task_struct of the task to attach by vpid and pass it along to the * function to attach either it or all tasks in its threadgroup. Will take @@ -2234,9 +2256,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) if (cred->euid && cred->euid != tcred->uid && cred->euid != tcred->suid) { - rcu_read_unlock(); - cgroup_unlock(); - return -EACCES; + /* + * if the default permission check fails, give each + * cgroup a chance to extend the permission check + */ + ret = cgroup_allow_attach(cgrp, tsk); + if (ret) { + rcu_read_unlock(); + cgroup_unlock(); + return ret; + } } get_task_struct(tsk); rcu_read_unlock(); @@ -2636,9 +2665,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, dentry->d_fsdata = cgrp; inc_nlink(parent->d_inode); rcu_assign_pointer(cgrp->dentry, dentry); - dget(dentry); } - dput(dentry); return error; } @@ -3810,6 +3837,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (err < 0) goto err_remove; + set_bit(CGRP_RELEASABLE, &parent->flags); + /* The cgroup directory was pre-locked for us */ BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); @@ -3941,6 +3970,21 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } +/* checks if all of the css_sets attached to a cgroup have a refcount of 0. + * Must be called with css_set_lock held */ +static int cgroup_css_sets_empty(struct cgroup *cgrp) +{ + struct cg_cgroup_link *link; + + list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { + struct css_set *cg = link->cg; + if (atomic_read(&cg->refcount) > 0) + return 0; + } + + return 1; +} + static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) { struct cgroup *cgrp = dentry->d_fsdata; @@ -3953,7 +3997,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) /* the vfs holds both inode->i_mutex already */ again: mutex_lock(&cgroup_mutex); - if (atomic_read(&cgrp->count) != 0) { + if (!cgroup_css_sets_empty(cgrp)) { mutex_unlock(&cgroup_mutex); return -EBUSY; } @@ -3986,7 +4030,7 @@ again: mutex_lock(&cgroup_mutex); parent = cgrp->parent; - if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { + if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) { clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); mutex_unlock(&cgroup_mutex); return -EBUSY; @@ -4026,7 +4070,6 @@ again: cgroup_d_remove_dir(d); dput(d); - set_bit(CGRP_RELEASABLE, &parent->flags); check_for_release(parent); /* @@ -4626,7 +4669,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) task_unlock(tsk); if (cg) - put_css_set_taskexit(cg); + put_css_set(cg); } /** @@ -4679,6 +4722,14 @@ static void check_for_release(struct cgroup *cgrp) } } +/* Caller must verify that the css is not for root cgroup */ +void __css_get(struct cgroup_subsys_state *css, int count) +{ + atomic_add(count, &css->refcnt); + set_bit(CGRP_RELEASABLE, &css->cgroup->flags); +} +EXPORT_SYMBOL_GPL(__css_get); + /* Caller must verify that the css is not for root cgroup */ void __css_put(struct cgroup_subsys_state *css, int count) { @@ -4687,10 +4738,7 @@ void __css_put(struct cgroup_subsys_state *css, int count) rcu_read_lock(); val = atomic_sub_return(count, &css->refcnt); if (val == 1) { - if (notify_on_release(cgrp)) { - set_bit(CGRP_RELEASABLE, &cgrp->flags); - check_for_release(cgrp); - } + check_for_release(cgrp); cgroup_wakeup_rmdir_waiter(cgrp); } rcu_read_unlock();