#include <linux/cred.h>
#include <linux/ctype.h>
#include <linux/errno.h>
-#include <linux/fs.h>
#include <linux/init_task.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/cgroup_subsys.h>
};
-#define MAX_CGROUP_ROOT_NAMELEN 64
-
-/*
- * A cgroupfs_root represents the root of a cgroup hierarchy,
- * and may be associated with a superblock to form an active
- * hierarchy
- */
-struct cgroupfs_root {
- struct super_block *sb;
-
- /*
- * The bitmask of subsystems intended to be attached to this
- * hierarchy
- */
- unsigned long subsys_mask;
-
- /* Unique id for this hierarchy. */
- int hierarchy_id;
-
- /* The bitmask of subsystems currently attached to this hierarchy */
- unsigned long actual_subsys_mask;
-
- /* A list running through the attached subsystems */
- struct list_head subsys_list;
-
- /* The root cgroup for this hierarchy */
- struct cgroup top_cgroup;
-
- /* Tracks how many cgroups are currently defined in hierarchy.*/
- int number_of_cgroups;
-
- /* A list running through the active hierarchies */
- struct list_head root_list;
-
- /* All cgroups on this root, cgroup_mutex protected */
- struct list_head allcg_list;
-
- /* Hierarchy-specific flags */
- unsigned long flags;
-
- /* IDs for cgroups in this hierarchy */
- struct ida cgroup_ida;
-
- /* The path to use for release notifications. */
- char release_agent_path[PATH_MAX];
-
- /* The name for this hierarchy - may be empty */
- char name[MAX_CGROUP_ROOT_NAMELEN];
-};
-
/*
* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
* subsystems that are otherwise unattached - it never has more than a
struct list_head node;
struct dentry *dentry;
struct cftype *type;
+
+ /* file xattrs */
+ struct simple_xattrs xattrs;
};
/*
}
EXPORT_SYMBOL_GPL(cgroup_is_descendant);
-/* bits in struct cgroupfs_root flags field */
-enum {
- ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
- ROOT_XATTR, /* supports extended attributes */
-};
-
static int cgroup_is_releasable(const struct cgroup *cgrp)
{
const int bits =
*/
dput(cgrp->parent->dentry);
+ ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
+
/*
* Drop the active superblock reference that we took when we
- * created the cgroup
+ * created the cgroup. This will free cgrp->root, if we are
+ * holding the last reference to @sb.
*/
deactivate_super(cgrp->root->sb);
simple_xattrs_free(&cgrp->xattrs);
- ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
}
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry->d_parent->d_fsdata;
- struct cftype *cft = cfe->type;
WARN_ONCE(!list_empty(&cfe->node) &&
cgrp != &cgrp->root->top_cgroup,
"cfe still linked for %s\n", cfe->type->name);
+ simple_xattrs_free(&cfe->xattrs);
kfree(cfe);
- simple_xattrs_free(&cft->xattrs);
}
iput(inode);
}
mutex_lock(&cgroup_root_mutex);
for_each_subsys(root, ss)
seq_printf(seq, ",%s", ss->name);
- if (test_bit(ROOT_NOPREFIX, &root->flags))
+ if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
+ seq_puts(seq, ",sane_behavior");
+ if (root->flags & CGRP_ROOT_NOPREFIX)
seq_puts(seq, ",noprefix");
- if (test_bit(ROOT_XATTR, &root->flags))
+ if (root->flags & CGRP_ROOT_XATTR)
seq_puts(seq, ",xattr");
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
all_ss = true;
continue;
}
+ if (!strcmp(token, "__DEVEL__sane_behavior")) {
+ opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
+ continue;
+ }
if (!strcmp(token, "noprefix")) {
- set_bit(ROOT_NOPREFIX, &opts->flags);
+ opts->flags |= CGRP_ROOT_NOPREFIX;
continue;
}
if (!strcmp(token, "clone_children")) {
continue;
}
if (!strcmp(token, "xattr")) {
- set_bit(ROOT_XATTR, &opts->flags);
+ opts->flags |= CGRP_ROOT_XATTR;
continue;
}
if (!strncmp(token, "release_agent=", 14)) {
/* Consistency checks */
+ if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
+ pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
+
+ if (opts->flags & CGRP_ROOT_NOPREFIX) {
+ pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
+ return -EINVAL;
+ }
+
+ if (opts->cpuset_clone_children) {
+ pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
+ return -EINVAL;
+ }
+ }
+
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
* the cpuset subsystem.
*/
- if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
- (opts->subsys_mask & mask))
+ if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
return -EINVAL;
struct cgroup_sb_opts opts;
unsigned long added_mask, removed_mask;
+ if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
+ pr_err("cgroup: sane_behavior: remount is not allowed\n");
+ return -EINVAL;
+ }
+
mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
root->number_of_cgroups = 1;
cgrp->root = root;
cgrp->name = &root_cgroup_name;
- cgrp->top_cgroup = cgrp;
init_cgroup_housekeeping(cgrp);
list_add_tail(&cgrp->allcg_node, &root->allcg_list);
}
* any) is not needed
*/
cgroup_drop_root(opts.new_root);
+
+ if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) &&
+ root->flags != opts.flags) {
+ pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
+ ret = -EINVAL;
+ goto drop_new_super;
+ }
+
/* no subsys rebinding, so refcounts don't change */
drop_parsed_module_refcounts(opts.subsys_mask);
}
int ret = -ENAMETOOLONG;
char *start;
+ if (!cgrp->parent) {
+ if (strlcpy(buf, "/", buflen) >= buflen)
+ return -ENAMETOOLONG;
+ return 0;
+ }
+
start = buf + buflen - 1;
*start = '\0';
rcu_read_lock();
- while (cgrp) {
+ do {
const char *name = cgroup_name(cgrp);
int len;
goto out;
memcpy(start, name, len);
- if (!cgrp->parent)
- break;
-
if (--start < buf)
goto out;
*start = '/';
cgrp = cgrp->parent;
- }
+ } while (cgrp->parent);
ret = 0;
memmove(buf, start, buf + buflen - start);
out:
tsk = tsk->group_leader;
/*
- * Workqueue threads may acquire PF_THREAD_BOUND and become
+ * Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
- if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
+ if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
return 0;
}
+static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *seq)
+{
+ seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
+ return 0;
+}
+
/* A buffer size big enough for numbers or short strings */
#define CGROUP_LOCAL_BUFFER_SIZE 64
if (S_ISDIR(dentry->d_inode->i_mode))
return &__d_cgrp(dentry)->xattrs;
else
- return &__d_cft(dentry)->xattrs;
+ return &__d_cfe(dentry)->xattrs;
}
static inline int xattr_enabled(struct dentry *dentry)
{
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
- return test_bit(ROOT_XATTR, &root->flags);
+ return root->flags & CGRP_ROOT_XATTR;
}
static bool is_valid_xattr(const char *name)
umode_t mode;
char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
- simple_xattrs_init(&cft->xattrs);
-
- if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
+ if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
strcpy(name, subsys->name);
strcat(name, ".");
}
cfe->type = (void *)cft;
cfe->dentry = dentry;
dentry->d_fsdata = cfe;
+ simple_xattrs_init(&cfe->xattrs);
list_add_tail(&cfe->node, &parent->files);
cfe = NULL;
}
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
+ if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
+ continue;
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
continue;
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
if (ret)
goto fail;
+ efile->f_op->poll(efile, &event->pt);
+
/*
* Events should be removed after rmdir of cgroup directory, but before
* destroying subsystem state objects. Let's take reference to cgroup
},
{
.name = "cgroup.clone_children",
+ .flags = CFTYPE_INSANE,
.read_u64 = cgroup_clone_children_read,
.write_u64 = cgroup_clone_children_write,
},
+ {
+ .name = "cgroup.sane_behavior",
+ .flags = CFTYPE_ONLY_ON_ROOT,
+ .read_seq_string = cgroup_sane_behavior_show,
+ },
{
.name = "release_agent",
.flags = CFTYPE_ONLY_ON_ROOT,
cgrp->parent = parent;
cgrp->root = parent->root;
- cgrp->top_cgroup = parent->top_cgroup;
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
- ss->active = 1;
BUG_ON(online_css(ss, dummytop));
mutex_unlock(&cgroup_mutex);
}
write_unlock(&css_set_lock);
- ss->active = 1;
ret = online_css(ss, dummytop);
if (ret)
goto err_unload;
mutex_lock(&cgroup_mutex);
offline_css(ss, dummytop);
- ss->active = 0;
if (ss->use_id)
idr_destroy(&ss->idr);
*/
/* TODO: Use a proper seq_file iterator */
-static int proc_cgroup_show(struct seq_file *m, void *v)
+int proc_cgroup_show(struct seq_file *m, void *v)
{
struct pid *pid;
struct task_struct *tsk;
return retval;
}
-static int cgroup_open(struct inode *inode, struct file *file)
-{
- struct pid *pid = PROC_I(inode)->pid;
- return single_open(file, proc_cgroup_show, pid);
-}
-
-const struct file_operations proc_cgroup_operations = {
- .open = cgroup_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
}
EXPORT_SYMBOL_GPL(css_lookup);
-/**
- * css_get_next - lookup next cgroup under specified hierarchy.
- * @ss: pointer to subsystem
- * @id: current position of iteration.
- * @root: pointer to css. search tree under this.
- * @foundid: position of found object.
- *
- * Search next css under the specified hierarchy of rootid. Calling under
- * rcu_read_lock() is necessary. Returns NULL if it reaches the end.
- */
-struct cgroup_subsys_state *
-css_get_next(struct cgroup_subsys *ss, int id,
- struct cgroup_subsys_state *root, int *foundid)
-{
- struct cgroup_subsys_state *ret = NULL;
- struct css_id *tmp;
- int tmpid;
- int rootid = css_id(root);
- int depth = css_depth(root);
-
- if (!rootid)
- return NULL;
-
- BUG_ON(!ss->use_id);
- WARN_ON_ONCE(!rcu_read_lock_held());
-
- /* fill start point for scan */
- tmpid = id;
- while (1) {
- /*
- * scan next entry from bitmap(tree), tmpid is updated after
- * idr_get_next().
- */
- tmp = idr_get_next(&ss->idr, &tmpid);
- if (!tmp)
- break;
- if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
- ret = rcu_dereference(tmp->css);
- if (ret) {
- *foundid = tmpid;
- break;
- }
- }
- /* continue to scan from next id */
- tmpid = tmpid + 1;
- }
- return ret;
-}
-
/*
* get corresponding css from file open on cgroupfs directory
*/