From: Balbir Singh Date: Thu, 7 Feb 2008 08:13:50 +0000 (-0800) Subject: Memory controller: cgroups setup X-Git-Tag: firefly_0821_release~22970 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8cdea7c05454260c0d4d83503949c358eb131d17;p=firefly-linux-kernel-4.4.55.git Memory controller: cgroups setup Setup the memory cgroup and add basic hooks and controls to integrate and work with the cgroup. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Paul Menage Cc: Peter Zijlstra Cc: "Eric W. Biederman" Cc: Nick Piggin Cc: Kirill Korotaev Cc: Herbert Poetzl Cc: David Rientjes Cc: Vaidyanathan Srinivasan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 9ec43186ba80..228235c5ae53 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -37,3 +37,8 @@ SUBSYS(cpuacct) /* */ +#ifdef CONFIG_CGROUP_MEM_CONT +SUBSYS(mem_cgroup) +#endif + +/* */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h new file mode 100644 index 000000000000..3f121b27677a --- /dev/null +++ b/include/linux/memcontrol.h @@ -0,0 +1,21 @@ +/* memcontrol.h - Memory Controller + * + * Copyright IBM Corporation, 2007 + * Author Balbir Singh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_MEMCONTROL_H +#define _LINUX_MEMCONTROL_H + +#endif /* _LINUX_MEMCONTROL_H */ + diff --git a/init/Kconfig b/init/Kconfig index d372bd616b0c..95ac2657b0f4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -397,6 +397,13 @@ config SYSFS_DEPRECATED If you are using a distro that was released in 2006 or later, it should be safe to say N here. +config CGROUP_MEM_CONT + bool "Memory controller for cgroups" + depends on CGROUPS && RESOURCE_COUNTERS + help + Provides a memory controller that manages both page cache and + RSS memory. + config PROC_PID_CPUSET bool "Include legacy /proc//cpuset file" depends on CPUSETS diff --git a/mm/Makefile b/mm/Makefile index 4af5dff37277..9f117bab5322 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -32,4 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o +obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o diff --git a/mm/memcontrol.c b/mm/memcontrol.c new file mode 100644 index 000000000000..80e48cd9d0c7 --- /dev/null +++ b/mm/memcontrol.c @@ -0,0 +1,127 @@ +/* memcontrol.c - Memory Controller + * + * Copyright IBM Corporation, 2007 + * Author Balbir Singh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + +struct cgroup_subsys mem_cgroup_subsys; + +/* + * The memory controller data structure. The memory controller controls both + * page cache and RSS per cgroup. We would eventually like to provide + * statistics based on the statistics developed by Rik Van Riel for clock-pro, + * to help the administrator determine what knobs to tune. + * + * TODO: Add a water mark for the memory controller. Reclaim will begin when + * we hit the water mark. + */ +struct mem_cgroup { + struct cgroup_subsys_state css; + /* + * the counter to account for memory usage + */ + struct res_counter res; +}; + +/* + * A page_cgroup page is associated with every page descriptor. The + * page_cgroup helps us identify information about the cgroup + */ +struct page_cgroup { + struct list_head lru; /* per cgroup LRU list */ + struct page *page; + struct mem_cgroup *mem_cgroup; +}; + + +static inline +struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) +{ + return container_of(cgroup_subsys_state(cont, + mem_cgroup_subsys_id), struct mem_cgroup, + css); +} + +static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, + struct file *file, char __user *userbuf, size_t nbytes, + loff_t *ppos) +{ + return res_counter_read(&mem_cgroup_from_cont(cont)->res, + cft->private, userbuf, nbytes, ppos); +} + +static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, + struct file *file, const char __user *userbuf, + size_t nbytes, loff_t *ppos) +{ + return res_counter_write(&mem_cgroup_from_cont(cont)->res, + cft->private, userbuf, nbytes, ppos); +} + +static struct cftype mem_cgroup_files[] = { + { + .name = "usage", + .private = RES_USAGE, + .read = mem_cgroup_read, + }, + { + .name = "limit", + .private = RES_LIMIT, + .write = mem_cgroup_write, + .read = mem_cgroup_read, + }, + { + .name = "failcnt", + .private = RES_FAILCNT, + .read = mem_cgroup_read, + }, +}; + +static struct cgroup_subsys_state * +mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) +{ + struct mem_cgroup *mem; + + mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); + if (!mem) + return -ENOMEM; + + res_counter_init(&mem->res); + return &mem->css; +} + +static void mem_cgroup_destroy(struct cgroup_subsys *ss, + struct cgroup *cont) +{ + kfree(mem_cgroup_from_cont(cont)); +} + +static int mem_cgroup_populate(struct cgroup_subsys *ss, + struct cgroup *cont) +{ + return cgroup_add_files(cont, ss, mem_cgroup_files, + ARRAY_SIZE(mem_cgroup_files)); +} + +struct cgroup_subsys mem_cgroup_subsys = { + .name = "memory", + .subsys_id = mem_cgroup_subsys_id, + .create = mem_cgroup_create, + .destroy = mem_cgroup_destroy, + .populate = mem_cgroup_populate, + .early_init = 0, +};