grows down?
make bind policy root only? It can trigger oom much faster and the
kernel is not always grateful with that.
- could replace all the switch()es with a mempolicy_ops structure.
*/
#include <linux/mempolicy.h>
.policy = MPOL_DEFAULT,
};
-static void mpol_rebind_policy(struct mempolicy *pol,
- const nodemask_t *newmask);
+static const struct mempolicy_operations {
+ int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
+ void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
+} mpol_ops[MPOL_MAX];
-/* Do sanity checking on a policy */
-static int mpol_check_policy(int mode, nodemask_t *nodes)
+/* Check that the nodemask contains at least one populated zone */
+static int is_valid_nodemask(const nodemask_t *nodemask)
{
- int empty = nodes_empty(*nodes);
+ int nd, k;
- switch (mode) {
- case MPOL_DEFAULT:
- if (!empty)
- return -EINVAL;
- break;
- case MPOL_BIND:
- case MPOL_INTERLEAVE:
- /* Preferred will only use the first bit, but allow
- more for now. */
- if (empty)
- return -EINVAL;
- break;
+ /* Check that there is something useful in this mask */
+ k = policy_zone;
+
+ for_each_node_mask(nd, *nodemask) {
+ struct zone *z;
+
+ for (k = 0; k <= policy_zone; k++) {
+ z = &NODE_DATA(nd)->node_zones[k];
+ if (z->present_pages > 0)
+ return 1;
+ }
}
- return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL;
+
+ return 0;
}
-/* Generate a custom zonelist for the BIND policy. */
-static struct zonelist *bind_zonelist(nodemask_t *nodes)
+static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
{
- struct zonelist *zl;
- int num, max, nd;
- enum zone_type k;
+ return pol->flags & (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES);
+}
- max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
- max++; /* space for zlcache_ptr (see mmzone.h) */
- zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
- if (!zl)
- return ERR_PTR(-ENOMEM);
- zl->zlcache_ptr = NULL;
- num = 0;
- /* First put in the highest zones from all nodes, then all the next
- lower zones etc. Avoid empty zones because the memory allocator
- doesn't like them. If you implement node hot removal you
- have to fix that. */
- k = MAX_NR_ZONES - 1;
- while (1) {
- for_each_node_mask(nd, *nodes) {
- struct zone *z = &NODE_DATA(nd)->node_zones[k];
- if (z->present_pages > 0)
- zl->zones[num++] = z;
- }
- if (k == 0)
- break;
- k--;
- }
- if (num == 0) {
- kfree(zl);
- return ERR_PTR(-EINVAL);
- }
- zl->zones[num] = NULL;
- return zl;
+static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
+ const nodemask_t *rel)
+{
+ nodemask_t tmp;
+ nodes_fold(tmp, *orig, nodes_weight(*rel));
+ nodes_onto(*ret, tmp, *rel);
+}
+
+static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
+{
+ if (nodes_empty(*nodes))
+ return -EINVAL;
+ pol->v.nodes = *nodes;
+ return 0;
+}
+
+static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
+{
+ if (!nodes)
+ pol->v.preferred_node = -1; /* local allocation */
+ else if (nodes_empty(*nodes))
+ return -EINVAL; /* no allowed nodes */
+ else
+ pol->v.preferred_node = first_node(*nodes);
+ return 0;
+}
+
+static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
+{
+ if (!is_valid_nodemask(nodes))
+ return -EINVAL;
+ pol->v.nodes = *nodes;
+ return 0;
}
/* Create a new policy */
-static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
+static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
+ nodemask_t *nodes)
{
struct mempolicy *policy;
+ nodemask_t cpuset_context_nmask;
+ int ret;
- pr_debug("setting mode %d nodes[0] %lx\n",
- mode, nodes ? nodes_addr(*nodes)[0] : -1);
+ pr_debug("setting mode %d flags %d nodes[0] %lx\n",
+ mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
- if (mode == MPOL_DEFAULT)
+ if (mode == MPOL_DEFAULT) {
+ if (nodes && !nodes_empty(*nodes))
+ return ERR_PTR(-EINVAL);
return NULL;
+ }
+ VM_BUG_ON(!nodes);
+
+ /*
+ * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or
+ * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation).
+ * All other modes require a valid pointer to a non-empty nodemask.
+ */
+ if (mode == MPOL_PREFERRED) {
+ if (nodes_empty(*nodes)) {
+ if (((flags & MPOL_F_STATIC_NODES) ||
+ (flags & MPOL_F_RELATIVE_NODES)))
+ return ERR_PTR(-EINVAL);
+ nodes = NULL; /* flag local alloc */
+ }
+ } else if (nodes_empty(*nodes))
+ return ERR_PTR(-EINVAL);
policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
if (!policy)
return ERR_PTR(-ENOMEM);
atomic_set(&policy->refcnt, 1);
- switch (mode) {
- case MPOL_INTERLEAVE:
- policy->v.nodes = *nodes;
- nodes_and(policy->v.nodes, policy->v.nodes,
- node_states[N_HIGH_MEMORY]);
- if (nodes_weight(policy->v.nodes) == 0) {
- kmem_cache_free(policy_cache, policy);
- return ERR_PTR(-EINVAL);
- }
- break;
- case MPOL_PREFERRED:
- policy->v.preferred_node = first_node(*nodes);
- if (policy->v.preferred_node >= MAX_NUMNODES)
- policy->v.preferred_node = -1;
- break;
- case MPOL_BIND:
- policy->v.zonelist = bind_zonelist(nodes);
- if (IS_ERR(policy->v.zonelist)) {
- void *error_code = policy->v.zonelist;
- kmem_cache_free(policy_cache, policy);
- return error_code;
- }
- break;
- }
policy->policy = mode;
- policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
+ policy->flags = flags;
+
+ if (nodes) {
+ /*
+ * cpuset related setup doesn't apply to local allocation
+ */
+ cpuset_update_task_memory_state();
+ if (flags & MPOL_F_RELATIVE_NODES)
+ mpol_relative_nodemask(&cpuset_context_nmask, nodes,
+ &cpuset_current_mems_allowed);
+ else
+ nodes_and(cpuset_context_nmask, *nodes,
+ cpuset_current_mems_allowed);
+ if (mpol_store_user_nodemask(policy))
+ policy->w.user_nodemask = *nodes;
+ else
+ policy->w.cpuset_mems_allowed =
+ cpuset_mems_allowed(current);
+ }
+
+ ret = mpol_ops[mode].create(policy,
+ nodes ? &cpuset_context_nmask : NULL);
+ if (ret < 0) {
+ kmem_cache_free(policy_cache, policy);
+ return ERR_PTR(ret);
+ }
return policy;
}
+static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
+{
+}
+
+static void mpol_rebind_nodemask(struct mempolicy *pol,
+ const nodemask_t *nodes)
+{
+ nodemask_t tmp;
+
+ if (pol->flags & MPOL_F_STATIC_NODES)
+ nodes_and(tmp, pol->w.user_nodemask, *nodes);
+ else if (pol->flags & MPOL_F_RELATIVE_NODES)
+ mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
+ else {
+ nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed,
+ *nodes);
+ pol->w.cpuset_mems_allowed = *nodes;
+ }
+
+ pol->v.nodes = tmp;
+ if (!node_isset(current->il_next, tmp)) {
+ current->il_next = next_node(current->il_next, tmp);
+ if (current->il_next >= MAX_NUMNODES)
+ current->il_next = first_node(tmp);
+ if (current->il_next >= MAX_NUMNODES)
+ current->il_next = numa_node_id();
+ }
+}
+
+static void mpol_rebind_preferred(struct mempolicy *pol,
+ const nodemask_t *nodes)
+{
+ nodemask_t tmp;
+
+ if (pol->flags & MPOL_F_STATIC_NODES) {
+ int node = first_node(pol->w.user_nodemask);
+
+ if (node_isset(node, *nodes))
+ pol->v.preferred_node = node;
+ else
+ pol->v.preferred_node = -1;
+ } else if (pol->flags & MPOL_F_RELATIVE_NODES) {
+ mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
+ pol->v.preferred_node = first_node(tmp);
+ } else if (pol->v.preferred_node != -1) {
+ pol->v.preferred_node = node_remap(pol->v.preferred_node,
+ pol->w.cpuset_mems_allowed,
+ *nodes);
+ pol->w.cpuset_mems_allowed = *nodes;
+ }
+}
+
+/* Migrate a policy to a different set of nodes */
+static void mpol_rebind_policy(struct mempolicy *pol,
+ const nodemask_t *newmask)
+{
+ if (!pol)
+ return;
+ if (!mpol_store_user_nodemask(pol) &&
+ nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
+ return;
+ mpol_ops[pol->policy].rebind(pol, newmask);
+}
+
+/*
+ * Wrapper for mpol_rebind_policy() that just requires task
+ * pointer, and updates task mempolicy.
+ */
+
+void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
+{
+ mpol_rebind_policy(tsk->mempolicy, new);
+}
+
+/*
+ * Rebind each vma in mm to new nodemask.
+ *
+ * Call holding a reference to mm. Takes mm->mmap_sem during call.
+ */
+
+void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
+{
+ struct vm_area_struct *vma;
+
+ down_write(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ mpol_rebind_policy(vma->vm_policy, new);
+ up_write(&mm->mmap_sem);
+}
+
+static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
+ [MPOL_DEFAULT] = {
+ .rebind = mpol_rebind_default,
+ },
+ [MPOL_INTERLEAVE] = {
+ .create = mpol_new_interleave,
+ .rebind = mpol_rebind_nodemask,
+ },
+ [MPOL_PREFERRED] = {
+ .create = mpol_new_preferred,
+ .rebind = mpol_rebind_preferred,
+ },
+ [MPOL_BIND] = {
+ .create = mpol_new_bind,
+ .rebind = mpol_rebind_nodemask,
+ },
+};
+
static void gather_stats(struct page *, void *, int pte_dirty);
static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags);
if (!err) {
mpol_get(new);
vma->vm_policy = new;
- mpol_free(old);
+ mpol_put(old);
}
return err;
}
return err;
}
-static int contextualize_policy(int mode, nodemask_t *nodes)
-{
- if (!nodes)
- return 0;
-
- cpuset_update_task_memory_state();
- if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
- return -EINVAL;
- return mpol_check_policy(mode, nodes);
-}
-
-
/*
* Update task->flags PF_MEMPOLICY bit: set iff non-default
* mempolicy. Allows more rapid checking of this (combined perhaps
}
/* Set the process memory policy */
-static long do_set_mempolicy(int mode, nodemask_t *nodes)
+static long do_set_mempolicy(unsigned short mode, unsigned short flags,
+ nodemask_t *nodes)
{
struct mempolicy *new;
- if (contextualize_policy(mode, nodes))
- return -EINVAL;
- new = mpol_new(mode, nodes);
+ new = mpol_new(mode, flags, nodes);
if (IS_ERR(new))
return PTR_ERR(new);
- mpol_free(current->mempolicy);
+ mpol_put(current->mempolicy);
current->mempolicy = new;
mpol_set_task_struct_flag();
- if (new && new->policy == MPOL_INTERLEAVE)
+ if (new && new->policy == MPOL_INTERLEAVE &&
+ nodes_weight(new->v.nodes))
current->il_next = first_node(new->v.nodes);
return 0;
}
/* Fill a zone bitmap for a policy */
static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
{
- int i;
-
nodes_clear(*nodes);
switch (p->policy) {
- case MPOL_BIND:
- for (i = 0; p->v.zonelist->zones[i]; i++)
- node_set(zone_to_nid(p->v.zonelist->zones[i]),
- *nodes);
- break;
case MPOL_DEFAULT:
break;
+ case MPOL_BIND:
+ /* Fall through */
case MPOL_INTERLEAVE:
*nodes = p->v.nodes;
break;
goto out;
}
} else
- *policy = pol->policy;
+ *policy = pol->policy | pol->flags;
if (vma) {
up_read(¤t->mm->mmap_sem);
#endif
static long do_mbind(unsigned long start, unsigned long len,
- unsigned long mode, nodemask_t *nmask,
- unsigned long flags)
+ unsigned short mode, unsigned short mode_flags,
+ nodemask_t *nmask, unsigned long flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
int err;
LIST_HEAD(pagelist);
- if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
- MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
- || mode > MPOL_MAX)
+ if (flags & ~(unsigned long)(MPOL_MF_STRICT |
+ MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
return -EINVAL;
if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
return -EPERM;
if (end == start)
return 0;
- if (mpol_check_policy(mode, nmask))
- return -EINVAL;
-
- new = mpol_new(mode, nmask);
+ new = mpol_new(mode, mode_flags, nmask);
if (IS_ERR(new))
return PTR_ERR(new);
if (!new)
flags |= MPOL_MF_DISCONTIG_OK;
- pr_debug("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
- mode, nmask ? nodes_addr(*nmask)[0] : -1);
+ pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n",
+ start, start + len, mode, mode_flags,
+ nmask ? nodes_addr(*nmask)[0] : -1);
down_write(&mm->mmap_sem);
vma = check_range(mm, start, end, nmask,
}
up_write(&mm->mmap_sem);
- mpol_free(new);
+ mpol_put(new);
return err;
}
{
nodemask_t nodes;
int err;
+ unsigned short mode_flags;
+ mode_flags = mode & MPOL_MODE_FLAGS;
+ mode &= ~MPOL_MODE_FLAGS;
+ if (mode >= MPOL_MAX)
+ return -EINVAL;
+ if ((mode_flags & MPOL_F_STATIC_NODES) &&
+ (mode_flags & MPOL_F_RELATIVE_NODES))
+ return -EINVAL;
err = get_nodes(&nodes, nmask, maxnode);
if (err)
return err;
-#ifdef CONFIG_CPUSETS
- /* Restrict the nodes to the allowed nodes in the cpuset */
- nodes_and(nodes, nodes, current->mems_allowed);
-#endif
- return do_mbind(start, len, mode, &nodes, flags);
+ return do_mbind(start, len, mode, mode_flags, &nodes, flags);
}
/* Set the process memory policy */
{
int err;
nodemask_t nodes;
+ unsigned short flags;
- if (mode < 0 || mode > MPOL_MAX)
+ flags = mode & MPOL_MODE_FLAGS;
+ mode &= ~MPOL_MODE_FLAGS;
+ if ((unsigned int)mode >= MPOL_MAX)
+ return -EINVAL;
+ if ((flags & MPOL_F_STATIC_NODES) && (flags & MPOL_F_RELATIVE_NODES))
return -EINVAL;
err = get_nodes(&nodes, nmask, maxnode);
if (err)
return err;
- return do_set_mempolicy(mode, &nodes);
+ return do_set_mempolicy(mode, flags, &nodes);
}
asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
return pol;
}
+/* Return a nodemask representing a mempolicy */
+static nodemask_t *nodemask_policy(gfp_t gfp, struct mempolicy *policy)
+{
+ /* Lower zones don't get a nodemask applied for MPOL_BIND */
+ if (unlikely(policy->policy == MPOL_BIND) &&
+ gfp_zone(gfp) >= policy_zone &&
+ cpuset_nodemask_valid_mems_allowed(&policy->v.nodes))
+ return &policy->v.nodes;
+
+ return NULL;
+}
+
/* Return a zonelist representing a mempolicy */
static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
{
nd = numa_node_id();
break;
case MPOL_BIND:
- /* Lower zones don't get a policy applied */
- /* Careful: current->mems_allowed might have moved */
- if (gfp_zone(gfp) >= policy_zone)
- if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
- return policy->v.zonelist;
- /*FALL THROUGH*/
+ /*
+ * Normally, MPOL_BIND allocations node-local are node-local
+ * within the allowed nodemask. However, if __GFP_THISNODE is
+ * set and the current node is part of the mask, we use the
+ * the zonelist for the first node in the mask instead.
+ */
+ nd = numa_node_id();
+ if (unlikely(gfp & __GFP_THISNODE) &&
+ unlikely(!node_isset(nd, policy->v.nodes)))
+ nd = first_node(policy->v.nodes);
+ break;
case MPOL_INTERLEAVE: /* should not happen */
case MPOL_DEFAULT:
nd = numa_node_id();
nd = 0;
BUG();
}
- return NODE_DATA(nd)->node_zonelists + gfp_zone(gfp);
+ return node_zonelist(nd, gfp);
}
/* Do dynamic interleaving for a process */
next = next_node(nid, policy->v.nodes);
if (next >= MAX_NUMNODES)
next = first_node(policy->v.nodes);
- me->il_next = next;
+ if (next < MAX_NUMNODES)
+ me->il_next = next;
return nid;
}
*/
unsigned slab_node(struct mempolicy *policy)
{
- int pol = policy ? policy->policy : MPOL_DEFAULT;
+ unsigned short pol = policy ? policy->policy : MPOL_DEFAULT;
switch (pol) {
case MPOL_INTERLEAVE:
return interleave_nodes(policy);
- case MPOL_BIND:
+ case MPOL_BIND: {
/*
* Follow bind policy behavior and start allocation at the
* first node.
*/
- return zone_to_nid(policy->v.zonelist->zones[0]);
+ struct zonelist *zonelist;
+ struct zone *zone;
+ enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
+ zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0];
+ (void)first_zones_zonelist(zonelist, highest_zoneidx,
+ &policy->v.nodes,
+ &zone);
+ return zone->node;
+ }
case MPOL_PREFERRED:
if (policy->v.preferred_node >= 0)
struct vm_area_struct *vma, unsigned long off)
{
unsigned nnodes = nodes_weight(pol->v.nodes);
- unsigned target = (unsigned)off % nnodes;
+ unsigned target;
int c;
int nid = -1;
+ if (!nnodes)
+ return numa_node_id();
+ target = (unsigned int)off % nnodes;
c = 0;
do {
nid = next_node(nid, pol->v.nodes);
* @vma = virtual memory area whose policy is sought
* @addr = address in @vma for shared policy lookup and interleave policy
* @gfp_flags = for requested zone
- * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy
+ * @mpol = pointer to mempolicy pointer for reference counted mempolicy
+ * @nodemask = pointer to nodemask pointer for MPOL_BIND nodemask
*
* Returns a zonelist suitable for a huge page allocation.
- * If the effective policy is 'BIND, returns pointer to policy's zonelist.
+ * If the effective policy is 'BIND, returns pointer to local node's zonelist,
+ * and a pointer to the mempolicy's @nodemask for filtering the zonelist.
* If it is also a policy for which get_vma_policy() returns an extra
- * reference, we must hold that reference until after allocation.
+ * reference, we must hold that reference until after the allocation.
* In that case, return policy via @mpol so hugetlb allocation can drop
- * the reference. For non-'BIND referenced policies, we can/do drop the
+ * the reference. For non-'BIND referenced policies, we can/do drop the
* reference here, so the caller doesn't need to know about the special case
* for default and current task policy.
*/
struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
- gfp_t gfp_flags, struct mempolicy **mpol)
+ gfp_t gfp_flags, struct mempolicy **mpol,
+ nodemask_t **nodemask)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
struct zonelist *zl;
*mpol = NULL; /* probably no unref needed */
- if (pol->policy == MPOL_INTERLEAVE) {
+ *nodemask = NULL; /* assume !MPOL_BIND */
+ if (pol->policy == MPOL_BIND) {
+ *nodemask = &pol->v.nodes;
+ } else if (pol->policy == MPOL_INTERLEAVE) {
unsigned nid;
nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
- __mpol_free(pol); /* finished with pol */
- return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
+ if (unlikely(pol != &default_policy &&
+ pol != current->mempolicy))
+ __mpol_put(pol); /* finished with pol */
+ return node_zonelist(nid, gfp_flags);
}
zl = zonelist_policy(GFP_HIGHUSER, pol);
if (unlikely(pol != &default_policy && pol != current->mempolicy)) {
if (pol->policy != MPOL_BIND)
- __mpol_free(pol); /* finished with pol */
+ __mpol_put(pol); /* finished with pol */
else
*mpol = pol; /* unref needed after allocation */
}
struct zonelist *zl;
struct page *page;
- zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
+ zl = node_zonelist(nid, gfp);
page = __alloc_pages(gfp, order, zl);
- if (page && page_zone(page) == zl->zones[0])
+ if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
return page;
}
unsigned nid;
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
+ if (unlikely(pol != &default_policy &&
+ pol != current->mempolicy))
+ __mpol_put(pol); /* finished with pol */
return alloc_page_interleave(gfp, 0, nid);
}
zl = zonelist_policy(gfp, pol);
/*
* slow path: ref counted policy -- shared or vma
*/
- struct page *page = __alloc_pages(gfp, 0, zl);
- __mpol_free(pol);
+ struct page *page = __alloc_pages_nodemask(gfp, 0,
+ zl, nodemask_policy(gfp, pol));
+ __mpol_put(pol);
return page;
}
/*
* fast path: default or task policy
*/
- return __alloc_pages(gfp, 0, zl);
+ return __alloc_pages_nodemask(gfp, 0, zl, nodemask_policy(gfp, pol));
}
/**
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
return alloc_page_interleave(gfp, order, interleave_nodes(pol));
- return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
+ return __alloc_pages_nodemask(gfp, order,
+ zonelist_policy(gfp, pol), nodemask_policy(gfp, pol));
}
EXPORT_SYMBOL(alloc_pages_current);
}
*new = *old;
atomic_set(&new->refcnt, 1);
- if (new->policy == MPOL_BIND) {
- int sz = ksize(old->v.zonelist);
- new->v.zonelist = kmemdup(old->v.zonelist, sz, GFP_KERNEL);
- if (!new->v.zonelist) {
- kmem_cache_free(policy_cache, new);
- return ERR_PTR(-ENOMEM);
- }
- }
return new;
}
+static int mpol_match_intent(const struct mempolicy *a,
+ const struct mempolicy *b)
+{
+ if (a->flags != b->flags)
+ return 0;
+ if (!mpol_store_user_nodemask(a))
+ return 1;
+ return nodes_equal(a->w.user_nodemask, b->w.user_nodemask);
+}
+
/* Slow path of a mempolicy comparison */
int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
return 0;
if (a->policy != b->policy)
return 0;
+ if (a->policy != MPOL_DEFAULT && !mpol_match_intent(a, b))
+ return 0;
switch (a->policy) {
case MPOL_DEFAULT:
return 1;
+ case MPOL_BIND:
+ /* Fall through */
case MPOL_INTERLEAVE:
return nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
return a->v.preferred_node == b->v.preferred_node;
- case MPOL_BIND: {
- int i;
- for (i = 0; a->v.zonelist->zones[i]; i++)
- if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
- return 0;
- return b->v.zonelist->zones[i] == NULL;
- }
default:
BUG();
return 0;
}
/* Slow path of a mpol destructor. */
-void __mpol_free(struct mempolicy *p)
+void __mpol_put(struct mempolicy *p)
{
if (!atomic_dec_and_test(&p->refcnt))
return;
- if (p->policy == MPOL_BIND)
- kfree(p->v.zonelist);
p->policy = MPOL_DEFAULT;
kmem_cache_free(policy_cache, p);
}
{
pr_debug("deleting %lx-l%lx\n", n->start, n->end);
rb_erase(&n->nd, &sp->root);
- mpol_free(n->policy);
+ mpol_put(n->policy);
kmem_cache_free(sn_cache, n);
}
sp_insert(sp, new);
spin_unlock(&sp->lock);
if (new2) {
- mpol_free(new2->policy);
+ mpol_put(new2->policy);
kmem_cache_free(sn_cache, new2);
}
return 0;
}
-void mpol_shared_policy_init(struct shared_policy *info, int policy,
- nodemask_t *policy_nodes)
+void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
+ unsigned short flags, nodemask_t *policy_nodes)
{
info->root = RB_ROOT;
spin_lock_init(&info->lock);
struct mempolicy *newpol;
/* Falls back to MPOL_DEFAULT on any error */
- newpol = mpol_new(policy, policy_nodes);
+ newpol = mpol_new(policy, flags, policy_nodes);
if (!IS_ERR(newpol)) {
/* Create pseudo-vma that contains just the policy */
struct vm_area_struct pvma;
/* Policy covers entire file */
pvma.vm_end = TASK_SIZE;
mpol_set_shared_policy(info, &pvma, newpol);
- mpol_free(newpol);
+ mpol_put(newpol);
}
}
}
struct sp_node *new = NULL;
unsigned long sz = vma_pages(vma);
- pr_debug("set_shared_policy %lx sz %lu %d %lx\n",
+ pr_debug("set_shared_policy %lx sz %lu %d %d %lx\n",
vma->vm_pgoff,
- sz, npol? npol->policy : -1,
+ sz, npol ? npol->policy : -1,
+ npol ? npol->flags : -1,
npol ? nodes_addr(npol->v.nodes)[0] : -1);
if (npol) {
n = rb_entry(next, struct sp_node, nd);
next = rb_next(&n->nd);
rb_erase(&n->nd, &p->root);
- mpol_free(n->policy);
+ mpol_put(n->policy);
kmem_cache_free(sn_cache, n);
}
spin_unlock(&p->lock);
if (unlikely(nodes_empty(interleave_nodes)))
node_set(prefer, interleave_nodes);
- if (do_set_mempolicy(MPOL_INTERLEAVE, &interleave_nodes))
+ if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
printk("numa_policy_init: interleaving failed\n");
}
/* Reset policy of current process to default */
void numa_default_policy(void)
{
- do_set_mempolicy(MPOL_DEFAULT, NULL);
-}
-
-/* Migrate a policy to a different set of nodes */
-static void mpol_rebind_policy(struct mempolicy *pol,
- const nodemask_t *newmask)
-{
- nodemask_t *mpolmask;
- nodemask_t tmp;
-
- if (!pol)
- return;
- mpolmask = &pol->cpuset_mems_allowed;
- if (nodes_equal(*mpolmask, *newmask))
- return;
-
- switch (pol->policy) {
- case MPOL_DEFAULT:
- break;
- case MPOL_INTERLEAVE:
- nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
- pol->v.nodes = tmp;
- *mpolmask = *newmask;
- current->il_next = node_remap(current->il_next,
- *mpolmask, *newmask);
- break;
- case MPOL_PREFERRED:
- pol->v.preferred_node = node_remap(pol->v.preferred_node,
- *mpolmask, *newmask);
- *mpolmask = *newmask;
- break;
- case MPOL_BIND: {
- nodemask_t nodes;
- struct zone **z;
- struct zonelist *zonelist;
-
- nodes_clear(nodes);
- for (z = pol->v.zonelist->zones; *z; z++)
- node_set(zone_to_nid(*z), nodes);
- nodes_remap(tmp, nodes, *mpolmask, *newmask);
- nodes = tmp;
-
- zonelist = bind_zonelist(&nodes);
-
- /* If no mem, then zonelist is NULL and we keep old zonelist.
- * If that old zonelist has no remaining mems_allowed nodes,
- * then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT.
- */
-
- if (!IS_ERR(zonelist)) {
- /* Good - got mem - substitute new zonelist */
- kfree(pol->v.zonelist);
- pol->v.zonelist = zonelist;
- }
- *mpolmask = *newmask;
- break;
- }
- default:
- BUG();
- break;
- }
-}
-
-/*
- * Wrapper for mpol_rebind_policy() that just requires task
- * pointer, and updates task mempolicy.
- */
-
-void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
-{
- mpol_rebind_policy(tsk->mempolicy, new);
-}
-
-/*
- * Rebind each vma in mm to new nodemask.
- *
- * Call holding a reference to mm. Takes mm->mmap_sem during call.
- */
-
-void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
-{
- struct vm_area_struct *vma;
-
- down_write(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- mpol_rebind_policy(vma->vm_policy, new);
- up_write(&mm->mmap_sem);
+ do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
}
/*
* Display pages allocated per node and memory policy via /proc.
*/
-
static const char * const policy_types[] =
{ "default", "prefer", "bind", "interleave" };
char *p = buffer;
int l;
nodemask_t nodes;
- int mode = pol ? pol->policy : MPOL_DEFAULT;
+ unsigned short mode = pol ? pol->policy : MPOL_DEFAULT;
+ unsigned short flags = pol ? pol->flags : 0;
switch (mode) {
case MPOL_DEFAULT:
break;
case MPOL_BIND:
- get_zonemask(pol, &nodes);
- break;
-
+ /* Fall through */
case MPOL_INTERLEAVE:
nodes = pol->v.nodes;
break;
strcpy(p, policy_types[mode]);
p += l;
+ if (flags) {
+ int need_bar = 0;
+
+ if (buffer + maxlen < p + 2)
+ return -ENOSPC;
+ *p++ = '=';
+
+ if (flags & MPOL_F_STATIC_NODES)
+ p += sprintf(p, "%sstatic", need_bar++ ? "|" : "");
+ if (flags & MPOL_F_RELATIVE_NODES)
+ p += sprintf(p, "%srelative", need_bar++ ? "|" : "");
+ }
+
if (!nodes_empty(nodes)) {
if (buffer + maxlen < p + 2)
return -ENOSPC;
* unref shared or other task's mempolicy
*/
if (pol != &default_policy && pol != current->mempolicy)
- __mpol_free(pol);
+ __mpol_put(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
if (file) {
seq_printf(m, " file=");
- seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n\t= ");
+ seq_path(m, &file->f_path, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_printf(m, " heap");
} else if (vma->vm_start <= mm->start_stack &&