return first;
}
-/* Apply policy to a single VMA */
-static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
+/*
+ * Apply policy to a single VMA
+ * This must be called with the mmap_sem held for writing.
+ */
+static int vma_replace_policy(struct vm_area_struct *vma,
+ struct mempolicy *pol)
{
- int err = 0;
- struct mempolicy *old = vma->vm_policy;
+ int err;
+ struct mempolicy *old;
+ struct mempolicy *new;
pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
vma->vm_start, vma->vm_end, vma->vm_pgoff,
vma->vm_ops, vma->vm_file,
vma->vm_ops ? vma->vm_ops->set_policy : NULL);
- if (vma->vm_ops && vma->vm_ops->set_policy)
+ new = mpol_dup(pol);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ if (vma->vm_ops && vma->vm_ops->set_policy) {
err = vma->vm_ops->set_policy(vma, new);
- if (!err) {
- mpol_get(new);
- vma->vm_policy = new;
- mpol_put(old);
+ if (err)
+ goto err_out;
}
+
+ old = vma->vm_policy;
+ vma->vm_policy = new; /* protected by mmap_sem */
+ mpol_put(old);
+
+ return 0;
+ err_out:
+ mpol_put(new);
return err;
}
if (err)
goto out;
}
- err = policy_vma(vma, new_pol);
+ err = vma_replace_policy(vma, new_pol);
if (err)
goto out;
}
nodemask_t nmask;
LIST_HEAD(pagelist);
int err = 0;
- struct vm_area_struct *vma;
nodes_clear(nmask);
node_set(source, nmask);
- vma = check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
+ /*
+ * This does not "check" the range but isolates all pages that
+ * need migration. Between passing in the full user address
+ * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
+ */
+ VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
+ check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, new_node_page, dest,
addr);
if (vpol)
pol = vpol;
- } else if (vma->vm_policy)
+ } else if (vma->vm_policy) {
pol = vma->vm_policy;
+
+ /*
+ * shmem_alloc_page() passes MPOL_F_SHARED policy with
+ * a pseudo vma whose vma->vm_ops=NULL. Take a reference
+ * count on these policies which will be dropped by
+ * mpol_cond_put() later
+ */
+ if (mpol_needs_cond_ref(pol))
+ mpol_get(pol);
+ }
}
if (!pol)
pol = &default_policy;
*/
/* lookup first element intersecting start-end */
-/* Caller holds sp->lock */
+/* Caller holds sp->mutex */
static struct sp_node *
sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
{
if (!sp->root.rb_node)
return NULL;
- spin_lock(&sp->lock);
+ mutex_lock(&sp->mutex);
sn = sp_lookup(sp, idx, idx+1);
if (sn) {
mpol_get(sn->policy);
pol = sn->policy;
}
- spin_unlock(&sp->lock);
+ mutex_unlock(&sp->mutex);
return pol;
}
+static void sp_free(struct sp_node *n)
+{
+ mpol_put(n->policy);
+ kmem_cache_free(sn_cache, n);
+}
+
static void sp_delete(struct shared_policy *sp, struct sp_node *n)
{
pr_debug("deleting %lx-l%lx\n", n->start, n->end);
rb_erase(&n->nd, &sp->root);
- mpol_put(n->policy);
- kmem_cache_free(sn_cache, n);
+ sp_free(n);
}
static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
struct mempolicy *pol)
{
- struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+ struct sp_node *n;
+ struct mempolicy *newpol;
+ n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
if (!n)
return NULL;
+
+ newpol = mpol_dup(pol);
+ if (IS_ERR(newpol)) {
+ kmem_cache_free(sn_cache, n);
+ return NULL;
+ }
+ newpol->flags |= MPOL_F_SHARED;
+
n->start = start;
n->end = end;
- mpol_get(pol);
- pol->flags |= MPOL_F_SHARED; /* for unref */
- n->policy = pol;
+ n->policy = newpol;
+
return n;
}
static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
unsigned long end, struct sp_node *new)
{
- struct sp_node *n, *new2 = NULL;
+ struct sp_node *n;
+ int ret = 0;
-restart:
- spin_lock(&sp->lock);
+ mutex_lock(&sp->mutex);
n = sp_lookup(sp, start, end);
/* Take care of old policies in the same range. */
while (n && n->start < end) {
} else {
/* Old policy spanning whole new range. */
if (n->end > end) {
+ struct sp_node *new2;
+ new2 = sp_alloc(end, n->end, n->policy);
if (!new2) {
- spin_unlock(&sp->lock);
- new2 = sp_alloc(end, n->end, n->policy);
- if (!new2)
- return -ENOMEM;
- goto restart;
+ ret = -ENOMEM;
+ goto out;
}
n->end = start;
sp_insert(sp, new2);
- new2 = NULL;
break;
} else
n->end = start;
}
if (new)
sp_insert(sp, new);
- spin_unlock(&sp->lock);
- if (new2) {
- mpol_put(new2->policy);
- kmem_cache_free(sn_cache, new2);
- }
- return 0;
+out:
+ mutex_unlock(&sp->mutex);
+ return ret;
}
/**
int ret;
sp->root = RB_ROOT; /* empty tree == default mempolicy */
- spin_lock_init(&sp->lock);
+ mutex_init(&sp->mutex);
if (mpol) {
struct vm_area_struct pvma;
}
err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
if (err && new)
- kmem_cache_free(sn_cache, new);
+ sp_free(new);
return err;
}
if (!p->root.rb_node)
return;
- spin_lock(&p->lock);
+ mutex_lock(&p->mutex);
next = rb_first(&p->root);
while (next) {
n = rb_entry(next, struct sp_node, nd);
next = rb_next(&n->nd);
- rb_erase(&n->nd, &p->root);
- mpol_put(n->policy);
- kmem_cache_free(sn_cache, n);
+ sp_delete(p, n);
}
- spin_unlock(&p->lock);
+ mutex_unlock(&p->mutex);
}
/* assumes fs == KERNEL_DS */