struct zonelist *zonelist;
struct zone *zone;
struct zoneref *z;
+ unsigned int cpuset_mems_cookie;
- get_mems_allowed();
+retry_cpuset:
+ cpuset_mems_cookie = get_mems_allowed();
zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol, &nodemask);
/*
}
}
}
-err:
+
mpol_cond_put(mpol);
- put_mems_allowed();
+ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+ goto retry_cpuset;
return page;
+
+err:
+ mpol_cond_put(mpol);
+ return NULL;
}
static void update_and_free_page(struct hstate *h, struct page *page)
{
mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
__unmap_hugepage_range(vma, start, end, ref_page);
+ /*
+ * Clear this flag so that x86's huge_pmd_share page_table_shareable
+ * test will fail on a vma being torn down, and not grab a page table
+ * on its way out. We're lucky that the flag has such an appropriate
+ * name, and can in fact be safely cleared here. We could clear it
+ * before the __unmap_hugepage_range above, but all that's necessary
+ * is to clear it before releasing the i_mmap_mutex below.
+ *
+ * This works because in the contexts this is called, the VMA is
+ * going to be destroyed. It is not vunerable to madvise(DONTNEED)
+ * because madvise is not supported on hugetlbfs. The same applies
+ * for direct IO. unmap_hugepage_range() is only being called just
+ * before free_pgtables() so clearing VM_MAYSHARE will not cause
+ * surprises later.
+ */
+ vma->vm_flags &= ~VM_MAYSHARE;
mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
}
}
}
spin_unlock(&mm->page_table_lock);
- mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
-
+ /*
+ * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
+ * may have cleared our pud entry and done put_page on the page table:
+ * once we release i_mmap_mutex, another task can do the final put_page
+ * and that page table be reused and filled with junk.
+ */
flush_tlb_range(vma, start, end);
+ mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
}
int hugetlb_reserve_pages(struct inode *inode,