mm, thp: respect MPOL_PREFERRED policy with non-local node

author Vlastimil Babka <vbabka@suse.cz>

Wed, 24 Jun 2015 23:58:48 +0000 (16:58 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 25 Jun 2015 00:49:46 +0000 (17:49 -0700)
author Vlastimil Babka <vbabka@suse.cz>
Wed, 24 Jun 2015 23:58:48 +0000 (16:58 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 25 Jun 2015 00:49:46 +0000 (17:49 -0700)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 747743237d9f4d3ead6117d4ee152c00659cd362..99d4c1d0b8583dc453ef992582074ef015f1fb49 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1972,35 +1972,41 @@ retry_cpuset:
         pol = get_vma_policy(vma, addr);
         cpuset_mems_cookie = read_mems_allowed_begin();
  
-       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
-                                       pol->mode != MPOL_INTERLEAVE)) {
+       if (pol->mode == MPOL_INTERLEAVE) {
+               unsigned nid;
+
+               nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
+               mpol_cond_put(pol);
+               page = alloc_page_interleave(gfp, order, nid);
+               goto out;
+       }
+
+       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+               int hpage_node = node;
+
                 /*
                  * For hugepage allocation and non-interleave policy which
-                * allows the current node, we only try to allocate from the
-                * current node and don't fall back to other nodes, as the
-                * cost of remote accesses would likely offset THP benefits.
+                * allows the current node (or other explicitly preferred
+                * node) we only try to allocate from the current/preferred
+                * node and don't fall back to other nodes, as the cost of
+                * remote accesses would likely offset THP benefits.
                  *
                  * If the policy is interleave, or does not allow the current
                  * node in its nodemask, we allocate the standard way.
                  */
+               if (pol->mode == MPOL_PREFERRED &&
+                                               !(pol->flags & MPOL_F_LOCAL))
+                       hpage_node = pol->v.preferred_node;
+
                 nmask = policy_nodemask(gfp, pol);
-               if (!nmask || node_isset(node, *nmask)) {
+               if (!nmask || node_isset(hpage_node, *nmask)) {
                         mpol_cond_put(pol);
-                       page = alloc_pages_exact_node(node,
+                       page = alloc_pages_exact_node(hpage_node,
                                                 gfp | __GFP_THISNODE, order);
                         goto out;
                 }
         }
  
-       if (pol->mode == MPOL_INTERLEAVE) {
-               unsigned nid;
-
-               nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
-               mpol_cond_put(pol);
-               page = alloc_page_interleave(gfp, order, nid);
-               goto out;
-       }
-
         nmask = policy_nodemask(gfp, pol);
         zl = policy_zonelist(gfp, pol, node);
         mpol_cond_put(pol);
author	Vlastimil Babka <vbabka@suse.cz>
	Wed, 24 Jun 2015 23:58:48 +0000 (16:58 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 25 Jun 2015 00:49:46 +0000 (17:49 -0700)