Merge tag 'blackfin-for-linus' of http://git.kernel.org/pub/scm/linux/kernel/git...

[firefly-linux-kernel-4.4.55.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index dfe954fbb48aeda3c6a2ec91bd1c18c158ec29da..4f59fa29eda8b9b22e9532f27a3ffc5535893452 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -261,8 +261,9 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
         } while (zone_span_seqretry(zone, seq));
  
         if (ret)
-               pr_err("page %lu outside zone [ %lu - %lu ]\n",
-                       pfn, start_pfn, start_pfn + sp);
+               pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n",
+                       pfn, zone_to_nid(zone), zone->name,
+                       start_pfn, start_pfn + sp);
  
         return ret;
  }
@@ -408,7 +409,8 @@ static int destroy_compound_page(struct page *page, unsigned long order)
         return bad;
  }
  
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+static inline void prep_zero_page(struct page *page, unsigned int order,
+                                                       gfp_t gfp_flags)
  {
         int i;
  
@@ -452,7 +454,7 @@ static inline void set_page_guard_flag(struct page *page) { }
  static inline void clear_page_guard_flag(struct page *page) { }
  #endif
  
-static inline void set_page_order(struct page *page, int order)
+static inline void set_page_order(struct page *page, unsigned int order)
  {
         set_page_private(page, order);
         __SetPageBuddy(page);
@@ -503,21 +505,31 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
   * For recording page's order, we use page_private(page).
   */
  static inline int page_is_buddy(struct page *page, struct page *buddy,
-                                                               int order)
+                                                       unsigned int order)
  {
         if (!pfn_valid_within(page_to_pfn(buddy)))
                 return 0;
  
-       if (page_zone_id(page) != page_zone_id(buddy))
-               return 0;
-
         if (page_is_guard(buddy) && page_order(buddy) == order) {
                 VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
+               if (page_zone_id(page) != page_zone_id(buddy))
+                       return 0;
+
                 return 1;
         }
  
         if (PageBuddy(buddy) && page_order(buddy) == order) {
                 VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
+               /*
+                * zone check is done late to avoid uselessly
+                * calculating zone/node ids for pages that could
+                * never merge.
+                */
+               if (page_zone_id(page) != page_zone_id(buddy))
+                       return 0;
+
                 return 1;
         }
         return 0;
@@ -549,6 +561,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
   */
  
  static inline void __free_one_page(struct page *page,
+               unsigned long pfn,
                 struct zone *zone, unsigned int order,
                 int migratetype)
  {
@@ -565,7 +578,7 @@ static inline void __free_one_page(struct page *page,
  
         VM_BUG_ON(migratetype == -1);
  
-       page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+       page_idx = pfn & ((1 << MAX_ORDER) - 1);
  
         VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
         VM_BUG_ON_PAGE(bad_range(zone, page), page);
@@ -700,7 +713,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                         list_del(&page->lru);
                         mt = get_freepage_migratetype(page);
                         /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
-                       __free_one_page(page, zone, 0, mt);
+                       __free_one_page(page, page_to_pfn(page), zone, 0, mt);
                         trace_mm_page_pcpu_drain(page, 0, mt);
                         if (likely(!is_migrate_isolate_page(page))) {
                                 __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
@@ -712,13 +725,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
         spin_unlock(&zone->lock);
  }
  
-static void free_one_page(struct zone *zone, struct page *page, int order,
+static void free_one_page(struct zone *zone,
+                               struct page *page, unsigned long pfn,
+                               unsigned int order,
                                 int migratetype)
  {
         spin_lock(&zone->lock);
         zone->pages_scanned = 0;
  
-       __free_one_page(page, zone, order, migratetype);
+       __free_one_page(page, pfn, zone, order, migratetype);
         if (unlikely(!is_migrate_isolate(migratetype)))
                 __mod_zone_freepage_state(zone, 1 << order, migratetype);
         spin_unlock(&zone->lock);
@@ -755,15 +770,16 @@ static void __free_pages_ok(struct page *page, unsigned int order)
  {
         unsigned long flags;
         int migratetype;
+       unsigned long pfn = page_to_pfn(page);
  
         if (!free_pages_prepare(page, order))
                 return;
  
+       migratetype = get_pfnblock_migratetype(page, pfn);
         local_irq_save(flags);
         __count_vm_events(PGFREE, 1 << order);
-       migratetype = get_pageblock_migratetype(page);
         set_freepage_migratetype(page, migratetype);
-       free_one_page(page_zone(page), page, order, migratetype);
+       free_one_page(page_zone(page), page, pfn, order, migratetype);
         local_irq_restore(flags);
  }
  
@@ -882,7 +898,7 @@ static inline int check_new_page(struct page *page)
         return 0;
  }
  
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
  {
         int i;
  
@@ -931,6 +947,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
                 rmv_page_order(page);
                 area->nr_free--;
                 expand(zone, page, order, current_order, area, migratetype);
+               set_freepage_migratetype(page, migratetype);
                 return page;
         }
  
@@ -1057,7 +1074,9 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
  
         /*
          * When borrowing from MIGRATE_CMA, we need to release the excess
-        * buddy pages to CMA itself.
+        * buddy pages to CMA itself. We also ensure the freepage_migratetype
+        * is set to CMA so it is returned to the correct freelist in case
+        * the page ends up being not actually allocated from the pcp lists.
          */
         if (is_migrate_cma(fallback_type))
                 return fallback_type;
@@ -1090,16 +1109,17 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
  
  /* Remove an element from the buddy allocator from the fallback list */
  static inline struct page *
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
  {
         struct free_area *area;
-       int current_order;
+       unsigned int current_order;
         struct page *page;
         int migratetype, new_type, i;
  
         /* Find the largest possible block of pages in the other list */
-       for (current_order = MAX_ORDER-1; current_order >= order;
-                                               --current_order) {
+       for (current_order = MAX_ORDER-1;
+                               current_order >= order && current_order <= MAX_ORDER-1;
+                               --current_order) {
                 for (i = 0;; i++) {
                         migratetype = fallbacks[start_migratetype][i];
  
@@ -1125,6 +1145,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
  
                         expand(zone, page, order, current_order, area,
                                new_type);
+                       /* The freepage_migratetype may differ from pageblock's
+                        * migratetype depending on the decisions in
+                        * try_to_steal_freepages. This is OK as long as it does
+                        * not differ for MIGRATE_CMA type.
+                        */
+                       set_freepage_migratetype(page, new_type);
  
                         trace_mm_page_alloc_extfrag(page, order, current_order,
                                 start_migratetype, migratetype, new_type);
@@ -1173,9 +1199,9 @@ retry_reserve:
   */
  static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         unsigned long count, struct list_head *list,
-                       int migratetype, int cold)
+                       int migratetype, bool cold)
  {
-       int mt = migratetype, i;
+       int i;
  
         spin_lock(&zone->lock);
         for (i = 0; i < count; ++i) {
@@ -1192,18 +1218,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                  * merge IO requests if the physical pages are ordered
                  * properly.
                  */
-               if (likely(cold == 0))
+               if (likely(!cold))
                         list_add(&page->lru, list);
                 else
                         list_add_tail(&page->lru, list);
-               if (IS_ENABLED(CONFIG_CMA)) {
-                       mt = get_pageblock_migratetype(page);
-                       if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
-                               mt = migratetype;
-               }
-               set_freepage_migratetype(page, mt);
                 list = &page->lru;
-               if (is_migrate_cma(mt))
+               if (is_migrate_cma(get_freepage_migratetype(page)))
                         __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
                                               -(1 << order));
         }
@@ -1327,7 +1347,7 @@ void mark_free_pages(struct zone *zone)
  {
         unsigned long pfn, max_zone_pfn;
         unsigned long flags;
-       int order, t;
+       unsigned int order, t;
         struct list_head *curr;
  
         if (zone_is_empty(zone))
@@ -1359,19 +1379,20 @@ void mark_free_pages(struct zone *zone)
  
  /*
   * Free a 0-order page
- * cold == 1 ? free a cold page : free a hot page
+ * cold == true ? free a cold page : free a hot page
   */
-void free_hot_cold_page(struct page *page, int cold)
+void free_hot_cold_page(struct page *page, bool cold)
  {
         struct zone *zone = page_zone(page);
         struct per_cpu_pages *pcp;
         unsigned long flags;
+       unsigned long pfn = page_to_pfn(page);
         int migratetype;
  
         if (!free_pages_prepare(page, 0))
                 return;
  
-       migratetype = get_pageblock_migratetype(page);
+       migratetype = get_pfnblock_migratetype(page, pfn);
         set_freepage_migratetype(page, migratetype);
         local_irq_save(flags);
         __count_vm_event(PGFREE);
@@ -1385,17 +1406,17 @@ void free_hot_cold_page(struct page *page, int cold)
          */
         if (migratetype >= MIGRATE_PCPTYPES) {
                 if (unlikely(is_migrate_isolate(migratetype))) {
-                       free_one_page(zone, page, 0, migratetype);
+                       free_one_page(zone, page, pfn, 0, migratetype);
                         goto out;
                 }
                 migratetype = MIGRATE_MOVABLE;
         }
  
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
-       if (cold)
-               list_add_tail(&page->lru, &pcp->lists[migratetype]);
-       else
+       if (!cold)
                 list_add(&page->lru, &pcp->lists[migratetype]);
+       else
+               list_add_tail(&page->lru, &pcp->lists[migratetype]);
         pcp->count++;
         if (pcp->count >= pcp->high) {
                 unsigned long batch = ACCESS_ONCE(pcp->batch);
@@ -1410,7 +1431,7 @@ out:
  /*
   * Free a list of 0-order pages
   */
-void free_hot_cold_page_list(struct list_head *list, int cold)
+void free_hot_cold_page_list(struct list_head *list, bool cold)
  {
         struct page *page, *next;
  
@@ -1522,12 +1543,12 @@ int split_free_page(struct page *page)
   */
  static inline
  struct page *buffered_rmqueue(struct zone *preferred_zone,
-                       struct zone *zone, int order, gfp_t gfp_flags,
-                       int migratetype)
+                       struct zone *zone, unsigned int order,
+                       gfp_t gfp_flags, int migratetype)
  {
         unsigned long flags;
         struct page *page;
-       int cold = !!(gfp_flags & __GFP_COLD);
+       bool cold = ((gfp_flags & __GFP_COLD) != 0);
  
  again:
         if (likely(order == 0)) {
@@ -1572,7 +1593,7 @@ again:
                 if (!page)
                         goto failed;
                 __mod_zone_freepage_state(zone, -(1 << order),
-                                         get_pageblock_migratetype(page));
+                                         get_freepage_migratetype(page));
         }
  
         __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
@@ -1672,8 +1693,9 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
   * Return true if free pages are above 'mark'. This takes into account the order
   * of the allocation.
   */
-static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-                     int classzone_idx, int alloc_flags, long free_pages)
+static bool __zone_watermark_ok(struct zone *z, unsigned int order,
+                       unsigned long mark, int classzone_idx, int alloc_flags,
+                       long free_pages)
  {
         /* free_pages my go negative - that's OK */
         long min = mark;
@@ -1707,15 +1729,15 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
         return true;
  }
  
-bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                       int classzone_idx, int alloc_flags)
  {
         return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
                                         zone_page_state(z, NR_FREE_PAGES));
  }
  
-bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
-                     int classzone_idx, int alloc_flags)
+bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+                       unsigned long mark, int classzone_idx, int alloc_flags)
  {
         long free_pages = zone_page_state(z, NR_FREE_PAGES);
  
@@ -1850,16 +1872,8 @@ static bool zone_local(struct zone *local_zone, struct zone *zone)
  
  static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
  {
-       return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
-}
-
-static void __paginginit init_zone_allows_reclaim(int nid)
-{
-       int i;
-
-       for_each_node_state(i, N_MEMORY)
-               if (node_distance(nid, i) <= RECLAIM_DISTANCE)
-                       node_set(i, NODE_DATA(nid)->reclaim_nodes);
+       return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
+                               RECLAIM_DISTANCE;
  }
  
  #else  /* CONFIG_NUMA */
@@ -1893,9 +1907,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
         return true;
  }
  
-static inline void init_zone_allows_reclaim(int nid)
-{
-}
  #endif /* CONFIG_NUMA */
  
  /*
@@ -1905,17 +1916,17 @@ static inline void init_zone_allows_reclaim(int nid)
  static struct page *
  get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
                 struct zonelist *zonelist, int high_zoneidx, int alloc_flags,
-               struct zone *preferred_zone, int migratetype)
+               struct zone *preferred_zone, int classzone_idx, int migratetype)
  {
         struct zoneref *z;
         struct page *page = NULL;
-       int classzone_idx;
         struct zone *zone;
         nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
         int zlc_active = 0;             /* set if using zonelist_cache */
         int did_zlc_setup = 0;          /* just call zlc_setup() one time */
+       bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
+                               (gfp_mask & __GFP_WRITE);
  
-       classzone_idx = zone_idx(preferred_zone);
  zonelist_scan:
         /*
          * Scan zonelist, looking for a zone with enough free.
@@ -1928,12 +1939,10 @@ zonelist_scan:
                 if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
                         !zlc_zone_worth_trying(zonelist, z, allowednodes))
                                 continue;
-               if ((alloc_flags & ALLOC_CPUSET) &&
+               if (cpusets_enabled() &&
+                       (alloc_flags & ALLOC_CPUSET) &&
                         !cpuset_zone_allowed_softwall(zone, gfp_mask))
                                 continue;
-               BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
-               if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS))
-                       goto try_this_zone;
                 /*
                  * Distribute pages in proportion to the individual
                  * zone size to ensure fair page aging.  The zone a
@@ -1972,15 +1981,19 @@ zonelist_scan:
                  * will require awareness of zones in the
                  * dirty-throttling and the flusher threads.
                  */
-               if ((alloc_flags & ALLOC_WMARK_LOW) &&
-                   (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
-                       goto this_zone_full;
+               if (consider_zone_dirty && !zone_dirty_ok(zone))
+                       continue;
  
                 mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
                 if (!zone_watermark_ok(zone, order, mark,
                                        classzone_idx, alloc_flags)) {
                         int ret;
  
+                       /* Checked here to keep the fast path fast */
+                       BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
+                       if (alloc_flags & ALLOC_NO_WATERMARKS)
+                               goto try_this_zone;
+
                         if (IS_ENABLED(CONFIG_NUMA) &&
                                         !did_zlc_setup && nr_online_nodes > 1) {
                                 /*
@@ -2042,7 +2055,7 @@ try_this_zone:
                 if (page)
                         break;
  this_zone_full:
-               if (IS_ENABLED(CONFIG_NUMA))
+               if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
                         zlc_mark_zone_full(zonelist, z);
         }
  
@@ -2171,7 +2184,7 @@ static inline struct page *
  __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, struct zone *preferred_zone,
-       int migratetype)
+       int classzone_idx, int migratetype)
  {
         struct page *page;
  
@@ -2189,7 +2202,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
                 order, zonelist, high_zoneidx,
                 ALLOC_WMARK_HIGH|ALLOC_CPUSET,
-               preferred_zone, migratetype);
+               preferred_zone, classzone_idx, migratetype);
         if (page)
                 goto out;
  
@@ -2224,7 +2237,7 @@ static struct page *
  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, bool sync_migration,
+       int classzone_idx, int migratetype, enum migrate_mode mode,
         bool *contended_compaction, bool *deferred_compaction,
         unsigned long *did_some_progress)
  {
@@ -2238,7 +2251,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
  
         current->flags |= PF_MEMALLOC;
         *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
-                                               nodemask, sync_migration,
+                                               nodemask, mode,
                                                 contended_compaction);
         current->flags &= ~PF_MEMALLOC;
  
@@ -2252,7 +2265,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                 page = get_page_from_freelist(gfp_mask, nodemask,
                                 order, zonelist, high_zoneidx,
                                 alloc_flags & ~ALLOC_NO_WATERMARKS,
-                               preferred_zone, migratetype);
+                               preferred_zone, classzone_idx, migratetype);
                 if (page) {
                         preferred_zone->compact_blockskip_flush = false;
                         compaction_defer_reset(preferred_zone, order, true);
@@ -2271,7 +2284,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                  * As async compaction considers a subset of pageblocks, only
                  * defer if the failure was a sync compaction failure.
                  */
-               if (sync_migration)
+               if (mode != MIGRATE_ASYNC)
                         defer_compaction(preferred_zone, order);
  
                 cond_resched();
@@ -2284,9 +2297,9 @@ static inline struct page *
  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, bool sync_migration,
-       bool *contended_compaction, bool *deferred_compaction,
-       unsigned long *did_some_progress)
+       int classzone_idx, int migratetype,
+       enum migrate_mode mode, bool *contended_compaction,
+       bool *deferred_compaction, unsigned long *did_some_progress)
  {
         return NULL;
  }
@@ -2325,7 +2338,7 @@ static inline struct page *
  __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, unsigned long *did_some_progress)
+       int classzone_idx, int migratetype, unsigned long *did_some_progress)
  {
         struct page *page = NULL;
         bool drained = false;
@@ -2343,7 +2356,8 @@ retry:
         page = get_page_from_freelist(gfp_mask, nodemask, order,
                                         zonelist, high_zoneidx,
                                         alloc_flags & ~ALLOC_NO_WATERMARKS,
-                                       preferred_zone, migratetype);
+                                       preferred_zone, classzone_idx,
+                                       migratetype);
  
         /*
          * If an allocation failed after direct reclaim, it could be because
@@ -2366,14 +2380,14 @@ static inline struct page *
  __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, struct zone *preferred_zone,
-       int migratetype)
+       int classzone_idx, int migratetype)
  {
         struct page *page;
  
         do {
                 page = get_page_from_freelist(gfp_mask, nodemask, order,
                         zonelist, high_zoneidx, ALLOC_NO_WATERMARKS,
-                       preferred_zone, migratetype);
+                       preferred_zone, classzone_idx, migratetype);
  
                 if (!page && gfp_mask & __GFP_NOFAIL)
                         wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
@@ -2474,14 +2488,14 @@ static inline struct page *
  __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         struct zonelist *zonelist, enum zone_type high_zoneidx,
         nodemask_t *nodemask, struct zone *preferred_zone,
-       int migratetype)
+       int classzone_idx, int migratetype)
  {
         const gfp_t wait = gfp_mask & __GFP_WAIT;
         struct page *page = NULL;
         int alloc_flags;
         unsigned long pages_reclaimed = 0;
         unsigned long did_some_progress;
-       bool sync_migration = false;
+       enum migrate_mode migration_mode = MIGRATE_ASYNC;
         bool deferred_compaction = false;
         bool contended_compaction = false;
  
@@ -2523,15 +2537,18 @@ restart:
          * Find the true preferred zone if the allocation is unconstrained by
          * cpusets.
          */
-       if (!(alloc_flags & ALLOC_CPUSET) && !nodemask)
-               first_zones_zonelist(zonelist, high_zoneidx, NULL,
-                                       &preferred_zone);
+       if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) {
+               struct zoneref *preferred_zoneref;
+               preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx,
+                               NULL, &preferred_zone);
+               classzone_idx = zonelist_zone_idx(preferred_zoneref);
+       }
  
  rebalance:
         /* This is the last chance, in general, before the goto nopage. */
         page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
                         high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
-                       preferred_zone, migratetype);
+                       preferred_zone, classzone_idx, migratetype);
         if (page)
                 goto got_pg;
  
@@ -2546,7 +2563,7 @@ rebalance:
  
                 page = __alloc_pages_high_priority(gfp_mask, order,
                                 zonelist, high_zoneidx, nodemask,
-                               preferred_zone, migratetype);
+                               preferred_zone, classzone_idx, migratetype);
                 if (page) {
                         goto got_pg;
                 }
@@ -2575,17 +2592,23 @@ rebalance:
          * Try direct compaction. The first pass is asynchronous. Subsequent
          * attempts after direct reclaim are synchronous
          */
-       page = __alloc_pages_direct_compact(gfp_mask, order,
-                                       zonelist, high_zoneidx,
-                                       nodemask,
-                                       alloc_flags, preferred_zone,
-                                       migratetype, sync_migration,
-                                       &contended_compaction,
+       page = __alloc_pages_direct_compact(gfp_mask, order, zonelist,
+                                       high_zoneidx, nodemask, alloc_flags,
+                                       preferred_zone,
+                                       classzone_idx, migratetype,
+                                       migration_mode, &contended_compaction,
                                         &deferred_compaction,
                                         &did_some_progress);
         if (page)
                 goto got_pg;
-       sync_migration = true;
+
+       /*
+        * It can become very expensive to allocate transparent hugepages at
+        * fault, so use asynchronous memory compaction for THP unless it is
+        * khugepaged trying to collapse.
+        */
+       if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
+               migration_mode = MIGRATE_SYNC_LIGHT;
  
         /*
          * If compaction is deferred for high-order allocations, it is because
@@ -2602,7 +2625,8 @@ rebalance:
                                         zonelist, high_zoneidx,
                                         nodemask,
                                         alloc_flags, preferred_zone,
-                                       migratetype, &did_some_progress);
+                                       classzone_idx, migratetype,
+                                       &did_some_progress);
         if (page)
                 goto got_pg;
  
@@ -2621,7 +2645,7 @@ rebalance:
                         page = __alloc_pages_may_oom(gfp_mask, order,
                                         zonelist, high_zoneidx,
                                         nodemask, preferred_zone,
-                                       migratetype);
+                                       classzone_idx, migratetype);
                         if (page)
                                 goto got_pg;
  
@@ -2660,12 +2684,11 @@ rebalance:
                  * direct reclaim and reclaim/compaction depends on compaction
                  * being called after reclaim so call directly if necessary
                  */
-               page = __alloc_pages_direct_compact(gfp_mask, order,
-                                       zonelist, high_zoneidx,
-                                       nodemask,
-                                       alloc_flags, preferred_zone,
-                                       migratetype, sync_migration,
-                                       &contended_compaction,
+               page = __alloc_pages_direct_compact(gfp_mask, order, zonelist,
+                                       high_zoneidx, nodemask, alloc_flags,
+                                       preferred_zone,
+                                       classzone_idx, migratetype,
+                                       migration_mode, &contended_compaction,
                                         &deferred_compaction,
                                         &did_some_progress);
                 if (page)
@@ -2691,10 +2714,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
  {
         enum zone_type high_zoneidx = gfp_zone(gfp_mask);
         struct zone *preferred_zone;
+       struct zoneref *preferred_zoneref;
         struct page *page = NULL;
         int migratetype = allocflags_to_migratetype(gfp_mask);
         unsigned int cpuset_mems_cookie;
         int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
+       int classzone_idx;
  
         gfp_mask &= gfp_allowed_mask;
  
@@ -2717,11 +2742,12 @@ retry_cpuset:
         cpuset_mems_cookie = read_mems_allowed_begin();
  
         /* The preferred zone is used for statistics later */
-       first_zones_zonelist(zonelist, high_zoneidx,
+       preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx,
                                 nodemask ? : &cpuset_current_mems_allowed,
                                 &preferred_zone);
         if (!preferred_zone)
                 goto out;
+       classzone_idx = zonelist_zone_idx(preferred_zoneref);
  
  #ifdef CONFIG_CMA
         if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
@@ -2731,7 +2757,7 @@ retry:
         /* First allocation attempt */
         page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                         zonelist, high_zoneidx, alloc_flags,
-                       preferred_zone, migratetype);
+                       preferred_zone, classzone_idx, migratetype);
         if (unlikely(!page)) {
                 /*
                  * The first pass makes sure allocations are spread
@@ -2757,7 +2783,7 @@ retry:
                 gfp_mask = memalloc_noio_flags(gfp_mask);
                 page = __alloc_pages_slowpath(gfp_mask, order,
                                 zonelist, high_zoneidx, nodemask,
-                               preferred_zone, migratetype);
+                               preferred_zone, classzone_idx, migratetype);
         }
  
         trace_mm_page_alloc(page, order, gfp_mask, migratetype);
@@ -2806,7 +2832,7 @@ void __free_pages(struct page *page, unsigned int order)
  {
         if (put_page_testzero(page)) {
                 if (order == 0)
-                       free_hot_cold_page(page, 0);
+                       free_hot_cold_page(page, false);
                 else
                         __free_pages_ok(page, order);
         }
@@ -3363,7 +3389,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
  /*
   * sysctl handler for numa_zonelist_order
   */
-int numa_zonelist_order_handler(ctl_table *table, int write,
+int numa_zonelist_order_handler(struct ctl_table *table, int write,
                 void __user *buffer, size_t *length,
                 loff_t *ppos)
  {
@@ -4107,7 +4133,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
  
  static void __meminit zone_init_free_lists(struct zone *zone)
  {
-       int order, t;
+       unsigned int order, t;
         for_each_migratetype_order(order, t) {
                 INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
                 zone->free_area[order].nr_free = 0;
@@ -4361,9 +4387,6 @@ int __meminit init_currently_empty_zone(struct zone *zone,
  #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
  /*
   * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
- * Architectures may implement their own version but if add_active_range()
- * was used and there are no special requirements, this is a convenient
- * alternative
   */
  int __meminit __early_pfn_to_nid(unsigned long pfn)
  {
@@ -4418,10 +4441,9 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
   * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
   * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid
   *
- * If an architecture guarantees that all ranges registered with
- * add_active_ranges() contain no holes and may be freed, this
- * this function may be used instead of calling memblock_free_early_nid()
- * manually.
+ * If an architecture guarantees that all ranges registered contain no holes
+ * and may be freed, this this function may be used instead of calling
+ * memblock_free_early_nid() manually.
   */
  void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
  {
@@ -4443,9 +4465,8 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
   * sparse_memory_present_with_active_regions - Call memory_present for each active range
   * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
   *
- * If an architecture guarantees that all ranges registered with
- * add_active_ranges() contain no holes and may be freed, this
- * function may be used instead of calling memory_present() manually.
+ * If an architecture guarantees that all ranges registered contain no holes and may
+ * be freed, this function may be used instead of calling memory_present() manually.
   */
  void __init sparse_memory_present_with_active_regions(int nid)
  {
@@ -4463,7 +4484,7 @@ void __init sparse_memory_present_with_active_regions(int nid)
   * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
   *
   * It returns the start and end page frame of a node based on information
- * provided by an arch calling add_active_range(). If called for a node
+ * provided by memblock_set_node(). If called for a node
   * with no available memory, a warning is printed and the start and end
   * PFNs will be 0.
   */
@@ -4933,8 +4954,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
  
         pgdat->node_id = nid;
         pgdat->node_start_pfn = node_start_pfn;
-       if (node_state(nid, N_MEMORY))
-               init_zone_allows_reclaim(nid);
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
         get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
  #endif
@@ -5042,7 +5061,7 @@ static unsigned long __init find_min_pfn_for_node(int nid)
   * find_min_pfn_with_active_regions - Find the minimum PFN registered
   *
   * It returns the minimum PFN based on information provided via
- * add_active_range().
+ * memblock_set_node().
   */
  unsigned long __init find_min_pfn_with_active_regions(void)
  {
@@ -5263,7 +5282,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
   * @max_zone_pfn: an array of max PFNs for each zone
   *
   * This will call free_area_init_node() for each active node in the system.
- * Using the page ranges provided by add_active_range(), the size of each
+ * Using the page ranges provided by memblock_set_node(), the size of each
   * zone in each node and their holes is calculated. If the maximum PFN
   * between two adjacent zones match, it is assumed that the zone is empty.
   * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
@@ -5786,7 +5805,7 @@ module_init(init_per_zone_wmark_min)
   *     that we can call two helper functions whenever min_free_kbytes
   *     changes.
   */
-int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
+int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
         int rc;
@@ -5803,7 +5822,7 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
  }
  
  #ifdef CONFIG_NUMA
-int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
+int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct zone *zone;
@@ -5819,7 +5838,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
         return 0;
  }
  
-int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct zone *zone;
@@ -5845,7 +5864,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
   * minimum watermarks. The lowmem reserve ratio can only make sense
   * if in function of the boot time zone sizes.
   */
-int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
         proc_dointvec_minmax(table, write, buffer, length, ppos);
@@ -5858,7 +5877,7 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
   * cpu.  It is the fraction of total pages in each zone that a hot per cpu
   * pagelist can have before it gets flushed back to buddy allocator.
   */
-int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct zone *zone;
@@ -6021,53 +6040,64 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
   * @end_bitidx: The last bit of interest
   * returns pageblock_bits flags
   */
-unsigned long get_pageblock_flags_group(struct page *page,
-                                       int start_bitidx, int end_bitidx)
+unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+                                       unsigned long end_bitidx,
+                                       unsigned long mask)
  {
         struct zone *zone;
         unsigned long *bitmap;
-       unsigned long pfn, bitidx;
-       unsigned long flags = 0;
-       unsigned long value = 1;
+       unsigned long bitidx, word_bitidx;
+       unsigned long word;
  
         zone = page_zone(page);
-       pfn = page_to_pfn(page);
         bitmap = get_pageblock_bitmap(zone, pfn);
         bitidx = pfn_to_bitidx(zone, pfn);
+       word_bitidx = bitidx / BITS_PER_LONG;
+       bitidx &= (BITS_PER_LONG-1);
  
-       for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
-               if (test_bit(bitidx + start_bitidx, bitmap))
-                       flags |= value;
-
-       return flags;
+       word = bitmap[word_bitidx];
+       bitidx += end_bitidx;
+       return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
  }
  
  /**
- * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages
+ * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
   * @page: The page within the block of interest
   * @start_bitidx: The first bit of interest
   * @end_bitidx: The last bit of interest
   * @flags: The flags to set
   */
-void set_pageblock_flags_group(struct page *page, unsigned long flags,
-                                       int start_bitidx, int end_bitidx)
+void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
+                                       unsigned long pfn,
+                                       unsigned long end_bitidx,
+                                       unsigned long mask)
  {
         struct zone *zone;
         unsigned long *bitmap;
-       unsigned long pfn, bitidx;
-       unsigned long value = 1;
+       unsigned long bitidx, word_bitidx;
+       unsigned long old_word, word;
+
+       BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
  
         zone = page_zone(page);
-       pfn = page_to_pfn(page);
         bitmap = get_pageblock_bitmap(zone, pfn);
         bitidx = pfn_to_bitidx(zone, pfn);
+       word_bitidx = bitidx / BITS_PER_LONG;
+       bitidx &= (BITS_PER_LONG-1);
+
         VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
  
-       for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
-               if (flags & value)
-                       __set_bit(bitidx + start_bitidx, bitmap);
-               else
-                       __clear_bit(bitidx + start_bitidx, bitmap);
+       bitidx += end_bitidx;
+       mask <<= (BITS_PER_LONG - bitidx - 1);
+       flags <<= (BITS_PER_LONG - bitidx - 1);
+
+       word = ACCESS_ONCE(bitmap[word_bitidx]);
+       for (;;) {
+               old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
+               if (word == old_word)
+                       break;
+               word = old_word;
+       }
  }
  
  /*
@@ -6227,7 +6257,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                 cc->nr_migratepages -= nr_reclaimed;
  
                 ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-                                   0, MIGRATE_SYNC, MR_CMA);
+                                   NULL, 0, cc->mode, MR_CMA);
         }
         if (ret < 0) {
                 putback_movable_pages(&cc->migratepages);
@@ -6266,7 +6296,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
                 .nr_migratepages = 0,
                 .order = -1,
                 .zone = page_zone(pfn_to_page(start)),
-               .sync = true,
+               .mode = MIGRATE_SYNC,
                 .ignore_skip_hint = true,
         };
         INIT_LIST_HEAD(&cc.migratepages);
@@ -6421,7 +6451,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
         struct page *page;
         struct zone *zone;
-       int order, i;
+       unsigned int order, i;
         unsigned long pfn;
         unsigned long flags;
         /* find the first valid pfn */
@@ -6473,7 +6503,7 @@ bool is_free_buddy_page(struct page *page)
         struct zone *zone = page_zone(page);
         unsigned long pfn = page_to_pfn(page);
         unsigned long flags;
-       int order;
+       unsigned int order;
  
         spin_lock_irqsave(&zone->lock, flags);
         for (order = 0; order < MAX_ORDER; order++) {