mm,numa: reorganize change_pmd_range()

[firefly-linux-kernel-4.4.55.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index a9c74b409681a460f2c2ef5f45b3f2283a81a4b8..1f56a80a7c41442eefa86d7fe82387570cb3bfd0 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -224,15 +224,15 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
         unsigned long freed = 0;
         unsigned long long delta;
         long total_scan;
-       long max_pass;
+       long freeable;
         long nr;
         long new_nr;
         int nid = shrinkctl->nid;
         long batch_size = shrinker->batch ? shrinker->batch
                                           : SHRINK_BATCH;
  
-       max_pass = shrinker->count_objects(shrinker, shrinkctl);
-       if (max_pass == 0)
+       freeable = shrinker->count_objects(shrinker, shrinkctl);
+       if (freeable == 0)
                 return 0;
  
         /*
@@ -244,14 +244,14 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
  
         total_scan = nr;
         delta = (4 * nr_pages_scanned) / shrinker->seeks;
-       delta *= max_pass;
+       delta *= freeable;
         do_div(delta, lru_pages + 1);
         total_scan += delta;
         if (total_scan < 0) {
                 printk(KERN_ERR
                 "shrink_slab: %pF negative objects to delete nr=%ld\n",
                        shrinker->scan_objects, total_scan);
-               total_scan = max_pass;
+               total_scan = freeable;
         }
  
         /*
@@ -260,26 +260,26 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
          * shrinkers to return -1 all the time. This results in a large
          * nr being built up so when a shrink that can do some work
          * comes along it empties the entire cache due to nr >>>
-        * max_pass.  This is bad for sustaining a working set in
+        * freeable. This is bad for sustaining a working set in
          * memory.
          *
          * Hence only allow the shrinker to scan the entire cache when
          * a large delta change is calculated directly.
          */
-       if (delta < max_pass / 4)
-               total_scan = min(total_scan, max_pass / 2);
+       if (delta < freeable / 4)
+               total_scan = min(total_scan, freeable / 2);
  
         /*
          * Avoid risking looping forever due to too large nr value:
          * never try to free more than twice the estimate number of
          * freeable entries.
          */
-       if (total_scan > max_pass * 2)
-               total_scan = max_pass * 2;
+       if (total_scan > freeable * 2)
+               total_scan = freeable * 2;
  
         trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
                                 nr_pages_scanned, lru_pages,
-                               max_pass, delta, total_scan);
+                               freeable, delta, total_scan);
  
         /*
          * Normally, we should not scan less than batch_size objects in one
@@ -292,12 +292,12 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
          *
          * We detect the "tight on memory" situations by looking at the total
          * number of objects we want to scan (total_scan). If it is greater
-        * than the total number of objects on slab (max_pass), we must be
+        * than the total number of objects on slab (freeable), we must be
          * scanning at high prio and therefore should try to reclaim as much as
          * possible.
          */
         while (total_scan >= batch_size ||
-              total_scan >= max_pass) {
+              total_scan >= freeable) {
                 unsigned long ret;
                 unsigned long nr_to_scan = min(batch_size, total_scan);
  
@@ -523,7 +523,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
   * Same as remove_mapping, but if the page is removed from the mapping, it
   * gets returned with a refcount of 0.
   */
-static int __remove_mapping(struct address_space *mapping, struct page *page)
+static int __remove_mapping(struct address_space *mapping, struct page *page,
+                           bool reclaimed)
  {
         BUG_ON(!PageLocked(page));
         BUG_ON(mapping != page_mapping(page));
@@ -569,10 +570,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
                 swapcache_free(swap, page);
         } else {
                 void (*freepage)(struct page *);
+               void *shadow = NULL;
  
                 freepage = mapping->a_ops->freepage;
-
-               __delete_from_page_cache(page);
+               /*
+                * Remember a shadow entry for reclaimed file cache in
+                * order to detect refaults, thus thrashing, later on.
+                *
+                * But don't store shadows in an address space that is
+                * already exiting.  This is not just an optizimation,
+                * inode reclaim needs to empty out the radix tree or
+                * the nodes are lost.  Don't plant shadows behind its
+                * back.
+                */
+               if (reclaimed && page_is_file_cache(page) &&
+                   !mapping_exiting(mapping))
+                       shadow = workingset_eviction(mapping, page);
+               __delete_from_page_cache(page, shadow);
                 spin_unlock_irq(&mapping->tree_lock);
                 mem_cgroup_uncharge_cache_page(page);
  
@@ -595,7 +609,7 @@ cannot_free:
   */
  int remove_mapping(struct address_space *mapping, struct page *page)
  {
-       if (__remove_mapping(mapping, page)) {
+       if (__remove_mapping(mapping, page, false)) {
                 /*
                  * Unfreezing the refcount with 1 rather than 2 effectively
                  * drops the pagecache ref for us without requiring another
@@ -1065,7 +1079,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         }
                 }
  
-               if (!mapping || !__remove_mapping(mapping, page))
+               if (!mapping || !__remove_mapping(mapping, page, true))
                         goto keep_locked;
  
                 /*
@@ -2297,7 +2311,12 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         struct zone *zone;
         unsigned long nr_soft_reclaimed;
         unsigned long nr_soft_scanned;
+       unsigned long lru_pages = 0;
         bool aborted_reclaim = false;
+       struct reclaim_state *reclaim_state = current->reclaim_state;
+       struct shrink_control shrink = {
+               .gfp_mask = sc->gfp_mask,
+       };
  
         /*
          * If the number of buffer_heads in the machine exceeds the maximum
@@ -2307,6 +2326,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         if (buffer_heads_over_limit)
                 sc->gfp_mask |= __GFP_HIGHMEM;
  
+       nodes_clear(shrink.nodes_to_scan);
+
         for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                         gfp_zone(sc->gfp_mask), sc->nodemask) {
                 if (!populated_zone(zone))
@@ -2318,6 +2339,10 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                 if (global_reclaim(sc)) {
                         if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                 continue;
+
+                       lru_pages += zone_reclaimable_pages(zone);
+                       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
+
                         if (sc->priority != DEF_PRIORITY &&
                             !zone_reclaimable(zone))
                                 continue;       /* Let kswapd poll it */
@@ -2354,6 +2379,20 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                 shrink_zone(zone, sc);
         }
  
+       /*
+        * Don't shrink slabs when reclaiming memory from over limit cgroups
+        * but do shrink slab at least once when aborting reclaim for
+        * compaction to avoid unevenly scanning file/anon LRU pages over slab
+        * pages.
+        */
+       if (global_reclaim(sc)) {
+               shrink_slab(&shrink, sc->nr_scanned, lru_pages);
+               if (reclaim_state) {
+                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+                       reclaim_state->reclaimed_slab = 0;
+               }
+       }
+
         return aborted_reclaim;
  }
  
@@ -2394,13 +2433,9 @@ static bool all_unreclaimable(struct zonelist *zonelist,
   *             else, the number of pages reclaimed
   */
  static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
-                                       struct scan_control *sc,
-                                       struct shrink_control *shrink)
+                                         struct scan_control *sc)
  {
         unsigned long total_scanned = 0;
-       struct reclaim_state *reclaim_state = current->reclaim_state;
-       struct zoneref *z;
-       struct zone *zone;
         unsigned long writeback_threshold;
         bool aborted_reclaim;
  
@@ -2415,32 +2450,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 sc->nr_scanned = 0;
                 aborted_reclaim = shrink_zones(zonelist, sc);
  
-               /*
-                * Don't shrink slabs when reclaiming memory from over limit
-                * cgroups but do shrink slab at least once when aborting
-                * reclaim for compaction to avoid unevenly scanning file/anon
-                * LRU pages over slab pages.
-                */
-               if (global_reclaim(sc)) {
-                       unsigned long lru_pages = 0;
-
-                       nodes_clear(shrink->nodes_to_scan);
-                       for_each_zone_zonelist(zone, z, zonelist,
-                                       gfp_zone(sc->gfp_mask)) {
-                               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                                       continue;
-
-                               lru_pages += zone_reclaimable_pages(zone);
-                               node_set(zone_to_nid(zone),
-                                        shrink->nodes_to_scan);
-                       }
-
-                       shrink_slab(shrink, sc->nr_scanned, lru_pages);
-                       if (reclaim_state) {
-                               sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-                               reclaim_state->reclaimed_slab = 0;
-                       }
-               }
                 total_scanned += sc->nr_scanned;
                 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
                         goto out;
@@ -2602,9 +2611,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                 .target_mem_cgroup = NULL,
                 .nodemask = nodemask,
         };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
  
         /*
          * Do not enter reclaim if fatal signal was delivered while throttled.
@@ -2618,7 +2624,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                 sc.may_writepage,
                                 gfp_mask);
  
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
  
         trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
  
@@ -2685,9 +2691,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
         };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
  
         /*
          * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
@@ -2702,7 +2705,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                             sc.may_writepage,
                                             sc.gfp_mask);
  
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
  
         trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
  
@@ -3337,9 +3340,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
                 .order = 0,
                 .priority = DEF_PRIORITY,
         };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
         struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
         struct task_struct *p = current;
         unsigned long nr_reclaimed;
@@ -3349,7 +3349,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
         reclaim_state.reclaimed_slab = 0;
         p->reclaim_state = &reclaim_state;
  
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
  
         p->reclaim_state = NULL;
         lockdep_clear_current_reclaim_state();