mm,numa: reorganize change_pmd_range()
[firefly-linux-kernel-4.4.55.git] / mm / vmscan.c
index a9c74b409681a460f2c2ef5f45b3f2283a81a4b8..1f56a80a7c41442eefa86d7fe82387570cb3bfd0 100644 (file)
@@ -224,15 +224,15 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
        unsigned long freed = 0;
        unsigned long long delta;
        long total_scan;
-       long max_pass;
+       long freeable;
        long nr;
        long new_nr;
        int nid = shrinkctl->nid;
        long batch_size = shrinker->batch ? shrinker->batch
                                          : SHRINK_BATCH;
 
-       max_pass = shrinker->count_objects(shrinker, shrinkctl);
-       if (max_pass == 0)
+       freeable = shrinker->count_objects(shrinker, shrinkctl);
+       if (freeable == 0)
                return 0;
 
        /*
@@ -244,14 +244,14 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
 
        total_scan = nr;
        delta = (4 * nr_pages_scanned) / shrinker->seeks;
-       delta *= max_pass;
+       delta *= freeable;
        do_div(delta, lru_pages + 1);
        total_scan += delta;
        if (total_scan < 0) {
                printk(KERN_ERR
                "shrink_slab: %pF negative objects to delete nr=%ld\n",
                       shrinker->scan_objects, total_scan);
-               total_scan = max_pass;
+               total_scan = freeable;
        }
 
        /*
@@ -260,26 +260,26 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
         * shrinkers to return -1 all the time. This results in a large
         * nr being built up so when a shrink that can do some work
         * comes along it empties the entire cache due to nr >>>
-        * max_pass.  This is bad for sustaining a working set in
+        * freeable. This is bad for sustaining a working set in
         * memory.
         *
         * Hence only allow the shrinker to scan the entire cache when
         * a large delta change is calculated directly.
         */
-       if (delta < max_pass / 4)
-               total_scan = min(total_scan, max_pass / 2);
+       if (delta < freeable / 4)
+               total_scan = min(total_scan, freeable / 2);
 
        /*
         * Avoid risking looping forever due to too large nr value:
         * never try to free more than twice the estimate number of
         * freeable entries.
         */
-       if (total_scan > max_pass * 2)
-               total_scan = max_pass * 2;
+       if (total_scan > freeable * 2)
+               total_scan = freeable * 2;
 
        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
                                nr_pages_scanned, lru_pages,
-                               max_pass, delta, total_scan);
+                               freeable, delta, total_scan);
 
        /*
         * Normally, we should not scan less than batch_size objects in one
@@ -292,12 +292,12 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
         *
         * We detect the "tight on memory" situations by looking at the total
         * number of objects we want to scan (total_scan). If it is greater
-        * than the total number of objects on slab (max_pass), we must be
+        * than the total number of objects on slab (freeable), we must be
         * scanning at high prio and therefore should try to reclaim as much as
         * possible.
         */
        while (total_scan >= batch_size ||
-              total_scan >= max_pass) {
+              total_scan >= freeable) {
                unsigned long ret;
                unsigned long nr_to_scan = min(batch_size, total_scan);
 
@@ -523,7 +523,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
  * Same as remove_mapping, but if the page is removed from the mapping, it
  * gets returned with a refcount of 0.
  */
-static int __remove_mapping(struct address_space *mapping, struct page *page)
+static int __remove_mapping(struct address_space *mapping, struct page *page,
+                           bool reclaimed)
 {
        BUG_ON(!PageLocked(page));
        BUG_ON(mapping != page_mapping(page));
@@ -569,10 +570,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
                swapcache_free(swap, page);
        } else {
                void (*freepage)(struct page *);
+               void *shadow = NULL;
 
                freepage = mapping->a_ops->freepage;
-
-               __delete_from_page_cache(page);
+               /*
+                * Remember a shadow entry for reclaimed file cache in
+                * order to detect refaults, thus thrashing, later on.
+                *
+                * But don't store shadows in an address space that is
+                * already exiting.  This is not just an optizimation,
+                * inode reclaim needs to empty out the radix tree or
+                * the nodes are lost.  Don't plant shadows behind its
+                * back.
+                */
+               if (reclaimed && page_is_file_cache(page) &&
+                   !mapping_exiting(mapping))
+                       shadow = workingset_eviction(mapping, page);
+               __delete_from_page_cache(page, shadow);
                spin_unlock_irq(&mapping->tree_lock);
                mem_cgroup_uncharge_cache_page(page);
 
@@ -595,7 +609,7 @@ cannot_free:
  */
 int remove_mapping(struct address_space *mapping, struct page *page)
 {
-       if (__remove_mapping(mapping, page)) {
+       if (__remove_mapping(mapping, page, false)) {
                /*
                 * Unfreezing the refcount with 1 rather than 2 effectively
                 * drops the pagecache ref for us without requiring another
@@ -1065,7 +1079,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        }
                }
 
-               if (!mapping || !__remove_mapping(mapping, page))
+               if (!mapping || !__remove_mapping(mapping, page, true))
                        goto keep_locked;
 
                /*
@@ -2297,7 +2311,12 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        struct zone *zone;
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
+       unsigned long lru_pages = 0;
        bool aborted_reclaim = false;
+       struct reclaim_state *reclaim_state = current->reclaim_state;
+       struct shrink_control shrink = {
+               .gfp_mask = sc->gfp_mask,
+       };
 
        /*
         * If the number of buffer_heads in the machine exceeds the maximum
@@ -2307,6 +2326,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        if (buffer_heads_over_limit)
                sc->gfp_mask |= __GFP_HIGHMEM;
 
+       nodes_clear(shrink.nodes_to_scan);
+
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask), sc->nodemask) {
                if (!populated_zone(zone))
@@ -2318,6 +2339,10 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                if (global_reclaim(sc)) {
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                continue;
+
+                       lru_pages += zone_reclaimable_pages(zone);
+                       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
+
                        if (sc->priority != DEF_PRIORITY &&
                            !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
@@ -2354,6 +2379,20 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                shrink_zone(zone, sc);
        }
 
+       /*
+        * Don't shrink slabs when reclaiming memory from over limit cgroups
+        * but do shrink slab at least once when aborting reclaim for
+        * compaction to avoid unevenly scanning file/anon LRU pages over slab
+        * pages.
+        */
+       if (global_reclaim(sc)) {
+               shrink_slab(&shrink, sc->nr_scanned, lru_pages);
+               if (reclaim_state) {
+                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+                       reclaim_state->reclaimed_slab = 0;
+               }
+       }
+
        return aborted_reclaim;
 }
 
@@ -2394,13 +2433,9 @@ static bool all_unreclaimable(struct zonelist *zonelist,
  *             else, the number of pages reclaimed
  */
 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
-                                       struct scan_control *sc,
-                                       struct shrink_control *shrink)
+                                         struct scan_control *sc)
 {
        unsigned long total_scanned = 0;
-       struct reclaim_state *reclaim_state = current->reclaim_state;
-       struct zoneref *z;
-       struct zone *zone;
        unsigned long writeback_threshold;
        bool aborted_reclaim;
 
@@ -2415,32 +2450,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                sc->nr_scanned = 0;
                aborted_reclaim = shrink_zones(zonelist, sc);
 
-               /*
-                * Don't shrink slabs when reclaiming memory from over limit
-                * cgroups but do shrink slab at least once when aborting
-                * reclaim for compaction to avoid unevenly scanning file/anon
-                * LRU pages over slab pages.
-                */
-               if (global_reclaim(sc)) {
-                       unsigned long lru_pages = 0;
-
-                       nodes_clear(shrink->nodes_to_scan);
-                       for_each_zone_zonelist(zone, z, zonelist,
-                                       gfp_zone(sc->gfp_mask)) {
-                               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                                       continue;
-
-                               lru_pages += zone_reclaimable_pages(zone);
-                               node_set(zone_to_nid(zone),
-                                        shrink->nodes_to_scan);
-                       }
-
-                       shrink_slab(shrink, sc->nr_scanned, lru_pages);
-                       if (reclaim_state) {
-                               sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-                               reclaim_state->reclaimed_slab = 0;
-                       }
-               }
                total_scanned += sc->nr_scanned;
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
                        goto out;
@@ -2602,9 +2611,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                .target_mem_cgroup = NULL,
                .nodemask = nodemask,
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
 
        /*
         * Do not enter reclaim if fatal signal was delivered while throttled.
@@ -2618,7 +2624,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                sc.may_writepage,
                                gfp_mask);
 
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
 
@@ -2685,9 +2691,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
 
        /*
         * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
@@ -2702,7 +2705,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                            sc.may_writepage,
                                            sc.gfp_mask);
 
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -3337,9 +3340,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
                .order = 0,
                .priority = DEF_PRIORITY,
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
        struct task_struct *p = current;
        unsigned long nr_reclaimed;
@@ -3349,7 +3349,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
 
-       nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
        p->reclaim_state = NULL;
        lockdep_clear_current_reclaim_state();