Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[firefly-linux-kernel-4.4.55.git] / mm / memcontrol.c
index b11fb2fe77c15775718b5d07379ddf543e58dbaa..795e525afaba8914f3f8863de6a7299d84898ada 100644 (file)
@@ -379,7 +379,6 @@ static bool move_file(void)
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
        MEM_CGROUP_CHARGE_TYPE_ANON,
-       MEM_CGROUP_CHARGE_TYPE_SHMEM,   /* used by page migration of shmem */
        MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */
        MEM_CGROUP_CHARGE_TYPE_DROP,    /* a page was unused swap cache */
        NR_CHARGE_TYPE,
@@ -406,6 +405,12 @@ enum charge_type {
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
 
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+       return container_of(s, struct mem_cgroup, css);
+}
+
 /* Writing them here to avoid exposing memcg's inner layout */
 #ifdef CONFIG_MEMCG_KMEM
 #include <net/sock.h>
@@ -863,9 +868,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 
 struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 {
-       return container_of(cgroup_subsys_state(cont,
-                               mem_cgroup_subsys_id), struct mem_cgroup,
-                               css);
+       return mem_cgroup_from_css(
+               cgroup_subsys_state(cont, mem_cgroup_subsys_id));
 }
 
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -878,8 +882,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
        if (unlikely(!p))
                return NULL;
 
-       return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
-                               struct mem_cgroup, css);
+       return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
 }
 
 struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
@@ -965,8 +968,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
                if (css) {
                        if (css == &root->css || css_tryget(css))
-                               memcg = container_of(css,
-                                                    struct mem_cgroup, css);
+                               memcg = mem_cgroup_from_css(css);
                } else
                        id = 0;
                rcu_read_unlock();
@@ -1453,7 +1455,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 /*
  * Return the memory (and swap, if configured) limit for a memcg.
  */
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
+static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
        u64 limit;
        u64 memsw;
@@ -1469,6 +1471,73 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
        return min(limit, memsw);
 }
 
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+                             int order)
+{
+       struct mem_cgroup *iter;
+       unsigned long chosen_points = 0;
+       unsigned long totalpages;
+       unsigned int points = 0;
+       struct task_struct *chosen = NULL;
+
+       /*
+        * If current has a pending SIGKILL, then automatically select it.  The
+        * goal is to allow it to allocate so that it may quickly exit and free
+        * its memory.
+        */
+       if (fatal_signal_pending(current)) {
+               set_thread_flag(TIF_MEMDIE);
+               return;
+       }
+
+       check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
+       totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
+       for_each_mem_cgroup_tree(iter, memcg) {
+               struct cgroup *cgroup = iter->css.cgroup;
+               struct cgroup_iter it;
+               struct task_struct *task;
+
+               cgroup_iter_start(cgroup, &it);
+               while ((task = cgroup_iter_next(cgroup, &it))) {
+                       switch (oom_scan_process_thread(task, totalpages, NULL,
+                                                       false)) {
+                       case OOM_SCAN_SELECT:
+                               if (chosen)
+                                       put_task_struct(chosen);
+                               chosen = task;
+                               chosen_points = ULONG_MAX;
+                               get_task_struct(chosen);
+                               /* fall through */
+                       case OOM_SCAN_CONTINUE:
+                               continue;
+                       case OOM_SCAN_ABORT:
+                               cgroup_iter_end(cgroup, &it);
+                               mem_cgroup_iter_break(memcg, iter);
+                               if (chosen)
+                                       put_task_struct(chosen);
+                               return;
+                       case OOM_SCAN_OK:
+                               break;
+                       };
+                       points = oom_badness(task, memcg, NULL, totalpages);
+                       if (points > chosen_points) {
+                               if (chosen)
+                                       put_task_struct(chosen);
+                               chosen = task;
+                               chosen_points = points;
+                               get_task_struct(chosen);
+                       }
+               }
+               cgroup_iter_end(cgroup, &it);
+       }
+
+       if (!chosen)
+               return;
+       points = chosen_points * 1000 / totalpages;
+       oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg,
+                        NULL, "Memory cgroup out of memory");
+}
+
 static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
                                        gfp_t gfp_mask,
                                        unsigned long flags)
@@ -1898,7 +1967,7 @@ again:
                return;
        /*
         * If this memory cgroup is not under account moving, we don't
-        * need to take move_lock_page_cgroup(). Because we already hold
+        * need to take move_lock_mem_cgroup(). Because we already hold
         * rcu_read_lock(), any calls to move_account will be delayed until
         * rcu_read_unlock() if mem_cgroup_stolen() == true.
         */
@@ -1920,7 +1989,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
        /*
         * It's guaranteed that pc->mem_cgroup never changes while
         * lock is held because a routine modifies pc->mem_cgroup
-        * should take move_lock_page_cgroup().
+        * should take move_lock_mem_cgroup().
         */
        move_unlock_mem_cgroup(pc->mem_cgroup, flags);
 }
@@ -2267,7 +2336,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
         * We always charge the cgroup the mm_struct belongs to.
         * The mm_struct's mem_cgroup changes on task migration if the
         * thread group leader migrates. It's possible that mm is not
-        * set, if so charge the init_mm (happens for pagecache usage).
+        * set, if so charge the root memcg (happens for pagecache usage).
         */
        if (!*ptr && !mm)
                *ptr = root_mem_cgroup;
@@ -2428,7 +2497,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
        css = css_lookup(&mem_cgroup_subsys, id);
        if (!css)
                return NULL;
-       return container_of(css, struct mem_cgroup, css);
+       return mem_cgroup_from_css(css);
 }
 
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -2472,11 +2541,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        bool anon;
 
        lock_page_cgroup(pc);
-       if (unlikely(PageCgroupUsed(pc))) {
-               unlock_page_cgroup(pc);
-               __mem_cgroup_cancel_charge(memcg, nr_pages);
-               return;
-       }
+       VM_BUG_ON(PageCgroupUsed(pc));
        /*
         * we don't need page_cgroup_lock about tail pages, becase they are not
         * accessed by any other context at this point.
@@ -2729,64 +2794,32 @@ int mem_cgroup_newpage_charge(struct page *page,
                                        MEM_CGROUP_CHARGE_TYPE_ANON);
 }
 
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
-                                       enum charge_type ctype);
-
-int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
-                               gfp_t gfp_mask)
-{
-       struct mem_cgroup *memcg = NULL;
-       enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-       int ret;
-
-       if (mem_cgroup_disabled())
-               return 0;
-       if (PageCompound(page))
-               return 0;
-
-       if (unlikely(!mm))
-               mm = &init_mm;
-       if (!page_is_file_cache(page))
-               type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-
-       if (!PageSwapCache(page))
-               ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
-       else { /* page is swapcache/shmem */
-               ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
-               if (!ret)
-                       __mem_cgroup_commit_charge_swapin(page, memcg, type);
-       }
-       return ret;
-}
-
 /*
  * While swap-in, try_charge -> commit or cancel, the page is locked.
  * And when try_charge() successfully returns, one refcnt to memcg without
  * struct page_cgroup is acquired. This refcnt will be consumed by
  * "commit()" or removed by "cancel()"
  */
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-                                struct page *page,
-                                gfp_t mask, struct mem_cgroup **memcgp)
+static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+                                         struct page *page,
+                                         gfp_t mask,
+                                         struct mem_cgroup **memcgp)
 {
        struct mem_cgroup *memcg;
+       struct page_cgroup *pc;
        int ret;
 
-       *memcgp = NULL;
-
-       if (mem_cgroup_disabled())
-               return 0;
-
-       if (!do_swap_account)
-               goto charge_cur_mm;
+       pc = lookup_page_cgroup(page);
        /*
-        * A racing thread's fault, or swapoff, may have already updated
-        * the pte, and even removed page from swap cache: in those cases
-        * do_swap_page()'s pte_same() test will fail; but there's also a
-        * KSM case which does need to charge the page.
+        * Every swap fault against a single page tries to charge the
+        * page, bail as early as possible.  shmem_unuse() encounters
+        * already charged pages, too.  The USED bit is protected by
+        * the page lock, which serializes swap cache removal, which
+        * in turn serializes uncharging.
         */
-       if (!PageSwapCache(page))
+       if (PageCgroupUsed(pc))
+               return 0;
+       if (!do_swap_account)
                goto charge_cur_mm;
        memcg = try_get_mem_cgroup_from_page(page);
        if (!memcg)
@@ -2798,14 +2831,44 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
                ret = 0;
        return ret;
 charge_cur_mm:
-       if (unlikely(!mm))
-               mm = &init_mm;
        ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
        if (ret == -EINTR)
                ret = 0;
        return ret;
 }
 
+int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
+                                gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+       *memcgp = NULL;
+       if (mem_cgroup_disabled())
+               return 0;
+       /*
+        * A racing thread's fault, or swapoff, may have already
+        * updated the pte, and even removed page from swap cache: in
+        * those cases unuse_pte()'s pte_same() test will fail; but
+        * there's also a KSM case which does need to charge the page.
+        */
+       if (!PageSwapCache(page)) {
+               int ret;
+
+               ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true);
+               if (ret == -EINTR)
+                       ret = 0;
+               return ret;
+       }
+       return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
+}
+
+void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+{
+       if (mem_cgroup_disabled())
+               return;
+       if (!memcg)
+               return;
+       __mem_cgroup_cancel_charge(memcg, 1);
+}
+
 static void
 __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
                                        enum charge_type ctype)
@@ -2843,13 +2906,27 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
                                          MEM_CGROUP_CHARGE_TYPE_ANON);
 }
 
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+                               gfp_t gfp_mask)
 {
+       struct mem_cgroup *memcg = NULL;
+       enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+       int ret;
+
        if (mem_cgroup_disabled())
-               return;
-       if (!memcg)
-               return;
-       __mem_cgroup_cancel_charge(memcg, 1);
+               return 0;
+       if (PageCompound(page))
+               return 0;
+
+       if (!PageSwapCache(page))
+               ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
+       else { /* page is swapcache/shmem */
+               ret = __mem_cgroup_try_charge_swapin(mm, page,
+                                                    gfp_mask, &memcg);
+               if (!ret)
+                       __mem_cgroup_commit_charge_swapin(page, memcg, type);
+       }
+       return ret;
 }
 
 static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
@@ -2909,7 +2986,8 @@ direct_uncharge:
  * uncharge if !page_mapped(page)
  */
 static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
+                            bool end_migration)
 {
        struct mem_cgroup *memcg = NULL;
        unsigned int nr_pages = 1;
@@ -2919,8 +2997,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        if (mem_cgroup_disabled())
                return NULL;
 
-       if (PageSwapCache(page))
-               return NULL;
+       VM_BUG_ON(PageSwapCache(page));
 
        if (PageTransHuge(page)) {
                nr_pages <<= compound_order(page);
@@ -2953,7 +3030,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
                /* fallthrough */
        case MEM_CGROUP_CHARGE_TYPE_DROP:
                /* See mem_cgroup_prepare_migration() */
-               if (page_mapped(page) || PageCgroupMigration(pc))
+               if (page_mapped(page))
+                       goto unlock_out;
+               /*
+                * Pages under migration may not be uncharged.  But
+                * end_migration() /must/ be the one uncharging the
+                * unused post-migration page and so it has to call
+                * here with the migration bit still set.  See the
+                * res_counter handling below.
+                */
+               if (!end_migration && PageCgroupMigration(pc))
                        goto unlock_out;
                break;
        case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
@@ -2987,7 +3073,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
                mem_cgroup_swap_statistics(memcg, true);
                mem_cgroup_get(memcg);
        }
-       if (!mem_cgroup_is_root(memcg))
+       /*
+        * Migration does not charge the res_counter for the
+        * replacement page, so leave it alone when phasing out the
+        * page that is unused after the migration.
+        */
+       if (!end_migration && !mem_cgroup_is_root(memcg))
                mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
 
        return memcg;
@@ -3003,14 +3094,16 @@ void mem_cgroup_uncharge_page(struct page *page)
        if (page_mapped(page))
                return;
        VM_BUG_ON(page->mapping && !PageAnon(page));
-       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
+       if (PageSwapCache(page))
+               return;
+       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
 }
 
 void mem_cgroup_uncharge_cache_page(struct page *page)
 {
        VM_BUG_ON(page_mapped(page));
        VM_BUG_ON(page->mapping);
-       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
 }
 
 /*
@@ -3074,7 +3167,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
        if (!swapout) /* this was a swap cache but the swap is unused ! */
                ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
 
-       memcg = __mem_cgroup_uncharge_common(page, ctype);
+       memcg = __mem_cgroup_uncharge_common(page, ctype, false);
 
        /*
         * record memcg information,  if swapout && memcg != NULL,
@@ -3164,19 +3257,18 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
  * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
  * page belongs to.
  */
-int mem_cgroup_prepare_migration(struct page *page,
-       struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
+void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
+                                 struct mem_cgroup **memcgp)
 {
        struct mem_cgroup *memcg = NULL;
        struct page_cgroup *pc;
        enum charge_type ctype;
-       int ret = 0;
 
        *memcgp = NULL;
 
        VM_BUG_ON(PageTransHuge(page));
        if (mem_cgroup_disabled())
-               return 0;
+               return;
 
        pc = lookup_page_cgroup(page);
        lock_page_cgroup(pc);
@@ -3221,24 +3313,9 @@ int mem_cgroup_prepare_migration(struct page *page,
         * we return here.
         */
        if (!memcg)
-               return 0;
+               return;
 
        *memcgp = memcg;
-       ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
-       css_put(&memcg->css);/* drop extra refcnt */
-       if (ret) {
-               if (PageAnon(page)) {
-                       lock_page_cgroup(pc);
-                       ClearPageCgroupMigration(pc);
-                       unlock_page_cgroup(pc);
-                       /*
-                        * The old page may be fully unmapped while we kept it.
-                        */
-                       mem_cgroup_uncharge_page(page);
-               }
-               /* we'll need to revisit this error code (we have -EINTR) */
-               return -ENOMEM;
-       }
        /*
         * We charge new page before it's used/mapped. So, even if unlock_page()
         * is called before end_migration, we can catch all events on this new
@@ -3247,12 +3324,14 @@ int mem_cgroup_prepare_migration(struct page *page,
         */
        if (PageAnon(page))
                ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
-       else if (page_is_file_cache(page))
-               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
        else
-               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+       /*
+        * The page is committed to the memcg, but it's not actually
+        * charged to the res_counter since we plan on replacing the
+        * old one and only one page is going to be left afterwards.
+        */
        __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
-       return ret;
 }
 
 /* remove redundant charge if migration failed*/
@@ -3274,6 +3353,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
                used = newpage;
                unused = oldpage;
        }
+       anon = PageAnon(used);
+       __mem_cgroup_uncharge_common(unused,
+                                    anon ? MEM_CGROUP_CHARGE_TYPE_ANON
+                                    : MEM_CGROUP_CHARGE_TYPE_CACHE,
+                                    true);
+       css_put(&memcg->css);
        /*
         * We disallowed uncharge of pages under migration because mapcount
         * of the page goes down to zero, temporarly.
@@ -3283,10 +3368,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        lock_page_cgroup(pc);
        ClearPageCgroupMigration(pc);
        unlock_page_cgroup(pc);
-       anon = PageAnon(used);
-       __mem_cgroup_uncharge_common(unused,
-               anon ? MEM_CGROUP_CHARGE_TYPE_ANON
-                    : MEM_CGROUP_CHARGE_TYPE_CACHE);
 
        /*
         * If a page is a file cache, radix-tree replacement is very atomic
@@ -3338,10 +3419,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
         */
        if (!memcg)
                return;
-
-       if (PageSwapBacked(oldpage))
-               type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-
        /*
         * Even if newpage->mapping was NULL before starting replacement,
         * the newpage may be on LRU(or pagevec for LRU) already. We lock
@@ -3416,7 +3493,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                /*
                 * Rather than hide all in some function, I do this in
                 * open coded manner. You see what this really does.
-                * We have to guarantee memcg->res.limit < memcg->memsw.limit.
+                * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
                 */
                mutex_lock(&set_limit_mutex);
                memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
@@ -3477,7 +3554,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                /*
                 * Rather than hide all in some function, I do this in
                 * open coded manner. You see what this really does.
-                * We have to guarantee memcg->res.limit < memcg->memsw.limit.
+                * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
                 */
                mutex_lock(&set_limit_mutex);
                memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);