mm/memblock.c: call kmemleak directly from memblock_(alloc|free)

[firefly-linux-kernel-4.4.55.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 86a2078805e5ca0d7f3bdeb8f6d803ee285519e4..9bf8a84bcaae5d90f10cb478b5233a3cbe25be3a 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,7 +80,7 @@ int do_swap_account __read_mostly;
  #ifdef CONFIG_MEMCG_SWAP_ENABLED
  static int really_do_swap_account __initdata = 1;
  #else
-static int really_do_swap_account __initdata = 0;
+static int really_do_swap_account __initdata;
  #endif
  
  #else
@@ -357,10 +357,9 @@ struct mem_cgroup {
         struct cg_proto tcp_mem;
  #endif
  #if defined(CONFIG_MEMCG_KMEM)
-       /* analogous to slab_common's slab_caches list. per-memcg */
+       /* analogous to slab_common's slab_caches list, but per-memcg;
+        * protected by memcg_slab_mutex */
         struct list_head memcg_slab_caches;
-       /* Not a spinlock, we can take a lot of time walking the list */
-       struct mutex slab_caches_mutex;
          /* Index in the kmem_cache->memcg_params->memcg_caches array */
         int kmemcg_id;
  #endif
@@ -1551,7 +1550,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
  int mem_cgroup_swappiness(struct mem_cgroup *memcg)
  {
         /* root ? */
-       if (!css_parent(&memcg->css))
+       if (mem_cgroup_disabled() || !css_parent(&memcg->css))
                 return vm_swappiness;
  
         return memcg->swappiness;
@@ -1595,23 +1594,12 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
  }
  
  /*
- * 2 routines for checking "mem" is under move_account() or not.
+ * A routine for checking "mem" is under move_account() or not.
   *
- * mem_cgroup_stolen() -  checking whether a cgroup is mc.from or not. This
- *                       is used for avoiding races in accounting.  If true,
- *                       pc->mem_cgroup may be overwritten.
- *
- * mem_cgroup_under_move() - checking a cgroup is mc.from or mc.to or
- *                       under hierarchy of moving cgroups. This is for
- *                       waiting at hith-memory prressure caused by "move".
+ * Checking a cgroup is mc.from or mc.to or under hierarchy of
+ * moving cgroups. This is for waiting at high-memory pressure
+ * caused by "move".
   */
-
-static bool mem_cgroup_stolen(struct mem_cgroup *memcg)
-{
-       VM_BUG_ON(!rcu_read_lock_held());
-       return atomic_read(&memcg->moving_account) > 0;
-}
-
  static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
  {
         struct mem_cgroup *from;
@@ -1654,7 +1642,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
   * Take this lock when
   * - a code tries to modify page's memcg while it's USED.
   * - a code tries to modify page state accounting in a memcg.
- * see mem_cgroup_stolen(), too.
   */
  static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
                                   unsigned long *flags)
@@ -2289,12 +2276,11 @@ cleanup:
  }
  
  /*
- * Currently used to update mapped file statistics, but the routine can be
- * generalized to update other statistics as well.
+ * Used to update mapped file or writeback or other statistics.
   *
   * Notes: Race condition
   *
- * We usually use page_cgroup_lock() for accessing page_cgroup member but
+ * We usually use lock_page_cgroup() for accessing page_cgroup member but
   * it tends to be costly. But considering some conditions, we doesn't need
   * to do so _always_.
   *
@@ -2308,8 +2294,8 @@ cleanup:
   * by flags.
   *
   * Considering "move", this is an only case we see a race. To make the race
- * small, we check mm->moving_account and detect there are possibility of race
- * If there is, we take a lock.
+ * small, we check memcg->moving_account and detect there are possibility
+ * of race or not. If there is, we take a lock.
   */
  
  void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2327,9 +2313,10 @@ again:
          * If this memory cgroup is not under account moving, we don't
          * need to take move_lock_mem_cgroup(). Because we already hold
          * rcu_read_lock(), any calls to move_account will be delayed until
-        * rcu_read_unlock() if mem_cgroup_stolen() == true.
+        * rcu_read_unlock().
          */
-       if (!mem_cgroup_stolen(memcg))
+       VM_BUG_ON(!rcu_read_lock_held());
+       if (atomic_read(&memcg->moving_account) <= 0)
                 return;
  
         move_lock_mem_cgroup(memcg, flags);
@@ -2437,7 +2424,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
   */
  static void drain_local_stock(struct work_struct *dummy)
  {
-       struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock);
+       struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
         drain_stock(stock);
         clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
  }
@@ -2913,6 +2900,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
  static DEFINE_MUTEX(set_limit_mutex);
  
  #ifdef CONFIG_MEMCG_KMEM
+/*
+ * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
+ * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
+ */
+static DEFINE_MUTEX(memcg_slab_mutex);
+
  static DEFINE_MUTEX(activate_kmem_mutex);
  
  static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
@@ -2945,10 +2938,10 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
  
         print_slabinfo_header(m);
  
-       mutex_lock(&memcg->slab_caches_mutex);
+       mutex_lock(&memcg_slab_mutex);
         list_for_each_entry(params, &memcg->memcg_slab_caches, list)
                 cache_show(memcg_params_to_cache(params), m);
-       mutex_unlock(&memcg->slab_caches_mutex);
+       mutex_unlock(&memcg_slab_mutex);
  
         return 0;
  }
@@ -3050,8 +3043,6 @@ void memcg_update_array_size(int num)
                 memcg_limited_groups_array_size = memcg_caches_array_size(num);
  }
  
-static void kmem_cache_destroy_work_func(struct work_struct *w);
-
  int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
  {
         struct memcg_cache_params *cur_params = s->memcg_params;
@@ -3104,29 +3095,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
         return 0;
  }
  
-char *memcg_create_cache_name(struct mem_cgroup *memcg,
-                             struct kmem_cache *root_cache)
-{
-       static char *buf = NULL;
-
-       /*
-        * We need a mutex here to protect the shared buffer. Since this is
-        * expected to be called only on cache creation, we can employ the
-        * slab_mutex for that purpose.
-        */
-       lockdep_assert_held(&slab_mutex);
-
-       if (!buf) {
-               buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-               if (!buf)
-                       return NULL;
-       }
-
-       cgroup_name(memcg->css.cgroup, buf, NAME_MAX + 1);
-       return kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-                        memcg_cache_id(memcg), buf);
-}
-
  int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
                              struct kmem_cache *root_cache)
  {
@@ -3148,8 +3116,6 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
         if (memcg) {
                 s->memcg_params->memcg = memcg;
                 s->memcg_params->root_cache = root_cache;
-               INIT_WORK(&s->memcg_params->destroy,
-                               kmem_cache_destroy_work_func);
                 css_get(&memcg->css);
         } else
                 s->memcg_params->is_root_cache = true;
@@ -3166,24 +3132,37 @@ void memcg_free_cache_params(struct kmem_cache *s)
         kfree(s->memcg_params);
  }
  
-void memcg_register_cache(struct kmem_cache *s)
+static void memcg_register_cache(struct mem_cgroup *memcg,
+                                struct kmem_cache *root_cache)
  {
-       struct kmem_cache *root;
-       struct mem_cgroup *memcg;
+       static char memcg_name_buf[NAME_MAX + 1]; /* protected by
+                                                    memcg_slab_mutex */
+       struct kmem_cache *cachep;
         int id;
  
-       if (is_root_cache(s))
+       lockdep_assert_held(&memcg_slab_mutex);
+
+       id = memcg_cache_id(memcg);
+
+       /*
+        * Since per-memcg caches are created asynchronously on first
+        * allocation (see memcg_kmem_get_cache()), several threads can try to
+        * create the same cache, but only one of them may succeed.
+        */
+       if (cache_from_memcg_idx(root_cache, id))
                 return;
  
+       cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
+       cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf);
         /*
-        * Holding the slab_mutex assures nobody will touch the memcg_caches
-        * array while we are modifying it.
+        * If we could not create a memcg cache, do not complain, because
+        * that's not critical at all as we can always proceed with the root
+        * cache.
          */
-       lockdep_assert_held(&slab_mutex);
+       if (!cachep)
+               return;
  
-       root = s->memcg_params->root_cache;
-       memcg = s->memcg_params->memcg;
-       id = memcg_cache_id(memcg);
+       list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
  
         /*
          * Since readers won't lock (see cache_from_memcg_idx()), we need a
@@ -3192,49 +3171,30 @@ void memcg_register_cache(struct kmem_cache *s)
          */
         smp_wmb();
  
-       /*
-        * Initialize the pointer to this cache in its parent's memcg_params
-        * before adding it to the memcg_slab_caches list, otherwise we can
-        * fail to convert memcg_params_to_cache() while traversing the list.
-        */
-       VM_BUG_ON(root->memcg_params->memcg_caches[id]);
-       root->memcg_params->memcg_caches[id] = s;
-
-       mutex_lock(&memcg->slab_caches_mutex);
-       list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
-       mutex_unlock(&memcg->slab_caches_mutex);
+       BUG_ON(root_cache->memcg_params->memcg_caches[id]);
+       root_cache->memcg_params->memcg_caches[id] = cachep;
  }
  
-void memcg_unregister_cache(struct kmem_cache *s)
+static void memcg_unregister_cache(struct kmem_cache *cachep)
  {
-       struct kmem_cache *root;
+       struct kmem_cache *root_cache;
         struct mem_cgroup *memcg;
         int id;
  
-       if (is_root_cache(s))
-               return;
+       lockdep_assert_held(&memcg_slab_mutex);
  
-       /*
-        * Holding the slab_mutex assures nobody will touch the memcg_caches
-        * array while we are modifying it.
-        */
-       lockdep_assert_held(&slab_mutex);
+       BUG_ON(is_root_cache(cachep));
  
-       root = s->memcg_params->root_cache;
-       memcg = s->memcg_params->memcg;
+       root_cache = cachep->memcg_params->root_cache;
+       memcg = cachep->memcg_params->memcg;
         id = memcg_cache_id(memcg);
  
-       mutex_lock(&memcg->slab_caches_mutex);
-       list_del(&s->memcg_params->list);
-       mutex_unlock(&memcg->slab_caches_mutex);
+       BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
+       root_cache->memcg_params->memcg_caches[id] = NULL;
  
-       /*
-        * Clear the pointer to this cache in its parent's memcg_params only
-        * after removing it from the memcg_slab_caches list, otherwise we can
-        * fail to convert memcg_params_to_cache() while traversing the list.
-        */
-       VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
-       root->memcg_params->memcg_caches[id] = NULL;
+       list_del(&cachep->memcg_params->list);
+
+       kmem_cache_destroy(cachep);
  }
  
  /*
@@ -3268,85 +3228,61 @@ static inline void memcg_resume_kmem_account(void)
         current->memcg_kmem_skip_account--;
  }
  
-static void kmem_cache_destroy_work_func(struct work_struct *w)
-{
-       struct kmem_cache *cachep;
-       struct memcg_cache_params *p;
-
-       p = container_of(w, struct memcg_cache_params, destroy);
-
-       cachep = memcg_params_to_cache(p);
-
-       kmem_cache_shrink(cachep);
-       if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-               kmem_cache_destroy(cachep);
-}
-
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+int __memcg_cleanup_cache_params(struct kmem_cache *s)
  {
         struct kmem_cache *c;
         int i, failed = 0;
  
-       /*
-        * If the cache is being destroyed, we trust that there is no one else
-        * requesting objects from it. Even if there are, the sanity checks in
-        * kmem_cache_destroy should caught this ill-case.
-        *
-        * Still, we don't want anyone else freeing memcg_caches under our
-        * noses, which can happen if a new memcg comes to life. As usual,
-        * we'll take the activate_kmem_mutex to protect ourselves against
-        * this.
-        */
-       mutex_lock(&activate_kmem_mutex);
+       mutex_lock(&memcg_slab_mutex);
         for_each_memcg_cache_index(i) {
                 c = cache_from_memcg_idx(s, i);
                 if (!c)
                         continue;
  
-               /*
-                * We will now manually delete the caches, so to avoid races
-                * we need to cancel all pending destruction workers and
-                * proceed with destruction ourselves.
-                */
-               cancel_work_sync(&c->memcg_params->destroy);
-               kmem_cache_destroy(c);
+               memcg_unregister_cache(c);
  
                 if (cache_from_memcg_idx(s, i))
                         failed++;
         }
-       mutex_unlock(&activate_kmem_mutex);
+       mutex_unlock(&memcg_slab_mutex);
         return failed;
  }
  
-static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
  {
         struct kmem_cache *cachep;
-       struct memcg_cache_params *params;
+       struct memcg_cache_params *params, *tmp;
  
         if (!memcg_kmem_is_active(memcg))
                 return;
  
-       mutex_lock(&memcg->slab_caches_mutex);
-       list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+       mutex_lock(&memcg_slab_mutex);
+       list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
                 cachep = memcg_params_to_cache(params);
-               schedule_work(&cachep->memcg_params->destroy);
+               kmem_cache_shrink(cachep);
+               if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+                       memcg_unregister_cache(cachep);
         }
-       mutex_unlock(&memcg->slab_caches_mutex);
+       mutex_unlock(&memcg_slab_mutex);
  }
  
-struct create_work {
+struct memcg_register_cache_work {
         struct mem_cgroup *memcg;
         struct kmem_cache *cachep;
         struct work_struct work;
  };
  
-static void memcg_create_cache_work_func(struct work_struct *w)
+static void memcg_register_cache_func(struct work_struct *w)
  {
-       struct create_work *cw = container_of(w, struct create_work, work);
+       struct memcg_register_cache_work *cw =
+               container_of(w, struct memcg_register_cache_work, work);
         struct mem_cgroup *memcg = cw->memcg;
         struct kmem_cache *cachep = cw->cachep;
  
-       kmem_cache_create_memcg(memcg, cachep);
+       mutex_lock(&memcg_slab_mutex);
+       memcg_register_cache(memcg, cachep);
+       mutex_unlock(&memcg_slab_mutex);
+
         css_put(&memcg->css);
         kfree(cw);
  }
@@ -3354,12 +3290,12 @@ static void memcg_create_cache_work_func(struct work_struct *w)
  /*
   * Enqueue the creation of a per-memcg kmem_cache.
   */
-static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-                                        struct kmem_cache *cachep)
+static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
+                                           struct kmem_cache *cachep)
  {
-       struct create_work *cw;
+       struct memcg_register_cache_work *cw;
  
-       cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+       cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
         if (cw == NULL) {
                 css_put(&memcg->css);
                 return;
@@ -3368,17 +3304,17 @@ static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
         cw->memcg = memcg;
         cw->cachep = cachep;
  
-       INIT_WORK(&cw->work, memcg_create_cache_work_func);
+       INIT_WORK(&cw->work, memcg_register_cache_func);
         schedule_work(&cw->work);
  }
  
-static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-                                      struct kmem_cache *cachep)
+static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
+                                         struct kmem_cache *cachep)
  {
         /*
          * We need to stop accounting when we kmalloc, because if the
          * corresponding kmalloc cache is not yet created, the first allocation
-        * in __memcg_create_cache_enqueue will recurse.
+        * in __memcg_schedule_register_cache will recurse.
          *
          * However, it is better to enclose the whole function. Depending on
          * the debugging options enabled, INIT_WORK(), for instance, can
@@ -3387,7 +3323,7 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
          * the safest choice is to do it like this, wrapping the whole function.
          */
         memcg_stop_kmem_account();
-       __memcg_create_cache_enqueue(memcg, cachep);
+       __memcg_schedule_register_cache(memcg, cachep);
         memcg_resume_kmem_account();
  }
  
@@ -3458,16 +3394,11 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
          *
          * However, there are some clashes that can arrive from locking.
          * For instance, because we acquire the slab_mutex while doing
-        * kmem_cache_dup, this means no further allocation could happen
-        * with the slab_mutex held.
-        *
-        * Also, because cache creation issue get_online_cpus(), this
-        * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
-        * that ends up reversed during cpu hotplug. (cpuset allocates
-        * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
-        * better to defer everything.
+        * memcg_create_kmem_cache, this means no further allocation
+        * could happen with the slab_mutex held. So it's better to
+        * defer everything.
          */
-       memcg_create_cache_enqueue(memcg, cachep);
+       memcg_schedule_register_cache(memcg, cachep);
         return cachep;
  out:
         rcu_read_unlock();
@@ -3591,7 +3522,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
         memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
  }
  #else
-static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
  {
  }
  #endif /* CONFIG_MEMCG_KMEM */
@@ -4744,9 +4675,9 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
                 if (mem_cgroup_move_parent(page, pc, memcg)) {
                         /* found lock contention or "pc" is obsolete. */
                         busy = page;
-                       cond_resched();
                 } else
                         busy = NULL;
+               cond_resched();
         } while (!list_empty(list));
  }
  
@@ -5022,13 +4953,14 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
          * Make sure we have enough space for this cgroup in each root cache's
          * memcg_params.
          */
+       mutex_lock(&memcg_slab_mutex);
         err = memcg_update_all_caches(memcg_id + 1);
+       mutex_unlock(&memcg_slab_mutex);
         if (err)
                 goto out_rmid;
  
         memcg->kmemcg_id = memcg_id;
         INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-       mutex_init(&memcg->slab_caches_mutex);
  
         /*
          * We couldn't have accounted to this cgroup, because it hasn't got the
@@ -6436,7 +6368,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
         css_for_each_descendant_post(iter, css)
                 mem_cgroup_reparent_charges(mem_cgroup_from_css(iter));
  
-       mem_cgroup_destroy_all_caches(memcg);
+       memcg_unregister_all_caches(memcg);
         vmpressure_cleanup(&memcg->vmpressure);
  }