slab allocators: Remove SLAB_CTOR_ATOMIC
[firefly-linux-kernel-4.4.55.git] / mm / slub.c
index a6231963cae5b3aa69760d19f6f5efe869c24384..347e44821bcb8b7e5b10a77a1c79c7debed1caa9 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -97,9 +97,6 @@
  *
  * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
  *
- * - SLAB_DEBUG_INITIAL is not supported but I have never seen a use of
- *   it.
- *
  * - Variable sizing of the per node arrays
  */
 
 #endif
 
 /*
- * Flags from the regular SLAB that SLUB does not support:
+ * Mininum number of partial slabs. These will be left on the partial
+ * lists even if they are empty. kmem_cache_shrink may reclaim them.
  */
-#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL)
-
-/* Mininum number of partial slabs */
 #define MIN_PARTIAL 2
 
+/*
+ * Maximum number of desirable partial slabs.
+ * The existence of more partial slabs makes kmem_cache_shrink
+ * sort the partial list by the number of objects in the.
+ */
+#define MAX_PARTIAL 10
+
 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
                                SLAB_POISON | SLAB_STORE_USER)
 /*
@@ -449,7 +451,7 @@ static int check_valid_pointer(struct kmem_cache *s, struct page *page,
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
                                                void *from, void *to)
 {
-       printk(KERN_ERR "@@@ SLUB: %s Restoring %s (0x%x) from 0x%p-0x%p\n",
+       printk(KERN_ERR "@@@ SLUB %s: Restoring %s (0x%x) from 0x%p-0x%p\n",
                s->name, message, data, from, to - 1);
        memset(from, data, to - from);
 }
@@ -496,9 +498,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
                return 1;
 
        if (!check_bytes(p + length, POISON_INUSE, remainder)) {
-               printk(KERN_ERR "SLUB: %s slab 0x%p: Padding fails check\n",
-                       s->name, p);
-               dump_stack();
+               slab_err(s, page, "Padding check failed");
                restore_bytes(s, "slab padding", POISON_INUSE, p + length,
                        p + length + remainder);
                return 0;
@@ -584,30 +584,25 @@ static int check_slab(struct kmem_cache *s, struct page *page)
        VM_BUG_ON(!irqs_disabled());
 
        if (!PageSlab(page)) {
-               printk(KERN_ERR "SLUB: %s Not a valid slab page @0x%p "
-                       "flags=%lx mapping=0x%p count=%d \n",
-                       s->name, page, page->flags, page->mapping,
+               slab_err(s, page, "Not a valid slab page flags=%lx "
+                       "mapping=0x%p count=%d", page->flags, page->mapping,
                        page_count(page));
                return 0;
        }
        if (page->offset * sizeof(void *) != s->offset) {
-               printk(KERN_ERR "SLUB: %s Corrupted offset %lu in slab @0x%p"
-                       " flags=0x%lx mapping=0x%p count=%d\n",
-                       s->name,
+               slab_err(s, page, "Corrupted offset %lu flags=0x%lx "
+                       "mapping=0x%p count=%d",
                        (unsigned long)(page->offset * sizeof(void *)),
-                       page,
                        page->flags,
                        page->mapping,
                        page_count(page));
-               dump_stack();
                return 0;
        }
        if (page->inuse > s->objects) {
-               printk(KERN_ERR "SLUB: %s Inuse %u > max %u in slab "
-                       "page @0x%p flags=%lx mapping=0x%p count=%d\n",
-                       s->name, page->inuse, s->objects, page, page->flags,
+               slab_err(s, page, "inuse %u > max %u @0x%p flags=%lx "
+                       "mapping=0x%p count=%d",
+                       s->name, page->inuse, s->objects, page->flags,
                        page->mapping, page_count(page));
-               dump_stack();
                return 0;
        }
        /* Slab_pad_check fixes things up after itself */
@@ -636,12 +631,13 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
                                set_freepointer(s, object, NULL);
                                break;
                        } else {
-                               printk(KERN_ERR "SLUB: %s slab 0x%p "
-                                       "freepointer 0x%p corrupted.\n",
-                                       s->name, page, fp);
-                               dump_stack();
+                               slab_err(s, page, "Freepointer 0x%p corrupt",
+                                                                       fp);
                                page->freelist = NULL;
                                page->inuse = s->objects;
+                               printk(KERN_ERR "@@@ SLUB %s: Freelist "
+                                       "cleared. Slab 0x%p\n",
+                                       s->name, page);
                                return 0;
                        }
                        break;
@@ -652,11 +648,12 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
        }
 
        if (page->inuse != s->objects - nr) {
-               printk(KERN_ERR "slab %s: page 0x%p wrong object count."
-                       " counter is %d but counted were %d\n",
-                       s->name, page, page->inuse,
-                       s->objects - nr);
+               slab_err(s, page, "Wrong object count. Counter is %d but "
+                       "counted were %d", s, page, page->inuse,
+                                                       s->objects - nr);
                page->inuse = s->objects - nr;
+               printk(KERN_ERR "@@@ SLUB %s: Object count adjusted. "
+                       "Slab @0x%p\n", s->name, page);
        }
        return search == NULL;
 }
@@ -692,15 +689,13 @@ static int alloc_object_checks(struct kmem_cache *s, struct page *page,
                goto bad;
 
        if (object && !on_freelist(s, page, object)) {
-               printk(KERN_ERR "SLUB: %s Object 0x%p@0x%p "
-                       "already allocated.\n",
-                       s->name, object, page);
-               goto dump;
+               slab_err(s, page, "Object 0x%p already allocated", object);
+               goto bad;
        }
 
        if (!check_valid_pointer(s, page, object)) {
                object_err(s, page, object, "Freelist Pointer check fails");
-               goto dump;
+               goto bad;
        }
 
        if (!object)
@@ -708,17 +703,8 @@ static int alloc_object_checks(struct kmem_cache *s, struct page *page,
 
        if (!check_object(s, page, object, 0))
                goto bad;
-       init_object(s, object, 1);
 
-       if (s->flags & SLAB_TRACE) {
-               printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n",
-                       s->name, object, page->inuse,
-                       page->freelist);
-               dump_stack();
-       }
        return 1;
-dump:
-       dump_stack();
 bad:
        if (PageSlab(page)) {
                /*
@@ -743,15 +729,12 @@ static int free_object_checks(struct kmem_cache *s, struct page *page,
                goto fail;
 
        if (!check_valid_pointer(s, page, object)) {
-               printk(KERN_ERR "SLUB: %s slab 0x%p invalid "
-                       "object pointer 0x%p\n",
-                       s->name, page, object);
+               slab_err(s, page, "Invalid object pointer 0x%p", object);
                goto fail;
        }
 
        if (on_freelist(s, page, object)) {
-               printk(KERN_ERR "SLUB: %s slab 0x%p object "
-                       "0x%p already free.\n", s->name, page, object);
+               slab_err(s, page, "Object 0x%p already free", object);
                goto fail;
        }
 
@@ -760,32 +743,22 @@ static int free_object_checks(struct kmem_cache *s, struct page *page,
 
        if (unlikely(s != page->slab)) {
                if (!PageSlab(page))
-                       printk(KERN_ERR "slab_free %s size %d: attempt to"
-                               "free object(0x%p) outside of slab.\n",
-                               s->name, s->size, object);
+                       slab_err(s, page, "Attempt to free object(0x%p) "
+                               "outside of slab", object);
                else
-               if (!page->slab)
+               if (!page->slab) {
                        printk(KERN_ERR
-                               "slab_free : no slab(NULL) for object 0x%p.\n",
+                               "SLUB <none>: no slab for object 0x%p.\n",
                                                object);
+                       dump_stack();
+               }
                else
-               printk(KERN_ERR "slab_free %s(%d): object at 0x%p"
-                               " belongs to slab %s(%d)\n",
-                               s->name, s->size, object,
-                               page->slab->name, page->slab->size);
+                       slab_err(s, page, "object at 0x%p belongs "
+                               "to slab %s", object, page->slab->name);
                goto fail;
        }
-       if (s->flags & SLAB_TRACE) {
-               printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n",
-                       s->name, object, page->inuse,
-                       page->freelist);
-               print_section("Object", object, s->objsize);
-               dump_stack();
-       }
-       init_object(s, object, 0);
        return 1;
 fail:
-       dump_stack();
        printk(KERN_ERR "@@@ SLUB: %s slab 0x%p object at 0x%p not freed.\n",
                s->name, page, object);
        return 0;
@@ -829,14 +802,8 @@ static void setup_object(struct kmem_cache *s, struct page *page,
                init_tracking(s, object);
        }
 
-       if (unlikely(s->ctor)) {
-               int mode = SLAB_CTOR_CONSTRUCTOR;
-
-               if (!(s->flags & __GFP_WAIT))
-                       mode |= SLAB_CTOR_ATOMIC;
-
-               s->ctor(object, s, mode);
-       }
+       if (unlikely(s->ctor))
+               s->ctor(object, s, SLAB_CTOR_CONSTRUCTOR);
 }
 
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1284,6 +1251,13 @@ debug:
                goto another_slab;
        if (s->flags & SLAB_STORE_USER)
                set_track(s, object, TRACK_ALLOC, addr);
+       if (s->flags & SLAB_TRACE) {
+               printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n",
+                       s->name, object, page->inuse,
+                       page->freelist);
+               dump_stack();
+       }
+       init_object(s, object, 1);
        goto have_object;
 }
 
@@ -1366,6 +1340,14 @@ debug:
                remove_full(s, page);
        if (s->flags & SLAB_STORE_USER)
                set_track(s, x, TRACK_FREE, addr);
+       if (s->flags & SLAB_TRACE) {
+               printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n",
+                       s->name, object, page->inuse,
+                       page->freelist);
+               print_section("Object", (void *)object, s->objsize);
+               dump_stack();
+       }
+       init_object(s, object, 0);
        goto checks_ok;
 }
 
@@ -1500,7 +1482,7 @@ static unsigned long calculate_alignment(unsigned long flags,
         * specified alignment though. If that is greater
         * then use it.
         */
-       if ((flags & (SLAB_MUST_HWCACHE_ALIGN | SLAB_HWCACHE_ALIGN)) &&
+       if ((flags & SLAB_HWCACHE_ALIGN) &&
                        size > L1_CACHE_BYTES / 2)
                return max_t(unsigned long, align, L1_CACHE_BYTES);
 
@@ -1752,8 +1734,6 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
        s->flags = flags;
        s->align = align;
 
-       BUG_ON(flags & SLUB_UNIMPLEMENTED);
-
        /*
         * The page->offset field is only 16 bit wide. This is an offset
         * in units of words from the beginning of an object. If the slab
@@ -1882,7 +1862,7 @@ static int kmem_cache_close(struct kmem_cache *s)
        for_each_online_node(node) {
                struct kmem_cache_node *n = get_node(s, node);
 
-               free_list(s, n, &n->partial);
+               n->nr_partial -= free_list(s, n, &n->partial);
                if (atomic_long_read(&n->nr_slabs))
                        return 1;
        }
@@ -2130,6 +2110,86 @@ void kfree(const void *x)
 }
 EXPORT_SYMBOL(kfree);
 
+/*
+ *  kmem_cache_shrink removes empty slabs from the partial lists
+ *  and then sorts the partially allocated slabs by the number
+ *  of items in use. The slabs with the most items in use
+ *  come first. New allocations will remove these from the
+ *  partial list because they are full. The slabs with the
+ *  least items are placed last. If it happens that the objects
+ *  are freed then the page can be returned to the page allocator.
+ */
+int kmem_cache_shrink(struct kmem_cache *s)
+{
+       int node;
+       int i;
+       struct kmem_cache_node *n;
+       struct page *page;
+       struct page *t;
+       struct list_head *slabs_by_inuse =
+               kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
+       unsigned long flags;
+
+       if (!slabs_by_inuse)
+               return -ENOMEM;
+
+       flush_all(s);
+       for_each_online_node(node) {
+               n = get_node(s, node);
+
+               if (!n->nr_partial)
+                       continue;
+
+               for (i = 0; i < s->objects; i++)
+                       INIT_LIST_HEAD(slabs_by_inuse + i);
+
+               spin_lock_irqsave(&n->list_lock, flags);
+
+               /*
+                * Build lists indexed by the items in use in
+                * each slab or free slabs if empty.
+                *
+                * Note that concurrent frees may occur while
+                * we hold the list_lock. page->inuse here is
+                * the upper limit.
+                */
+               list_for_each_entry_safe(page, t, &n->partial, lru) {
+                       if (!page->inuse && slab_trylock(page)) {
+                               /*
+                                * Must hold slab lock here because slab_free
+                                * may have freed the last object and be
+                                * waiting to release the slab.
+                                */
+                               list_del(&page->lru);
+                               n->nr_partial--;
+                               slab_unlock(page);
+                               discard_slab(s, page);
+                       } else {
+                               if (n->nr_partial > MAX_PARTIAL)
+                                       list_move(&page->lru,
+                                       slabs_by_inuse + page->inuse);
+                       }
+               }
+
+               if (n->nr_partial <= MAX_PARTIAL)
+                       goto out;
+
+               /*
+                * Rebuild the partial list with the slabs filled up
+                * most first and the least used slabs at the end.
+                */
+               for (i = s->objects - 1; i >= 0; i--)
+                       list_splice(slabs_by_inuse + i, n->partial.prev);
+
+       out:
+               spin_unlock_irqrestore(&n->list_lock, flags);
+       }
+
+       kfree(slabs_by_inuse);
+       return 0;
+}
+EXPORT_SYMBOL(kmem_cache_shrink);
+
 /**
  * krealloc - reallocate memory. The contents will remain unchanged.
  *
@@ -2382,17 +2442,6 @@ static struct notifier_block __cpuinitdata slab_notifier =
 
 #endif
 
-/***************************************************************
- *     Compatiblility definitions
- **************************************************************/
-
-int kmem_cache_shrink(struct kmem_cache *s)
-{
-       flush_all(s);
-       return 0;
-}
-EXPORT_SYMBOL(kmem_cache_shrink);
-
 #ifdef CONFIG_NUMA
 
 /*****************************************************************
@@ -3077,8 +3126,7 @@ SLAB_ATTR(reclaim_account);
 
 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
 {
-       return sprintf(buf, "%d\n", !!(s->flags &
-               (SLAB_HWCACHE_ALIGN|SLAB_MUST_HWCACHE_ALIGN)));
+       return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
 }
 SLAB_ATTR_RO(hwcache_align);
 
@@ -3169,6 +3217,25 @@ static ssize_t validate_store(struct kmem_cache *s,
 }
 SLAB_ATTR(validate);
 
+static ssize_t shrink_show(struct kmem_cache *s, char *buf)
+{
+       return 0;
+}
+
+static ssize_t shrink_store(struct kmem_cache *s,
+                       const char *buf, size_t length)
+{
+       if (buf[0] == '1') {
+               int rc = kmem_cache_shrink(s);
+
+               if (rc)
+                       return rc;
+       } else
+               return -EINVAL;
+       return length;
+}
+SLAB_ATTR(shrink);
+
 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
 {
        if (!(s->flags & SLAB_STORE_USER))
@@ -3225,6 +3292,7 @@ static struct attribute * slab_attrs[] = {
        &poison_attr.attr,
        &store_user_attr.attr,
        &validate_attr.attr,
+       &shrink_attr.attr,
        &alloc_calls_attr.attr,
        &free_calls_attr.attr,
 #ifdef CONFIG_ZONE_DMA