#endif
};
+/*
+ * Try to keep at least this much lowmem free. Do not allow normal
+ * allocations below this point, only high priority ones. Automatically
+ * tuned according to the amount of memory in the system.
+ */
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
+/*
+ * Extra memory for the system to try freeing. Used to temporarily
+ * free memory, to make space for new workloads. Anyone can allocate
+ * down to the min watermarks controlled by min_free_kbytes above.
+ */
+int extra_free_kbytes = 0;
+
static unsigned long __meminitdata nr_kernel_pages;
static unsigned long __meminitdata nr_all_pages;
static unsigned long __meminitdata dma_reserve;
unsigned long combined_idx;
unsigned long uninitialized_var(buddy_idx);
struct page *buddy;
- unsigned int max_order = MAX_ORDER;
+ unsigned int max_order;
+
+ max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
VM_BUG_ON(!zone_is_initialized(zone));
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
VM_BUG_ON(migratetype == -1);
- if (is_migrate_isolate(migratetype)) {
- /*
- * We restrict max order of merging to prevent merge
- * between freepages on isolate pageblock and normal
- * pageblock. Without this, pageblock isolation
- * could cause incorrect freepage accounting.
- */
- max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
- } else {
+ if (likely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
- }
- page_idx = pfn & ((1 << max_order) - 1);
+ page_idx = pfn & ((1 << MAX_ORDER) - 1);
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
+continue_merging:
while (order < max_order - 1) {
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order))
- break;
+ goto done_merging;
/*
* Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
* merge with it and move up one order.
page_idx = combined_idx;
order++;
}
+ if (max_order < MAX_ORDER) {
+ /* If we are here, it means order is >= pageblock_order.
+ * We want to prevent merge between freepages on isolate
+ * pageblock and normal pageblock. Without this, pageblock
+ * isolation could cause incorrect freepage or CMA accounting.
+ *
+ * We don't want to hit this code for the more frequent
+ * low-order merging.
+ */
+ if (unlikely(has_isolate_pageblock(zone))) {
+ int buddy_mt;
+
+ buddy_idx = __find_buddy_index(page_idx, order);
+ buddy = page + (buddy_idx - page_idx);
+ buddy_mt = get_pageblock_migratetype(buddy);
+
+ if (migratetype != buddy_mt
+ && (is_migrate_isolate(migratetype) ||
+ is_migrate_isolate(buddy_mt)))
+ goto done_merging;
+ }
+ max_order++;
+ goto continue_merging;
+ }
+
+done_merging:
set_page_order(page, order);
/*
static void __setup_per_zone_wmarks(void)
{
unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+ unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
unsigned long lowmem_pages = 0;
struct zone *zone;
unsigned long flags;
}
for_each_zone(zone) {
- u64 tmp;
+ u64 min, low;
spin_lock_irqsave(&zone->lock, flags);
- tmp = (u64)pages_min * zone->managed_pages;
- do_div(tmp, lowmem_pages);
+ min = (u64)pages_min * zone->managed_pages;
+ do_div(min, lowmem_pages);
+ low = (u64)pages_low * zone->managed_pages;
+ do_div(low, vm_total_pages);
+
if (is_highmem(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
* If it's a lowmem zone, reserve a number of pages
* proportionate to the zone's size.
*/
- zone->watermark[WMARK_MIN] = tmp;
+ zone->watermark[WMARK_MIN] = min;
}
- zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
- zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+ zone->watermark[WMARK_LOW] = min_wmark_pages(zone) +
+ low + (min >> 2);
+ zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+ low + (min >> 1);
__mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
setup_per_zone_inactive_ratio();
return 0;
}
-module_init(init_per_zone_wmark_min)
+core_initcall(init_per_zone_wmark_min)
/*
* min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
* that we can call two helper functions whenever min_free_kbytes
- * changes.
+ * or extra_free_kbytes changes.
*/
int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)