Merge branch 'memblock-kill-early_node_map' of git://git.kernel.org/pub/scm/linux...
authorIngo Molnar <mingo@elte.hu>
Tue, 20 Dec 2011 11:14:26 +0000 (12:14 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 20 Dec 2011 11:14:26 +0000 (12:14 +0100)
1  2 
arch/arm/kernel/setup.c
arch/s390/kernel/setup.c
arch/x86/Kconfig
arch/x86/kernel/mpparse.c
arch/x86/xen/setup.c
drivers/iommu/intel-iommu.c
include/linux/mm.h
kernel/printk.c
mm/page_alloc.c

diff --combined arch/arm/kernel/setup.c
index 8fc2c8fcbdc646a4a8babecbf4a758f88e6d8d60,6701ba99dea47080871c72e24f96e2656ece5142..c0b59bff6be640a7ee23a0443020d62dce591465
@@@ -52,6 -52,7 +52,7 @@@
  #include <asm/mach/time.h>
  #include <asm/traps.h>
  #include <asm/unwind.h>
+ #include <asm/memblock.h>
  
  #if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
  #include "compat.h"
@@@ -895,6 -896,8 +896,6 @@@ void __init setup_arch(char **cmdline_p
  {
        struct machine_desc *mdesc;
  
 -      unwind_init();
 -
        setup_processor();
        mdesc = setup_machine_fdt(__atags_pointer);
        if (!mdesc)
        machine_desc = mdesc;
        machine_name = mdesc->name;
  
 +#ifdef CONFIG_ZONE_DMA
 +      if (mdesc->dma_zone_size) {
 +              extern unsigned long arm_dma_zone_size;
 +              arm_dma_zone_size = mdesc->dma_zone_size;
 +      }
 +#endif
        if (mdesc->soft_reboot)
                reboot_setup("s");
  
  
        tcm_init();
  
 -#ifdef CONFIG_ZONE_DMA
 -      if (mdesc->dma_zone_size) {
 -              extern unsigned long arm_dma_zone_size;
 -              arm_dma_zone_size = mdesc->dma_zone_size;
 -      }
 -#endif
  #ifdef CONFIG_MULTI_IRQ_HANDLER
        handle_arch_irq = mdesc->handle_irq;
  #endif
diff --combined arch/s390/kernel/setup.c
index e54c4ff8abaaa3d1a34efd34decd0d12c713e4ef,a2850df006ef320964d251780c227d385ac4a071..f11d1b037c50d31c48573730c8354dee330f8be0
@@@ -21,6 -21,7 +21,7 @@@
  #include <linux/module.h>
  #include <linux/sched.h>
  #include <linux/kernel.h>
+ #include <linux/memblock.h>
  #include <linux/mm.h>
  #include <linux/stddef.h>
  #include <linux/unistd.h>
@@@ -579,7 -580,7 +580,7 @@@ static unsigned long __init find_crash_
                *msg = "first memory chunk must be at least crashkernel size";
                return 0;
        }
 -      if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE))
 +      if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
                return OLDMEM_BASE;
  
        for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
@@@ -820,7 -821,8 +821,8 @@@ setup_memory(void
                end_chunk = min(end_chunk, end_pfn);
                if (start_chunk >= end_chunk)
                        continue;
-               add_active_range(0, start_chunk, end_chunk);
+               memblock_add_node(PFN_PHYS(start_chunk),
+                                 PFN_PHYS(end_chunk - start_chunk), 0);
                pfn = max(start_chunk, start_pfn);
                for (; pfn < end_chunk; pfn++)
                        page_set_storage_key(PFN_PHYS(pfn),
diff --combined arch/x86/Kconfig
index efb42949cc09349e37246baa07f018648186386f,9bab4a90d7a1eb71dab83e7129e959117168f350..67d6af3581bc288073f77a3fd35372aa4893c7f8
@@@ -26,6 -26,8 +26,8 @@@ config X8
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select ARCH_WANT_FRAME_POINTERS
        select HAVE_DMA_ATTRS
@@@ -204,9 -206,6 +206,6 @@@ config ZONE_DMA3
        bool
        default X86_64
  
- config ARCH_POPULATES_NODE_MAP
-       def_bool y
  config AUDIT_ARCH
        bool
        default X86_64
@@@ -390,7 -389,7 +389,7 @@@ config X86_INTEL_C
          This option compiles in support for the CE4100 SOC for settop
          boxes and media devices.
  
 -config X86_INTEL_MID
 +config X86_WANT_INTEL_MID
        bool "Intel MID platform support"
        depends on X86_32
        depends on X86_EXTENDED_PLATFORM
          systems which do not have the PCI legacy interfaces (Moorestown,
          Medfield). If you are building for a PC class system say N here.
  
 -if X86_INTEL_MID
 +if X86_WANT_INTEL_MID
 +
 +config X86_INTEL_MID
 +      bool
  
  config X86_MRST
         bool "Moorestown MID platform"
        select SPI
        select INTEL_SCU_IPC
        select X86_PLATFORM_DEVICES
 +      select X86_INTEL_MID
        ---help---
          Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
          Internet Device(MID) platform. Moorestown consists of two chips:
index 0741b062a3048a6e2b1b5bd0eb4edbbf3d5bb9cf,a6b79c16ec78cfaef1bd48c77ac701b37ee88523..ca470e4c92dc10c5ff70af33a268305a24f8ca74
@@@ -95,8 -95,8 +95,8 @@@ static void __init MP_bus_info(struct m
        }
  #endif
  
 +      set_bit(m->busid, mp_bus_not_pci);
        if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
 -              set_bit(m->busid, mp_bus_not_pci);
  #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
                mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
  #endif
@@@ -564,9 -564,7 +564,7 @@@ void __init default_get_smp_config(unsi
  
  static void __init smp_reserve_memory(struct mpf_intel *mpf)
  {
-       unsigned long size = get_mpc_size(mpf->physptr);
-       memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
+       memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
  }
  
  static int __init smp_scan_config(unsigned long base, unsigned long length)
                               mpf, (u64)virt_to_phys(mpf));
  
                        mem = virt_to_phys(mpf);
-                       memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
+                       memblock_reserve(mem, sizeof(*mpf));
                        if (mpf->physptr)
                                smp_reserve_memory(mpf);
  
@@@ -836,10 -834,8 +834,8 @@@ early_param("alloc_mptable", parse_allo
  
  void __init early_reserve_e820_mpc_new(void)
  {
-       if (enable_update_mptable && alloc_mptable) {
-               u64 startt = 0;
-               mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
-       }
+       if (enable_update_mptable && alloc_mptable)
+               mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
  }
  
  static int __init update_mp_table(void)
diff --combined arch/x86/xen/setup.c
index b2c7179fa26343d5cedfb7846d2f1b92ceb97390,f5e1362550e76130eb4b770e926825ec76cb56f1..e03c63692176f1e5f2ce38fda9cce20e389ce183
@@@ -75,7 -75,7 +75,7 @@@ static void __init xen_add_extra_mem(u6
        if (i == XEN_EXTRA_MEM_MAX_REGIONS)
                printk(KERN_WARNING "Warning: not enough extra memory regions\n");
  
-       memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
+       memblock_reserve(start, size);
  
        xen_max_p2m_pfn = PFN_DOWN(start + size);
  
@@@ -173,21 -173,9 +173,21 @@@ static unsigned long __init xen_get_max
        domid_t domid = DOMID_SELF;
        int ret;
  
 -      ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
 -      if (ret > 0)
 -              max_pages = ret;
 +      /*
 +       * For the initial domain we use the maximum reservation as
 +       * the maximum page.
 +       *
 +       * For guest domains the current maximum reservation reflects
 +       * the current maximum rather than the static maximum. In this
 +       * case the e820 map provided to us will cover the static
 +       * maximum region.
 +       */
 +      if (xen_initial_domain()) {
 +              ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
 +              if (ret > 0)
 +                      max_pages = ret;
 +      }
 +
        return min(max_pages, MAX_DOMAIN_PAGES);
  }
  
@@@ -311,9 -299,8 +311,8 @@@ char * __init xen_memory_setup(void
         *  - xen_start_info
         * See comment above "struct start_info" in <xen/interface/xen.h>
         */
-       memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
-                     __pa(xen_start_info->pt_base),
-                       "XEN START INFO");
+       memblock_reserve(__pa(xen_start_info->mfn_list),
+                        xen_start_info->pt_base - xen_start_info->mfn_list);
  
        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  
@@@ -422,6 -409,6 +421,6 @@@ void __init xen_arch_setup(void
  #endif
        disable_cpuidle();
        boot_option_idle_override = IDLE_HALT;
 -
 +      WARN_ON(set_pm_idle_to_default());
        fiddle_vdso();
  }
index bdc447fd4766fbba47f46f49301a3a4758639289,d1c17934d66f1e4070a00800d87e8b78d36d5758..31053a951c3452640d7dd4e2a6f4c9c2460dee63
@@@ -41,6 -41,7 +41,7 @@@
  #include <linux/tboot.h>
  #include <linux/dmi.h>
  #include <linux/pci-ats.h>
+ #include <linux/memblock.h>
  #include <asm/cacheflush.h>
  #include <asm/iommu.h>
  
@@@ -405,9 -406,6 +406,9 @@@ int dmar_disabled = 0
  int dmar_disabled = 1;
  #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
  
 +int intel_iommu_enabled = 0;
 +EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 +
  static int dmar_map_gfx = 1;
  static int dmar_forcedac;
  static int intel_iommu_strict;
@@@ -2188,18 -2186,6 +2189,6 @@@ static inline void iommu_prepare_isa(vo
  
  static int md_domain_init(struct dmar_domain *domain, int guest_width);
  
- static int __init si_domain_work_fn(unsigned long start_pfn,
-                                   unsigned long end_pfn, void *datax)
- {
-       int *ret = datax;
-       *ret = iommu_domain_identity_map(si_domain,
-                                        (uint64_t)start_pfn << PAGE_SHIFT,
-                                        (uint64_t)end_pfn << PAGE_SHIFT);
-       return *ret;
- }
  static int __init si_domain_init(int hw)
  {
        struct dmar_drhd_unit *drhd;
                return 0;
  
        for_each_online_node(nid) {
-               work_with_active_regions(nid, si_domain_work_fn, &ret);
-               if (ret)
-                       return ret;
+               unsigned long start_pfn, end_pfn;
+               int i;
+               for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
+                       ret = iommu_domain_identity_map(si_domain,
+                                       PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+                       if (ret)
+                               return ret;
+               }
        }
  
        return 0;
@@@ -3527,7 -3519,7 +3522,7 @@@ found
        return 0;
  }
  
 -int dmar_parse_rmrr_atsr_dev(void)
 +int __init dmar_parse_rmrr_atsr_dev(void)
  {
        struct dmar_rmrr_unit *rmrr, *rmrr_n;
        struct dmar_atsr_unit *atsr, *atsr_n;
@@@ -3650,8 -3642,6 +3645,8 @@@ int __init intel_iommu_init(void
  
        bus_register_notifier(&pci_bus_type, &device_nb);
  
 +      intel_iommu_enabled = 1;
 +
        return 0;
  }
  
diff --combined include/linux/mm.h
index 4baadd18f4ad3402f47fbd2ac919bafba519bed4,c6f49bea52a34e8eb0c0ec2daaddcd2e29d5b981..5d9b4c9813bdb7ae5700c3f078dc5629371c5f4d
@@@ -10,7 -10,6 +10,7 @@@
  #include <linux/mmzone.h>
  #include <linux/rbtree.h>
  #include <linux/prio_tree.h>
 +#include <linux/atomic.h>
  #include <linux/debug_locks.h>
  #include <linux/mm_types.h>
  #include <linux/range.h>
@@@ -1253,41 -1252,34 +1253,34 @@@ static inline void pgtable_page_dtor(st
  extern void free_area_init(unsigned long * zones_size);
  extern void free_area_init_node(int nid, unsigned long * zones_size,
                unsigned long zone_start_pfn, unsigned long *zholes_size);
- #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  /*
-  * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
+  * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
   * zones, allocate the backing mem_map and account for memory holes in a more
   * architecture independent manner. This is a substitute for creating the
   * zone_sizes[] and zholes_size[] arrays and passing them to
   * free_area_init_node()
   *
   * An architecture is expected to register range of page frames backed by
-  * physical memory with add_active_range() before calling
+  * physical memory with memblock_add[_node]() before calling
   * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
   * usage, an architecture is expected to do something like
   *
   * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
   *                                                     max_highmem_pfn};
   * for_each_valid_physical_page_range()
-  *    add_active_range(node_id, start_pfn, end_pfn)
+  *    memblock_add_node(base, size, nid)
   * free_area_init_nodes(max_zone_pfns);
   *
-  * If the architecture guarantees that there are no holes in the ranges
-  * registered with add_active_range(), free_bootmem_active_regions()
-  * will call free_bootmem_node() for each registered physical page range.
-  * Similarly sparse_memory_present_with_active_regions() calls
-  * memory_present() for each range when SPARSEMEM is enabled.
+  * free_bootmem_with_active_regions() calls free_bootmem_node() for each
+  * registered physical page range.  Similarly
+  * sparse_memory_present_with_active_regions() calls memory_present() for
+  * each range when SPARSEMEM is enabled.
   *
   * See mm/page_alloc.c for more information on each function exposed by
-  * CONFIG_ARCH_POPULATES_NODE_MAP
+  * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
   */
  extern void free_area_init_nodes(unsigned long *max_zone_pfn);
- extern void add_active_range(unsigned int nid, unsigned long start_pfn,
-                                       unsigned long end_pfn);
- extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
-                                       unsigned long end_pfn);
- extern void remove_all_active_ranges(void);
- void sort_node_map(void);
  unsigned long node_map_pfn_alignment(void);
  unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
                                                unsigned long end_pfn);
@@@ -1300,14 -1292,11 +1293,11 @@@ extern void free_bootmem_with_active_re
                                                unsigned long max_low_pfn);
  int add_from_early_node_map(struct range *range, int az,
                                   int nr_range, int nid);
- u64 __init find_memory_core_early(int nid, u64 size, u64 align,
-                                       u64 goal, u64 limit);
- typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
- extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
  extern void sparse_memory_present_with_active_regions(int nid);
- #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
  
- #if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
+ #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+ #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
      !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
  static inline int __early_pfn_to_nid(unsigned long pfn)
  {
diff --combined kernel/printk.c
index 7982a0a841eaf082fe929e24a1f4bd5aefd4f015,baf2aebd6970af85547a865276785557b4066e27..afc8310c462533fcc86def288e89e1e6f37cf7f4
@@@ -199,7 -199,7 +199,7 @@@ void __init setup_log_buf(int early
                unsigned long mem;
  
                mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
-               if (mem == MEMBLOCK_ERROR)
+               if (!mem)
                        return;
                new_log_buf = __va(mem);
        } else {
@@@ -1293,11 -1293,10 +1293,11 @@@ again
        raw_spin_lock(&logbuf_lock);
        if (con_start != log_end)
                retry = 1;
 +      raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 +
        if (retry && console_trylock())
                goto again;
  
 -      raw_spin_unlock_irqrestore(&logbuf_lock, flags);
        if (wake_klogd)
                wake_up_klogd();
  }
diff --combined mm/page_alloc.c
index 2b8ba3aebf6e2c6b46b0d12dfea058ee3ab022fe,63ff8dab433a5dc40733ad512246564a6a48c3ad..bdc804c2d99cd8186497f14cbd3d71cf7175f69d
@@@ -181,39 -181,17 +181,17 @@@ static unsigned long __meminitdata nr_k
  static unsigned long __meminitdata nr_all_pages;
  static unsigned long __meminitdata dma_reserve;
  
- #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
-   /*
-    * MAX_ACTIVE_REGIONS determines the maximum number of distinct
-    * ranges of memory (RAM) that may be registered with add_active_range().
-    * Ranges passed to add_active_range() will be merged if possible
-    * so the number of times add_active_range() can be called is
-    * related to the number of nodes and the number of holes
-    */
-   #ifdef CONFIG_MAX_ACTIVE_REGIONS
-     /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
-     #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
-   #else
-     #if MAX_NUMNODES >= 32
-       /* If there can be many nodes, allow up to 50 holes per node */
-       #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
-     #else
-       /* By default, allow up to 256 distinct regions */
-       #define MAX_ACTIVE_REGIONS 256
-     #endif
-   #endif
-   static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
-   static int __meminitdata nr_nodemap_entries;
-   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-   static unsigned long __initdata required_kernelcore;
-   static unsigned long __initdata required_movablecore;
-   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-   /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
-   int movable_zone;
-   EXPORT_SYMBOL(movable_zone);
- #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+ static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+ static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+ static unsigned long __initdata required_kernelcore;
+ static unsigned long __initdata required_movablecore;
+ static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+ /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
+ int movable_zone;
+ EXPORT_SYMBOL(movable_zone);
+ #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  
  #if MAX_NUMNODES > 1
  int nr_node_ids __read_mostly = MAX_NUMNODES;
@@@ -356,8 -334,8 +334,8 @@@ void prep_compound_page(struct page *pa
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
 -
                __SetPageTail(p);
 +              set_page_count(p, 0);
                p->first_page = page;
        }
  }
@@@ -706,10 -684,10 +684,10 @@@ void __meminit __free_pages_bootmem(str
                int loop;
  
                prefetchw(page);
-               for (loop = 0; loop < BITS_PER_LONG; loop++) {
+               for (loop = 0; loop < (1 << order); loop++) {
                        struct page *p = &page[loop];
  
-                       if (loop + 1 < BITS_PER_LONG)
+                       if (loop + 1 < (1 << order))
                                prefetchw(p + 1);
                        __ClearPageReserved(p);
                        set_page_count(p, 0);
@@@ -3377,15 -3355,9 +3355,15 @@@ static void setup_zone_migrate_reserve(
        unsigned long block_migratetype;
        int reserve;
  
 -      /* Get the start pfn, end pfn and the number of blocks to reserve */
 +      /*
 +       * Get the start pfn, end pfn and the number of blocks to reserve
 +       * We have to be careful to be aligned to pageblock_nr_pages to
 +       * make sure that we always check pfn_valid for the first page in
 +       * the block.
 +       */
        start_pfn = zone->zone_start_pfn;
        end_pfn = start_pfn + zone->spanned_pages;
 +      start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
  
@@@ -3737,35 -3709,7 +3715,7 @@@ __meminit int init_currently_empty_zone
        return 0;
  }
  
- #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
- /*
-  * Basic iterator support. Return the first range of PFNs for a node
-  * Note: nid == MAX_NUMNODES returns first region regardless of node
-  */
- static int __meminit first_active_region_index_in_nid(int nid)
- {
-       int i;
-       for (i = 0; i < nr_nodemap_entries; i++)
-               if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
-                       return i;
-       return -1;
- }
- /*
-  * Basic iterator support. Return the next active range of PFNs for a node
-  * Note: nid == MAX_NUMNODES returns next region regardless of node
-  */
- static int __meminit next_active_region_index_in_nid(int index, int nid)
- {
-       for (index = index + 1; index < nr_nodemap_entries; index++)
-               if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
-                       return index;
-       return -1;
- }
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
  /*
   * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
   */
  int __meminit __early_pfn_to_nid(unsigned long pfn)
  {
-       int i;
-       for (i = 0; i < nr_nodemap_entries; i++) {
-               unsigned long start_pfn = early_node_map[i].start_pfn;
-               unsigned long end_pfn = early_node_map[i].end_pfn;
+       unsigned long start_pfn, end_pfn;
+       int i, nid;
  
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
                if (start_pfn <= pfn && pfn < end_pfn)
-                       return early_node_map[i].nid;
-       }
+                       return nid;
        /* This is a memory hole */
        return -1;
  }
@@@ -3812,11 -3753,6 +3759,6 @@@ bool __meminit early_pfn_in_nid(unsigne
  }
  #endif
  
- /* Basic iterator support to walk early_node_map[] */
- #define for_each_active_range_index_in_nid(i, nid) \
-       for (i = first_active_region_index_in_nid(nid); i != -1; \
-                               i = next_active_region_index_in_nid(i, nid))
  /**
   * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
   * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
   * add_active_ranges() contain no holes and may be freed, this
   * this function may be used instead of calling free_bootmem() manually.
   */
- void __init free_bootmem_with_active_regions(int nid,
-                                               unsigned long max_low_pfn)
- {
-       int i;
-       for_each_active_range_index_in_nid(i, nid) {
-               unsigned long size_pages = 0;
-               unsigned long end_pfn = early_node_map[i].end_pfn;
-               if (early_node_map[i].start_pfn >= max_low_pfn)
-                       continue;
-               if (end_pfn > max_low_pfn)
-                       end_pfn = max_low_pfn;
-               size_pages = end_pfn - early_node_map[i].start_pfn;
-               free_bootmem_node(NODE_DATA(early_node_map[i].nid),
-                               PFN_PHYS(early_node_map[i].start_pfn),
-                               size_pages << PAGE_SHIFT);
-       }
- }
- #ifdef CONFIG_HAVE_MEMBLOCK
- /*
-  * Basic iterator support. Return the last range of PFNs for a node
-  * Note: nid == MAX_NUMNODES returns last region regardless of node
-  */
- static int __meminit last_active_region_index_in_nid(int nid)
+ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
  {
-       int i;
-       for (i = nr_nodemap_entries - 1; i >= 0; i--)
-               if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
-                       return i;
-       return -1;
- }
- /*
-  * Basic iterator support. Return the previous active range of PFNs for a node
-  * Note: nid == MAX_NUMNODES returns next region regardless of node
-  */
- static int __meminit previous_active_region_index_in_nid(int index, int nid)
- {
-       for (index = index - 1; index >= 0; index--)
-               if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
-                       return index;
-       return -1;
- }
- #define for_each_active_range_index_in_nid_reverse(i, nid) \
-       for (i = last_active_region_index_in_nid(nid); i != -1; \
-                               i = previous_active_region_index_in_nid(i, nid))
- u64 __init find_memory_core_early(int nid, u64 size, u64 align,
-                                       u64 goal, u64 limit)
- {
-       int i;
-       /* Need to go over early_node_map to find out good range for node */
-       for_each_active_range_index_in_nid_reverse(i, nid) {
-               u64 addr;
-               u64 ei_start, ei_last;
-               u64 final_start, final_end;
-               ei_last = early_node_map[i].end_pfn;
-               ei_last <<= PAGE_SHIFT;
-               ei_start = early_node_map[i].start_pfn;
-               ei_start <<= PAGE_SHIFT;
-               final_start = max(ei_start, goal);
-               final_end = min(ei_last, limit);
-               if (final_start >= final_end)
-                       continue;
-               addr = memblock_find_in_range(final_start, final_end, size, align);
+       unsigned long start_pfn, end_pfn;
+       int i, this_nid;
  
-               if (addr == MEMBLOCK_ERROR)
-                       continue;
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
+               start_pfn = min(start_pfn, max_low_pfn);
+               end_pfn = min(end_pfn, max_low_pfn);
  
-               return addr;
+               if (start_pfn < end_pfn)
+                       free_bootmem_node(NODE_DATA(this_nid),
+                                         PFN_PHYS(start_pfn),
+                                         (end_pfn - start_pfn) << PAGE_SHIFT);
        }
-       return MEMBLOCK_ERROR;
  }
- #endif
  
  int __init add_from_early_node_map(struct range *range, int az,
                                   int nr_range, int nid)
  {
+       unsigned long start_pfn, end_pfn;
        int i;
-       u64 start, end;
  
        /* need to go over early_node_map to find out good range for node */
-       for_each_active_range_index_in_nid(i, nid) {
-               start = early_node_map[i].start_pfn;
-               end = early_node_map[i].end_pfn;
-               nr_range = add_range(range, az, nr_range, start, end);
-       }
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
+               nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
        return nr_range;
  }
  
- void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
- {
-       int i;
-       int ret;
-       for_each_active_range_index_in_nid(i, nid) {
-               ret = work_fn(early_node_map[i].start_pfn,
-                             early_node_map[i].end_pfn, data);
-               if (ret)
-                       break;
-       }
- }
  /**
   * sparse_memory_present_with_active_regions - Call memory_present for each active range
   * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
   */
  void __init sparse_memory_present_with_active_regions(int nid)
  {
-       int i;
+       unsigned long start_pfn, end_pfn;
+       int i, this_nid;
  
-       for_each_active_range_index_in_nid(i, nid)
-               memory_present(early_node_map[i].nid,
-                               early_node_map[i].start_pfn,
-                               early_node_map[i].end_pfn);
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
+               memory_present(this_nid, start_pfn, end_pfn);
  }
  
  /**
  void __meminit get_pfn_range_for_nid(unsigned int nid,
                        unsigned long *start_pfn, unsigned long *end_pfn)
  {
+       unsigned long this_start_pfn, this_end_pfn;
        int i;
        *start_pfn = -1UL;
        *end_pfn = 0;
  
-       for_each_active_range_index_in_nid(i, nid) {
-               *start_pfn = min(*start_pfn, early_node_map[i].start_pfn);
-               *end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
+       for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
+               *start_pfn = min(*start_pfn, this_start_pfn);
+               *end_pfn = max(*end_pfn, this_end_pfn);
        }
  
        if (*start_pfn == -1UL)
@@@ -4083,46 -3932,16 +3938,16 @@@ unsigned long __meminit __absent_pages_
                                unsigned long range_start_pfn,
                                unsigned long range_end_pfn)
  {
-       int i = 0;
-       unsigned long prev_end_pfn = 0, hole_pages = 0;
-       unsigned long start_pfn;
-       /* Find the end_pfn of the first active range of pfns in the node */
-       i = first_active_region_index_in_nid(nid);
-       if (i == -1)
-               return 0;
-       prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
-       /* Account for ranges before physical memory on this node */
-       if (early_node_map[i].start_pfn > range_start_pfn)
-               hole_pages = prev_end_pfn - range_start_pfn;
-       /* Find all holes for the zone within the node */
-       for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
-               /* No need to continue if prev_end_pfn is outside the zone */
-               if (prev_end_pfn >= range_end_pfn)
-                       break;
-               /* Make sure the end of the zone is not within the hole */
-               start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
-               prev_end_pfn = max(prev_end_pfn, range_start_pfn);
+       unsigned long nr_absent = range_end_pfn - range_start_pfn;
+       unsigned long start_pfn, end_pfn;
+       int i;
  
-               /* Update the hole size cound and move on */
-               if (start_pfn > range_start_pfn) {
-                       BUG_ON(prev_end_pfn > start_pfn);
-                       hole_pages += start_pfn - prev_end_pfn;
-               }
-               prev_end_pfn = early_node_map[i].end_pfn;
+       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
+               start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
+               end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
+               nr_absent -= end_pfn - start_pfn;
        }
-       /* Account for ranges past physical memory on this node */
-       if (range_end_pfn > prev_end_pfn)
-               hole_pages += range_end_pfn -
-                               max(range_start_pfn, prev_end_pfn);
-       return hole_pages;
+       return nr_absent;
  }
  
  /**
@@@ -4143,14 -3962,14 +3968,14 @@@ static unsigned long __meminit zone_abs
                                        unsigned long zone_type,
                                        unsigned long *ignored)
  {
+       unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
+       unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
        unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
  
        get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
-       zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type],
-                                                       node_start_pfn);
-       zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
-                                                       node_end_pfn);
+       zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
+       zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
  
        adjust_zone_range_for_zone_movable(nid, zone_type,
                        node_start_pfn, node_end_pfn,
        return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
  }
  
- #else
+ #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long *zones_size)
@@@ -4176,7 -3995,7 +4001,7 @@@ static inline unsigned long __meminit z
        return zholes_size[zone_type];
  }
  
- #endif
+ #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  
  static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
                unsigned long *zones_size, unsigned long *zholes_size)
@@@ -4399,10 -4218,10 +4224,10 @@@ static void __init_refok alloc_node_mem
         */
        if (pgdat == NODE_DATA(0)) {
                mem_map = NODE_DATA(0)->node_mem_map;
- #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
                if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
                        mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
- #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+ #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
        }
  #endif
  #endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@@ -4427,7 -4246,7 +4252,7 @@@ void __paginginit free_area_init_node(i
        free_area_init_core(pgdat, zones_size, zholes_size);
  }
  
- #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  
  #if MAX_NUMNODES > 1
  /*
@@@ -4448,170 -4267,6 +4273,6 @@@ static inline void setup_nr_node_ids(vo
  }
  #endif
  
- /**
-  * add_active_range - Register a range of PFNs backed by physical memory
-  * @nid: The node ID the range resides on
-  * @start_pfn: The start PFN of the available physical memory
-  * @end_pfn: The end PFN of the available physical memory
-  *
-  * These ranges are stored in an early_node_map[] and later used by
-  * free_area_init_nodes() to calculate zone sizes and holes. If the
-  * range spans a memory hole, it is up to the architecture to ensure
-  * the memory is not freed by the bootmem allocator. If possible
-  * the range being registered will be merged with existing ranges.
-  */
- void __init add_active_range(unsigned int nid, unsigned long start_pfn,
-                                               unsigned long end_pfn)
- {
-       int i;
-       mminit_dprintk(MMINIT_TRACE, "memory_register",
-                       "Entering add_active_range(%d, %#lx, %#lx) "
-                       "%d entries of %d used\n",
-                       nid, start_pfn, end_pfn,
-                       nr_nodemap_entries, MAX_ACTIVE_REGIONS);
-       mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
-       /* Merge with existing active regions if possible */
-       for (i = 0; i < nr_nodemap_entries; i++) {
-               if (early_node_map[i].nid != nid)
-                       continue;
-               /* Skip if an existing region covers this new one */
-               if (start_pfn >= early_node_map[i].start_pfn &&
-                               end_pfn <= early_node_map[i].end_pfn)
-                       return;
-               /* Merge forward if suitable */
-               if (start_pfn <= early_node_map[i].end_pfn &&
-                               end_pfn > early_node_map[i].end_pfn) {
-                       early_node_map[i].end_pfn = end_pfn;
-                       return;
-               }
-               /* Merge backward if suitable */
-               if (start_pfn < early_node_map[i].start_pfn &&
-                               end_pfn >= early_node_map[i].start_pfn) {
-                       early_node_map[i].start_pfn = start_pfn;
-                       return;
-               }
-       }
-       /* Check that early_node_map is large enough */
-       if (i >= MAX_ACTIVE_REGIONS) {
-               printk(KERN_CRIT "More than %d memory regions, truncating\n",
-                                                       MAX_ACTIVE_REGIONS);
-               return;
-       }
-       early_node_map[i].nid = nid;
-       early_node_map[i].start_pfn = start_pfn;
-       early_node_map[i].end_pfn = end_pfn;
-       nr_nodemap_entries = i + 1;
- }
- /**
-  * remove_active_range - Shrink an existing registered range of PFNs
-  * @nid: The node id the range is on that should be shrunk
-  * @start_pfn: The new PFN of the range
-  * @end_pfn: The new PFN of the range
-  *
-  * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
-  * The map is kept near the end physical page range that has already been
-  * registered. This function allows an arch to shrink an existing registered
-  * range.
-  */
- void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
-                               unsigned long end_pfn)
- {
-       int i, j;
-       int removed = 0;
-       printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
-                         nid, start_pfn, end_pfn);
-       /* Find the old active region end and shrink */
-       for_each_active_range_index_in_nid(i, nid) {
-               if (early_node_map[i].start_pfn >= start_pfn &&
-                   early_node_map[i].end_pfn <= end_pfn) {
-                       /* clear it */
-                       early_node_map[i].start_pfn = 0;
-                       early_node_map[i].end_pfn = 0;
-                       removed = 1;
-                       continue;
-               }
-               if (early_node_map[i].start_pfn < start_pfn &&
-                   early_node_map[i].end_pfn > start_pfn) {
-                       unsigned long temp_end_pfn = early_node_map[i].end_pfn;
-                       early_node_map[i].end_pfn = start_pfn;
-                       if (temp_end_pfn > end_pfn)
-                               add_active_range(nid, end_pfn, temp_end_pfn);
-                       continue;
-               }
-               if (early_node_map[i].start_pfn >= start_pfn &&
-                   early_node_map[i].end_pfn > end_pfn &&
-                   early_node_map[i].start_pfn < end_pfn) {
-                       early_node_map[i].start_pfn = end_pfn;
-                       continue;
-               }
-       }
-       if (!removed)
-               return;
-       /* remove the blank ones */
-       for (i = nr_nodemap_entries - 1; i > 0; i--) {
-               if (early_node_map[i].nid != nid)
-                       continue;
-               if (early_node_map[i].end_pfn)
-                       continue;
-               /* we found it, get rid of it */
-               for (j = i; j < nr_nodemap_entries - 1; j++)
-                       memcpy(&early_node_map[j], &early_node_map[j+1],
-                               sizeof(early_node_map[j]));
-               j = nr_nodemap_entries - 1;
-               memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
-               nr_nodemap_entries--;
-       }
- }
- /**
-  * remove_all_active_ranges - Remove all currently registered regions
-  *
-  * During discovery, it may be found that a table like SRAT is invalid
-  * and an alternative discovery method must be used. This function removes
-  * all currently registered regions.
-  */
- void __init remove_all_active_ranges(void)
- {
-       memset(early_node_map, 0, sizeof(early_node_map));
-       nr_nodemap_entries = 0;
- }
- /* Compare two active node_active_regions */
- static int __init cmp_node_active_region(const void *a, const void *b)
- {
-       struct node_active_region *arange = (struct node_active_region *)a;
-       struct node_active_region *brange = (struct node_active_region *)b;
-       /* Done this way to avoid overflows */
-       if (arange->start_pfn > brange->start_pfn)
-               return 1;
-       if (arange->start_pfn < brange->start_pfn)
-               return -1;
-       return 0;
- }
- /* sort the node_map by start_pfn */
- void __init sort_node_map(void)
- {
-       sort(early_node_map, (size_t)nr_nodemap_entries,
-                       sizeof(struct node_active_region),
-                       cmp_node_active_region, NULL);
- }
  /**
   * node_map_pfn_alignment - determine the maximum internode alignment
   *
  unsigned long __init node_map_pfn_alignment(void)
  {
        unsigned long accl_mask = 0, last_end = 0;
+       unsigned long start, end, mask;
        int last_nid = -1;
-       int i;
-       for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
-               int nid = early_node_map[i].nid;
-               unsigned long start = early_node_map[i].start_pfn;
-               unsigned long end = early_node_map[i].end_pfn;
-               unsigned long mask;
+       int i, nid;
  
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
                if (!start || last_nid < 0 || last_nid == nid) {
                        last_nid = nid;
                        last_end = end;
  /* Find the lowest pfn for a node */
  static unsigned long __init find_min_pfn_for_node(int nid)
  {
-       int i;
        unsigned long min_pfn = ULONG_MAX;
+       unsigned long start_pfn;
+       int i;
  
-       /* Assuming a sorted map, the first range found has the starting pfn */
-       for_each_active_range_index_in_nid(i, nid)
-               min_pfn = min(min_pfn, early_node_map[i].start_pfn);
+       for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
+               min_pfn = min(min_pfn, start_pfn);
  
        if (min_pfn == ULONG_MAX) {
                printk(KERN_WARNING
@@@ -4703,15 -4354,16 +4360,16 @@@ unsigned long __init find_min_pfn_with_
   */
  static unsigned long __init early_calculate_totalpages(void)
  {
-       int i;
        unsigned long totalpages = 0;
+       unsigned long start_pfn, end_pfn;
+       int i, nid;
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+               unsigned long pages = end_pfn - start_pfn;
  
-       for (i = 0; i < nr_nodemap_entries; i++) {
-               unsigned long pages = early_node_map[i].end_pfn -
-                                               early_node_map[i].start_pfn;
                totalpages += pages;
                if (pages)
-                       node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
+                       node_set_state(nid, N_HIGH_MEMORY);
        }
        return totalpages;
  }
@@@ -4766,6 -4418,8 +4424,8 @@@ restart
        /* Spread kernelcore memory as evenly as possible throughout nodes */
        kernelcore_node = required_kernelcore / usable_nodes;
        for_each_node_state(nid, N_HIGH_MEMORY) {
+               unsigned long start_pfn, end_pfn;
                /*
                 * Recalculate kernelcore_node if the division per node
                 * now exceeds what is necessary to satisfy the requested
                kernelcore_remaining = kernelcore_node;
  
                /* Go through each range of PFNs within this node */
-               for_each_active_range_index_in_nid(i, nid) {
-                       unsigned long start_pfn, end_pfn;
+               for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
                        unsigned long size_pages;
  
-                       start_pfn = max(early_node_map[i].start_pfn,
-                                               zone_movable_pfn[nid]);
-                       end_pfn = early_node_map[i].end_pfn;
+                       start_pfn = max(start_pfn, zone_movable_pfn[nid]);
                        if (start_pfn >= end_pfn)
                                continue;
  
@@@ -4890,11 -4541,8 +4547,8 @@@ static void check_for_regular_memory(pg
   */
  void __init free_area_init_nodes(unsigned long *max_zone_pfn)
  {
-       unsigned long nid;
-       int i;
-       /* Sort early_node_map as initialisation assumes it is sorted */
-       sort_node_map();
+       unsigned long start_pfn, end_pfn;
+       int i, nid;
  
        /* Record where the zone boundaries are */
        memset(arch_zone_lowest_possible_pfn, 0,
        }
  
        /* Print out the early_node_map[] */
-       printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
-       for (i = 0; i < nr_nodemap_entries; i++)
-               printk("  %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
-                                               early_node_map[i].start_pfn,
-                                               early_node_map[i].end_pfn);
+       printk("Early memory PFN ranges\n");
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
+               printk("  %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
  
        /* Initialise every node */
        mminit_verify_pageflags_layout();
@@@ -4998,7 -4644,7 +4650,7 @@@ static int __init cmdline_parse_movable
  early_param("kernelcore", cmdline_parse_kernelcore);
  early_param("movablecore", cmdline_parse_movablecore);
  
- #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+ #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  
  /**
   * set_dma_reserve - set the specified number of pages reserved in the first zone