2 * linux/arch/arm/mm/mmu.c
4 * Copyright (C) 1995-2005 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/mman.h>
15 #include <linux/nodemask.h>
16 #include <linux/memblock.h>
19 #include <asm/cputype.h>
20 #include <asm/sections.h>
21 #include <asm/cachetype.h>
22 #include <asm/setup.h>
23 #include <asm/sizes.h>
24 #include <asm/smp_plat.h>
26 #include <asm/highmem.h>
27 #include <asm/traps.h>
29 #include <asm/mach/arch.h>
30 #include <asm/mach/map.h>
35 * empty_zero_page is a special page that is used for
36 * zero-initialized data and COW.
38 struct page *empty_zero_page;
39 EXPORT_SYMBOL(empty_zero_page);
42 * The pmd table for the upper-most set of pages.
46 #define CPOLICY_UNCACHED 0
47 #define CPOLICY_BUFFERED 1
48 #define CPOLICY_WRITETHROUGH 2
49 #define CPOLICY_WRITEBACK 3
50 #define CPOLICY_WRITEALLOC 4
52 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
53 static unsigned int ecc_mask __initdata = 0;
55 pgprot_t pgprot_kernel;
57 EXPORT_SYMBOL(pgprot_user);
58 EXPORT_SYMBOL(pgprot_kernel);
61 const char policy[16];
67 static struct cachepolicy cache_policies[] __initdata = {
71 .pmd = PMD_SECT_UNCACHED,
72 .pte = L_PTE_MT_UNCACHED,
76 .pmd = PMD_SECT_BUFFERED,
77 .pte = L_PTE_MT_BUFFERABLE,
79 .policy = "writethrough",
82 .pte = L_PTE_MT_WRITETHROUGH,
84 .policy = "writeback",
87 .pte = L_PTE_MT_WRITEBACK,
89 .policy = "writealloc",
92 .pte = L_PTE_MT_WRITEALLOC,
97 * These are useful for identifying cache coherency
98 * problems by allowing the cache or the cache and
99 * writebuffer to be turned off. (Note: the write
100 * buffer should not be on and the cache off).
102 static int __init early_cachepolicy(char *p)
106 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
107 int len = strlen(cache_policies[i].policy);
109 if (memcmp(p, cache_policies[i].policy, len) == 0) {
111 cr_alignment &= ~cache_policies[i].cr_mask;
112 cr_no_alignment &= ~cache_policies[i].cr_mask;
116 if (i == ARRAY_SIZE(cache_policies))
117 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
119 * This restriction is partly to do with the way we boot; it is
120 * unpredictable to have memory mapped using two different sets of
121 * memory attributes (shared, type, and cache attribs). We can not
122 * change these attributes once the initial assembly has setup the
125 if (cpu_architecture() >= CPU_ARCH_ARMv6) {
126 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
127 cachepolicy = CPOLICY_WRITEBACK;
130 set_cr(cr_alignment);
133 early_param("cachepolicy", early_cachepolicy);
135 static int __init early_nocache(char *__unused)
137 char *p = "buffered";
138 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
139 early_cachepolicy(p);
142 early_param("nocache", early_nocache);
144 static int __init early_nowrite(char *__unused)
146 char *p = "uncached";
147 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
148 early_cachepolicy(p);
151 early_param("nowb", early_nowrite);
153 #ifndef CONFIG_ARM_LPAE
154 static int __init early_ecc(char *p)
156 if (memcmp(p, "on", 2) == 0)
157 ecc_mask = PMD_PROTECTION;
158 else if (memcmp(p, "off", 3) == 0)
162 early_param("ecc", early_ecc);
165 static int __init noalign_setup(char *__unused)
167 cr_alignment &= ~CR_A;
168 cr_no_alignment &= ~CR_A;
169 set_cr(cr_alignment);
172 __setup("noalign", noalign_setup);
175 void adjust_cr(unsigned long mask, unsigned long set)
183 local_irq_save(flags);
185 cr_no_alignment = (cr_no_alignment & ~mask) | set;
186 cr_alignment = (cr_alignment & ~mask) | set;
188 set_cr((get_cr() & ~mask) | set);
190 local_irq_restore(flags);
194 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
195 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
197 static struct mem_type mem_types[] = {
198 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
199 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
201 .prot_l1 = PMD_TYPE_TABLE,
202 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
205 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
206 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
207 .prot_l1 = PMD_TYPE_TABLE,
208 .prot_sect = PROT_SECT_DEVICE,
211 [MT_DEVICE_CACHED] = { /* ioremap_cached */
212 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
213 .prot_l1 = PMD_TYPE_TABLE,
214 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
217 [MT_DEVICE_WC] = { /* ioremap_wc */
218 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
219 .prot_l1 = PMD_TYPE_TABLE,
220 .prot_sect = PROT_SECT_DEVICE,
224 .prot_pte = PROT_PTE_DEVICE,
225 .prot_l1 = PMD_TYPE_TABLE,
226 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
230 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
231 .domain = DOMAIN_KERNEL,
233 #ifndef CONFIG_ARM_LPAE
235 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
236 .domain = DOMAIN_KERNEL,
240 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
242 .prot_l1 = PMD_TYPE_TABLE,
243 .domain = DOMAIN_USER,
245 [MT_HIGH_VECTORS] = {
246 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
247 L_PTE_USER | L_PTE_RDONLY,
248 .prot_l1 = PMD_TYPE_TABLE,
249 .domain = DOMAIN_USER,
252 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
253 .prot_l1 = PMD_TYPE_TABLE,
254 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
255 .domain = DOMAIN_KERNEL,
258 .prot_sect = PMD_TYPE_SECT,
259 .domain = DOMAIN_KERNEL,
261 [MT_MEMORY_NONCACHED] = {
262 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
264 .prot_l1 = PMD_TYPE_TABLE,
265 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
266 .domain = DOMAIN_KERNEL,
269 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
271 .prot_l1 = PMD_TYPE_TABLE,
272 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
273 .domain = DOMAIN_KERNEL,
276 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
277 .prot_l1 = PMD_TYPE_TABLE,
278 .domain = DOMAIN_KERNEL,
281 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
283 .prot_l1 = PMD_TYPE_TABLE,
284 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
285 PMD_SECT_UNCACHED | PMD_SECT_XN,
286 .domain = DOMAIN_KERNEL,
290 const struct mem_type *get_mem_type(unsigned int type)
292 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
294 EXPORT_SYMBOL(get_mem_type);
297 * Adjust the PMD section entries according to the CPU in use.
299 static void __init build_mem_type_table(void)
301 struct cachepolicy *cp;
302 unsigned int cr = get_cr();
303 pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
304 int cpu_arch = cpu_architecture();
307 if (cpu_arch < CPU_ARCH_ARMv6) {
308 #if defined(CONFIG_CPU_DCACHE_DISABLE)
309 if (cachepolicy > CPOLICY_BUFFERED)
310 cachepolicy = CPOLICY_BUFFERED;
311 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
312 if (cachepolicy > CPOLICY_WRITETHROUGH)
313 cachepolicy = CPOLICY_WRITETHROUGH;
316 if (cpu_arch < CPU_ARCH_ARMv5) {
317 if (cachepolicy >= CPOLICY_WRITEALLOC)
318 cachepolicy = CPOLICY_WRITEBACK;
322 cachepolicy = CPOLICY_WRITEALLOC;
325 * Strip out features not present on earlier architectures.
326 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
327 * without extended page tables don't have the 'Shared' bit.
329 if (cpu_arch < CPU_ARCH_ARMv5)
330 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
331 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
332 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
333 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
334 mem_types[i].prot_sect &= ~PMD_SECT_S;
337 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
338 * "update-able on write" bit on ARM610). However, Xscale and
339 * Xscale3 require this bit to be cleared.
341 if (cpu_is_xscale() || cpu_is_xsc3()) {
342 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
343 mem_types[i].prot_sect &= ~PMD_BIT4;
344 mem_types[i].prot_l1 &= ~PMD_BIT4;
346 } else if (cpu_arch < CPU_ARCH_ARMv6) {
347 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
348 if (mem_types[i].prot_l1)
349 mem_types[i].prot_l1 |= PMD_BIT4;
350 if (mem_types[i].prot_sect)
351 mem_types[i].prot_sect |= PMD_BIT4;
356 * Mark the device areas according to the CPU/architecture.
358 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
359 if (!cpu_is_xsc3()) {
361 * Mark device regions on ARMv6+ as execute-never
362 * to prevent speculative instruction fetches.
364 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
365 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
366 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
367 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
369 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
371 * For ARMv7 with TEX remapping,
372 * - shared device is SXCB=1100
373 * - nonshared device is SXCB=0100
374 * - write combine device mem is SXCB=0001
375 * (Uncached Normal memory)
377 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
378 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
379 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
380 } else if (cpu_is_xsc3()) {
383 * - shared device is TEXCB=00101
384 * - nonshared device is TEXCB=01000
385 * - write combine device mem is TEXCB=00100
386 * (Inner/Outer Uncacheable in xsc3 parlance)
388 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
389 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
390 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
393 * For ARMv6 and ARMv7 without TEX remapping,
394 * - shared device is TEXCB=00001
395 * - nonshared device is TEXCB=01000
396 * - write combine device mem is TEXCB=00100
397 * (Uncached Normal in ARMv6 parlance).
399 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
400 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
401 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
405 * On others, write combining is "Uncached/Buffered"
407 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
411 * Now deal with the memory-type mappings
413 cp = &cache_policies[cachepolicy];
414 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
417 * Only use write-through for non-SMP systems
419 if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
420 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
423 * Enable CPU-specific coherency if supported.
424 * (Only available on XSC3 at the moment.)
426 if (arch_is_coherent() && cpu_is_xsc3()) {
427 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
428 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
429 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
430 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
433 * ARMv6 and above have extended page tables.
435 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
436 #ifndef CONFIG_ARM_LPAE
438 * Mark cache clean areas and XIP ROM read only
439 * from SVC mode and no access from userspace.
441 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
442 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
443 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
448 * Mark memory with the "shared" attribute
451 user_pgprot |= L_PTE_SHARED;
452 kern_pgprot |= L_PTE_SHARED;
453 vecs_pgprot |= L_PTE_SHARED;
454 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
455 mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
456 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
457 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
458 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
459 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
460 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
461 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
466 * Non-cacheable Normal - intended for memory areas that must
467 * not cause dirty cache line writebacks when used
469 if (cpu_arch >= CPU_ARCH_ARMv6) {
470 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
471 /* Non-cacheable Normal is XCB = 001 */
472 mem_types[MT_MEMORY_NONCACHED].prot_sect |=
475 /* For both ARMv6 and non-TEX-remapping ARMv7 */
476 mem_types[MT_MEMORY_NONCACHED].prot_sect |=
480 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
483 #ifdef CONFIG_ARM_LPAE
485 * Do not generate access flag faults for the kernel mappings.
487 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
488 mem_types[i].prot_pte |= PTE_EXT_AF;
489 mem_types[i].prot_sect |= PMD_SECT_AF;
491 kern_pgprot |= PTE_EXT_AF;
492 vecs_pgprot |= PTE_EXT_AF;
495 for (i = 0; i < 16; i++) {
496 unsigned long v = pgprot_val(protection_map[i]);
497 protection_map[i] = __pgprot(v | user_pgprot);
500 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
501 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
503 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
504 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
505 L_PTE_DIRTY | kern_pgprot);
507 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
508 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
509 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
510 mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
511 mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
512 mem_types[MT_ROM].prot_sect |= cp->pmd;
516 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
520 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
523 printk("Memory policy: ECC %sabled, Data cache %s\n",
524 ecc_mask ? "en" : "dis", cp->policy);
526 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
527 struct mem_type *t = &mem_types[i];
529 t->prot_l1 |= PMD_DOMAIN(t->domain);
531 t->prot_sect |= PMD_DOMAIN(t->domain);
535 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
536 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
537 unsigned long size, pgprot_t vma_prot)
540 return pgprot_noncached(vma_prot);
541 else if (file->f_flags & O_SYNC)
542 return pgprot_writecombine(vma_prot);
545 EXPORT_SYMBOL(phys_mem_access_prot);
548 #define vectors_base() (vectors_high() ? 0xffff0000 : 0)
550 static void __init *early_alloc(unsigned long sz)
552 void *ptr = __va(memblock_alloc(sz, sz));
557 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
559 if (pmd_none(*pmd)) {
560 pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
561 __pmd_populate(pmd, __pa(pte), prot);
563 BUG_ON(pmd_bad(*pmd));
564 return pte_offset_kernel(pmd, addr);
567 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
568 unsigned long end, unsigned long pfn,
569 const struct mem_type *type)
571 pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
573 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
575 } while (pte++, addr += PAGE_SIZE, addr != end);
578 static void __init alloc_init_section(pud_t *pud, unsigned long addr,
579 unsigned long end, phys_addr_t phys,
580 const struct mem_type *type)
582 pmd_t *pmd = pmd_offset(pud, addr);
585 * Try a section mapping - end, addr and phys must all be aligned
586 * to a section boundary. Note that PMDs refer to the individual
587 * L1 entries, whereas PGDs refer to a group of L1 entries making
588 * up one logical pointer to an L2 table.
590 if (((addr | end | phys) & ~SECTION_MASK) == 0) {
593 #ifndef CONFIG_ARM_LPAE
594 if (addr & SECTION_SIZE)
599 *pmd = __pmd(phys | type->prot_sect);
600 phys += SECTION_SIZE;
601 } while (pmd++, addr += SECTION_SIZE, addr != end);
606 * No need to loop; pte's aren't interested in the
607 * individual L1 entries.
609 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
613 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
614 unsigned long phys, const struct mem_type *type)
616 pud_t *pud = pud_offset(pgd, addr);
620 next = pud_addr_end(addr, end);
621 alloc_init_section(pud, addr, next, phys, type);
623 } while (pud++, addr = next, addr != end);
626 #ifndef CONFIG_ARM_LPAE
627 static void __init create_36bit_mapping(struct map_desc *md,
628 const struct mem_type *type)
630 unsigned long addr, length, end;
635 phys = __pfn_to_phys(md->pfn);
636 length = PAGE_ALIGN(md->length);
638 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
639 printk(KERN_ERR "MM: CPU does not support supersection "
640 "mapping for 0x%08llx at 0x%08lx\n",
641 (long long)__pfn_to_phys((u64)md->pfn), addr);
645 /* N.B. ARMv6 supersections are only defined to work with domain 0.
646 * Since domain assignments can in fact be arbitrary, the
647 * 'domain == 0' check below is required to insure that ARMv6
648 * supersections are only allocated for domain 0 regardless
649 * of the actual domain assignments in use.
652 printk(KERN_ERR "MM: invalid domain in supersection "
653 "mapping for 0x%08llx at 0x%08lx\n",
654 (long long)__pfn_to_phys((u64)md->pfn), addr);
658 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
659 printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
660 " at 0x%08lx invalid alignment\n",
661 (long long)__pfn_to_phys((u64)md->pfn), addr);
666 * Shift bits [35:32] of address into bits [23:20] of PMD
669 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
671 pgd = pgd_offset_k(addr);
674 pud_t *pud = pud_offset(pgd, addr);
675 pmd_t *pmd = pmd_offset(pud, addr);
678 for (i = 0; i < 16; i++)
679 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
681 addr += SUPERSECTION_SIZE;
682 phys += SUPERSECTION_SIZE;
683 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
684 } while (addr != end);
686 #endif /* !CONFIG_ARM_LPAE */
689 * Create the page directory entries and any necessary
690 * page tables for the mapping specified by `md'. We
691 * are able to cope here with varying sizes and address
692 * offsets, and we take full advantage of sections and
695 static void __init create_mapping(struct map_desc *md)
697 unsigned long addr, length, end;
699 const struct mem_type *type;
702 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
703 printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
704 " at 0x%08lx in user region\n",
705 (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
709 if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
710 md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
711 printk(KERN_WARNING "BUG: mapping for 0x%08llx"
712 " at 0x%08lx overlaps vmalloc space\n",
713 (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
716 type = &mem_types[md->type];
718 #ifndef CONFIG_ARM_LPAE
720 * Catch 36-bit addresses
722 if (md->pfn >= 0x100000) {
723 create_36bit_mapping(md, type);
728 addr = md->virtual & PAGE_MASK;
729 phys = __pfn_to_phys(md->pfn);
730 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
732 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
733 printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
734 "be mapped using pages, ignoring.\n",
735 (long long)__pfn_to_phys(md->pfn), addr);
739 pgd = pgd_offset_k(addr);
742 unsigned long next = pgd_addr_end(addr, end);
744 alloc_init_pud(pgd, addr, next, phys, type);
748 } while (pgd++, addr != end);
752 * Create the architecture specific mappings
754 void __init iotable_init(struct map_desc *io_desc, int nr)
758 for (i = 0; i < nr; i++)
759 create_mapping(io_desc + i);
762 static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
765 * vmalloc=size forces the vmalloc area to be exactly 'size'
766 * bytes. This can be used to increase (or decrease) the vmalloc
767 * area - the default is 128m.
769 static int __init early_vmalloc(char *arg)
771 unsigned long vmalloc_reserve = memparse(arg, NULL);
773 if (vmalloc_reserve < SZ_16M) {
774 vmalloc_reserve = SZ_16M;
776 "vmalloc area too small, limiting to %luMB\n",
777 vmalloc_reserve >> 20);
780 if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
781 vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
783 "vmalloc area is too big, limiting to %luMB\n",
784 vmalloc_reserve >> 20);
787 vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
790 early_param("vmalloc", early_vmalloc);
792 static phys_addr_t lowmem_limit __initdata = 0;
794 void __init sanity_check_meminfo(void)
796 int i, j, highmem = 0;
798 for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
799 struct membank *bank = &meminfo.bank[j];
800 *bank = meminfo.bank[i];
802 #ifdef CONFIG_HIGHMEM
803 if (__va(bank->start) >= vmalloc_min ||
804 __va(bank->start) < (void *)PAGE_OFFSET)
807 bank->highmem = highmem;
810 * Split those memory banks which are partially overlapping
811 * the vmalloc area greatly simplifying things later.
813 if (__va(bank->start) < vmalloc_min &&
814 bank->size > vmalloc_min - __va(bank->start)) {
815 if (meminfo.nr_banks >= NR_BANKS) {
816 printk(KERN_CRIT "NR_BANKS too low, "
817 "ignoring high memory\n");
819 memmove(bank + 1, bank,
820 (meminfo.nr_banks - i) * sizeof(*bank));
823 bank[1].size -= vmalloc_min - __va(bank->start);
824 bank[1].start = __pa(vmalloc_min - 1) + 1;
825 bank[1].highmem = highmem = 1;
828 bank->size = vmalloc_min - __va(bank->start);
831 bank->highmem = highmem;
834 * Check whether this memory bank would entirely overlap
837 if (__va(bank->start) >= vmalloc_min ||
838 __va(bank->start) < (void *)PAGE_OFFSET) {
839 printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
840 "(vmalloc region overlap).\n",
841 (unsigned long long)bank->start,
842 (unsigned long long)bank->start + bank->size - 1);
847 * Check whether this memory bank would partially overlap
850 if (__va(bank->start + bank->size) > vmalloc_min ||
851 __va(bank->start + bank->size) < __va(bank->start)) {
852 unsigned long newsize = vmalloc_min - __va(bank->start);
853 printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
854 "to -%.8llx (vmalloc region overlap).\n",
855 (unsigned long long)bank->start,
856 (unsigned long long)bank->start + bank->size - 1,
857 (unsigned long long)bank->start + newsize - 1);
858 bank->size = newsize;
861 if (!bank->highmem && bank->start + bank->size > lowmem_limit)
862 lowmem_limit = bank->start + bank->size;
866 #ifdef CONFIG_HIGHMEM
868 const char *reason = NULL;
870 if (cache_is_vipt_aliasing()) {
872 * Interactions between kmap and other mappings
873 * make highmem support with aliasing VIPT caches
876 reason = "with VIPT aliasing cache";
879 printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
881 while (j > 0 && meminfo.bank[j - 1].highmem)
886 meminfo.nr_banks = j;
887 memblock_set_current_limit(lowmem_limit);
890 static inline void prepare_page_table(void)
896 * Clear out all the mappings below the kernel image.
898 for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
899 pmd_clear(pmd_off_k(addr));
901 #ifdef CONFIG_XIP_KERNEL
902 /* The XIP kernel is mapped in the module area -- skip over it */
903 addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
905 for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
906 pmd_clear(pmd_off_k(addr));
909 * Find the end of the first block of lowmem.
911 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
912 if (end >= lowmem_limit)
916 * Clear out all the kernel space mappings, except for the first
917 * memory bank, up to the end of the vmalloc region.
919 for (addr = __phys_to_virt(end);
920 addr < VMALLOC_END; addr += PMD_SIZE)
921 pmd_clear(pmd_off_k(addr));
924 #ifdef CONFIG_ARM_LPAE
925 /* the first page is reserved for pgd */
926 #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \
927 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
929 #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
933 * Reserve the special regions of memory
935 void __init arm_mm_memblock_reserve(void)
938 * Reserve the page tables. These are already in use,
939 * and can only be in node 0.
941 memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
945 * Because of the SA1111 DMA bug, we want to preserve our
946 * precious DMA-able memory...
948 memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
953 * Set up device the mappings. Since we clear out the page tables for all
954 * mappings above VMALLOC_END, we will remove any debug device mappings.
955 * This means you have to be careful how you debug this function, or any
956 * called function. This means you can't use any function or debugging
957 * method which may touch any device, otherwise the kernel _will_ crash.
959 static void __init devicemaps_init(struct machine_desc *mdesc)
965 * Allocate the vector page early.
967 vectors_page = early_alloc(PAGE_SIZE);
969 for (addr = VMALLOC_END; addr; addr += PMD_SIZE)
970 pmd_clear(pmd_off_k(addr));
973 * Map the kernel if it is XIP.
974 * It is always first in the modulearea.
976 #ifdef CONFIG_XIP_KERNEL
977 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
978 map.virtual = MODULES_VADDR;
979 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
981 create_mapping(&map);
985 * Map the cache flushing regions.
988 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
989 map.virtual = FLUSH_BASE;
991 map.type = MT_CACHECLEAN;
992 create_mapping(&map);
994 #ifdef FLUSH_BASE_MINICACHE
995 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
996 map.virtual = FLUSH_BASE_MINICACHE;
998 map.type = MT_MINICLEAN;
999 create_mapping(&map);
1003 * Create a mapping for the machine vectors at the high-vectors
1004 * location (0xffff0000). If we aren't using high-vectors, also
1005 * create a mapping at the low-vectors virtual address.
1007 map.pfn = __phys_to_pfn(virt_to_phys(vectors_page));
1008 map.virtual = 0xffff0000;
1009 map.length = PAGE_SIZE;
1010 map.type = MT_HIGH_VECTORS;
1011 create_mapping(&map);
1013 if (!vectors_high()) {
1015 map.type = MT_LOW_VECTORS;
1016 create_mapping(&map);
1020 * Ask the machine support to map in the statically mapped devices.
1026 * Finally flush the caches and tlb to ensure that we're in a
1027 * consistent state wrt the writebuffer. This also ensures that
1028 * any write-allocated cache lines in the vector page are written
1029 * back. After this point, we can start to touch devices again.
1031 local_flush_tlb_all();
1035 static void __init kmap_init(void)
1037 #ifdef CONFIG_HIGHMEM
1038 pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
1039 PKMAP_BASE, _PAGE_KERNEL_TABLE);
1043 static void __init map_lowmem(void)
1045 struct memblock_region *reg;
1047 /* Map all the lowmem memory banks. */
1048 for_each_memblock(memory, reg) {
1049 phys_addr_t start = reg->base;
1050 phys_addr_t end = start + reg->size;
1051 struct map_desc map;
1053 if (end > lowmem_limit)
1058 map.pfn = __phys_to_pfn(start);
1059 map.virtual = __phys_to_virt(start);
1060 map.length = end - start;
1061 map.type = MT_MEMORY;
1063 create_mapping(&map);
1068 * paging_init() sets up the page tables, initialises the zone memory
1069 * maps, and sets up the zero page, bad page and bad page tables.
1071 void __init paging_init(struct machine_desc *mdesc)
1075 memblock_set_current_limit(lowmem_limit);
1077 build_mem_type_table();
1078 prepare_page_table();
1080 devicemaps_init(mdesc);
1083 top_pmd = pmd_off_k(0xffff0000);
1085 /* allocate the zero page. */
1086 zero_page = early_alloc(PAGE_SIZE);
1090 empty_zero_page = virt_to_page(zero_page);
1091 __flush_dcache_page(NULL, empty_zero_page);