arch/x86/mm/pageattr.c

   1 /*
   2  * Copyright 2002 Andi Kleen, SuSE Labs.
   3  * Thanks to Ben LaHaise for precious feedback.
   4  */
   5
   6 #include <linux/highmem.h>
   7 #include <linux/module.h>
   8 #include <linux/sched.h>
   9 #include <linux/slab.h>
  10 #include <linux/mm.h>
  11
  12 void clflush_cache_range(void *addr, int size)
  13 {
  14         int i;
  15
  16         for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
  17                 clflush(addr+i);
  18 }
  19
  20 #include <asm/processor.h>
  21 #include <asm/tlbflush.h>
  22 #include <asm/sections.h>
  23 #include <asm/uaccess.h>
  24 #include <asm/pgalloc.h>
  25
  26 pte_t *lookup_address(unsigned long address, int *level)
  27 {
  28         pgd_t *pgd = pgd_offset_k(address);
  29         pud_t *pud;
  30         pmd_t *pmd;
  31
  32         if (pgd_none(*pgd))
  33                 return NULL;
  34         pud = pud_offset(pgd, address);
  35         if (pud_none(*pud))
  36                 return NULL;
  37         pmd = pmd_offset(pud, address);
  38         if (pmd_none(*pmd))
  39                 return NULL;
  40         *level = 3;
  41         if (pmd_large(*pmd))
  42                 return (pte_t *)pmd;
  43         *level = 4;
  44
  45         return pte_offset_kernel(pmd, address);
  46 }
  47
  48 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  49 {
  50         /* change init_mm */
  51         set_pte_atomic(kpte, pte);
  52 #ifdef CONFIG_X86_32
  53         if (SHARED_KERNEL_PMD)
  54                 return;
  55         {
  56                 struct page *page;
  57
  58                 for (page = pgd_list; page; page = (struct page *)page->index) {
  59                         pgd_t *pgd;
  60                         pud_t *pud;
  61                         pmd_t *pmd;
  62
  63                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
  64                         pud = pud_offset(pgd, address);
  65                         pmd = pmd_offset(pud, address);
  66                         set_pte_atomic((pte_t *)pmd, pte);
  67                 }
  68         }
  69 #endif
  70 }
  71
  72 static int split_large_page(pte_t *kpte, unsigned long address)
  73 {
  74         pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
  75         gfp_t gfp_flags = GFP_KERNEL;
  76         unsigned long flags;
  77         unsigned long addr;
  78         pte_t *pbase, *tmp;
  79         struct page *base;
  80         int i, level;
  81
  82 #ifdef CONFIG_DEBUG_PAGEALLOC
  83         gfp_flags = GFP_ATOMIC;
  84 #endif
  85         base = alloc_pages(gfp_flags, 0);
  86         if (!base)
  87                 return -ENOMEM;
  88
  89         spin_lock_irqsave(&pgd_lock, flags);
  90         /*
  91          * Check for races, another CPU might have split this page
  92          * up for us already:
  93          */
  94         tmp = lookup_address(address, &level);
  95         if (tmp != kpte) {
  96                 WARN_ON_ONCE(1);
  97                 goto out_unlock;
  98         }
  99
 100         address = __pa(address);
 101         addr = address & LARGE_PAGE_MASK;
 102         pbase = (pte_t *)page_address(base);
 103 #ifdef CONFIG_X86_32
 104         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 105 #endif
 106
 107         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
 108                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
 109
 110         /*
 111          * Install the new, split up pagetable:
 112          */
 113         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 114         base = NULL;
 115
 116 out_unlock:
 117         spin_unlock_irqrestore(&pgd_lock, flags);
 118
 119         if (base)
 120                 __free_pages(base, 0);
 121
 122         return 0;
 123 }
 124
 125 static int
 126 __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
 127 {
 128         struct page *kpte_page;
 129         int level, err = 0;
 130         pte_t *kpte;
 131
 132         BUG_ON(PageHighMem(page));
 133
 134 repeat:
 135         kpte = lookup_address(address, &level);
 136         if (!kpte)
 137                 return -EINVAL;
 138
 139         kpte_page = virt_to_page(kpte);
 140         BUG_ON(PageLRU(kpte_page));
 141         BUG_ON(PageCompound(kpte_page));
 142
 143         /*
 144          * Better fail early if someone sets the kernel text to NX.
 145          * Does not cover __inittext
 146          */
 147         BUG_ON(address >= (unsigned long)&_text &&
 148                 address < (unsigned long)&_etext &&
 149                (pgprot_val(prot) & _PAGE_NX));
 150
 151         if (level == 4) {
 152                 set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
 153         } else {
 154                 err = split_large_page(kpte, address);
 155                 if (!err)
 156                         goto repeat;
 157         }
 158         return err;
 159 }
 160
 161 /**
 162  * change_page_attr_addr - Change page table attributes in linear mapping
 163  * @address: Virtual address in linear mapping.
 164  * @numpages: Number of pages to change
 165  * @prot:    New page table attribute (PAGE_*)
 166  *
 167  * Change page attributes of a page in the direct mapping. This is a variant
 168  * of change_page_attr() that also works on memory holes that do not have
 169  * mem_map entry (pfn_valid() is false).
 170  *
 171  * See change_page_attr() documentation for more details.
 172  */
 173
 174 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 175 {
 176         int err = 0, kernel_map = 0, i;
 177
 178 #ifdef CONFIG_X86_64
 179         if (address >= __START_KERNEL_map &&
 180                         address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 181
 182                 address = (unsigned long)__va(__pa(address));
 183                 kernel_map = 1;
 184         }
 185 #endif
 186
 187         for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
 188                 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 189
 190                 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
 191                         err = __change_page_attr(address, pfn_to_page(pfn), prot);
 192                         if (err)
 193                                 break;
 194                 }
 195 #ifdef CONFIG_X86_64
 196                 /*
 197                  * Handle kernel mapping too which aliases part of
 198                  * lowmem:
 199                  */
 200                 if (__pa(address) < KERNEL_TEXT_SIZE) {
 201                         unsigned long addr2;
 202                         pgprot_t prot2;
 203
 204                         addr2 = __START_KERNEL_map + __pa(address);
 205                         /* Make sure the kernel mappings stay executable */
 206                         prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
 207                         err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
 208                 }
 209 #endif
 210         }
 211
 212         return err;
 213 }
 214
 215 /**
 216  * change_page_attr - Change page table attributes in the linear mapping.
 217  * @page: First page to change
 218  * @numpages: Number of pages to change
 219  * @prot: New protection/caching type (PAGE_*)
 220  *
 221  * Returns 0 on success, otherwise a negated errno.
 222  *
 223  * This should be used when a page is mapped with a different caching policy
 224  * than write-back somewhere - some CPUs do not like it when mappings with
 225  * different caching policies exist. This changes the page attributes of the
 226  * in kernel linear mapping too.
 227  *
 228  * Caller must call global_flush_tlb() later to make the changes active.
 229  *
 230  * The caller needs to ensure that there are no conflicting mappings elsewhere
 231  * (e.g. in user space) * This function only deals with the kernel linear map.
 232  *
 233  * For MMIO areas without mem_map use change_page_attr_addr() instead.
 234  */
 235 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 236 {
 237         unsigned long addr = (unsigned long)page_address(page);
 238
 239         return change_page_attr_addr(addr, numpages, prot);
 240 }
 241 EXPORT_SYMBOL(change_page_attr);
 242
 243 static void flush_kernel_map(void *arg)
 244 {
 245         /*
 246          * Flush all to work around Errata in early athlons regarding
 247          * large page flushing.
 248          */
 249         __flush_tlb_all();
 250
 251         if (boot_cpu_data.x86_model >= 4)
 252                 wbinvd();
 253 }
 254
 255 void global_flush_tlb(void)
 256 {
 257         BUG_ON(irqs_disabled());
 258
 259         on_each_cpu(flush_kernel_map, NULL, 1, 1);
 260 }
 261 EXPORT_SYMBOL(global_flush_tlb);
 262
 263 #ifdef CONFIG_DEBUG_PAGEALLOC
 264 void kernel_map_pages(struct page *page, int numpages, int enable)
 265 {
 266         if (PageHighMem(page))
 267                 return;
 268         if (!enable) {
 269                 debug_check_no_locks_freed(page_address(page),
 270                                            numpages * PAGE_SIZE);
 271         }
 272
 273         /*
 274          * If page allocator is not up yet then do not call c_p_a():
 275          */
 276         if (!debug_pagealloc_enabled)
 277                 return;
 278
 279         /*
 280          * the return value is ignored - the calls cannot fail,
 281          * large pages are disabled at boot time.
 282          */
 283         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
 284
 285         /*
 286          * we should perform an IPI and flush all tlbs,
 287          * but that can deadlock->flush only current cpu.
 288          */
 289         __flush_tlb_all();
 290 }
 291 #endif