arm64: kdump: protect crash dump kernel memory
authorTakahiro Akashi <takahiro.akashi@linaro.org>
Mon, 3 Apr 2017 02:24:34 +0000 (11:24 +0900)
committerAKASHI Takahiro <takahiro.akashi@linaro.org>
Mon, 19 Jun 2017 06:09:04 +0000 (15:09 +0900)
arch_kexec_protect_crashkres() and arch_kexec_unprotect_crashkres()
are meant to be called by kexec_load() in order to protect the memory
allocated for crash dump kernel once the image is loaded.

The protection is implemented by unmapping the relevant segments in crash
dump kernel memory, rather than making it read-only as other archs do,
to prevent coherency issues due to potential cache aliasing (with
mismatched attributes).

Page-level mappings are consistently used here so that we can change
the attributes of segments in page granularity as well as shrink the region
also in page granularity through /sys/kernel/kexec_crash_size, putting
the freed memory back to buddy system.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Conflicts:
arch/arm64/mm/mmu.c
The file has been heavily refactored in v4.12, in particular, by
    commit 951366d4b7 ("arm64/mmu: replace 'page_mappings_only' parameter
       with flags argument")
    commit f14c66ce81b5 ("arm64: mm: replace 'block_mappings_allowed' with
 'page_mappings_only'")
    commit 5ea5306c3235 ("arm64: alternatives: apply boot time fixups via
 the linear mapping")

arch/arm64/kernel/machine_kexec.c
arch/arm64/mm/mmu.c

index bc96c8a7fc79b60bd2f9f4a248d46a71dda36e58..b63baa7496090099b0ed5975140d99ca322e715e 100644 (file)
@@ -14,7 +14,9 @@
 
 #include <asm/cacheflush.h>
 #include <asm/cpu_ops.h>
+#include <asm/mmu.h>
 #include <asm/mmu_context.h>
+#include <asm/page.h>
 
 #include "cpu-reset.h"
 
@@ -22,8 +24,6 @@
 extern const unsigned char arm64_relocate_new_kernel[];
 extern const unsigned long arm64_relocate_new_kernel_size;
 
-static unsigned long kimage_start;
-
 /**
  * kexec_image_info - For debugging output.
  */
@@ -64,8 +64,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
  */
 int machine_kexec_prepare(struct kimage *kimage)
 {
-       kimage_start = kimage->start;
-
        kexec_image_info(kimage);
 
        if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
@@ -183,7 +181,7 @@ void machine_kexec(struct kimage *kimage)
        kexec_list_flush(kimage);
 
        /* Flush the new image if already in place. */
-       if (kimage->head & IND_DONE)
+       if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
                kexec_segment_flush(kimage);
 
        pr_info("Bye!\n");
@@ -201,7 +199,7 @@ void machine_kexec(struct kimage *kimage)
         */
 
        cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
-               kimage_start, 0);
+               kimage->start, 0);
 
        BUG(); /* Should never get here. */
 }
@@ -210,3 +208,25 @@ void machine_crash_shutdown(struct pt_regs *regs)
 {
        /* Empty routine needed to avoid build errors. */
 }
+
+void arch_kexec_protect_crashkres(void)
+{
+       int i;
+
+       kexec_segment_flush(kexec_crash_image);
+
+       for (i = 0; i < kexec_crash_image->nr_segments; i++)
+               set_memory_valid(
+                       __phys_to_virt(kexec_crash_image->segment[i].mem),
+                       kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+       int i;
+
+       for (i = 0; i < kexec_crash_image->nr_segments; i++)
+               set_memory_valid(
+                       __phys_to_virt(kexec_crash_image->segment[i].mem),
+                       kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
+}
index f93dcea6e686d2de247550ff15c40283898513e5..d23342804230ac53ebc9eb9b030b5831bf6602ce 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kexec.h>
 #include <linux/libfdt.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
@@ -368,41 +370,46 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
                             late_pgtable_alloc, !debug_pagealloc_enabled());
 }
 
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+                                 phys_addr_t end, pgprot_t prot,
+                                 bool allow_block_mappings)
+{
+       __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+                            prot, early_pgtable_alloc, allow_block_mappings);
+}
+
+static void __init map_mem(pgd_t *pgd)
 {
        unsigned long kernel_start = __pa(_text);
        unsigned long kernel_end = __pa(_etext);
+       struct memblock_region *reg;
 
        /*
         * Take care not to create a writable alias for the
         * read-only text and rodata sections of the kernel image.
+        * So temporarily mark them as NOMAP to skip mappings in
+        * the following for-loop
         */
+       memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
+#ifdef CONFIG_KEXEC_CORE
+       if (crashk_res.end)
+               memblock_mark_nomap(crashk_res.start,
+                                   resource_size(&crashk_res));
+#endif
 
-       /* No overlap with the kernel text */
-       if (end < kernel_start || start >= kernel_end) {
-               __create_pgd_mapping(pgd, start, __phys_to_virt(start),
-                                    end - start, PAGE_KERNEL,
-                                    early_pgtable_alloc,
-                                    !debug_pagealloc_enabled());
-               return;
-       }
+       /* map all the memory banks */
+       for_each_memblock(memory, reg) {
+               phys_addr_t start = reg->base;
+               phys_addr_t end = start + reg->size;
 
-       /*
-        * This block overlaps the kernel text mapping.
-        * Map the portion(s) which don't overlap.
-        */
-       if (start < kernel_start)
-               __create_pgd_mapping(pgd, start,
-                                    __phys_to_virt(start),
-                                    kernel_start - start, PAGE_KERNEL,
-                                    early_pgtable_alloc,
-                                    !debug_pagealloc_enabled());
-       if (kernel_end < end)
-               __create_pgd_mapping(pgd, kernel_end,
-                                    __phys_to_virt(kernel_end),
-                                    end - kernel_end, PAGE_KERNEL,
-                                    early_pgtable_alloc,
-                                    !debug_pagealloc_enabled());
+               if (start >= end)
+                       break;
+               if (memblock_is_nomap(reg))
+                       continue;
+
+               __map_memblock(pgd, start, end,
+                              PAGE_KERNEL, !debug_pagealloc_enabled());
+       }
 
        /*
         * Map the linear alias of the [_text, _etext) interval as
@@ -410,25 +417,23 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
         * region accessible to subsystems such as hibernate, but
         * protects it from inadvertent modification or execution.
         */
-       __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
-                            kernel_end - kernel_start, PAGE_KERNEL_RO,
-                            early_pgtable_alloc, !debug_pagealloc_enabled());
-}
+       __map_memblock(pgd, kernel_start, kernel_end,
+                      PAGE_KERNEL_RO, !debug_pagealloc_enabled());
+       memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
-static void __init map_mem(pgd_t *pgd)
-{
-       struct memblock_region *reg;
-
-       /* map all the memory banks */
-       for_each_memblock(memory, reg) {
-               phys_addr_t start = reg->base;
-               phys_addr_t end = start + reg->size;
-
-               if (start >= end)
-                       break;
-
-               __map_memblock(pgd, start, end);
+#ifdef CONFIG_KEXEC_CORE
+       /*
+        * Use page-level mappings here so that we can shrink the region
+        * in page granularity and put back unused memory to buddy system
+        * through /sys/kernel/kexec_crash_size interface.
+        */
+       if (crashk_res.end) {
+               __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+                              PAGE_KERNEL, false);
+               memblock_clear_nomap(crashk_res.start,
+                                    resource_size(&crashk_res));
        }
+#endif
 }
 
 void mark_rodata_ro(void)