mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)
[firefly-linux-kernel-4.4.55.git] / fs / proc / task_mmu.c
index eeab30fcffcc1d09b95bfbfc9d423b1d379a2969..f5ca96524f5f9f48c577cd05c15ea2fac37d4304 100644 (file)
@@ -436,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = {
 
 #ifdef CONFIG_PROC_PAGE_MONITOR
 struct mem_size_stats {
-       struct vm_area_struct *vma;
        unsigned long resident;
        unsigned long shared_clean;
        unsigned long shared_dirty;
@@ -485,7 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        struct page *page = NULL;
 
        if (pte_present(*pte)) {
@@ -509,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        struct page *page;
 
        /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -530,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                           struct mm_walk *walk)
 {
-       struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte;
        spinlock_t *ptl;
 
@@ -623,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
        };
 
        memset(&mss, 0, sizeof mss);
-       mss.vma = vma;
        /* mmap_sem is held in m_start */
-       if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-               walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
+       walk_page_vma(vma, &smaps_walk);
 
        show_map_vma(m, vma, is_pid);
 
@@ -740,7 +736,6 @@ enum clear_refs_types {
 };
 
 struct clear_refs_private {
-       struct vm_area_struct *vma;
        enum clear_refs_types type;
 };
 
@@ -771,7 +766,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 {
        struct clear_refs_private *cp = walk->private;
-       struct vm_area_struct *vma = cp->vma;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte, ptent;
        spinlock_t *ptl;
        struct page *page;
@@ -805,6 +800,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 
+static int clear_refs_test_walk(unsigned long start, unsigned long end,
+                               struct mm_walk *walk)
+{
+       struct clear_refs_private *cp = walk->private;
+       struct vm_area_struct *vma = walk->vma;
+
+       if (vma->vm_flags & VM_PFNMAP)
+               return 1;
+
+       /*
+        * Writing 1 to /proc/pid/clear_refs affects all pages.
+        * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
+        * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
+        * Writing 4 to /proc/pid/clear_refs affects all pages.
+        */
+       if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
+               return 1;
+       if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
+               return 1;
+       return 0;
+}
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *ppos)
 {
@@ -845,6 +862,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                };
                struct mm_walk clear_refs_walk = {
                        .pmd_entry = clear_refs_pte_range,
+                       .test_walk = clear_refs_test_walk,
                        .mm = mm,
                        .private = &cp,
                };
@@ -864,28 +882,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                        }
                        mmu_notifier_invalidate_range_start(mm, 0, -1);
                }
-               for (vma = mm->mmap; vma; vma = vma->vm_next) {
-                       cp.vma = vma;
-                       if (is_vm_hugetlb_page(vma))
-                               continue;
-                       /*
-                        * Writing 1 to /proc/pid/clear_refs affects all pages.
-                        *
-                        * Writing 2 to /proc/pid/clear_refs only affects
-                        * Anonymous pages.
-                        *
-                        * Writing 3 to /proc/pid/clear_refs only affects file
-                        * mapped pages.
-                        *
-                        * Writing 4 to /proc/pid/clear_refs affects all pages.
-                        */
-                       if (type == CLEAR_REFS_ANON && vma->vm_file)
-                               continue;
-                       if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
-                               continue;
-                       walk_page_range(vma->vm_start, vma->vm_end,
-                                       &clear_refs_walk);
-               }
+               walk_page_range(0, ~0UL, &clear_refs_walk);
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_end(mm, 0, -1);
                flush_tlb_mm(mm);
@@ -1053,15 +1050,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                             struct mm_walk *walk)
 {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = walk->vma;
        struct pagemapread *pm = walk->private;
        spinlock_t *ptl;
        pte_t *pte, *orig_pte;
        int err = 0;
 
-       /* find the first VMA at or above 'addr' */
-       vma = find_vma(walk->mm, addr);
-       if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                int pmd_flags2;
 
                if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1087,55 +1082,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        if (pmd_trans_unstable(pmd))
                return 0;
 
-       while (1) {
-               /* End of address space hole, which we mark as non-present. */
-               unsigned long hole_end;
-
-               if (vma)
-                       hole_end = min(end, vma->vm_start);
-               else
-                       hole_end = end;
-
-               for (; addr < hole_end; addr += PAGE_SIZE) {
-                       pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
-
-                       err = add_to_pagemap(addr, &pme, pm);
-                       if (err)
-                               return err;
-               }
-
-               if (!vma || vma->vm_start >= end)
-                       break;
-               /*
-                * We can't possibly be in a hugetlb VMA. In general,
-                * for a mm_walk with a pmd_entry and a hugetlb_entry,
-                * the pmd_entry can only be called on addresses in a
-                * hugetlb if the walk starts in a non-hugetlb VMA and
-                * spans a hugepage VMA. Since pagemap_read walks are
-                * PMD-sized and PMD-aligned, this will never be true.
-                */
-               BUG_ON(is_vm_hugetlb_page(vma));
-
-               /* Addresses in the VMA. */
-               orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
-               for (; addr < min(end, vma->vm_end); pte++, addr += PAGE_SIZE) {
-                       pagemap_entry_t pme;
-
-                       pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
-                       err = add_to_pagemap(addr, &pme, pm);
-                       if (err)
-                               break;
-               }
-               pte_unmap_unlock(orig_pte, ptl);
+       /*
+        * We can assume that @vma always points to a valid one and @end never
+        * goes beyond vma->vm_end.
+        */
+       orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       for (; addr < end; pte++, addr += PAGE_SIZE) {
+               pagemap_entry_t pme;
 
+               pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+               err = add_to_pagemap(addr, &pme, pm);
                if (err)
-                       return err;
-
-               if (addr == end)
                        break;
-
-               vma = find_vma(walk->mm, addr);
        }
+       pte_unmap_unlock(orig_pte, ptl);
 
        cond_resched();
 
@@ -1161,15 +1121,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
                                 struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = walk->vma;
        int err = 0;
        int flags2;
        pagemap_entry_t pme;
 
-       vma = find_vma(walk->mm, addr);
-       WARN_ON_ONCE(!vma);
-
-       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+       if (vma->vm_flags & VM_SOFTDIRTY)
                flags2 = __PM_SOFT_DIRTY;
        else
                flags2 = 0;
@@ -1329,7 +1286,6 @@ const struct file_operations proc_pagemap_operations = {
 #ifdef CONFIG_NUMA
 
 struct numa_maps {
-       struct vm_area_struct *vma;
        unsigned long pages;
        unsigned long anon;
        unsigned long active;
@@ -1398,18 +1354,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
                unsigned long end, struct mm_walk *walk)
 {
-       struct numa_maps *md;
+       struct numa_maps *md = walk->private;
+       struct vm_area_struct *vma = walk->vma;
        spinlock_t *ptl;
        pte_t *orig_pte;
        pte_t *pte;
 
-       md = walk->private;
-
-       if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
-               page = can_gather_numa_stats(huge_pte, md->vma, addr);
+               page = can_gather_numa_stats(huge_pte, vma, addr);
                if (page)
                        gather_stats(page, md, pte_dirty(huge_pte),
                                     HPAGE_PMD_SIZE/PAGE_SIZE);
@@ -1421,7 +1376,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
                return 0;
        orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
        do {
-               struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
+               struct page *page = can_gather_numa_stats(*pte, vma, addr);
                if (!page)
                        continue;
                gather_stats(page, md, pte_dirty(*pte), 1);
@@ -1431,7 +1386,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 #ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
        struct numa_maps *md;
@@ -1450,7 +1405,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
 }
 
 #else
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
        return 0;
@@ -1468,7 +1423,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        struct numa_maps *md = &numa_priv->md;
        struct file *file = vma->vm_file;
        struct mm_struct *mm = vma->vm_mm;
-       struct mm_walk walk = {};
+       struct mm_walk walk = {
+               .hugetlb_entry = gather_hugetlb_stats,
+               .pmd_entry = gather_pte_stats,
+               .private = md,
+               .mm = mm,
+       };
        struct mempolicy *pol;
        char buffer[64];
        int nid;
@@ -1479,13 +1439,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        /* Ensure we start with an empty set of numa_maps statistics. */
        memset(md, 0, sizeof(*md));
 
-       md->vma = vma;
-
-       walk.hugetlb_entry = gather_hugetbl_stats;
-       walk.pmd_entry = gather_pte_stats;
-       walk.private = md;
-       walk.mm = mm;
-
        pol = __get_vma_policy(vma, vma->vm_start);
        if (pol) {
                mpol_to_str(buffer, sizeof(buffer), pol);
@@ -1519,7 +1472,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        if (is_vm_hugetlb_page(vma))
                seq_puts(m, " huge");
 
-       walk_page_range(vma->vm_start, vma->vm_end, &walk);
+       /* mmap_sem is held by m_start */
+       walk_page_vma(vma, &walk);
 
        if (!md->pages)
                goto out;