mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)
[firefly-linux-kernel-4.4.55.git] / fs / proc / task_mmu.c
index 246eae84b13b1be4bf36bdf48fe50b178a9cb407..f5ca96524f5f9f48c577cd05c15ea2fac37d4304 100644 (file)
@@ -21,7 +21,7 @@
 
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
-       unsigned long data, text, lib, swap;
+       unsigned long data, text, lib, swap, ptes, pmds;
        unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
 
        /*
@@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
        text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
        lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
        swap = get_mm_counter(mm, MM_SWAPENTS);
+       ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
+       pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
        seq_printf(m,
                "VmPeak:\t%8lu kB\n"
                "VmSize:\t%8lu kB\n"
@@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
                "VmExe:\t%8lu kB\n"
                "VmLib:\t%8lu kB\n"
                "VmPTE:\t%8lu kB\n"
+               "VmPMD:\t%8lu kB\n"
                "VmSwap:\t%8lu kB\n",
                hiwater_vm << (PAGE_SHIFT-10),
                total_vm << (PAGE_SHIFT-10),
@@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
                total_rss << (PAGE_SHIFT-10),
                data << (PAGE_SHIFT-10),
                mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-               (PTRS_PER_PTE * sizeof(pte_t) *
-                atomic_long_read(&mm->nr_ptes)) >> 10,
+               ptes >> 10,
+               pmds >> 10,
                swap << (PAGE_SHIFT-10));
 }
 
@@ -433,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = {
 
 #ifdef CONFIG_PROC_PAGE_MONITOR
 struct mem_size_stats {
-       struct vm_area_struct *vma;
        unsigned long resident;
        unsigned long shared_clean;
        unsigned long shared_dirty;
@@ -443,7 +445,6 @@ struct mem_size_stats {
        unsigned long anonymous;
        unsigned long anonymous_thp;
        unsigned long swap;
-       unsigned long nonlinear;
        u64 pss;
 };
 
@@ -483,8 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
-       pgoff_t pgoff = linear_page_index(vma, addr);
+       struct vm_area_struct *vma = walk->vma;
        struct page *page = NULL;
 
        if (pte_present(*pte)) {
@@ -496,17 +496,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                        mss->swap += PAGE_SIZE;
                else if (is_migration_entry(swpent))
                        page = migration_entry_to_page(swpent);
-       } else if (pte_file(*pte)) {
-               if (pte_to_pgoff(*pte) != pgoff)
-                       mss->nonlinear += PAGE_SIZE;
        }
 
        if (!page)
                return;
-
-       if (page->index != pgoff)
-               mss->nonlinear += PAGE_SIZE;
-
        smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
 }
 
@@ -515,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        struct page *page;
 
        /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -536,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                           struct mm_walk *walk)
 {
-       struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte;
        spinlock_t *ptl;
 
@@ -596,7 +588,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
                [ilog2(VM_ACCOUNT)]     = "ac",
                [ilog2(VM_NORESERVE)]   = "nr",
                [ilog2(VM_HUGETLB)]     = "ht",
-               [ilog2(VM_NONLINEAR)]   = "nl",
                [ilog2(VM_ARCH_1)]      = "ar",
                [ilog2(VM_DONTDUMP)]    = "dd",
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -630,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
        };
 
        memset(&mss, 0, sizeof mss);
-       mss.vma = vma;
        /* mmap_sem is held in m_start */
-       if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-               walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
+       walk_page_vma(vma, &smaps_walk);
 
        show_map_vma(m, vma, is_pid);
 
@@ -668,10 +657,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
                   (vma->vm_flags & VM_LOCKED) ?
                        (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
 
-       if (vma->vm_flags & VM_NONLINEAR)
-               seq_printf(m, "Nonlinear:      %8lu kB\n",
-                               mss.nonlinear >> 10);
-
        show_smap_vma_flags(m, vma);
        m_cache_vma(m, vma);
        return 0;
@@ -751,7 +736,6 @@ enum clear_refs_types {
 };
 
 struct clear_refs_private {
-       struct vm_area_struct *vma;
        enum clear_refs_types type;
 };
 
@@ -772,8 +756,6 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
                ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
        } else if (is_swap_pte(ptent)) {
                ptent = pte_swp_clear_soft_dirty(ptent);
-       } else if (pte_file(ptent)) {
-               ptent = pte_file_clear_soft_dirty(ptent);
        }
 
        set_pte_at(vma->vm_mm, addr, pte, ptent);
@@ -784,7 +766,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 {
        struct clear_refs_private *cp = walk->private;
-       struct vm_area_struct *vma = cp->vma;
+       struct vm_area_struct *vma = walk->vma;
        pte_t *pte, ptent;
        spinlock_t *ptl;
        struct page *page;
@@ -818,6 +800,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 
+static int clear_refs_test_walk(unsigned long start, unsigned long end,
+                               struct mm_walk *walk)
+{
+       struct clear_refs_private *cp = walk->private;
+       struct vm_area_struct *vma = walk->vma;
+
+       if (vma->vm_flags & VM_PFNMAP)
+               return 1;
+
+       /*
+        * Writing 1 to /proc/pid/clear_refs affects all pages.
+        * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
+        * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
+        * Writing 4 to /proc/pid/clear_refs affects all pages.
+        */
+       if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
+               return 1;
+       if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
+               return 1;
+       return 0;
+}
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *ppos)
 {
@@ -858,6 +862,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                };
                struct mm_walk clear_refs_walk = {
                        .pmd_entry = clear_refs_pte_range,
+                       .test_walk = clear_refs_test_walk,
                        .mm = mm,
                        .private = &cp,
                };
@@ -877,28 +882,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                        }
                        mmu_notifier_invalidate_range_start(mm, 0, -1);
                }
-               for (vma = mm->mmap; vma; vma = vma->vm_next) {
-                       cp.vma = vma;
-                       if (is_vm_hugetlb_page(vma))
-                               continue;
-                       /*
-                        * Writing 1 to /proc/pid/clear_refs affects all pages.
-                        *
-                        * Writing 2 to /proc/pid/clear_refs only affects
-                        * Anonymous pages.
-                        *
-                        * Writing 3 to /proc/pid/clear_refs only affects file
-                        * mapped pages.
-                        *
-                        * Writing 4 to /proc/pid/clear_refs affects all pages.
-                        */
-                       if (type == CLEAR_REFS_ANON && vma->vm_file)
-                               continue;
-                       if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
-                               continue;
-                       walk_page_range(vma->vm_start, vma->vm_end,
-                                       &clear_refs_walk);
-               }
+               walk_page_range(0, ~0UL, &clear_refs_walk);
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_end(mm, 0, -1);
                flush_tlb_mm(mm);
@@ -1066,15 +1050,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                             struct mm_walk *walk)
 {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = walk->vma;
        struct pagemapread *pm = walk->private;
        spinlock_t *ptl;
-       pte_t *pte;
+       pte_t *pte, *orig_pte;
        int err = 0;
 
-       /* find the first VMA at or above 'addr' */
-       vma = find_vma(walk->mm, addr);
-       if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                int pmd_flags2;
 
                if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1100,51 +1082,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        if (pmd_trans_unstable(pmd))
                return 0;
 
-       while (1) {
-               /* End of address space hole, which we mark as non-present. */
-               unsigned long hole_end;
-
-               if (vma)
-                       hole_end = min(end, vma->vm_start);
-               else
-                       hole_end = end;
-
-               for (; addr < hole_end; addr += PAGE_SIZE) {
-                       pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
-
-                       err = add_to_pagemap(addr, &pme, pm);
-                       if (err)
-                               return err;
-               }
-
-               if (!vma || vma->vm_start >= end)
-                       break;
-               /*
-                * We can't possibly be in a hugetlb VMA. In general,
-                * for a mm_walk with a pmd_entry and a hugetlb_entry,
-                * the pmd_entry can only be called on addresses in a
-                * hugetlb if the walk starts in a non-hugetlb VMA and
-                * spans a hugepage VMA. Since pagemap_read walks are
-                * PMD-sized and PMD-aligned, this will never be true.
-                */
-               BUG_ON(is_vm_hugetlb_page(vma));
-
-               /* Addresses in the VMA. */
-               for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
-                       pagemap_entry_t pme;
-                       pte = pte_offset_map(pmd, addr);
-                       pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
-                       pte_unmap(pte);
-                       err = add_to_pagemap(addr, &pme, pm);
-                       if (err)
-                               return err;
-               }
+       /*
+        * We can assume that @vma always points to a valid one and @end never
+        * goes beyond vma->vm_end.
+        */
+       orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       for (; addr < end; pte++, addr += PAGE_SIZE) {
+               pagemap_entry_t pme;
 
-               if (addr == end)
+               pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+               err = add_to_pagemap(addr, &pme, pm);
+               if (err)
                        break;
-
-               vma = find_vma(walk->mm, addr);
        }
+       pte_unmap_unlock(orig_pte, ptl);
 
        cond_resched();
 
@@ -1170,15 +1121,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
                                 struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = walk->vma;
        int err = 0;
        int flags2;
        pagemap_entry_t pme;
 
-       vma = find_vma(walk->mm, addr);
-       WARN_ON_ONCE(!vma);
-
-       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+       if (vma->vm_flags & VM_SOFTDIRTY)
                flags2 = __PM_SOFT_DIRTY;
        else
                flags2 = 0;
@@ -1338,7 +1286,6 @@ const struct file_operations proc_pagemap_operations = {
 #ifdef CONFIG_NUMA
 
 struct numa_maps {
-       struct vm_area_struct *vma;
        unsigned long pages;
        unsigned long anon;
        unsigned long active;
@@ -1407,18 +1354,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
                unsigned long end, struct mm_walk *walk)
 {
-       struct numa_maps *md;
+       struct numa_maps *md = walk->private;
+       struct vm_area_struct *vma = walk->vma;
        spinlock_t *ptl;
        pte_t *orig_pte;
        pte_t *pte;
 
-       md = walk->private;
-
-       if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
-               page = can_gather_numa_stats(huge_pte, md->vma, addr);
+               page = can_gather_numa_stats(huge_pte, vma, addr);
                if (page)
                        gather_stats(page, md, pte_dirty(huge_pte),
                                     HPAGE_PMD_SIZE/PAGE_SIZE);
@@ -1430,7 +1376,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
                return 0;
        orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
        do {
-               struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
+               struct page *page = can_gather_numa_stats(*pte, vma, addr);
                if (!page)
                        continue;
                gather_stats(page, md, pte_dirty(*pte), 1);
@@ -1440,7 +1386,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 #ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
        struct numa_maps *md;
@@ -1459,7 +1405,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
 }
 
 #else
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
        return 0;
@@ -1477,7 +1423,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        struct numa_maps *md = &numa_priv->md;
        struct file *file = vma->vm_file;
        struct mm_struct *mm = vma->vm_mm;
-       struct mm_walk walk = {};
+       struct mm_walk walk = {
+               .hugetlb_entry = gather_hugetlb_stats,
+               .pmd_entry = gather_pte_stats,
+               .private = md,
+               .mm = mm,
+       };
        struct mempolicy *pol;
        char buffer[64];
        int nid;
@@ -1488,13 +1439,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        /* Ensure we start with an empty set of numa_maps statistics. */
        memset(md, 0, sizeof(*md));
 
-       md->vma = vma;
-
-       walk.hugetlb_entry = gather_hugetbl_stats;
-       walk.pmd_entry = gather_pte_stats;
-       walk.private = md;
-       walk.mm = mm;
-
        pol = __get_vma_policy(vma, vma->vm_start);
        if (pol) {
                mpol_to_str(buffer, sizeof(buffer), pol);
@@ -1528,7 +1472,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        if (is_vm_hugetlb_page(vma))
                seq_puts(m, " huge");
 
-       walk_page_range(vma->vm_start, vma->vm_end, &walk);
+       /* mmap_sem is held by m_start */
+       walk_page_vma(vma, &walk);
 
        if (!md->pages)
                goto out;