proc/pagemap: walk page tables under pte lock
authorKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Wed, 11 Feb 2015 23:27:31 +0000 (15:27 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Feb 2015 01:06:05 +0000 (17:06 -0800)
Lockless access to pte in pagemap_pte_range() might race with page
migration and trigger BUG_ON(!PageLocked()) in migration_entry_to_page():

CPU A (pagemap)                           CPU B (migration)
                                          lock_page()
                                          try_to_unmap(page, TTU_MIGRATION...)
                                               make_migration_entry()
                                               set_pte_at()
<read *pte>
pte_to_pagemap_entry()
                                          remove_migration_ptes()
                                          unlock_page()
    if(is_migration_entry())
        migration_entry_to_page()
            BUG_ON(!PageLocked(page))

Also lockless read might be non-atomic if pte is larger than wordsize.
Other pte walkers (smaps, numa_maps, clear_refs) already lock ptes.

Fixes: 052fb0d635df ("proc: report file/anon bit in /proc/pid/pagemap")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: <stable@vger.kernel.org> [3.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/proc/task_mmu.c

index e6e0abeb5d12405594274c9119b0c6d97ed750a3..eeab30fcffcc1d09b95bfbfc9d423b1d379a2969 100644 (file)
@@ -1056,7 +1056,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        struct vm_area_struct *vma;
        struct pagemapread *pm = walk->private;
        spinlock_t *ptl;
-       pte_t *pte;
+       pte_t *pte, *orig_pte;
        int err = 0;
 
        /* find the first VMA at or above 'addr' */
@@ -1117,15 +1117,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                BUG_ON(is_vm_hugetlb_page(vma));
 
                /* Addresses in the VMA. */
-               for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
+               orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+               for (; addr < min(end, vma->vm_end); pte++, addr += PAGE_SIZE) {
                        pagemap_entry_t pme;
-                       pte = pte_offset_map(pmd, addr);
+
                        pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
-                       pte_unmap(pte);
                        err = add_to_pagemap(addr, &pme, pm);
                        if (err)
-                               return err;
+                               break;
                }
+               pte_unmap_unlock(orig_pte, ptl);
+
+               if (err)
+                       return err;
 
                if (addr == end)
                        break;