Revert "proc: pagemap: Hold mmap_sem during page walk"
author黄涛 <huangtao@rock-chips.com>
Sat, 30 Jul 2011 14:16:43 +0000 (22:16 +0800)
committer黄涛 <huangtao@rock-chips.com>
Sat, 30 Jul 2011 14:16:43 +0000 (22:16 +0800)
This reverts commit 49ff3070f5ecc45ce45cc4e840cd0f20b768e3d3.

fs/proc/task_mmu.c

index 8e5fae2f7aed8f7fdeb3c02dd01c2dcddf3509f7..2a1bef9203c6e73734e7fa359873bbb45456d0ae 100644 (file)
@@ -404,7 +404,6 @@ static int show_smap(struct seq_file *m, void *v)
 
        memset(&mss, 0, sizeof mss);
        mss.vma = vma;
-       /* mmap_sem is held in m_start */
        if (vma->vm_mm && !is_vm_hugetlb_page(vma))
                walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
 
@@ -551,8 +550,7 @@ const struct file_operations proc_clear_refs_operations = {
 };
 
 struct pagemapread {
-       int pos, len;
-       u64 *buffer;
+       u64 __user *out, *end;
 };
 
 #define PM_ENTRY_BYTES      sizeof(u64)
@@ -575,8 +573,10 @@ struct pagemapread {
 static int add_to_pagemap(unsigned long addr, u64 pfn,
                          struct pagemapread *pm)
 {
-       pm->buffer[pm->pos++] = pfn;
-       if (pm->pos >= pm->len)
+       if (put_user(pfn, pm->out))
+               return -EFAULT;
+       pm->out++;
+       if (pm->out >= pm->end)
                return PM_END_OF_BUFFER;
        return 0;
 }
@@ -674,20 +674,21 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-#define PAGEMAP_WALK_SIZE      (PMD_SIZE)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
                            size_t count, loff_t *ppos)
 {
        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+       struct page **pages, *page;
+       unsigned long uaddr, uend;
        struct mm_struct *mm;
        struct pagemapread pm;
+       int pagecount;
        int ret = -ESRCH;
        struct mm_walk pagemap_walk = {};
        unsigned long src;
        unsigned long svpfn;
        unsigned long start_vaddr;
        unsigned long end_vaddr;
-       int copied = 0;
 
        if (!task)
                goto out;
@@ -710,12 +711,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (!mm)
                goto out_task;
 
-       pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
-       pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
+
+       uaddr = (unsigned long)buf & PAGE_MASK;
+       uend = (unsigned long)(buf + count);
+       pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
+       ret = 0;
+       if (pagecount == 0)
+               goto out_mm;
+       pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
        ret = -ENOMEM;
-       if (!pm.buffer)
+       if (!pages)
                goto out_mm;
 
+       down_read(&current->mm->mmap_sem);
+       ret = get_user_pages(current, current->mm, uaddr, pagecount,
+                            1, 0, pages, NULL);
+       up_read(&current->mm->mmap_sem);
+
+       if (ret < 0)
+               goto out_free;
+
+       if (ret != pagecount) {
+               pagecount = ret;
+               ret = -EFAULT;
+               goto out_pages;
+       }
+
+       pm.out = (u64 __user *)buf;
+       pm.end = (u64 __user *)(buf + count);
+
        pagemap_walk.pmd_entry = pagemap_pte_range;
        pagemap_walk.pte_hole = pagemap_pte_hole;
        pagemap_walk.mm = mm;
@@ -736,36 +760,23 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
         * user buffer is tracked in "pm", and the walk
         * will stop when we hit the end of the buffer.
         */
-       ret = 0;
-       while (count && (start_vaddr < end_vaddr)) {
-               int len;
-               unsigned long end;
-
-               pm.pos = 0;
-               end = start_vaddr + PAGEMAP_WALK_SIZE;
-               /* overflow ? */
-               if (end < start_vaddr || end > end_vaddr)
-                       end = end_vaddr;
-               down_read(&mm->mmap_sem);
-               ret = walk_page_range(start_vaddr, end, &pagemap_walk);
-               up_read(&mm->mmap_sem);
-               start_vaddr = end;
-
-               len = min(count, PM_ENTRY_BYTES * pm.pos);
-               if (copy_to_user(buf, pm.buffer, len) < 0) {
-                       ret = -EFAULT;
-                       goto out_free;
-               }
-               copied += len;
-               buf += len;
-               count -= len;
+       ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
+       if (ret == PM_END_OF_BUFFER)
+               ret = 0;
+       /* don't need mmap_sem for these, but this looks cleaner */
+       *ppos += (char __user *)pm.out - buf;
+       if (!ret)
+               ret = (char __user *)pm.out - buf;
+
+out_pages:
+       for (; pagecount; pagecount--) {
+               page = pages[pagecount-1];
+               if (!PageReserved(page))
+                       SetPageDirty(page);
+               page_cache_release(page);
        }
-       *ppos += copied;
-       if (!ret || ret == PM_END_OF_BUFFER)
-               ret = copied;
-
 out_free:
-       kfree(pm.buffer);
+       kfree(pages);
 out_mm:
        mmput(mm);
 out_task: