Merge branch 'develop' of 10.10.10.29:/home/rockchip/kernel into develop

[firefly-linux-kernel-4.4.55.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index b1443ac07c00a4f1a46de6bb260d00e8f52f99db..53c1da0d04a68d02bd1fd9eba8c925ceda59fec8 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 unsigned long addr = vma->vm_start;
  
                 /*
-                * Hide vma from rmap and vmtruncate before freeing pgtables
+                * Hide vma from rmap and truncate_pagecache before freeing
+                * pgtables
                  */
                 anon_vma_unlink(vma);
                 unlink_file_vma(vma);
@@ -640,6 +641,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                 unsigned long addr, unsigned long end)
  {
+       pte_t *orig_src_pte, *orig_dst_pte;
         pte_t *src_pte, *dst_pte;
         spinlock_t *src_ptl, *dst_ptl;
         int progress = 0;
@@ -653,6 +655,8 @@ again:
         src_pte = pte_offset_map_nested(src_pmd, addr);
         src_ptl = pte_lockptr(src_mm, src_pmd);
         spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+       orig_src_pte = src_pte;
+       orig_dst_pte = dst_pte;
         arch_enter_lazy_mmu_mode();
  
         do {
@@ -676,9 +680,9 @@ again:
  
         arch_leave_lazy_mmu_mode();
         spin_unlock(src_ptl);
-       pte_unmap_nested(src_pte - 1);
+       pte_unmap_nested(orig_src_pte);
         add_mm_rss(dst_mm, rss[0], rss[1]);
-       pte_unmap_unlock(dst_pte - 1, dst_ptl);
+       pte_unmap_unlock(orig_dst_pte, dst_ptl);
         cond_resched();
         if (addr != end)
                 goto again;
@@ -1278,10 +1282,20 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 return i ? : -EFAULT;
                         }
                         if (pages) {
-                               struct page *page = vm_normal_page(gate_vma, start, *pte);
+                               struct page *page;
+
+                               page = vm_normal_page(gate_vma, start, *pte);
+                               if (!page) {
+                                       if (!(gup_flags & FOLL_DUMP) &&
+                                            is_zero_pfn(pte_pfn(*pte)))
+                                               page = pte_page(*pte);
+                                       else {
+                                               pte_unmap(pte);
+                                               return i ? : -EFAULT;
+                                       }
+                               }
                                 pages[i] = page;
-                               if (page)
-                                       get_page(page);
+                               get_page(page);
                         }
                         pte_unmap(pte);
                         if (vmas)
@@ -1325,7 +1339,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 if (ret & VM_FAULT_ERROR) {
                                         if (ret & VM_FAULT_OOM)
                                                 return i ? i : -ENOMEM;
-                                       else if (ret & VM_FAULT_SIGBUS)
+                                       if (ret &
+                                           (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
                                                 return i ? i : -EFAULT;
                                         BUG();
                                 }
@@ -1818,10 +1833,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
         token = pmd_pgtable(*pmd);
  
         do {
-               err = fn(pte, token, addr, data);
+               err = fn(pte++, token, addr, data);
                 if (err)
                         break;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+       } while (addr += PAGE_SIZE, addr != end);
  
         arch_leave_lazy_mmu_mode();
  
@@ -2407,7 +2422,7 @@ restart:
   * @mapping: the address space containing mmaps to be unmapped.
   * @holebegin: byte in first page to unmap, relative to the start of
   * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
   * must keep the partial page.  In contrast, we must get rid of
   * partial pages.
   * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2458,63 +2473,6 @@ void unmap_mapping_range(struct address_space *mapping,
  }
  EXPORT_SYMBOL(unmap_mapping_range);
  
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-       if (inode->i_size < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               i_size_write(inode, offset);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
-
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               i_size_write(inode, offset);
-
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
  int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
  {
         struct address_space *mapping = inode->i_mapping;
@@ -2559,8 +2517,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto out;
  
         entry = pte_to_swp_entry(orig_pte);
-       if (is_migration_entry(entry)) {
-               migration_entry_wait(mm, pmd, address);
+       if (unlikely(non_swap_entry(entry))) {
+               if (is_migration_entry(entry)) {
+                       migration_entry_wait(mm, pmd, address);
+               } else if (is_hwpoison_entry(entry)) {
+                       ret = VM_FAULT_HWPOISON;
+               } else {
+                       print_bad_pte(vma, address, orig_pte, NULL);
+                       ret = VM_FAULT_SIGBUS;
+               }
                 goto out;
         }
         delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2584,6 +2549,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 /* Had to read the page from swap area: Major fault */
                 ret = VM_FAULT_MAJOR;
                 count_vm_event(PGMAJFAULT);
+       } else if (PageHWPoison(page)) {
+               ret = VM_FAULT_HWPOISON;
+               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+               goto out_release;
         }
  
         lock_page(page);
@@ -2655,10 +2624,45 @@ out_nomap:
         pte_unmap_unlock(page_table, ptl);
  out_page:
         unlock_page(page);
+out_release:
         page_cache_release(page);
         return ret;
  }
  
+/*
+ * This is like a special single-page "expand_{down|up}wards()",
+ * except we must first make sure that 'address{-|+}PAGE_SIZE'
+ * doesn't hit another vma.
+ */
+static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+{
+       address &= PAGE_MASK;
+       if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+               struct vm_area_struct *prev = vma->vm_prev;
+
+               /*
+                * Is there a mapping abutting this one below?
+                *
+                * That's only ok if it's the same stack mapping
+                * that has gotten split..
+                */
+               if (prev && prev->vm_end == address)
+                       return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+
+               expand_stack(vma, address - PAGE_SIZE);
+       }
+       if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+               struct vm_area_struct *next = vma->vm_next;
+
+               /* As VM_GROWSDOWN but s/below/above/ */
+               if (next && next->vm_start == address + PAGE_SIZE)
+                       return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+
+               expand_upwards(vma, address + PAGE_SIZE);
+       }
+       return 0;
+}
+
  /*
   * We enter with non-exclusive mmap_sem (to exclude vma changes,
   * but allow concurrent faults), and pte mapped but not yet locked.
@@ -2672,19 +2676,23 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         spinlock_t *ptl;
         pte_t entry;
  
+       pte_unmap(page_table);
+
+       /* Check if we need to add a guard page to the stack */
+       if (check_stack_guard_page(vma, address) < 0)
+               return VM_FAULT_SIGBUS;
+
+       /* Use the zero-page for reads */
         if (!(flags & FAULT_FLAG_WRITE)) {
                 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
                                                 vma->vm_page_prot));
-               ptl = pte_lockptr(mm, pmd);
-               spin_lock(ptl);
+               page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
                 if (!pte_none(*page_table))
                         goto unlock;
                 goto setpte;
         }
  
         /* Allocate our own private page. */
-       pte_unmap(page_table);
-
         if (unlikely(anon_vma_prepare(vma)))
                 goto oom;
         page = alloc_zeroed_user_highpage_movable(vma, address);
@@ -2760,6 +2768,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
                 return ret;
  
+       if (unlikely(PageHWPoison(vmf.page))) {
+               if (ret & VM_FAULT_LOCKED)
+                       unlock_page(vmf.page);
+               return VM_FAULT_HWPOISON;
+       }
+
         /*
          * For consistency in subsequent calls, make the faulted page always
          * locked.
@@ -2944,7 +2958,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                  * Page table corrupted: show pte and kill process.
                  */
                 print_bad_pte(vma, address, orig_pte, NULL);
-               return VM_FAULT_OOM;
+               return VM_FAULT_SIGBUS;
         }
  
         pgoff = pte_to_pgoff(orig_pte);