unsigned long addr = vma->vm_start;
/*
- * Hide vma from rmap and vmtruncate before freeing pgtables
+ * Hide vma from rmap and truncate_pagecache before freeing
+ * pgtables
*/
anon_vma_unlink(vma);
unlink_file_vma(vma);
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
+ pte_t *orig_src_pte, *orig_dst_pte;
pte_t *src_pte, *dst_pte;
spinlock_t *src_ptl, *dst_ptl;
int progress = 0;
src_pte = pte_offset_map_nested(src_pmd, addr);
src_ptl = pte_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+ orig_src_pte = src_pte;
+ orig_dst_pte = dst_pte;
arch_enter_lazy_mmu_mode();
do {
arch_leave_lazy_mmu_mode();
spin_unlock(src_ptl);
- pte_unmap_nested(src_pte - 1);
+ pte_unmap_nested(orig_src_pte);
add_mm_rss(dst_mm, rss[0], rss[1]);
- pte_unmap_unlock(dst_pte - 1, dst_ptl);
+ pte_unmap_unlock(orig_dst_pte, dst_ptl);
cond_resched();
if (addr != end)
goto again;
return i ? : -EFAULT;
}
if (pages) {
- struct page *page = vm_normal_page(gate_vma, start, *pte);
+ struct page *page;
+
+ page = vm_normal_page(gate_vma, start, *pte);
+ if (!page) {
+ if (!(gup_flags & FOLL_DUMP) &&
+ is_zero_pfn(pte_pfn(*pte)))
+ page = pte_page(*pte);
+ else {
+ pte_unmap(pte);
+ return i ? : -EFAULT;
+ }
+ }
pages[i] = page;
- if (page)
- get_page(page);
+ get_page(page);
}
pte_unmap(pte);
if (vmas)
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return i ? i : -ENOMEM;
- else if (ret & VM_FAULT_SIGBUS)
+ if (ret &
+ (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
return i ? i : -EFAULT;
BUG();
}
token = pmd_pgtable(*pmd);
do {
- err = fn(pte, token, addr, data);
+ err = fn(pte++, token, addr, data);
if (err)
break;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
- * boundary. Note that this is different from vmtruncate(), which
+ * boundary. Note that this is different from truncate_pagecache(), which
* must keep the partial page. In contrast, we must get rid of
* partial pages.
* @holelen: size of prospective hole in bytes. This will be rounded
}
EXPORT_SYMBOL(unmap_mapping_range);
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page. Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
- if (inode->i_size < offset) {
- unsigned long limit;
-
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (limit != RLIM_INFINITY && offset > limit)
- goto out_sig;
- if (offset > inode->i_sb->s_maxbytes)
- goto out_big;
- i_size_write(inode, offset);
- } else {
- struct address_space *mapping = inode->i_mapping;
-
- /*
- * truncation of in-use swapfiles is disallowed - it would
- * cause subsequent swapout to scribble on the now-freed
- * blocks.
- */
- if (IS_SWAPFILE(inode))
- return -ETXTBSY;
- i_size_write(inode, offset);
-
- /*
- * unmap_mapping_range is called twice, first simply for
- * efficiency so that truncate_inode_pages does fewer
- * single-page unmaps. However after this first call, and
- * before truncate_inode_pages finishes, it is possible for
- * private pages to be COWed, which remain after
- * truncate_inode_pages finishes, hence the second
- * unmap_mapping_range call must be made for correctness.
- */
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(mapping, offset);
- unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
- }
-
- if (inode->i_op->truncate)
- inode->i_op->truncate(inode);
- return 0;
-
-out_sig:
- send_sig(SIGXFSZ, current, 0);
-out_big:
- return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
goto out;
entry = pte_to_swp_entry(orig_pte);
- if (is_migration_entry(entry)) {
- migration_entry_wait(mm, pmd, address);
+ if (unlikely(non_swap_entry(entry))) {
+ if (is_migration_entry(entry)) {
+ migration_entry_wait(mm, pmd, address);
+ } else if (is_hwpoison_entry(entry)) {
+ ret = VM_FAULT_HWPOISON;
+ } else {
+ print_bad_pte(vma, address, orig_pte, NULL);
+ ret = VM_FAULT_SIGBUS;
+ }
goto out;
}
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
+ } else if (PageHWPoison(page)) {
+ ret = VM_FAULT_HWPOISON;
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ goto out_release;
}
lock_page(page);
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
+out_release:
page_cache_release(page);
return ret;
}
+/*
+ * This is like a special single-page "expand_{down|up}wards()",
+ * except we must first make sure that 'address{-|+}PAGE_SIZE'
+ * doesn't hit another vma.
+ */
+static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+{
+ address &= PAGE_MASK;
+ if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+ struct vm_area_struct *prev = vma->vm_prev;
+
+ /*
+ * Is there a mapping abutting this one below?
+ *
+ * That's only ok if it's the same stack mapping
+ * that has gotten split..
+ */
+ if (prev && prev->vm_end == address)
+ return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+
+ expand_stack(vma, address - PAGE_SIZE);
+ }
+ if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+ struct vm_area_struct *next = vma->vm_next;
+
+ /* As VM_GROWSDOWN but s/below/above/ */
+ if (next && next->vm_start == address + PAGE_SIZE)
+ return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+
+ expand_upwards(vma, address + PAGE_SIZE);
+ }
+ return 0;
+}
+
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
spinlock_t *ptl;
pte_t entry;
+ pte_unmap(page_table);
+
+ /* Check if we need to add a guard page to the stack */
+ if (check_stack_guard_page(vma, address) < 0)
+ return VM_FAULT_SIGBUS;
+
+ /* Use the zero-page for reads */
if (!(flags & FAULT_FLAG_WRITE)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
vma->vm_page_prot));
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table))
goto unlock;
goto setpte;
}
/* Allocate our own private page. */
- pte_unmap(page_table);
-
if (unlikely(anon_vma_prepare(vma)))
goto oom;
page = alloc_zeroed_user_highpage_movable(vma, address);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret;
+ if (unlikely(PageHWPoison(vmf.page))) {
+ if (ret & VM_FAULT_LOCKED)
+ unlock_page(vmf.page);
+ return VM_FAULT_HWPOISON;
+ }
+
/*
* For consistency in subsequent calls, make the faulted page always
* locked.
* Page table corrupted: show pte and kill process.
*/
print_bad_pte(vma, address, orig_pte, NULL);
- return VM_FAULT_OOM;
+ return VM_FAULT_SIGBUS;
}
pgoff = pte_to_pgoff(orig_pte);