X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=mm%2Fmemory.c;h=7e91b5f9f690e4ae9d7c3e58bc1530c995311089;hb=ffe0d5a575459ffe664b0762130b557f826fcace;hp=5c694f2b9c12d02d6a9ca129afb702d68237a9c9;hpb=58fa879e1e640a1856f736b418984ebeccee1c95;p=firefly-linux-kernel-4.4.55.git diff --git a/mm/memory.c b/mm/memory.c index 5c694f2b9c12..7e91b5f9f690 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -108,7 +108,8 @@ static int __init disable_randmaps(char *s) } __setup("norandmaps", disable_randmaps); -static unsigned long zero_pfn __read_mostly; +unsigned long zero_pfn __read_mostly; +unsigned long highest_memmap_pfn __read_mostly; /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() @@ -296,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr = vma->vm_start; /* - * Hide vma from rmap and vmtruncate before freeing pgtables + * Hide vma from rmap and truncate_pagecache before freeing + * pgtables */ anon_vma_unlink(vma); unlink_file_vma(vma); @@ -455,6 +457,20 @@ static inline int is_cow_mapping(unsigned int flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +#ifndef is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + return pfn == zero_pfn; +} +#endif + +#ifndef my_zero_pfn +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return zero_pfn; +} +#endif + /* * vm_normal_page -- This function gets the "struct page" associated with a pte. * @@ -512,7 +528,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, goto check_pfn; if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; - if (pfn != zero_pfn) + if (!is_zero_pfn(pfn)) print_bad_pte(vma, addr, pte, NULL); return NULL; } @@ -534,6 +550,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, } } + if (is_zero_pfn(pfn)) + return NULL; check_pfn: if (unlikely(pfn > highest_memmap_pfn)) { print_bad_pte(vma, addr, pte, NULL); @@ -1161,7 +1179,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, page = vm_normal_page(vma, address, pte); if (unlikely(!page)) { if ((flags & FOLL_DUMP) || - pte_pfn(pte) != zero_pfn) + !is_zero_pfn(pte_pfn(pte))) goto bad_page; page = pte_page(pte); } @@ -1308,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) return i ? i : -ENOMEM; - else if (ret & VM_FAULT_SIGBUS) + if (ret & + (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; BUG(); } @@ -1443,10 +1462,6 @@ struct page *get_dump_page(unsigned long addr) if (__get_user_pages(current, current->mm, addr, 1, FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1) return NULL; - if (page == ZERO_PAGE(0)) { - page_cache_release(page); - return NULL; - } flush_cache_page(vma, addr, page_to_pfn(page)); return page; } @@ -1629,7 +1644,8 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* * refcount the page if pfn_valid is true (hence insert_page rather - * than insert_pfn). + * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP + * without pte special, it would there be refcounted as a normal page. */ if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) { struct page *page; @@ -2097,7 +2113,7 @@ gotten: if (unlikely(anon_vma_prepare(vma))) goto oom; - if (pte_pfn(orig_pte) == zero_pfn) { + if (is_zero_pfn(pte_pfn(orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, address); if (!new_page) goto oom; @@ -2393,7 +2409,7 @@ restart: * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE - * boundary. Note that this is different from vmtruncate(), which + * boundary. Note that this is different from truncate_pagecache(), which * must keep the partial page. In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes. This will be rounded @@ -2444,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); -/** - * vmtruncate - unmap mappings "freed" by truncate() syscall - * @inode: inode of the file used - * @offset: file offset to start truncating - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode * inode, loff_t offset) -{ - if (inode->i_size < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - i_size_write(inode, offset); - } else { - struct address_space *mapping = inode->i_mapping; - - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - i_size_write(inode, offset); - - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; - -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -} -EXPORT_SYMBOL(vmtruncate); - int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) { struct address_space *mapping = inode->i_mapping; @@ -2545,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out; entry = pte_to_swp_entry(orig_pte); - if (is_migration_entry(entry)) { - migration_entry_wait(mm, pmd, address); + if (unlikely(non_swap_entry(entry))) { + if (is_migration_entry(entry)) { + migration_entry_wait(mm, pmd, address); + } else if (is_hwpoison_entry(entry)) { + ret = VM_FAULT_HWPOISON; + } else { + print_bad_pte(vma, address, orig_pte, NULL); + ret = VM_FAULT_OOM; + } goto out; } delayacct_set_flag(DELAYACCT_PF_SWAPIN); @@ -2570,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); + } else if (PageHWPoison(page)) { + ret = VM_FAULT_HWPOISON; + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); + goto out; } lock_page(page); @@ -2658,8 +2628,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; pte_t entry; - if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) { - entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot)); + if (!(flags & FAULT_FLAG_WRITE)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), + vma->vm_page_prot)); ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (!pte_none(*page_table)) @@ -2745,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; + if (unlikely(PageHWPoison(vmf.page))) { + if (ret & VM_FAULT_LOCKED) + unlock_page(vmf.page); + return VM_FAULT_HWPOISON; + } + /* * For consistency in subsequent calls, make the faulted page always * locked.