rk_sdmmc: recalculate audib rx_wmark

[firefly-linux-kernel-4.4.55.git] / mm / nommu.c
diff --git a/mm/nommu.c b/mm/nommu.c

index 9edc897a3970e3a22753e6bbed4ce0bdde9d0f19..298884dcd6e71e4723a203a82f1613cbfcec19ec 100644 (file)
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -13,7 +13,7 @@
   *  Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
   */
  
-#include <linux/module.h>
+#include <linux/export.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/swap.h>
@@ -22,7 +22,6 @@
  #include <linux/pagemap.h>
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
-#include <linux/tracehook.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
  #include <linux/mount.h>
@@ -30,6 +29,7 @@
  #include <linux/security.h>
  #include <linux/syscalls.h>
  #include <linux/audit.h>
+#include <linux/sched/sysctl.h>
  
  #include <asm/uaccess.h>
  #include <asm/tlb.h>
@@ -63,10 +63,27 @@ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
  int sysctl_overcommit_ratio = 50; /* default is 50% */
  int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
  int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
+unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
+unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
  int heap_stack_gap = 0;
  
  atomic_long_t mmap_pages_allocated;
  
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+       return percpu_counter_read_positive(&vm_committed_as);
+}
+
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
  EXPORT_SYMBOL(mem_map);
  EXPORT_SYMBOL(num_physpages);
  
@@ -125,10 +142,10 @@ unsigned int kobjsize(const void *objp)
         return PAGE_SIZE << compound_order(page);
  }
  
-int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-                    unsigned long start, int nr_pages, unsigned int foll_flags,
-                    struct page **pages, struct vm_area_struct **vmas,
-                    int *retry)
+long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+                     unsigned long start, unsigned long nr_pages,
+                     unsigned int foll_flags, struct page **pages,
+                     struct vm_area_struct **vmas, int *nonblocking)
  {
         struct vm_area_struct *vma;
         unsigned long vm_flags;
@@ -175,9 +192,10 @@ finish_or_fault:
   *   slab page or a secondary page from a compound page
   * - don't permit access to VMAs that don't support it, such as I/O mappings
   */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-       unsigned long start, int nr_pages, int write, int force,
-       struct page **pages, struct vm_area_struct **vmas)
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+                   unsigned long start, unsigned long nr_pages,
+                   int write, int force, struct page **pages,
+                   struct vm_area_struct **vmas)
  {
         int flags = 0;
  
@@ -212,8 +230,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL(follow_pfn);
  
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
+LIST_HEAD(vmap_area_list);
  
  void vfree(const void *addr)
  {
@@ -455,7 +472,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
   *     between processes, it syncs the pagetable across all
   *     processes.
   */
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
  {
         BUG();
         return NULL;
@@ -697,9 +714,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
         if (vma->vm_file) {
                 mapping = vma->vm_file->f_mapping;
  
+               mutex_lock(&mapping->i_mmap_mutex);
                 flush_dcache_mmap_lock(mapping);
-               vma_prio_tree_insert(vma, &mapping->i_mmap);
+               vma_interval_tree_insert(vma, &mapping->i_mmap);
                 flush_dcache_mmap_unlock(mapping);
+               mutex_unlock(&mapping->i_mmap_mutex);
         }
  
         /* add the VMA to the tree */
@@ -761,9 +780,11 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
         if (vma->vm_file) {
                 mapping = vma->vm_file->f_mapping;
  
+               mutex_lock(&mapping->i_mmap_mutex);
                 flush_dcache_mmap_lock(mapping);
-               vma_prio_tree_remove(vma, &mapping->i_mmap);
+               vma_interval_tree_remove(vma, &mapping->i_mmap);
                 flush_dcache_mmap_unlock(mapping);
+               mutex_unlock(&mapping->i_mmap_mutex);
         }
  
         /* remove from the MM's tree and list */
@@ -776,8 +797,6 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
  
         if (vma->vm_next)
                 vma->vm_next->vm_prev = vma->vm_prev;
-
-       vma->vm_mm = NULL;
  }
  
  /*
@@ -788,11 +807,8 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
         kenter("%p", vma);
         if (vma->vm_ops && vma->vm_ops->close)
                 vma->vm_ops->close(vma);
-       if (vma->vm_file) {
+       if (vma->vm_file)
                 fput(vma->vm_file);
-               if (vma->vm_flags & VM_EXECUTABLE)
-                       removed_exe_file_vma(mm);
-       }
         put_nommu_region(vma->vm_region);
         kmem_cache_free(vm_area_cachep, vma);
  }
@@ -806,7 +822,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
         struct vm_area_struct *vma;
  
         /* check the cache first */
-       vma = mm->mmap_cache;
+       vma = ACCESS_ONCE(mm->mmap_cache);
         if (vma && vma->vm_start <= addr && vma->vm_end > addr)
                 return vma;
  
@@ -888,7 +904,6 @@ static int validate_mmap_request(struct file *file,
                                  unsigned long *_capabilities)
  {
         unsigned long capabilities, rlen;
-       unsigned long reqprot = prot;
         int ret;
  
         /* do the simple checks first */
@@ -929,7 +944,7 @@ static int validate_mmap_request(struct file *file,
                  */
                 mapping = file->f_mapping;
                 if (!mapping)
-                       mapping = file->f_path.dentry->d_inode->i_mapping;
+                       mapping = file_inode(file)->i_mapping;
  
                 capabilities = 0;
                 if (mapping && mapping->backing_dev_info)
@@ -938,7 +953,7 @@ static int validate_mmap_request(struct file *file,
                 if (!capabilities) {
                         /* no explicit capabilities set, so assume some
                          * defaults */
-                       switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) {
+                       switch (file_inode(file)->i_mode & S_IFMT) {
                         case S_IFREG:
                         case S_IFBLK:
                                 capabilities = BDI_CAP_MAP_COPY;
@@ -973,11 +988,11 @@ static int validate_mmap_request(struct file *file,
                             !(file->f_mode & FMODE_WRITE))
                                 return -EACCES;
  
-                       if (IS_APPEND(file->f_path.dentry->d_inode) &&
+                       if (IS_APPEND(file_inode(file)) &&
                             (file->f_mode & FMODE_WRITE))
                                 return -EACCES;
  
-                       if (locks_verify_locked(file->f_path.dentry->d_inode))
+                       if (locks_verify_locked(file_inode(file)))
                                 return -EAGAIN;
  
                         if (!(capabilities & BDI_CAP_MAP_DIRECT))
@@ -1046,7 +1061,7 @@ static int validate_mmap_request(struct file *file,
         }
  
         /* allow the security API to have its say */
-       ret = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+       ret = security_mmap_addr(addr);
         if (ret < 0)
                 return ret;
  
@@ -1087,7 +1102,7 @@ static unsigned long determine_vm_flags(struct file *file,
          * it's being traced - otherwise breakpoints set in it may interfere
          * with another untraced process
          */
-       if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current))
+       if ((flags & MAP_PRIVATE) && current->ptrace)
                 vm_flags &= ~VM_MAYSHARE;
  
         return vm_flags;
@@ -1237,7 +1252,8 @@ unsigned long do_mmap_pgoff(struct file *file,
                             unsigned long len,
                             unsigned long prot,
                             unsigned long flags,
-                           unsigned long pgoff)
+                           unsigned long pgoff,
+                           unsigned long *populate)
  {
         struct vm_area_struct *vma;
         struct vm_region *region;
@@ -1247,6 +1263,8 @@ unsigned long do_mmap_pgoff(struct file *file,
  
         kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
  
+       *populate = 0;
+
         /* decide whether we should attempt the mapping, and if so what sort of
          * mapping */
         ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
@@ -1282,14 +1300,8 @@ unsigned long do_mmap_pgoff(struct file *file,
         vma->vm_pgoff = pgoff;
  
         if (file) {
-               region->vm_file = file;
-               get_file(file);
-               vma->vm_file = file;
-               get_file(file);
-               if (vm_flags & VM_EXECUTABLE) {
-                       added_exe_file_vma(current->mm);
-                       vma->vm_mm = current->mm;
-               }
+               region->vm_file = get_file(file);
+               vma->vm_file = get_file(file);
         }
  
         down_write(&nommu_region_sem);
@@ -1316,8 +1328,8 @@ unsigned long do_mmap_pgoff(struct file *file,
                                 continue;
  
                         /* search for overlapping mappings on the same file */
-                       if (pregion->vm_file->f_path.dentry->d_inode !=
-                           file->f_path.dentry->d_inode)
+                       if (file_inode(pregion->vm_file) !=
+                           file_inode(file))
                                 continue;
  
                         if (pregion->vm_pgoff >= pgend)
@@ -1442,8 +1454,6 @@ error:
         kmem_cache_free(vm_region_jar, region);
         if (vma->vm_file)
                 fput(vma->vm_file);
-       if (vma->vm_flags & VM_EXECUTABLE)
-               removed_exe_file_vma(vma->vm_mm);
         kmem_cache_free(vm_area_cachep, vma);
         kleave(" = %d", ret);
         return ret;
@@ -1469,7 +1479,6 @@ error_getting_region:
         show_free_areas(0);
         return -ENOMEM;
  }
-EXPORT_SYMBOL(do_mmap_pgoff);
  
  SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                 unsigned long, prot, unsigned long, flags,
@@ -1487,9 +1496,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
  
         flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
  
-       down_write(&current->mm->mmap_sem);
-       retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-       up_write(&current->mm->mmap_sem);
+       retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
  
         if (file)
                 fput(file);
@@ -1708,16 +1715,22 @@ erase_whole_vma:
  }
  EXPORT_SYMBOL(do_munmap);
  
-SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
+int vm_munmap(unsigned long addr, size_t len)
  {
-       int ret;
         struct mm_struct *mm = current->mm;
+       int ret;
  
         down_write(&mm->mmap_sem);
         ret = do_munmap(mm, addr, len);
         up_write(&mm->mmap_sem);
         return ret;
  }
+EXPORT_SYMBOL(vm_munmap);
+
+SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
+{
+       return vm_munmap(addr, len);
+}
  
  /*
   * release all the mappings made in a process's VM space
@@ -1743,7 +1756,7 @@ void exit_mmap(struct mm_struct *mm)
         kleave("");
  }
  
-unsigned long do_brk(unsigned long addr, unsigned long len)
+unsigned long vm_brk(unsigned long addr, unsigned long len)
  {
         return -ENOMEM;
  }
@@ -1758,7 +1771,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
   *
   * MREMAP_FIXED is not supported under NOMMU conditions
   */
-unsigned long do_mremap(unsigned long addr,
+static unsigned long do_mremap(unsigned long addr,
                         unsigned long old_len, unsigned long new_len,
                         unsigned long flags, unsigned long new_addr)
  {
@@ -1793,7 +1806,6 @@ unsigned long do_mremap(unsigned long addr,
         vma->vm_end = vma->vm_start + new_len;
         return vma->vm_start;
  }
-EXPORT_SYMBOL(do_mremap);
  
  SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
                 unsigned long, new_len, unsigned long, flags,
@@ -1807,9 +1819,11 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
         return ret;
  }
  
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-                       unsigned int foll_flags)
+struct page *follow_page_mask(struct vm_area_struct *vma,
+                             unsigned long address, unsigned int flags,
+                             unsigned int *page_mask)
  {
+       *page_mask = 0;
         return NULL;
  }
  
@@ -1819,11 +1833,21 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
         if (addr != (pfn << PAGE_SHIFT))
                 return -EINVAL;
  
-       vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+       vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
         return 0;
  }
  EXPORT_SYMBOL(remap_pfn_range);
  
+int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
+{
+       unsigned long pfn = start >> PAGE_SHIFT;
+       unsigned long vm_len = vma->vm_end - vma->vm_start;
+
+       pfn += vma->vm_pgoff;
+       return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_iomap_memory);
+
  int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
                         unsigned long pgoff)
  {
@@ -1874,7 +1898,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
   */
  int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
  {
-       unsigned long free, allowed;
+       unsigned long free, allowed, reserve;
  
         vm_acct_memory(pages);
  
@@ -1885,10 +1909,18 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                 return 0;
  
         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
-               unsigned long n;
+               free = global_page_state(NR_FREE_PAGES);
+               free += global_page_state(NR_FILE_PAGES);
  
-               free = global_page_state(NR_FILE_PAGES);
-               free += nr_swap_pages;
+               /*
+                * shmem pages shouldn't be counted as free in this
+                * case, they can't be purged, only swapped out, and
+                * that won't affect the overall amount of available
+                * memory in the system.
+                */
+               free -= global_page_state(NR_SHMEM);
+
+               free += get_nr_swap_pages();
  
                 /*
                  * Any slabs which are created with the
@@ -1898,35 +1930,19 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                  */
                 free += global_page_state(NR_SLAB_RECLAIMABLE);
  
-               /*
-                * Leave the last 3% for root
-                */
-               if (!cap_sys_admin)
-                       free -= free / 32;
-
-               if (free > pages)
-                       return 0;
-
-               /*
-                * nr_free_pages() is very expensive on large systems,
-                * only call if we're about to fail.
-                */
-               n = nr_free_pages();
-
                 /*
                  * Leave reserved pages. The pages are not for anonymous pages.
                  */
-               if (n <= totalreserve_pages)
+               if (free <= totalreserve_pages)
                         goto error;
                 else
-                       n -= totalreserve_pages;
+                       free -= totalreserve_pages;
  
                 /*
-                * Leave the last 3% for root
+                * Reserve some for root
                  */
                 if (!cap_sys_admin)
-                       n -= n / 32;
-               free += n;
+                       free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
  
                 if (free > pages)
                         return 0;
@@ -1936,16 +1952,19 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
  
         allowed = totalram_pages * sysctl_overcommit_ratio / 100;
         /*
-        * Leave the last 3% for root
+        * Reserve some 3% for root
          */
         if (!cap_sys_admin)
-               allowed -= allowed / 32;
+               allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
         allowed += total_swap_pages;
  
-       /* Don't let a single process grow too big:
-          leave 3% of the size of this process for other processes */
-       if (mm)
-               allowed -= mm->total_vm / 32;
+       /*
+        * Don't let a single process grow so big a user can't recover
+        */
+       if (mm) {
+               reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+               allowed -= min(mm->total_vm / 32, reserve);
+       }
  
         if (percpu_counter_read_positive(&vm_committed_as) < allowed)
                 return 0;
@@ -1968,6 +1987,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  }
  EXPORT_SYMBOL(filemap_fault);
  
+int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
+                            unsigned long size, pgoff_t pgoff)
+{
+       BUG();
+       return 0;
+}
+EXPORT_SYMBOL(generic_file_remap_pages);
+
  static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                 unsigned long addr, void *buf, int len, int write)
  {
@@ -2052,7 +2079,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
                                 size_t newsize)
  {
         struct vm_area_struct *vma;
-       struct prio_tree_iter iter;
         struct vm_region *region;
         pgoff_t low, high;
         size_t r_size, r_top;
@@ -2061,13 +2087,14 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
         high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  
         down_write(&nommu_region_sem);
+       mutex_lock(&inode->i_mapping->i_mmap_mutex);
  
         /* search for VMAs that fall within the dead zone */
-       vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-                             low, high) {
+       vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
                 /* found one - only interested if it's shared out of the page
                  * cache */
                 if (vma->vm_flags & VM_SHARED) {
+                       mutex_unlock(&inode->i_mapping->i_mmap_mutex);
                         up_write(&nommu_region_sem);
                         return -ETXTBSY; /* not quite true, but near enough */
                 }
@@ -2079,8 +2106,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
          * we don't check for any regions that start beyond the EOF as there
          * shouldn't be any
          */
-       vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-                             0, ULONG_MAX) {
+       vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap,
+                                 0, ULONG_MAX) {
                 if (!(vma->vm_flags & VM_SHARED))
                         continue;
  
@@ -2095,6 +2122,49 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
                 }
         }
  
+       mutex_unlock(&inode->i_mapping->i_mmap_mutex);
         up_write(&nommu_region_sem);
         return 0;
  }
+
+/*
+ * Initialise sysctl_user_reserve_kbytes.
+ *
+ * This is intended to prevent a user from starting a single memory hogging
+ * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER
+ * mode.
+ *
+ * The default value is min(3% of free memory, 128MB)
+ * 128MB is enough to recover with sshd/login, bash, and top/kill.
+ */
+static int __meminit init_user_reserve(void)
+{
+       unsigned long free_kbytes;
+
+       free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+
+       sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
+       return 0;
+}
+module_init(init_user_reserve)
+
+/*
+ * Initialise sysctl_admin_reserve_kbytes.
+ *
+ * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin
+ * to log in and kill a memory hogging process.
+ *
+ * Systems with more than 256MB will reserve 8MB, enough to recover
+ * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will
+ * only reserve 3% of free pages by default.
+ */
+static int __meminit init_admin_reserve(void)
+{
+       unsigned long free_kbytes;
+
+       free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
+
+       sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
+       return 0;
+}
+module_init(init_admin_reserve)