Allow stack to grow up to address space limit
[firefly-linux-kernel-4.4.55.git] / mm / mmap.c
index 2ce04a649f6b4977e54b76a29be9d5bac5e71dab..fcf4c883e21ae508ed72a64aa10fe656d33c91cd 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -288,6 +288,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        unsigned long retval;
        unsigned long newbrk, oldbrk;
        struct mm_struct *mm = current->mm;
+       struct vm_area_struct *next;
        unsigned long min_brk;
        bool populate;
 
@@ -332,7 +333,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        }
 
        /* Check against existing mmap mappings. */
-       if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+       next = find_vma(mm, oldbrk);
+       if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                goto out;
 
        /* Ok, looks good - let it rip. */
@@ -355,10 +357,22 @@ out:
 
 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
-       unsigned long max, subtree_gap;
-       max = vma->vm_start;
-       if (vma->vm_prev)
-               max -= vma->vm_prev->vm_end;
+       unsigned long max, prev_end, subtree_gap;
+
+       /*
+        * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+        * allow two stack_guard_gaps between them here, and when choosing
+        * an unmapped area; whereas when expanding we only require one.
+        * That's a little inconsistent, but keeps the code here simpler.
+        */
+       max = vm_start_gap(vma);
+       if (vma->vm_prev) {
+               prev_end = vm_end_gap(vma->vm_prev);
+               if (max > prev_end)
+                       max -= prev_end;
+               else
+                       max = 0;
+       }
        if (vma->vm_rb.rb_left) {
                subtree_gap = rb_entry(vma->vm_rb.rb_left,
                                struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -441,13 +455,17 @@ static void validate_mm(struct mm_struct *mm)
        struct vm_area_struct *vma = mm->mmap;
 
        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
-               highest_address = vma->vm_end;
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
+               highest_address = vm_end_gap(vma);
                vma = vma->vm_next;
                i++;
        }
@@ -616,7 +634,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
        if (vma->vm_next)
                vma_gap_update(vma->vm_next);
        else
-               mm->highest_vm_end = vma->vm_end;
+               mm->highest_vm_end = vm_end_gap(vma);
 
        /*
         * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -862,7 +880,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                        vma_gap_update(vma);
                if (end_changed) {
                        if (!next)
-                               mm->highest_vm_end = end;
+                               mm->highest_vm_end = vm_end_gap(vma);
                        else if (!adjust_next)
                                vma_gap_update(next);
                }
@@ -905,7 +923,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                else if (next)
                        vma_gap_update(next);
                else
-                       mm->highest_vm_end = end;
+                       VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
        }
        if (insert && file)
                uprobe_mmap(insert);
@@ -1737,7 +1755,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
        while (true) {
                /* Visit left subtree if it looks promising */
-               gap_end = vma->vm_start;
+               gap_end = vm_start_gap(vma);
                if (gap_end >= low_limit && vma->vm_rb.rb_left) {
                        struct vm_area_struct *left =
                                rb_entry(vma->vm_rb.rb_left,
@@ -1748,7 +1766,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
                        }
                }
 
-               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 check_current:
                /* Check if current node has a suitable gap */
                if (gap_start > high_limit)
@@ -1775,8 +1793,8 @@ check_current:
                        vma = rb_entry(rb_parent(prev),
                                       struct vm_area_struct, vm_rb);
                        if (prev == vma->vm_rb.rb_left) {
-                               gap_start = vma->vm_prev->vm_end;
-                               gap_end = vma->vm_start;
+                               gap_start = vm_end_gap(vma->vm_prev);
+                               gap_end = vm_start_gap(vma);
                                goto check_current;
                        }
                }
@@ -1840,7 +1858,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
        while (true) {
                /* Visit right subtree if it looks promising */
-               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
                if (gap_start <= high_limit && vma->vm_rb.rb_right) {
                        struct vm_area_struct *right =
                                rb_entry(vma->vm_rb.rb_right,
@@ -1853,7 +1871,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 check_current:
                /* Check if current node has a suitable gap */
-               gap_end = vma->vm_start;
+               gap_end = vm_start_gap(vma);
                if (gap_end < low_limit)
                        return -ENOMEM;
                if (gap_start <= high_limit && gap_end - gap_start >= length)
@@ -1879,7 +1897,7 @@ check_current:
                                       struct vm_area_struct, vm_rb);
                        if (prev == vma->vm_rb.rb_right) {
                                gap_start = vma->vm_prev ?
-                                       vma->vm_prev->vm_end : 0;
+                                       vm_end_gap(vma->vm_prev) : 0;
                                goto check_current;
                        }
                }
@@ -1917,7 +1935,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
        struct vm_unmapped_area_info info;
 
        if (len > TASK_SIZE - mmap_min_addr)
@@ -1928,9 +1946,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
        if (addr) {
                addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)) &&
+                   (!prev || addr >= vm_end_gap(prev)))
                        return addr;
        }
 
@@ -1953,7 +1972,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                          const unsigned long len, const unsigned long pgoff,
                          const unsigned long flags)
 {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
        struct mm_struct *mm = current->mm;
        unsigned long addr = addr0;
        struct vm_unmapped_area_info info;
@@ -1968,9 +1987,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        /* requesting a specific address */
        if (addr) {
                addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)) &&
+                               (!prev || addr >= vm_end_gap(prev)))
                        return addr;
        }
 
@@ -2095,21 +2115,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
  * update accounting. This is shared with both the
  * grow-up and grow-down cases.
  */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+                            unsigned long size, unsigned long grow)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct rlimit *rlim = current->signal->rlim;
-       unsigned long new_start, actual_size;
+       unsigned long new_start;
 
        /* address space limit tests */
        if (!may_expand_vm(mm, grow))
                return -ENOMEM;
 
        /* Stack limit test */
-       actual_size = size;
-       if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
-               actual_size -= PAGE_SIZE;
-       if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+       if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
                return -ENOMEM;
 
        /* mlock limit tests */
@@ -2147,32 +2165,43 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
-       int error;
+       struct vm_area_struct *next;
+       unsigned long gap_addr;
+       int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against exceeding limits of the address space. */
+       address &= PAGE_MASK;
+       if (address >= TASK_SIZE)
+               return -ENOMEM;
+       address += PAGE_SIZE;
+
+       /* Enforce stack_guard_gap */
+       gap_addr = address + stack_guard_gap;
+
+       /* Guard against overflow */
+       if (gap_addr < address || gap_addr > TASK_SIZE)
+               gap_addr = TASK_SIZE;
+
+       next = vma->vm_next;
+       if (next && next->vm_start < gap_addr) {
+               if (!(next->vm_flags & VM_GROWSUP))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
+
+       /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
-       vma_lock_anon_vma(vma);
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
         */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
@@ -2190,7 +2219,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2207,14 +2236,14 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                if (vma->vm_next)
                                        vma_gap_update(vma->vm_next);
                                else
-                                       mm->highest_vm_end = address;
+                                       mm->highest_vm_end = vm_end_gap(vma);
                                spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
@@ -2228,27 +2257,36 @@ int expand_downwards(struct vm_area_struct *vma,
                                   unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *prev;
+       unsigned long gap_addr;
        int error;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
        address &= PAGE_MASK;
        error = security_mmap_addr(address);
        if (error)
                return error;
 
-       vma_lock_anon_vma(vma);
+       /* Enforce stack_guard_gap */
+       gap_addr = address - stack_guard_gap;
+       if (gap_addr > address)
+               return -ENOMEM;
+       prev = vma->vm_prev;
+       if (prev && prev->vm_end > gap_addr) {
+               if (!(prev->vm_flags & VM_GROWSDOWN))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
+
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
@@ -2266,7 +2304,7 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
@@ -2288,34 +2326,31 @@ int expand_downwards(struct vm_area_struct *vma,
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
 }
 
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+       unsigned long val;
+       char *endptr;
+
+       val = simple_strtoul(p, &endptr, 10);
+       if (!*endptr)
+               stack_guard_gap = val << PAGE_SHIFT;
+
+       return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-       struct vm_area_struct *next;
-
-       address &= PAGE_MASK;
-       next = vma->vm_next;
-       if (next && next->vm_start == address + PAGE_SIZE) {
-               if (!(next->vm_flags & VM_GROWSUP))
-                       return -ENOMEM;
-       }
        return expand_upwards(vma, address);
 }
 
@@ -2337,14 +2372,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 #else
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-       struct vm_area_struct *prev;
-
-       address &= PAGE_MASK;
-       prev = vma->vm_prev;
-       if (prev && prev->vm_end == address) {
-               if (!(prev->vm_flags & VM_GROWSDOWN))
-                       return -ENOMEM;
-       }
        return expand_downwards(vma, address);
 }
 
@@ -2442,7 +2469,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
                vma->vm_prev = prev;
                vma_gap_update(vma);
        } else
-               mm->highest_vm_end = prev ? prev->vm_end : 0;
+               mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
        tail_vma->vm_next = NULL;
 
        /* Kill the cache */
@@ -2673,12 +2700,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        if (!vma || !(vma->vm_flags & VM_SHARED))
                goto out;
 
-       if (start < vma->vm_start || start + size > vma->vm_end)
+       if (start < vma->vm_start)
                goto out;
 
-       if (pgoff == linear_page_index(vma, start)) {
-               ret = 0;
-               goto out;
+       if (start + size > vma->vm_end) {
+               struct vm_area_struct *next;
+
+               for (next = vma->vm_next; next; next = next->vm_next) {
+                       /* hole between vmas ? */
+                       if (next->vm_start != next->vm_prev->vm_end)
+                               goto out;
+
+                       if (next->vm_file != vma->vm_file)
+                               goto out;
+
+                       if (next->vm_flags != vma->vm_flags)
+                               goto out;
+
+                       if (start + size <= next->vm_end)
+                               break;
+               }
+
+               if (!next)
+                       goto out;
        }
 
        prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2688,9 +2732,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        flags &= MAP_NONBLOCK;
        flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
        if (vma->vm_flags & VM_LOCKED) {
+               struct vm_area_struct *tmp;
                flags |= MAP_LOCKED;
+
                /* drop PG_Mlocked flag for over-mapped range */
-               munlock_vma_pages_range(vma, start, start + size);
+               for (tmp = vma; tmp->vm_start >= start + size;
+                               tmp = tmp->vm_next) {
+                       munlock_vma_pages_range(tmp,
+                                       max(tmp->vm_start, start),
+                                       min(tmp->vm_end, start + size));
+               }
        }
 
        file = get_file(vma->vm_file);