xfs: avoid false quotacheck after unclean shutdown
[firefly-linux-kernel-4.4.55.git] / mm / memory-failure.c
index 9ccef39a9de261c96f4e5775d7dca48b63d4d133..cd8989c1027ea6e2446af6ff148b5a947c8cbc0f 100644 (file)
@@ -204,9 +204,9 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
 #endif
        si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
 
-       if ((flags & MF_ACTION_REQUIRED) && t == current) {
+       if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
                si.si_code = BUS_MCEERR_AR;
-               ret = force_sig_info(SIGBUS, &si, t);
+               ret = force_sig_info(SIGBUS, &si, current);
        } else {
                /*
                 * Don't use force here, it's convenient if the signal
@@ -380,20 +380,51 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
        }
 }
 
-static int task_early_kill(struct task_struct *tsk)
+/*
+ * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO)
+ * on behalf of the thread group. Return task_struct of the (first found)
+ * dedicated thread if found, and return NULL otherwise.
+ *
+ * We already hold read_lock(&tasklist_lock) in the caller, so we don't
+ * have to call rcu_read_lock/unlock() in this function.
+ */
+static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
 {
+       struct task_struct *t;
+
+       for_each_thread(tsk, t)
+               if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY))
+                       return t;
+       return NULL;
+}
+
+/*
+ * Determine whether a given process is "early kill" process which expects
+ * to be signaled when some page under the process is hwpoisoned.
+ * Return task_struct of the dedicated thread (main thread unless explicitly
+ * specified) if the process is "early kill," and otherwise returns NULL.
+ */
+static struct task_struct *task_early_kill(struct task_struct *tsk,
+                                          int force_early)
+{
+       struct task_struct *t;
        if (!tsk->mm)
-               return 0;
-       if (tsk->flags & PF_MCE_PROCESS)
-               return !!(tsk->flags & PF_MCE_EARLY);
-       return sysctl_memory_failure_early_kill;
+               return NULL;
+       if (force_early)
+               return tsk;
+       t = find_early_kill_thread(tsk);
+       if (t)
+               return t;
+       if (sysctl_memory_failure_early_kill)
+               return tsk;
+       return NULL;
 }
 
 /*
  * Collect processes when the error hit an anonymous page.
  */
 static void collect_procs_anon(struct page *page, struct list_head *to_kill,
-                             struct to_kill **tkc)
+                             struct to_kill **tkc, int force_early)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -408,16 +439,17 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
        read_lock(&tasklist_lock);
        for_each_process (tsk) {
                struct anon_vma_chain *vmac;
+               struct task_struct *t = task_early_kill(tsk, force_early);
 
-               if (!task_early_kill(tsk))
+               if (!t)
                        continue;
                anon_vma_interval_tree_foreach(vmac, &av->rb_root,
                                               pgoff, pgoff) {
                        vma = vmac->vma;
                        if (!page_mapped_in_vma(page, vma))
                                continue;
-                       if (vma->vm_mm == tsk->mm)
-                               add_to_kill(tsk, page, vma, to_kill, tkc);
+                       if (vma->vm_mm == t->mm)
+                               add_to_kill(t, page, vma, to_kill, tkc);
                }
        }
        read_unlock(&tasklist_lock);
@@ -428,7 +460,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
  * Collect processes when the error hit a file mapped page.
  */
 static void collect_procs_file(struct page *page, struct list_head *to_kill,
-                             struct to_kill **tkc)
+                             struct to_kill **tkc, int force_early)
 {
        struct vm_area_struct *vma;
        struct task_struct *tsk;
@@ -438,10 +470,10 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
        read_lock(&tasklist_lock);
        for_each_process(tsk) {
                pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+               struct task_struct *t = task_early_kill(tsk, force_early);
 
-               if (!task_early_kill(tsk))
+               if (!t)
                        continue;
-
                vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
                                      pgoff) {
                        /*
@@ -451,8 +483,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
                         * Assume applications who requested early kill want
                         * to be informed of all such data corruptions.
                         */
-                       if (vma->vm_mm == tsk->mm)
-                               add_to_kill(tsk, page, vma, to_kill, tkc);
+                       if (vma->vm_mm == t->mm)
+                               add_to_kill(t, page, vma, to_kill, tkc);
                }
        }
        read_unlock(&tasklist_lock);
@@ -465,7 +497,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
  * First preallocate one tokill structure outside the spin locks,
  * so that we can kill at least one process reasonably reliable.
  */
-static void collect_procs(struct page *page, struct list_head *tokill)
+static void collect_procs(struct page *page, struct list_head *tokill,
+                               int force_early)
 {
        struct to_kill *tk;
 
@@ -476,9 +509,9 @@ static void collect_procs(struct page *page, struct list_head *tokill)
        if (!tk)
                return;
        if (PageAnon(page))
-               collect_procs_anon(page, tokill, &tk);
+               collect_procs_anon(page, tokill, &tk, force_early);
        else
-               collect_procs_file(page, tokill, &tk);
+               collect_procs_file(page, tokill, &tk, force_early);
        kfree(tk);
 }
 
@@ -963,7 +996,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * there's nothing that can be done.
         */
        if (kill)
-               collect_procs(ppage, &tokill);
+               collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED);
 
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
@@ -1132,11 +1165,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                }
        }
 
-       /*
-        * Lock the page and wait for writeback to finish.
-        * It's very difficult to mess with pages currently under IO
-        * and in many cases impossible, so we just avoid it here.
-        */
        lock_page(hpage);
 
        /*
@@ -1186,6 +1214,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        if (PageHuge(p))
                set_page_hwpoison_huge_page(hpage);
 
+       /*
+        * It's very difficult to mess with pages currently under IO
+        * and in many cases impossible, so we just avoid it here.
+        */
        wait_on_page_writeback(p);
 
        /*
@@ -1298,7 +1330,7 @@ static void memory_failure_work_func(struct work_struct *work)
        unsigned long proc_flags;
        int gotten;
 
-       mf_cpu = &__get_cpu_var(memory_failure_cpu);
+       mf_cpu = this_cpu_ptr(&memory_failure_cpu);
        for (;;) {
                spin_lock_irqsave(&mf_cpu->lock, proc_flags);
                gotten = kfifo_get(&mf_cpu->fifo, &entry);
@@ -1503,7 +1535,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 
        /* Keep page count to indicate a given hugepage is isolated. */
        list_move(&hpage->lru, &pagelist);
-       ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+       ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                MIGRATE_SYNC, MR_MEMORY_FAILURE);
        if (ret) {
                pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
@@ -1584,7 +1616,7 @@ static int __soft_offline_page(struct page *page, int flags)
                inc_zone_page_state(page, NR_ISOLATED_ANON +
                                        page_is_file_cache(page));
                list_add(&page->lru, &pagelist);
-               ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+               ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                        MIGRATE_SYNC, MR_MEMORY_FAILURE);
                if (ret) {
                        if (!list_empty(&pagelist)) {
@@ -1664,11 +1696,7 @@ int soft_offline_page(struct page *page, int flags)
                }
        }
 
-       /*
-        * The lock_memory_hotplug prevents a race with memory hotplug.
-        * This is a big hammer, a better would be nicer.
-        */
-       lock_memory_hotplug();
+       get_online_mems();
 
        /*
         * Isolate the page, so that it doesn't get reallocated if it
@@ -1679,7 +1707,7 @@ int soft_offline_page(struct page *page, int flags)
                set_migratetype_isolate(page, true);
 
        ret = get_any_page(page, pfn, flags);
-       unlock_memory_hotplug();
+       put_online_mems();
        if (ret > 0) { /* for in-use pages */
                if (PageHuge(page))
                        ret = soft_offline_huge_page(page, flags);