oom: remove oom_disable_count
authorDavid Rientjes <rientjes@google.com>
Tue, 1 Nov 2011 00:07:15 +0000 (17:07 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 1 Nov 2011 00:30:45 +0000 (17:30 -0700)
This removes mm->oom_disable_count entirely since it's unnecessary and
currently buggy.  The counter was intended to be per-process but it's
currently decremented in the exit path for each thread that exits, causing
it to underflow.

The count was originally intended to prevent oom killing threads that
share memory with threads that cannot be killed since it doesn't lead to
future memory freeing.  The counter could be fixed to represent all
threads sharing the same mm, but it's better to remove the count since:

 - it is possible that the OOM_DISABLE thread sharing memory with the
   victim is waiting on that thread to exit and will actually cause
   future memory freeing, and

 - there is no guarantee that a thread is disabled from oom killing just
   because another thread sharing its mm is oom disabled.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/exec.c
fs/proc/base.c
include/linux/mm_types.h
kernel/exit.c
kernel/fork.c
mm/oom_kill.c

index 25dcbe5fc35664d6f389ce48f051d6528d607014..36254645b7cc4d81f7c73b2940767fe06de1642c 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -841,10 +841,6 @@ static int exec_mmap(struct mm_struct *mm)
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
-       if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-               atomic_dec(&old_mm->oom_disable_count);
-               atomic_inc(&tsk->mm->oom_disable_count);
-       }
        task_unlock(tsk);
        arch_pick_mmap_layout(mm);
        if (old_mm) {
index 5eb02069e1b817730050b7b46304a9244c064bde..8f0087e20e168ee04e6b14e7ef05a68f13e4b58e 100644 (file)
@@ -1107,13 +1107,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
                goto err_sighand;
        }
 
-       if (oom_adjust != task->signal->oom_adj) {
-               if (oom_adjust == OOM_DISABLE)
-                       atomic_inc(&task->mm->oom_disable_count);
-               if (task->signal->oom_adj == OOM_DISABLE)
-                       atomic_dec(&task->mm->oom_disable_count);
-       }
-
        /*
         * Warn that /proc/pid/oom_adj is deprecated, see
         * Documentation/feature-removal-schedule.txt.
@@ -1215,12 +1208,6 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
                goto err_sighand;
        }
 
-       if (oom_score_adj != task->signal->oom_score_adj) {
-               if (oom_score_adj == OOM_SCORE_ADJ_MIN)
-                       atomic_inc(&task->mm->oom_disable_count);
-               if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-                       atomic_dec(&task->mm->oom_disable_count);
-       }
        task->signal->oom_score_adj = oom_score_adj;
        if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
                task->signal->oom_score_adj_min = oom_score_adj;
index c93d00a6e95d19ca347ab52761de58bf91141c74..6456624aa9644573ea5298e114a2230564f5b54e 100644 (file)
@@ -336,9 +336,6 @@ struct mm_struct {
        unsigned int token_priority;
        unsigned int last_interval;
 
-       /* How many tasks sharing this mm are OOM_DISABLE */
-       atomic_t oom_disable_count;
-
        unsigned long flags; /* Must use atomic bitops to access the bits */
 
        struct core_state *core_state; /* coredumping support */
index 2913b3509d4288026990cc087894e69eace6c564..d0b7d988f8735beb6e1e7b5f7b42ecbd04b91bf5 100644 (file)
@@ -681,8 +681,6 @@ static void exit_mm(struct task_struct * tsk)
        enter_lazy_tlb(mm, current);
        /* We don't want this task to be frozen prematurely */
        clear_freeze_flag(tsk);
-       if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-               atomic_dec(&mm->oom_disable_count);
        task_unlock(tsk);
        mm_update_next_owner(mm);
        mmput(mm);
index 8e6b6f4fb272ba498a4acdbcb6a29c8f7e6c87dd..70d76191afb9449033f09ef2c3532d450defdd49 100644 (file)
@@ -501,7 +501,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
        mm->cached_hole_size = ~0UL;
        mm_init_aio(mm);
        mm_init_owner(mm, p);
-       atomic_set(&mm->oom_disable_count, 0);
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
@@ -816,8 +815,6 @@ good_mm:
        /* Initializing for Swap token stuff */
        mm->token_priority = 0;
        mm->last_interval = 0;
-       if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-               atomic_inc(&mm->oom_disable_count);
 
        tsk->mm = mm;
        tsk->active_mm = mm;
@@ -1391,13 +1388,8 @@ bad_fork_cleanup_io:
 bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
 bad_fork_cleanup_mm:
-       if (p->mm) {
-               task_lock(p);
-               if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-                       atomic_dec(&p->mm->oom_disable_count);
-               task_unlock(p);
+       if (p->mm)
                mmput(p->mm);
-       }
 bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
                free_signal_struct(p->signal);
index b0d8943bc9fd798530e8c5b5899da026ecc74f56..2b97e8f04607b2ccb0011a3af5996aa41efe069b 100644 (file)
@@ -54,13 +54,7 @@ int test_set_oom_score_adj(int new_val)
 
        spin_lock_irq(&sighand->siglock);
        old_val = current->signal->oom_score_adj;
-       if (new_val != old_val) {
-               if (new_val == OOM_SCORE_ADJ_MIN)
-                       atomic_inc(&current->mm->oom_disable_count);
-               else if (old_val == OOM_SCORE_ADJ_MIN)
-                       atomic_dec(&current->mm->oom_disable_count);
-               current->signal->oom_score_adj = new_val;
-       }
+       current->signal->oom_score_adj = new_val;
        spin_unlock_irq(&sighand->siglock);
 
        return old_val;
@@ -172,16 +166,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
        if (!p)
                return 0;
 
-       /*
-        * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN
-        * so the entire heuristic doesn't need to be executed for something
-        * that cannot be killed.
-        */
-       if (atomic_read(&p->mm->oom_disable_count)) {
-               task_unlock(p);
-               return 0;
-       }
-
        /*
         * The memory controller may have a limit of 0 bytes, so avoid a divide
         * by zero, if necessary.
@@ -451,6 +435,9 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
        for_each_process(q)
                if (q->mm == mm && !same_thread_group(q, p) &&
                    !(q->flags & PF_KTHREAD)) {
+                       if (q->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+                               continue;
+
                        task_lock(q);   /* Protect ->comm from prctl() */
                        pr_err("Kill process %d (%s) sharing same memory\n",
                                task_pid_nr(q), q->comm);
@@ -727,7 +714,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
        read_lock(&tasklist_lock);
        if (sysctl_oom_kill_allocating_task &&
            !oom_unkillable_task(current, NULL, nodemask) &&
-           current->mm && !atomic_read(&current->mm->oom_disable_count)) {
+           current->mm) {
                /*
                 * oom_kill_process() needs tasklist_lock held.  If it returns
                 * non-zero, current could not be killed so we must fallback to