futex: Make lookup_pi_state more robust
authorThomas Gleixner <tglx@linutronix.de>
Tue, 3 Jun 2014 12:27:08 +0000 (12:27 +0000)
committerJP Abgrall <jpa@google.com>
Fri, 6 Jun 2014 21:53:48 +0000 (14:53 -0700)
The current implementation of lookup_pi_state has ambigous handling of
the TID value 0 in the user space futex. We can get into the kernel
even if the TID value is 0, because either there is a stale waiters
bit or the owner died bit is set or we are called from the requeue_pi
path or from user space just for fun.

The current code avoids an explicit sanity check for pid = 0 in case
that kernel internal state (waiters) are found for the user space
address. This can lead to state leakage and worse under some
circumstances.

Handle the cases explicit:

     Waiter | pi_state | pi->owner | uTID      | uODIED | ?

[1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
[2]  NULL   | ---      | ---       | >0        | 0/1    | Valid

[3]  Found  | NULL     | --        | Any       | 0/1    | Invalid

[4]  Found  | Found    | NULL      | 0         | 1      | Valid
[5]  Found  | Found    | NULL      | >0        | 1      | Invalid

[6]  Found  | Found    | task      | 0         | 1      | Valid

[7]  Found  | Found    | NULL      | Any       | 0      | Invalid

[8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
[9]  Found  | Found    | task      | 0         | 0      | Invalid
[10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid

[1]  Indicates that the kernel can acquire the futex atomically. We
     came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.

[2]  Valid, if TID does not belong to a kernel thread. If no matching
     thread is found then it indicates that the owner TID has died.

[3]  Invalid. The waiter is queued on a non PI futex

[4]  Valid state after exit_robust_list(), which sets the user space
     value to FUTEX_WAITERS | FUTEX_OWNER_DIED.

[5]  The user space value got manipulated between exit_robust_list()
     and exit_pi_state_list()

[6]  Valid state after exit_pi_state_list() which sets the new owner in
     the pi_state but cannot access the user space value.

[7]  pi_state->owner can only be NULL when the OWNER_DIED bit is set.

[8]  Owner and user space value match

[9]  There is no transient state which sets the user space TID to 0
     except exit_robust_list(), but this is indicated by the
     FUTEX_OWNER_DIED bit. See [4]

[10] There is no transient state which leaves owner and user space
     TID out of sync.

Backport to 3.13
  conflicts: kernel/futex.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Johansen <john.johansen@canonical.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Will Drewry <wad@chromium.org>
Cc: Darren Hart <dvhart@linux.intel.com>
Cc: stable@vger.kernel.org
kernel/futex.c

index 092fa090ac65ef5eaba0efdd4c2d9b5c8b3b4eec..590483b726eb0cc81bc383fabd15076e1618ede0 100644 (file)
@@ -590,6 +590,55 @@ void exit_pi_state_list(struct task_struct *curr)
        raw_spin_unlock_irq(&curr->pi_lock);
 }
 
+/*
+ * We need to check the following states:
+ *
+ *      Waiter | pi_state | pi->owner | uTID      | uODIED | ?
+ *
+ * [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
+ * [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid
+ *
+ * [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid
+ *
+ * [4]  Found  | Found    | NULL      | 0         | 1      | Valid
+ * [5]  Found  | Found    | NULL      | >0        | 1      | Invalid
+ *
+ * [6]  Found  | Found    | task      | 0         | 1      | Valid
+ *
+ * [7]  Found  | Found    | NULL      | Any       | 0      | Invalid
+ *
+ * [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
+ * [9]  Found  | Found    | task      | 0         | 0      | Invalid
+ * [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid
+ *
+ * [1] Indicates that the kernel can acquire the futex atomically. We
+ *     came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
+ *
+ * [2] Valid, if TID does not belong to a kernel thread. If no matching
+ *      thread is found then it indicates that the owner TID has died.
+ *
+ * [3] Invalid. The waiter is queued on a non PI futex
+ *
+ * [4] Valid state after exit_robust_list(), which sets the user space
+ *     value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
+ *
+ * [5] The user space value got manipulated between exit_robust_list()
+ *     and exit_pi_state_list()
+ *
+ * [6] Valid state after exit_pi_state_list() which sets the new owner in
+ *     the pi_state but cannot access the user space value.
+ *
+ * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
+ *
+ * [8] Owner and user space value match
+ *
+ * [9] There is no transient state which sets the user space TID to 0
+ *     except exit_robust_list(), but this is indicated by the
+ *     FUTEX_OWNER_DIED bit. See [4]
+ *
+ * [10] There is no transient state which leaves owner and user space
+ *     TID out of sync.
+ */
 static int
 lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                union futex_key *key, struct futex_pi_state **ps)
@@ -605,12 +654,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        plist_for_each_entry_safe(this, next, head, list) {
                if (match_futex(&this->key, key)) {
                        /*
-                        * Another waiter already exists - bump up
-                        * the refcount and return its pi_state:
+                        * Sanity check the waiter before increasing
+                        * the refcount and attaching to it.
                         */
                        pi_state = this->pi_state;
                        /*
-                        * Userspace might have messed up non-PI and PI futexes
+                        * Userspace might have messed up non-PI and
+                        * PI futexes [3]
                         */
                        if (unlikely(!pi_state))
                                return -EINVAL;
@@ -618,34 +668,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                        WARN_ON(!atomic_read(&pi_state->refcount));
 
                        /*
-                        * When pi_state->owner is NULL then the owner died
-                        * and another waiter is on the fly. pi_state->owner
-                        * is fixed up by the task which acquires
-                        * pi_state->rt_mutex.
-                        *
-                        * We do not check for pid == 0 which can happen when
-                        * the owner died and robust_list_exit() cleared the
-                        * TID.
+                        * Handle the owner died case:
                         */
-                       if (pid && pi_state->owner) {
+                       if (uval & FUTEX_OWNER_DIED) {
+                               /*
+                                * exit_pi_state_list sets owner to NULL and
+                                * wakes the topmost waiter. The task which
+                                * acquires the pi_state->rt_mutex will fixup
+                                * owner.
+                                */
+                               if (!pi_state->owner) {
+                                       /*
+                                        * No pi state owner, but the user
+                                        * space TID is not 0. Inconsistent
+                                        * state. [5]
+                                        */
+                                       if (pid)
+                                               return -EINVAL;
+                                       /*
+                                        * Take a ref on the state and
+                                        * return. [4]
+                                        */
+                                       goto out_state;
+                               }
+
+                               /*
+                                * If TID is 0, then either the dying owner
+                                * has not yet executed exit_pi_state_list()
+                                * or some waiter acquired the rtmutex in the
+                                * pi state, but did not yet fixup the TID in
+                                * user space.
+                                *
+                                * Take a ref on the state and return. [6]
+                                */
+                               if (!pid)
+                                       goto out_state;
+                       } else {
                                /*
-                                * Bail out if user space manipulated the
-                                * futex value.
+                                * If the owner died bit is not set,
+                                * then the pi_state must have an
+                                * owner. [7]
                                 */
-                               if (pid != task_pid_vnr(pi_state->owner))
+                               if (!pi_state->owner)
                                        return -EINVAL;
                        }
 
+                       /*
+                        * Bail out if user space manipulated the
+                        * futex value. If pi state exists then the
+                        * owner TID must be the same as the user
+                        * space TID. [9/10]
+                        */
+                       if (pid != task_pid_vnr(pi_state->owner))
+                               return -EINVAL;
+
+               out_state:
                        atomic_inc(&pi_state->refcount);
                        *ps = pi_state;
-
                        return 0;
                }
        }
 
        /*
         * We are the first waiter - try to look up the real owner and attach
-        * the new pi_state to it, but bail out when TID = 0
+        * the new pi_state to it, but bail out when TID = 0 [1]
         */
        if (!pid)
                return -ESRCH;
@@ -673,6 +759,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                return ret;
        }
 
+       /*
+        * No existing pi state. First waiter. [2]
+        */
        pi_state = alloc_pi_state();
 
        /*