drbd: Fix a race condition that can lead to a BUG()
authorPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 21 Aug 2012 18:34:07 +0000 (20:34 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Fri, 9 Nov 2012 13:08:20 +0000 (14:08 +0100)
If the preconditions for a state change change after the wait_event() we
might hit the BUG() statement in conn_set_state().

With holding the spin_lock while evaluating the condition AND until the
actual state change we ensure the the preconditions can not change anymore.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_state.c

index 37ae87e468ae66af17c58f3c212b116871316ac2..1c1576b942b6d110b65101b6e2711de182ea8a22 100644 (file)
@@ -2301,3 +2301,30 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
 }
 
 #endif
+
+/* This is defined in drivers/md/md.h as well. Should go into wait.h */
+#define __wait_event_lock_irq(wq, condition, lock, cmd)                \
+do {                                                                   \
+       wait_queue_t __wait;                                            \
+       init_waitqueue_entry(&__wait, current);                         \
+                                                                       \
+       add_wait_queue(&wq, &__wait);                                   \
+       for (;;) {                                                      \
+               set_current_state(TASK_UNINTERRUPTIBLE);                \
+               if (condition)                                          \
+                       break;                                          \
+               spin_unlock_irq(&lock);                                 \
+               cmd;                                                    \
+               schedule();                                             \
+               spin_lock_irq(&lock);                                   \
+       }                                                               \
+       current->state = TASK_RUNNING;                                  \
+       remove_wait_queue(&wq, &__wait);                                \
+} while (0)
+
+#define wait_event_lock_irq(wq, condition, lock, cmd)                  \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event_lock_irq(wq, condition, lock, cmd);                \
+} while (0)
index 4fda4e2024ecc24c5c5cd4590e51c94b6bab3ff1..ce1495187f023079bf0321d319d5df7cff5b5666 100644 (file)
@@ -1710,7 +1710,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
        if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
                return SS_CW_FAILED_BY_PEER;
 
-       spin_lock_irq(&tconn->req_lock);
        rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
 
        if (rv == SS_UNKNOWN_ERROR)
@@ -1719,8 +1718,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
        if (rv == SS_SUCCESS)
                rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
 
-       spin_unlock_irq(&tconn->req_lock);
-
        return rv;
 }
 
@@ -1736,21 +1733,22 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v
        set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
        if (conn_send_state_req(tconn, mask, val)) {
                clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
-               rv = SS_CW_FAILED_BY_PEER;
                /* if (f & CS_VERBOSE)
                   print_st_err(mdev, os, ns, rv); */
-               goto abort;
+               mutex_unlock(&tconn->cstate_mutex);
+               spin_lock_irq(&tconn->req_lock);
+               return SS_CW_FAILED_BY_PEER;
        }
 
        if (val.conn == C_DISCONNECTING)
                set_bit(DISCONNECT_SENT, &tconn->flags);
 
-       wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)));
+       spin_lock_irq(&tconn->req_lock);
+
+       wait_event_lock_irq(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)), tconn->req_lock,);
        clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
 
-abort:
        mutex_unlock(&tconn->cstate_mutex);
-       spin_lock_irq(&tconn->req_lock);
 
        return rv;
 }