From 599377acb7cf3e1bdec13285096adac7ebaaaac5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 17 Aug 2012 14:50:22 +0200 Subject: [PATCH] drbd: Avoid NetworkFailure state during disconnect Disconnecting is a cluster wide state change. In case the peer node agrees to the state transition, it sends back the fact on the meta-data connection and closes both sockets. In case the node node that initiated the state transfer sees the closing action on the data-socket, before the P_STATE_CHG_REPLY packet, it was going into one of the network failure states. At least with the fencing option set to something else thatn "dont-care", the unclean shutdown of the connection causes a short IO freeze or a fence operation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 3 +++ drivers/block/drbd/drbd_receiver.c | 21 ++++++++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3cce7357402b..3b378124bac8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -857,6 +857,7 @@ enum { * so shrink_page_list() would not recurse into, * and potentially deadlock on, this drbd worker. */ + DISCONNECT_SENT, /* Currently the last bit in this 32bit word */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index df9965d820c9..7b48653d1c8f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -659,6 +659,9 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } + if (mask.conn == C_MASK && val.conn == C_DISCONNECTING) + set_bit(DISCONNECT_SENT, &mdev->flags); + wait_event(mdev->state_wait, (rv = _req_st_cond(mdev, mask, val))); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 55c359a1a052..64e6a619241d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -534,7 +534,6 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) dev_err(DEV, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - dev_info(DEV, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -547,9 +546,21 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(oldfs); + if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &mdev->flags)) { + long t; /* time_left */ + t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, + mdev->net_conf->ping_timeo * HZ/10); + if (t) + goto out; + } + dev_info(DEV, "sock was shut down by peer\n"); + } + if (rv != size) drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); +out: return rv; } @@ -760,6 +771,7 @@ static int drbd_connect(struct drbd_conf *mdev) D_ASSERT(!mdev->data.socket); + clear_bit(DISCONNECT_SENT, &mdev->flags); if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; @@ -4680,6 +4692,13 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &mdev->flags)) { + long t; /* time_left */ + t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED, + mdev->net_conf->ping_timeo * HZ/10); + if (t) + break; + } dev_err(DEV, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { -- 2.34.1