From: Lars Ellenberg Date: Thu, 27 Oct 2011 14:52:30 +0000 (+0200) Subject: drbd: send intermediate state change results to the peer X-Git-Tag: firefly_0821_release~3680^2~2719^2^2~32 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f479ea06613514814449f28cba6488e31698e406;p=firefly-linux-kernel-4.4.55.git drbd: send intermediate state change results to the peer DRBD state changes schedule after_state_ch() actions to a worker thread, which decides on the old and new states of that change, whether to send an informational state update packet (P_STATE) to the peer. If it decides to drbd_send_state(), it would however always send the _curent_ state, which, if a second state change happens before the after_state_ch() of the first ran, may "fast-forward" the peer's view about this node. In most cases that is harmless, but sometimes this can confuse DRBD, for example into not actually starting a necessary resync if you do a very tight detach/attach loop on a Connected Secondary. Fix this by always sending the "new" state of the respective state transition which scheduled this after_state_ch() work. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c7976a77dfba..31dee20f3411 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1229,8 +1229,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); -extern int _drbd_send_state(struct drbd_conf *mdev); -extern int drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); +extern int drbd_send_current_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd, struct p_header80 *h, size_t size, unsigned msg_flags); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 64318d4ca9ec..3a5b4dec529f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1487,7 +1487,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Do not change the order of the if above and the two below... */ if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); } /* No point in queuing send_bitmap if we don't have a connection * anymore, so check also the _current_ state, not only the new state @@ -1552,14 +1552,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); } /* We want to pause/continue resync, tell peer. */ if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* In case one of the isp bits got set, suspend other devices. */ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && @@ -1569,10 +1569,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || @@ -1612,7 +1612,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, "ASSERT FAILED: disk is %s during detach\n", drbd_disk_str(mdev->state.disk)); - if (drbd_send_state(mdev)) + if (drbd_send_state(mdev, ns)) dev_info(DEV, "Notified peer that I am detaching my disk\n"); drbd_rs_cancel_all(mdev); @@ -1642,7 +1642,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - if (drbd_send_state(mdev)) + if (drbd_send_state(mdev, ns)) dev_info(DEV, "Notified peer that I'm now diskless.\n"); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ @@ -1651,7 +1651,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Notify peer that I had a local IO error, and did not detached.. */ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && @@ -1669,7 +1669,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* sync target done with resync. Explicitly notify peer, even though * it should (at least for non-empty resyncs) already know itself. */ if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk @@ -2191,10 +2191,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl } /** - * drbd_send_state() - Sends the drbd state to the peer + * drbd_send_current_state() - Sends the drbd state to the peer * @mdev: DRBD device. */ -int drbd_send_state(struct drbd_conf *mdev) +int drbd_send_current_state(struct drbd_conf *mdev) { struct socket *sock; struct p_state p; @@ -2220,6 +2220,37 @@ int drbd_send_state(struct drbd_conf *mdev) return ok; } +/** + * drbd_send_state() - After a state change, sends the new state to the peer + * @mdev: DRBD device. + * @state: the state to send, not necessarily the current state. + * + * Each state change queues an "after_state_ch" work, which will eventually + * send the resulting new state to the peer. If more state changes happen + * between queuing and processing of the after_state_ch work, we still + * want to send each intermediary state in the order it occurred. + */ +int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) +{ + struct socket *sock; + struct p_state p; + int ok = 0; + + mutex_lock(&mdev->data.mutex); + + p.state = cpu_to_be32(state.i); + sock = mdev->data.socket; + + if (likely(sock != NULL)) { + ok = _drbd_send_cmd(mdev, sock, P_STATE, + (struct p_header80 *)&p, sizeof(p), 0); + } + + mutex_unlock(&mdev->data.mutex); + + return ok; +} + int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 00a82ab7ab98..1bbbad302ae7 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* if this was forced, we should consider sync */ if (forced) drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } drbd_md_sync(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1a48e02b83bc..f0d86cb300cf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -899,7 +899,7 @@ retry: drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sizes(mdev, 0, 0); drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); @@ -3294,7 +3294,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* Nowadays only used when forcing a node into primary role and setting its disk to UpToDate with that */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } }