From 11b58e73a3a3d1bbb582370d59f9b2c4d0136b42 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 12 May 2010 17:08:26 +0200 Subject: [PATCH] drbd: factored tl_restart() out of tl_clear(). If IO was frozen for a temporal network outage, resend the content of the transfer-log into the newly established connection. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 + drivers/block/drbd/drbd_main.c | 116 ++++++++++++++++++++--------- drivers/block/drbd/drbd_receiver.c | 3 - drivers/block/drbd/drbd_req.c | 14 ++++ drivers/block/drbd/drbd_req.h | 8 ++ 5 files changed, 103 insertions(+), 40 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 11b7c6f84cd3..bef9138f1975 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1138,6 +1138,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev); extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_conf *mdev); +enum drbd_req_event; +extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a86e6f1ff7f4..a8a0341fce53 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -333,59 +333,94 @@ bail: drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); } - /** - * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL + * _tl_restart() - Walks the transfer log, and applies an action to all requests * @mdev: DRBD device. + * @what: The action/event to perform with all request objects * - * This is called after the connection to the peer was lost. The storage covered - * by the requests on the transfer gets marked as our of sync. Called from the - * receiver thread and the worker thread. + * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io, + * restart_frozen_disk_io. */ -void tl_clear(struct drbd_conf *mdev) +static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { - struct drbd_tl_epoch *b, *tmp; + struct drbd_tl_epoch *b, *tmp, **pn; struct list_head *le, *tle; - struct drbd_request *r; - int new_initial_bnr = net_random(); - - spin_lock_irq(&mdev->req_lock); + struct drbd_request *req; + int rv, n_writes, n_reads; b = mdev->oldest_tle; + pn = &mdev->oldest_tle; while (b) { + n_writes = 0; + n_reads = 0; list_for_each_safe(le, tle, &b->requests) { - r = list_entry(le, struct drbd_request, tl_requests); - /* It would be nice to complete outside of spinlock. - * But this is easier for now. */ - _req_mod(r, connection_lost_while_pending); + req = list_entry(le, struct drbd_request, tl_requests); + rv = _req_mod(req, what); + + n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT; + n_reads += (rv & MR_READ) >> MR_READ_SHIFT; } tmp = b->next; - /* there could still be requests on that ring list, - * in case local io is still pending */ - list_del(&b->requests); - - /* dec_ap_pending corresponding to queue_barrier. - * the newest barrier may not have been queued yet, - * in which case w.cb is still NULL. */ - if (b->w.cb != NULL) - dec_ap_pending(mdev); - - if (b == mdev->newest_tle) { - /* recycle, but reinit! */ - D_ASSERT(tmp == NULL); - INIT_LIST_HEAD(&b->requests); - INIT_LIST_HEAD(&b->w.list); - b->w.cb = NULL; - b->br_number = new_initial_bnr; - b->n_writes = 0; - - mdev->oldest_tle = b; - break; + if (n_writes + n_reads) { + if (what == resend) { + b->n_writes = n_writes; + if (b->w.cb == NULL) { + b->w.cb = w_send_barrier; + inc_ap_pending(mdev); + set_bit(CREATE_BARRIER, &mdev->flags); + } + + drbd_queue_work(&mdev->data.work, &b->w); + } + pn = &b->next; + } else { + /* there could still be requests on that ring list, + * in case local io is still pending */ + list_del(&b->requests); + + /* dec_ap_pending corresponding to queue_barrier. + * the newest barrier may not have been queued yet, + * in which case w.cb is still NULL. */ + if (b->w.cb != NULL) + dec_ap_pending(mdev); + + if (b == mdev->newest_tle) { + /* recycle, but reinit! */ + D_ASSERT(tmp == NULL); + INIT_LIST_HEAD(&b->requests); + INIT_LIST_HEAD(&b->w.list); + b->w.cb = NULL; + b->br_number = net_random(); + b->n_writes = 0; + + *pn = b; + break; + } + *pn = tmp; + kfree(b); } - kfree(b); b = tmp; } +} + + +/** + * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL + * @mdev: DRBD device. + * + * This is called after the connection to the peer was lost. The storage covered + * by the requests on the transfer gets marked as our of sync. Called from the + * receiver thread and the worker thread. + */ +void tl_clear(struct drbd_conf *mdev) +{ + struct list_head *le, *tle; + struct drbd_request *r; + + spin_lock_irq(&mdev->req_lock); + + _tl_restart(mdev, connection_lost_while_pending); /* we expect this list to be empty. */ D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); @@ -406,6 +441,13 @@ void tl_clear(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); } +void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +{ + spin_lock_irq(&mdev->req_lock); + _tl_restart(mdev, what); + spin_unlock_irq(&mdev->req_lock); +} + /** * cl_wide_st_chg() - TRUE if the state change is a cluster wide one * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 88a5e1f4ec1d..8daa920c40a4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -776,9 +776,6 @@ static int drbd_connect(struct drbd_conf *mdev) D_ASSERT(!mdev->data.socket); - if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) - dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); - if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d9df1a1c40b9..39c2cc3614e4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -634,6 +634,20 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* else: done by handed_over_to_network */ break; + case resend: + /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK + before the connection loss; only P_BARRIER_ACK was missing. + Trowing them out of the TL here by pretending we got a BARRIER_ACK + TODO: Either resync them, or ensure peer was not rebooted. */ + if (!(req->rq_state & RQ_NET_OK)) { + if (req->w.cb) { + drbd_queue_work(&mdev->data.work, &req->w); + rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; + } + break; + } + /* else, fall through to barrier_acked */ + case barrier_acked: if (!(req->rq_state & RQ_WRITE)) break; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index db37c6e47fa9..1bcb85539735 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -104,6 +104,7 @@ enum drbd_req_event { read_ahead_completed_with_error, write_completed_with_error, completed_ok, + resend, nothing, /* for tracing only */ }; @@ -206,6 +207,13 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) +/* For waking up the frozen transfer log mod_req() has to return if the request + should be counted in the epoch object*/ +#define MR_WRITE_SHIFT 0 +#define MR_WRITE (1 << MR_WRITE_SHIFT) +#define MR_READ_SHIFT 1 +#define MR_READ (1 << MR_READ_SHIFT) + /* epoch entries */ static inline struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) -- 2.34.1