From dfa8bedbfe881caf6676704ab1aae18dfe8e430a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 29 Jun 2011 14:06:08 +0200 Subject: [PATCH] drbd: Implemented the disk-timeout option When the disk-timeout is active, and it expires for a single request, we consider the local disk as D_FAILED. Note: With this change, I made both timeout based state transitions HARD state transitions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 5 +++++ drivers/block/drbd/drbd_receiver.c | 2 -- drivers/block/drbd/drbd_req.c | 32 +++++++++++++++++++----------- include/linux/drbd_limits.h | 5 +++++ include/linux/drbd_nl.h | 1 + 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bc8a8a7556da..4bd636524dd1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1404,6 +1404,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Here we have the actions that are performed after a state change. This function might sleep */ + if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING) + mod_timer(&mdev->request_timer, jiffies + HZ); + nsm.i = -1; if (ns.susp_nod) { if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) @@ -3318,6 +3321,8 @@ static void drbd_delete_device(unsigned int minor) if (!mdev) return; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ if (mdev->open_cnt != 0) dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e7ed0aa93a16..a85bbe1bbc2b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3803,8 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - /* make sure syncer is stopped and w_resume_next_sg queued */ del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1a8aac4b0c2f..ef145e33a647 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1208,13 +1208,19 @@ void request_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; struct drbd_request *req; /* oldest request */ struct list_head *le; - unsigned long et = 0; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ if (get_net_conf(mdev)) { - et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; + ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; put_net_conf(mdev); } - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + if (get_ldev(mdev)) { + dt = mdev->ldev->dc.disk_timeout * HZ / 10; + put_ldev(mdev); + } + et = min_not_zero(dt, ent); + + if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) return; /* Recurring timer stopped */ spin_lock_irq(&mdev->req_lock); @@ -1227,17 +1233,19 @@ void request_timer_fn(unsigned long data) le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { + if (ent && req->rq_state & RQ_NET_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + ent)) { dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); } - + if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); + } + } + nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et; spin_unlock_irq(&mdev->req_lock); + mod_timer(&mdev->request_timer, nt); } diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 928c84dfaf42..fb670bf603f7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -48,6 +48,11 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 7203c9ead233..a8706f08ab36 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -31,6 +31,7 @@ NL_PACKET(disk_conf, 3, NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) + NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout) ) NL_PACKET(detach, 4, -- 2.34.1