drbd: fix resend/resubmit of frozen IO
authorLars Ellenberg <lars.ellenberg@linbit.com>
Wed, 25 Apr 2012 09:46:14 +0000 (11:46 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 9 May 2012 13:16:58 +0000 (15:16 +0200)
DRBD can freeze IO, due to fencing policy (fencing resource-and-stonith),
or because we lost access to data (on-no-data-accessible suspend-io).

Resuming from there (re-connect, or re-attach, or explicit admin
intervention) should "just work".

Unfortunately, if the re-attach/re-connect did not happen within
the timeout, since the commit
  drbd: Implemented real timeout checking for request processing time
if so configured, the request_timer_fn() would timeout and
detach/disconnect virtually immediately.

This change tracks the most recent attach and connect, and does not
timeout within <configured timeout interval> after attach/connect.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_req.c

index f215ad430bb81716f807ca43bb852ff53dc73b53..302a6e786f76906ceb2c48a3707254cb9418b566 100644 (file)
@@ -1049,6 +1049,8 @@ struct drbd_conf {
        struct crypto_hash *csums_tfm;
        struct crypto_hash *verify_tfm;
 
+       unsigned long last_reattach_jif;
+       unsigned long last_reconnect_jif;
        struct drbd_thread receiver;
        struct drbd_thread worker;
        struct drbd_thread asender;
index deccff3af774e17529642336139e00a88f38d78c..ab501b23b50e1cd16afc76c3fdad95c991f3797c 100644 (file)
@@ -1326,6 +1326,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
        if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
                drbd_resume_al(mdev);
 
+       /* remember last connect and attach times so request_timer_fn() won't
+        * kill newly established sessions while we are still trying to thaw
+        * previously frozen IO */
+       if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
+               mdev->last_reconnect_jif = jiffies;
+       if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
+           ns.disk > D_NEGOTIATING)
+               mdev->last_reattach_jif = jiffies;
+
        ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
        if (ascw) {
                ascw->os = os;
index 340d57b985652e2f961323d56ac6247ff3fa439e..4a642ce62bae6f865cda16c7d96b788391a24989 100644 (file)
@@ -1200,43 +1200,65 @@ void request_timer_fn(unsigned long data)
        struct drbd_request *req; /* oldest request */
        struct list_head *le;
        unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
+       unsigned long now;
 
        if (get_net_conf(mdev)) {
-               ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
+               if (mdev->state.conn >= C_WF_REPORT_PARAMS)
+                       ent = mdev->net_conf->timeout*HZ/10
+                               * mdev->net_conf->ko_count;
                put_net_conf(mdev);
        }
-       if (get_ldev(mdev)) {
+       if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
                dt = mdev->ldev->dc.disk_timeout * HZ / 10;
                put_ldev(mdev);
        }
        et = min_not_zero(dt, ent);
 
-       if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
+       if (!et)
                return; /* Recurring timer stopped */
 
+       now = jiffies;
+
        spin_lock_irq(&mdev->req_lock);
        le = &mdev->oldest_tle->requests;
        if (list_empty(le)) {
                spin_unlock_irq(&mdev->req_lock);
-               mod_timer(&mdev->request_timer, jiffies + et);
+               mod_timer(&mdev->request_timer, now + et);
                return;
        }
 
        le = le->prev;
        req = list_entry(le, struct drbd_request, tl_requests);
-       if (ent && req->rq_state & RQ_NET_PENDING) {
-               if (time_is_before_eq_jiffies(req->start_time + ent)) {
-                       dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
-                       _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
-               }
+
+       /* The request is considered timed out, if
+        * - we have some effective timeout from the configuration,
+        *   with above state restrictions applied,
+        * - the oldest request is waiting for a response from the network
+        *   resp. the local disk,
+        * - the oldest request is in fact older than the effective timeout,
+        * - the connection was established (resp. disk was attached)
+        *   for longer than the timeout already.
+        * Note that for 32bit jiffies and very stable connections/disks,
+        * we may have a wrap around, which is catched by
+        *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
+        *
+        * Side effect: once per 32bit wrap-around interval, which means every
+        * ~198 days with 250 HZ, we have a window where the timeout would need
+        * to expire twice (worst case) to become effective. Good enough.
+        */
+       if (ent && req->rq_state & RQ_NET_PENDING &&
+                time_after(now, req->start_time + ent) &&
+               !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) {
+               dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+               _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
        }
-       if (dt && req->rq_state & RQ_LOCAL_PENDING) {
-               if (time_is_before_eq_jiffies(req->start_time + dt)) {
-                       dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
-                       __drbd_chk_io_error(mdev, 1);
-               }
+       if (dt && req->rq_state & RQ_LOCAL_PENDING &&
+                time_after(now, req->start_time + dt) &&
+               !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
+               dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+               __drbd_chk_io_error(mdev, 1);
        }
-       nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
+       nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
        spin_unlock_irq(&mdev->req_lock);
        mod_timer(&mdev->request_timer, nt);
 }