drbd: wait for meta data IO completion even with failed disk, unless force-detached
authorLars Ellenberg <lars.ellenberg@linbit.com>
Thu, 27 Sep 2012 11:03:45 +0000 (13:03 +0200)
committerJens Axboe <axboe@kernel.dk>
Tue, 30 Oct 2012 07:39:18 +0000 (08:39 +0100)
The intention of force-detach is to be able to deal with a completely
unresponsive lower level IO stack, which does not even deliver error
completions anymore, but no completion at all.

In all other cases, we must still wait for the meta data IO completion.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h

index 3fbef018ce555fe47a716a0de01c2df837a2cc9e..ec9b10cd65dde7e2c59b1f9670def77c477d2ce0 100644 (file)
@@ -82,22 +82,19 @@ void drbd_md_put_buffer(struct drbd_conf *mdev)
                wake_up(&mdev->misc_wait);
 }
 
-static bool md_io_allowed(struct drbd_conf *mdev)
-{
-       enum drbd_disk_state ds = mdev->state.disk;
-       return ds >= D_NEGOTIATING || ds == D_ATTACHING;
-}
-
-void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
                                     unsigned int *done)
 {
        long dt = bdev->dc.disk_timeout * HZ / 10;
        if (dt == 0)
                dt = MAX_SCHEDULE_TIMEOUT;
 
-       dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
-       if (dt == 0)
+       dt = wait_event_timeout(mdev->misc_wait,
+                       *done || test_bit(FORCE_DETACH, &mdev->flags), dt);
+       if (dt == 0) {
                dev_err(DEV, "meta-data IO operation timed out\n");
+               drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH);
+       }
 }
 
 static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
@@ -137,7 +134,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
                bio_endio(bio, -EIO);
        else
                submit_bio(rw, bio);
-       wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
+       wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done);
        ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;
 
  out:
index dda4e384929efad25c17f15d18ccb7b4b0126b6d..8d8069758042601f5a02c060a265e43e3d1401d8 100644 (file)
@@ -1090,7 +1090,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
         * "in_flight reached zero, all done" event.
         */
        if (!atomic_dec_and_test(&ctx->in_flight))
-               wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
+               wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
        else
                kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
@@ -1105,7 +1105,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
        }
 
        if (atomic_read(&ctx->in_flight))
-               err = -EIO; /* Disk failed during IO... */
+               err = -EIO; /* Disk timeout/force-detach during IO... */
 
        now = jiffies;
        if (rw == WRITE) {
@@ -1224,11 +1224,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
        }
 
        bm_page_io_async(ctx, idx, WRITE_SYNC);
-       wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
+       wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
 
        if (ctx->error)
                drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
-               /* that should force detach, so the in memory bitmap will be
+               /* that causes us to detach, so the in memory bitmap will be
                 * gone in a moment as well. */
 
        mdev->bm_writ_cnt++;
index 557dd5a2c0cc2b60f346b7a93111e1803d86ec5d..619a4944feeae00b624b2ea495b500280252fbad 100644 (file)
@@ -1581,8 +1581,8 @@ extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
 extern void drbd_md_put_buffer(struct drbd_conf *mdev);
 extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
                                struct drbd_backing_dev *bdev, sector_t sector, int rw);
-extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
-                                           unsigned int *done);
+extern void wait_until_done_or_force_detached(struct drbd_conf *mdev,
+               struct drbd_backing_dev *bdev, unsigned int *done);
 extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
 extern void drbd_rs_controller_reset(struct drbd_conf *mdev);