drbd: Fix a potential write ordering issue on SyncTarget nodes
authorPhilipp Reisner <philipp.reisner@linbit.com>
Sun, 19 Feb 2012 00:27:53 +0000 (01:27 +0100)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 9 May 2012 13:16:38 +0000 (15:16 +0200)
If a SyncTarget node gets a P_RS_DATA_REPLY before a P_DATA packet
for the same sector, it simply submits these two IO requests.

  This is be possible because on the SyncSource node, the data of the
  P_RS_DATA_REPLY packet was read from disk.  Immediately after that a
  write request from upper layers came in.

The disk scheduler or even the "hardware" queues on the disk drive might
reorder these writes.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_receiver.c

index 6b0505a3c4fc2adeab29c7b4a7a2403b867a7931..d601501c336a3bb8696ff41da1306abe209ed579 100644 (file)
@@ -1585,6 +1585,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
        return ok;
 }
 
+static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e)
+{
+
+       struct drbd_epoch_entry *rs_e;
+       bool rv = 0;
+
+       spin_lock_irq(&mdev->req_lock);
+       list_for_each_entry(rs_e, &mdev->sync_ee, w.list) {
+               if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) {
+                       rv = 1;
+                       break;
+               }
+       }
+       spin_unlock_irq(&mdev->req_lock);
+
+       return rv;
+}
+
 /* Called from receive_Data.
  * Synchronize packets on sock with packets on msock.
  *
@@ -1828,6 +1846,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
        list_add(&e->w.list, &mdev->active_ee);
        spin_unlock_irq(&mdev->req_lock);
 
+       if (mdev->state.conn == C_SYNC_TARGET)
+               wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e));
+
        switch (mdev->net_conf->wire_protocol) {
        case DRBD_PROT_C:
                inc_unacked(mdev);