drbd: Use interval tree for overlapping epoch entry detection
authorAndreas Gruenbacher <agruen@linbit.com>
Thu, 20 Jan 2011 14:23:07 +0000 (15:23 +0100)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Mon, 29 Aug 2011 09:26:53 +0000 (11:26 +0200)
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c

index fa722a986e071b5f73eb4e06559b8f2d597a45d1..751a4d4ff070ae60fb76aa8156e74302831cb382 100644 (file)
@@ -1080,6 +1080,9 @@ struct drbd_conf {
        struct hlist_head *ee_hash; /* is proteced by req_lock! */
        unsigned int ee_hash_s;
 
+       /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */
+       struct rb_root epoch_entries;
+
        /* this one is protected by ee_lock, single thread */
        struct drbd_epoch_entry *last_write_w_barrier;
 
index 003313711ef253f1cb1049c1bacdc4a345ef7ef8..18f27afab81e7c18cf9dd716d51f74ba3945de18 100644 (file)
@@ -3475,6 +3475,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
                goto out_no_tl;
        mdev->read_requests = RB_ROOT;
        mdev->write_requests = RB_ROOT;
+       mdev->epoch_entries = RB_ROOT;
 
        mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
        if (!mdev->app_reads_hash)
index 42c0ffabad72b434ec5e839641b39611aa9f61f7..a0fbbfc77d8561478b5a7446ea0159b9bc15b49a 100644 (file)
@@ -334,6 +334,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
                goto fail;
 
        INIT_HLIST_NODE(&e->collision);
+       drbd_clear_interval(&e->i);
        e->epoch = NULL;
        e->mdev = mdev;
        e->pages = page;
@@ -361,6 +362,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
        drbd_pp_free(mdev, e->pages, is_net);
        D_ASSERT(atomic_read(&e->pending_bios) == 0);
        D_ASSERT(hlist_unhashed(&e->collision));
+       D_ASSERT(drbd_interval_empty(&e->i));
        mempool_free(e, drbd_ee_mempool);
 }
 
@@ -1418,6 +1420,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
        int ok;
 
        D_ASSERT(hlist_unhashed(&e->collision));
+       D_ASSERT(drbd_interval_empty(&e->i));
 
        if (likely((e->flags & EE_WAS_ERROR) == 0)) {
                drbd_set_in_sync(mdev, sector, e->i.size);
@@ -1574,9 +1577,13 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
                spin_lock_irq(&mdev->req_lock);
                D_ASSERT(!hlist_unhashed(&e->collision));
                hlist_del_init(&e->collision);
+               D_ASSERT(!drbd_interval_empty(&e->i));
+               drbd_remove_interval(&mdev->epoch_entries, &e->i);
+               drbd_clear_interval(&e->i);
                spin_unlock_irq(&mdev->req_lock);
        } else {
                D_ASSERT(hlist_unhashed(&e->collision));
+               D_ASSERT(drbd_interval_empty(&e->i));
        }
 
        drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1595,6 +1602,9 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
        spin_lock_irq(&mdev->req_lock);
        D_ASSERT(!hlist_unhashed(&e->collision));
        hlist_del_init(&e->collision);
+       D_ASSERT(!drbd_interval_empty(&e->i));
+       drbd_remove_interval(&mdev->epoch_entries, &e->i);
+       drbd_clear_interval(&e->i);
        spin_unlock_irq(&mdev->req_lock);
 
        dec_unacked(mdev);
@@ -1767,6 +1777,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                spin_lock_irq(&mdev->req_lock);
 
                hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
+               drbd_insert_interval(&mdev->epoch_entries, &e->i);
 
                first = 1;
                for (;;) {
@@ -1817,6 +1828,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 
                        if (signal_pending(current)) {
                                hlist_del_init(&e->collision);
+                               drbd_remove_interval(&mdev->epoch_entries, &e->i);
+                               drbd_clear_interval(&e->i);
 
                                spin_unlock_irq(&mdev->req_lock);
 
@@ -1875,6 +1888,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
        spin_lock_irq(&mdev->req_lock);
        list_del(&e->w.list);
        hlist_del_init(&e->collision);
+       drbd_remove_interval(&mdev->epoch_entries, &e->i);
+       drbd_clear_interval(&e->i);
        spin_unlock_irq(&mdev->req_lock);
        if (e->flags & EE_CALL_AL_COMPLETE_IO)
                drbd_al_complete_io(mdev, e->i.sector);
index 5bf93a7c91b0fb693f81d917a070879e41b5b38d..b81ce82eb1593a993ba8000b5a5a4d94bafdd885 100644 (file)
@@ -135,9 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
        struct drbd_request *req)
 {
        const unsigned long s = req->rq_state;
-       struct drbd_epoch_entry *e;
-       struct hlist_node *n;
-       struct hlist_head *slot;
 
        /* Before we can signal completion to the upper layers,
         * we may need to close the current epoch.
@@ -185,16 +182,10 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
                 *
                 * anyways, if we found one,
                 * we just have to do a wake_up.  */
-#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size)
-               slot = ee_hash_slot(mdev, req->i.sector);
-               hlist_for_each_entry(e, n, slot, collision) {
-                       if (OVERLAPS) {
-                               wake_up(&mdev->misc_wait);
-                               break;
-                       }
-               }
+               i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
+               if (i)
+                       wake_up(&mdev->misc_wait);
        }
-#undef OVERLAPS
 }
 
 void complete_master_bio(struct drbd_conf *mdev,
@@ -332,9 +323,6 @@ static int _req_conflicts(struct drbd_request *req)
        const sector_t sector = req->i.sector;
        const int size = req->i.size;
        struct drbd_interval *i;
-       struct drbd_epoch_entry *e;
-       struct hlist_node *n;
-       struct hlist_head *slot;
 
        D_ASSERT(hlist_unhashed(&req->collision));
        D_ASSERT(drbd_interval_empty(&req->i));
@@ -364,21 +352,21 @@ static int _req_conflicts(struct drbd_request *req)
        if (mdev->ee_hash_s) {
                /* now, check for overlapping requests with remote origin */
                BUG_ON(mdev->ee_hash == NULL);
-#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size)
-               slot = ee_hash_slot(mdev, sector);
-               hlist_for_each_entry(e, n, slot, collision) {
-                       if (OVERLAPS) {
-                               dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
-                                     " [DISCARD L] new: %llus +%u; "
-                                     "pending: %llus +%u\n",
-                                     current->comm, current->pid,
-                                     (unsigned long long)sector, size,
-                                     (unsigned long long)e->i.sector, e->i.size);
-                               goto out_conflict;
-                       }
+
+               i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
+               if (i) {
+                       struct drbd_epoch_entry *e =
+                               container_of(i, struct drbd_epoch_entry, i);
+
+                       dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
+                             " [DISCARD L] new: %llus +%u; "
+                             "pending: %llus +%u\n",
+                             current->comm, current->pid,
+                             (unsigned long long)sector, size,
+                             (unsigned long long)e->i.sector, e->i.size);
+                       goto out_conflict;
                }
        }
-#undef OVERLAPS
 
 out_no_conflict:
        /* this is like it should be, and what we expected.