dm snapshot: make bio optional in __origin_write
[firefly-linux-kernel-4.4.55.git] / drivers / md / dm-snap.c
index 8bd77cbd7e4542302b5d4c9892e2f06aa6c533c3..5e553c50c215948082c5a929cadbed22949d8e55 100644 (file)
@@ -59,6 +59,9 @@ struct dm_snapshot {
        struct rw_semaphore lock;
 
        struct dm_dev *origin;
+       struct dm_dev *cow;
+
+       struct dm_target *ti;
 
        /* List of snapshots per Origin */
        struct list_head list;
@@ -69,6 +72,9 @@ struct dm_snapshot {
        /* Origin writes don't trigger exceptions until this is set */
        int active;
 
+       /* Whether or not owning mapped_device is suspended */
+       int suspended;
+
        mempool_t *pending_pool;
 
        atomic_t pending_exceptions_count;
@@ -97,6 +103,12 @@ struct dm_snapshot {
        struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
 };
 
+struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
+{
+       return s->cow;
+}
+EXPORT_SYMBOL(dm_snap_cow);
+
 static struct workqueue_struct *ksnapd;
 static void flush_queued_bios(struct work_struct *work);
 
@@ -290,23 +302,117 @@ static void __insert_origin(struct origin *o)
        list_add_tail(&o->hash_list, sl);
 }
 
+/*
+ * _origins_lock must be held when calling this function.
+ * Returns number of snapshots registered using the supplied cow device, plus:
+ * snap_src - a snapshot suitable for use as a source of exception handover
+ * snap_dest - a snapshot capable of receiving exception handover.
+ *
+ * Possible return values and states:
+ *   0: NULL, NULL  - first new snapshot
+ *   1: snap_src, NULL - normal snapshot
+ *   2: snap_src, snap_dest  - waiting for handover
+ *   2: snap_src, NULL - handed over, waiting for old to be deleted
+ *   1: NULL, snap_dest - source got destroyed without handover
+ */
+static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
+                                       struct dm_snapshot **snap_src,
+                                       struct dm_snapshot **snap_dest)
+{
+       struct dm_snapshot *s;
+       struct origin *o;
+       int count = 0;
+       int active;
+
+       o = __lookup_origin(snap->origin->bdev);
+       if (!o)
+               goto out;
+
+       list_for_each_entry(s, &o->snapshots, list) {
+               if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
+                       continue;
+
+               down_read(&s->lock);
+               active = s->active;
+               up_read(&s->lock);
+
+               if (active) {
+                       if (snap_src)
+                               *snap_src = s;
+               } else if (snap_dest)
+                       *snap_dest = s;
+
+               count++;
+       }
+
+out:
+       return count;
+}
+
+/*
+ * On success, returns 1 if this snapshot is a handover destination,
+ * otherwise returns 0.
+ */
+static int __validate_exception_handover(struct dm_snapshot *snap)
+{
+       struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+
+       /* Does snapshot need exceptions handed over to it? */
+       if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest) == 2) ||
+           snap_dest) {
+               snap->ti->error = "Snapshot cow pairing for exception "
+                                 "table handover failed";
+               return -EINVAL;
+       }
+
+       /*
+        * If no snap_src was found, snap cannot become a handover
+        * destination.
+        */
+       if (!snap_src)
+               return 0;
+
+       return 1;
+}
+
+static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
+{
+       struct dm_snapshot *l;
+
+       /* Sort the list according to chunk size, largest-first smallest-last */
+       list_for_each_entry(l, &o->snapshots, list)
+               if (l->store->chunk_size < s->store->chunk_size)
+                       break;
+       list_add_tail(&s->list, &l->list);
+}
+
 /*
  * Make a note of the snapshot and its origin so we can look it
  * up when the origin has a write on it.
+ *
+ * Also validate snapshot exception store handovers.
+ * On success, returns 1 if this registration is a handover destination,
+ * otherwise returns 0.
  */
 static int register_snapshot(struct dm_snapshot *snap)
 {
-       struct dm_snapshot *l;
-       struct origin *o, *new_o;
+       struct origin *o, *new_o = NULL;
        struct block_device *bdev = snap->origin->bdev;
+       int r = 0;
 
        new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
        if (!new_o)
                return -ENOMEM;
 
        down_write(&_origins_lock);
-       o = __lookup_origin(bdev);
 
+       r = __validate_exception_handover(snap);
+       if (r < 0) {
+               kfree(new_o);
+               goto out;
+       }
+
+       o = __lookup_origin(bdev);
        if (o)
                kfree(new_o);
        else {
@@ -320,14 +426,27 @@ static int register_snapshot(struct dm_snapshot *snap)
                __insert_origin(o);
        }
 
-       /* Sort the list according to chunk size, largest-first smallest-last */
-       list_for_each_entry(l, &o->snapshots, list)
-               if (l->store->chunk_size < snap->store->chunk_size)
-                       break;
-       list_add_tail(&snap->list, &l->list);
+       __insert_snapshot(o, snap);
+
+out:
+       up_write(&_origins_lock);
+
+       return r;
+}
+
+/*
+ * Move snapshot to correct place in list according to chunk size.
+ */
+static void reregister_snapshot(struct dm_snapshot *s)
+{
+       struct block_device *bdev = s->origin->bdev;
+
+       down_write(&_origins_lock);
+
+       list_del(&s->list);
+       __insert_snapshot(__lookup_origin(bdev), s);
 
        up_write(&_origins_lock);
-       return 0;
 }
 
 static void unregister_snapshot(struct dm_snapshot *s)
@@ -338,7 +457,7 @@ static void unregister_snapshot(struct dm_snapshot *s)
        o = __lookup_origin(s->origin->bdev);
 
        list_del(&s->list);
-       if (list_empty(&o->snapshots)) {
+       if (o && list_empty(&o->snapshots)) {
                list_del(&o->hash_list);
                kfree(o);
        }
@@ -558,7 +677,7 @@ static int init_hash_tables(struct dm_snapshot *s)
         * Calculate based on the size of the original volume or
         * the COW volume...
         */
-       cow_dev_size = get_dev_size(s->store->cow->bdev);
+       cow_dev_size = get_dev_size(s->cow->bdev);
        origin_dev_size = get_dev_size(s->origin->bdev);
        max_buckets = calc_max_buckets();
 
@@ -596,49 +715,61 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        struct dm_snapshot *s;
        int i;
        int r = -EINVAL;
-       char *origin_path;
-       struct dm_exception_store *store;
+       char *origin_path, *cow_path;
        unsigned args_used;
 
        if (argc != 4) {
                ti->error = "requires exactly 4 arguments";
                r = -EINVAL;
-               goto bad_args;
+               goto bad;
        }
 
        origin_path = argv[0];
        argv++;
        argc--;
 
-       r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s) {
+               ti->error = "Cannot allocate snapshot context private "
+                   "structure";
+               r = -ENOMEM;
+               goto bad;
+       }
+
+       cow_path = argv[0];
+       argv++;
+       argc--;
+
+       r = dm_get_device(ti, cow_path, 0, 0,
+                         FMODE_READ | FMODE_WRITE, &s->cow);
+       if (r) {
+               ti->error = "Cannot get COW device";
+               goto bad_cow;
+       }
+
+       r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
        if (r) {
                ti->error = "Couldn't create exception store";
                r = -EINVAL;
-               goto bad_args;
+               goto bad_store;
        }
 
        argv += args_used;
        argc -= args_used;
 
-       s = kmalloc(sizeof(*s), GFP_KERNEL);
-       if (!s) {
-               ti->error = "Cannot allocate snapshot context private "
-                   "structure";
-               r = -ENOMEM;
-               goto bad_snap;
-       }
-
        r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
        if (r) {
                ti->error = "Cannot get origin device";
                goto bad_origin;
        }
 
-       s->store = store;
+       s->ti = ti;
        s->valid = 1;
        s->active = 0;
+       s->suspended = 0;
        atomic_set(&s->pending_exceptions_count, 0);
        init_rwsem(&s->lock);
+       INIT_LIST_HEAD(&s->list);
        spin_lock_init(&s->pe_lock);
 
        /* Allocate hash table for COW data */
@@ -673,39 +804,55 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        spin_lock_init(&s->tracked_chunk_lock);
 
-       /* Metadata must only be loaded into one table at once */
+       bio_list_init(&s->queued_bios);
+       INIT_WORK(&s->queued_bios_work, flush_queued_bios);
+
+       ti->private = s;
+       ti->num_flush_requests = 1;
+
+       /* Add snapshot to the list of snapshots for this origin */
+       /* Exceptions aren't triggered till snapshot_resume() is called */
+       r = register_snapshot(s);
+       if (r == -ENOMEM) {
+               ti->error = "Snapshot origin struct allocation failed";
+               goto bad_load_and_register;
+       } else if (r < 0) {
+               /* invalid handover, register_snapshot has set ti->error */
+               goto bad_load_and_register;
+       }
+
+       /*
+        * Metadata must only be loaded into one table at once, so skip this
+        * if metadata will be handed over during resume.
+        * Chunk size will be set during the handover - set it to zero to
+        * ensure it's ignored.
+        */
+       if (r > 0) {
+               s->store->chunk_size = 0;
+               return 0;
+       }
+
        r = s->store->type->read_metadata(s->store, dm_add_exception,
                                          (void *)s);
        if (r < 0) {
                ti->error = "Failed to read snapshot metadata";
-               goto bad_load_and_register;
+               goto bad_read_metadata;
        } else if (r > 0) {
                s->valid = 0;
                DMWARN("Snapshot is marked invalid.");
        }
 
-       bio_list_init(&s->queued_bios);
-       INIT_WORK(&s->queued_bios_work, flush_queued_bios);
-
        if (!s->store->chunk_size) {
                ti->error = "Chunk size not set";
-               goto bad_load_and_register;
-       }
-
-       /* Add snapshot to the list of snapshots for this origin */
-       /* Exceptions aren't triggered till snapshot_resume() is called */
-       if (register_snapshot(s)) {
-               r = -EINVAL;
-               ti->error = "Cannot register snapshot origin";
-               goto bad_load_and_register;
+               goto bad_read_metadata;
        }
-
-       ti->private = s;
        ti->split_io = s->store->chunk_size;
-       ti->num_flush_requests = 1;
 
        return 0;
 
+bad_read_metadata:
+       unregister_snapshot(s);
+
 bad_load_and_register:
        mempool_destroy(s->tracked_chunk_pool);
 
@@ -723,12 +870,15 @@ bad_hash_tables:
        dm_put_device(ti, s->origin);
 
 bad_origin:
-       kfree(s);
+       dm_exception_store_destroy(s->store);
 
-bad_snap:
-       dm_exception_store_destroy(store);
+bad_store:
+       dm_put_device(ti, s->cow);
 
-bad_args:
+bad_cow:
+       kfree(s);
+
+bad:
        return r;
 }
 
@@ -741,15 +891,58 @@ static void __free_exceptions(struct dm_snapshot *s)
        dm_exception_table_exit(&s->complete, exception_cache);
 }
 
+static void __handover_exceptions(struct dm_snapshot *snap_src,
+                                 struct dm_snapshot *snap_dest)
+{
+       union {
+               struct dm_exception_table table_swap;
+               struct dm_exception_store *store_swap;
+       } u;
+
+       /*
+        * Swap all snapshot context information between the two instances.
+        */
+       u.table_swap = snap_dest->complete;
+       snap_dest->complete = snap_src->complete;
+       snap_src->complete = u.table_swap;
+
+       u.store_swap = snap_dest->store;
+       snap_dest->store = snap_src->store;
+       snap_src->store = u.store_swap;
+
+       snap_dest->store->snap = snap_dest;
+       snap_src->store->snap = snap_src;
+
+       snap_dest->ti->split_io = snap_dest->store->chunk_size;
+       snap_dest->valid = snap_src->valid;
+
+       /*
+        * Set source invalid to ensure it receives no further I/O.
+        */
+       snap_src->valid = 0;
+}
+
 static void snapshot_dtr(struct dm_target *ti)
 {
 #ifdef CONFIG_DM_DEBUG
        int i;
 #endif
        struct dm_snapshot *s = ti->private;
+       struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
 
        flush_workqueue(ksnapd);
 
+       down_read(&_origins_lock);
+       /* Check whether exception handover must be cancelled */
+       (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
+       if (snap_src && snap_dest && (s == snap_src)) {
+               down_write(&snap_dest->lock);
+               snap_dest->valid = 0;
+               up_write(&snap_dest->lock);
+               DMERR("Cancelling snapshot handover.");
+       }
+       up_read(&_origins_lock);
+
        /* Prevent further origin writes from using this snapshot. */
        /* After this returns there can be no new kcopyd jobs. */
        unregister_snapshot(s);
@@ -777,6 +970,8 @@ static void snapshot_dtr(struct dm_target *ti)
 
        dm_exception_store_destroy(s->store);
 
+       dm_put_device(ti, s->cow);
+
        kfree(s);
 }
 
@@ -839,7 +1034,7 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
 
        s->valid = 0;
 
-       dm_table_event(s->store->ti->table);
+       dm_table_event(s->ti->table);
 }
 
 static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -977,7 +1172,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
        src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
        src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
 
-       dest.bdev = s->store->cow->bdev;
+       dest.bdev = s->cow->bdev;
        dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
        dest.count = src.count;
 
@@ -1038,7 +1233,7 @@ __find_pending_exception(struct dm_snapshot *s,
 static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
                            struct bio *bio, chunk_t chunk)
 {
-       bio->bi_bdev = s->store->cow->bdev;
+       bio->bi_bdev = s->cow->bdev;
        bio->bi_sector = chunk_to_sector(s->store,
                                         dm_chunk_number(e->new_chunk) +
                                         (chunk - e->old_chunk)) +
@@ -1056,7 +1251,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
        struct dm_snap_pending_exception *pe = NULL;
 
        if (unlikely(bio_empty_barrier(bio))) {
-               bio->bi_bdev = s->store->cow->bdev;
+               bio->bi_bdev = s->cow->bdev;
                return DM_MAPIO_REMAPPED;
        }
 
@@ -1151,12 +1346,63 @@ static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
        return 0;
 }
 
+static void snapshot_postsuspend(struct dm_target *ti)
+{
+       struct dm_snapshot *s = ti->private;
+
+       down_write(&s->lock);
+       s->suspended = 1;
+       up_write(&s->lock);
+}
+
+static int snapshot_preresume(struct dm_target *ti)
+{
+       int r = 0;
+       struct dm_snapshot *s = ti->private;
+       struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+
+       down_read(&_origins_lock);
+       (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
+       if (snap_src && snap_dest) {
+               down_read(&snap_src->lock);
+               if (s == snap_src) {
+                       DMERR("Unable to resume snapshot source until "
+                             "handover completes.");
+                       r = -EINVAL;
+               } else if (!snap_src->suspended) {
+                       DMERR("Unable to perform snapshot handover until "
+                             "source is suspended.");
+                       r = -EINVAL;
+               }
+               up_read(&snap_src->lock);
+       }
+       up_read(&_origins_lock);
+
+       return r;
+}
+
 static void snapshot_resume(struct dm_target *ti)
 {
        struct dm_snapshot *s = ti->private;
+       struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+
+       down_read(&_origins_lock);
+       (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
+       if (snap_src && snap_dest) {
+               down_write(&snap_src->lock);
+               down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
+               __handover_exceptions(snap_src, snap_dest);
+               up_write(&snap_dest->lock);
+               up_write(&snap_src->lock);
+       }
+       up_read(&_origins_lock);
+
+       /* Now we have correct chunk size, reregister */
+       reregister_snapshot(s);
 
        down_write(&s->lock);
        s->active = 1;
+       s->suspended = 0;
        up_write(&s->lock);
 }
 
@@ -1200,7 +1446,7 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
                 * to make private copies if the output is to
                 * make sense.
                 */
-               DMEMIT("%s", snap->origin->name);
+               DMEMIT("%s %s", snap->origin->name, snap->cow->name);
                snap->store->type->status(snap->store, type, result + sz,
                                          maxlen - sz);
                break;
@@ -1221,7 +1467,19 @@ static int snapshot_iterate_devices(struct dm_target *ti,
 /*-----------------------------------------------------------------
  * Origin methods
  *---------------------------------------------------------------*/
-static int __origin_write(struct list_head *snapshots, struct bio *bio)
+
+/*
+ * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
+ * supplied bio was ignored.  The caller may submit it immediately.
+ * (No remapping actually occurs as the origin is always a direct linear
+ * map.)
+ *
+ * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
+ * and any supplied bio is added to a list to be submitted once all
+ * the necessary exceptions exist.
+ */
+static int __origin_write(struct list_head *snapshots, sector_t sector,
+                         struct bio *bio)
 {
        int r = DM_MAPIO_REMAPPED, first = 0;
        struct dm_snapshot *snap;
@@ -1240,14 +1498,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
                        goto next_snapshot;
 
                /* Nothing to do if writing beyond end of snapshot */
-               if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
+               if (sector >= dm_table_get_size(snap->ti->table))
                        goto next_snapshot;
 
                /*
                 * Remember, different snapshots can have
                 * different chunk sizes.
                 */
-               chunk = sector_to_chunk(snap->store, bio->bi_sector);
+               chunk = sector_to_chunk(snap->store, sector);
 
                /*
                 * Check exception table to see if block
@@ -1297,7 +1555,8 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
                                first = 1;
                        }
 
-                       bio_list_add(&primary_pe->origin_bios, bio);
+                       if (bio)
+                               bio_list_add(&primary_pe->origin_bios, bio);
 
                        r = DM_MAPIO_SUBMITTED;
                }
@@ -1353,7 +1612,7 @@ static int do_origin(struct dm_dev *origin, struct bio *bio)
        down_read(&_origins_lock);
        o = __lookup_origin(origin->bdev);
        if (o)
-               r = __origin_write(&o->snapshots, bio);
+               r = __origin_write(&o->snapshots, bio->bi_sector, bio);
        up_read(&_origins_lock);
 
        return r;
@@ -1465,12 +1724,14 @@ static struct target_type origin_target = {
 
 static struct target_type snapshot_target = {
        .name    = "snapshot",
-       .version = {1, 8, 0},
+       .version = {1, 9, 0},
        .module  = THIS_MODULE,
        .ctr     = snapshot_ctr,
        .dtr     = snapshot_dtr,
        .map     = snapshot_map,
        .end_io  = snapshot_end_io,
+       .postsuspend = snapshot_postsuspend,
+       .preresume  = snapshot_preresume,
        .resume  = snapshot_resume,
        .status  = snapshot_status,
        .iterate_devices = snapshot_iterate_devices,