bcache: FUA fixes
authorKent Overstreet <koverstreet@google.com>
Thu, 27 Jun 2013 00:25:38 +0000 (17:25 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Aug 2013 16:47:40 +0000 (09:47 -0700)
commit e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 upstream.

Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node
writes have... weird ordering requirements.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/md/bcache/btree.c
drivers/md/bcache/journal.c
drivers/md/bcache/request.c

index 7b687a6f3dec1d310e1c8fe3297d78244f2b691d..833c590806ba6b4bcd39e2cf453e245b3d4ae373 100644 (file)
@@ -326,10 +326,25 @@ static void do_btree_write(struct btree *b)
        i->csum         = btree_csum_set(b, i);
 
        btree_bio_init(b);
-       b->bio->bi_rw   = REQ_META|WRITE_SYNC;
+       b->bio->bi_rw   = REQ_META|WRITE_SYNC|REQ_FUA;
        b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c);
        bch_bio_map(b->bio, i);
 
+       /*
+        * If we're appending to a leaf node, we don't technically need FUA -
+        * this write just needs to be persisted before the next journal write,
+        * which will be marked FLUSH|FUA.
+        *
+        * Similarly if we're writing a new btree root - the pointer is going to
+        * be in the next journal entry.
+        *
+        * But if we're writing a new btree node (that isn't a root) or
+        * appending to a non leaf btree node, we need either FUA or a flush
+        * when we write the parent with the new pointer. FUA is cheaper than a
+        * flush, and writes appending to leaf nodes aren't blocking anything so
+        * just make all btree node writes FUA to keep things sane.
+        */
+
        bkey_copy(&k.key, &b->key);
        SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
 
@@ -2142,6 +2157,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c)
 void bch_btree_set_root(struct btree *b)
 {
        unsigned i;
+       struct closure cl;
+
+       closure_init_stack(&cl);
 
        BUG_ON(!b->written);
 
@@ -2155,8 +2173,9 @@ void bch_btree_set_root(struct btree *b)
        b->c->root = b;
        __bkey_put(b->c, &b->key);
 
-       bch_journal_meta(b->c, NULL);
+       bch_journal_meta(b->c, &cl);
        pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
+       closure_sync(&cl);
 }
 
 /* Cache lookup */
index 8a54d3b4f517def99e5f918d697bf05ab1e907bc..b49abb246bb69f61e4d865843d597e55dc8409c4 100644 (file)
@@ -622,7 +622,7 @@ static void journal_write_unlocked(struct closure *cl)
                bio_reset(bio);
                bio->bi_sector  = PTR_OFFSET(k, i);
                bio->bi_bdev    = ca->bdev;
-               bio->bi_rw      = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH;
+               bio->bi_rw      = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
                bio->bi_size    = sectors << 9;
 
                bio->bi_end_io  = journal_write_endio;
index 2f36743ce7087526783923368981b6ac7167f889..afb9a998a737d205024bf06d755756686f5d3b19 100644 (file)
@@ -1053,9 +1053,20 @@ static void request_write(struct cached_dev *dc, struct search *s)
                trace_bcache_writethrough(s->orig_bio);
                closure_bio_submit(bio, cl, s->d);
        } else {
-               s->op.cache_bio = bio;
                trace_bcache_writeback(s->orig_bio);
                bch_writeback_add(dc, bio_sectors(bio));
+
+               if (s->op.flush_journal) {
+                       /* Also need to send a flush to the backing device */
+                       s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
+                                                          dc->disk.bio_split);
+
+                       bio->bi_size = 0;
+                       bio->bi_vcnt = 0;
+                       closure_bio_submit(bio, cl, s->d);
+               } else {
+                       s->op.cache_bio = bio;
+               }
        }
 out:
        closure_call(&s->op.cl, bch_insert_data, NULL, cl);