static void bm_store_page_idx(struct page *page, unsigned long idx)
{
BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
- page_private(page) |= idx;
+ set_page_private(page, idx);
}
static unsigned long bm_page_to_idx(struct page *page)
return page_nr;
}
-static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
+static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{
struct page *page = b->bm_pages[idx];
- return (unsigned long *) kmap_atomic(page, km);
+ return (unsigned long *) kmap_atomic(page);
}
static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{
- return __bm_map_pidx(b, idx, KM_IRQ1);
+ return __bm_map_pidx(b, idx);
}
-static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
+static void __bm_unmap(unsigned long *p_addr)
{
- kunmap_atomic(p_addr, km);
+ kunmap_atomic(p_addr);
};
static void bm_unmap(unsigned long *p_addr)
{
- return __bm_unmap(p_addr, KM_IRQ1);
+ return __bm_unmap(p_addr);
}
/* long word offset of _bitmap_ sector */
return old_pages;
/* Trying kmalloc first, falling back to vmalloc.
- * GFP_KERNEL is ok, as this is done when a lower level disk is
- * "attached" to the drbd. Context is receiver thread or drbdsetup /
- * netlink process. As we have no disk yet, we are not in the IO path,
- * not even the IO path of the peer. */
+ * GFP_NOIO, as this is called while drbd IO is "suspended",
+ * and during resize or attach on diskless Primary,
+ * we must not block on IO to ourselves.
+ * Context is receiver thread or dmsetup. */
bytes = sizeof(struct page *)*want;
- new_pages = kmalloc(bytes, GFP_KERNEL);
+ new_pages = kzalloc(bytes, GFP_NOIO);
if (!new_pages) {
- new_pages = vmalloc(bytes);
+ new_pages = __vmalloc(bytes,
+ GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
if (!new_pages)
return NULL;
vmalloced = 1;
}
- memset(new_pages, 0, bytes);
if (want >= have) {
for (i = 0; i < have; i++)
new_pages[i] = old_pages[i];
for (; i < want; i++) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (!page) {
bm_free_pages(new_pages + have, i - have);
bm_vk_free(new_pages, vmalloced);
/* all but last page */
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
- p_addr = __bm_map_pidx(b, idx, KM_USER0);
+ p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < LWPP; i++)
bits += hweight_long(p_addr[i]);
- __bm_unmap(p_addr, KM_USER0);
+ __bm_unmap(p_addr);
cond_resched();
}
/* last (or only) page */
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
- p_addr = __bm_map_pidx(b, idx, KM_USER0);
+ p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < last_word; i++)
bits += hweight_long(p_addr[i]);
p_addr[last_word] &= cpu_to_lel(mask);
/* 32bit arch, may have an unused padding long */
if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
p_addr[last_word+1] = 0;
- __bm_unmap(p_addr, KM_USER0);
+ __bm_unmap(p_addr);
return bits;
}
unsigned flags;
#define BM_AIO_COPY_PAGES 1
#define BM_AIO_WRITE_HINTED 2
+#define BM_WRITE_ALL_PAGES 4
int error;
struct kref kref;
};
if (ctx->flags & BM_AIO_COPY_PAGES) {
void *src, *dest;
page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
- dest = kmap_atomic(page, KM_USER0);
- src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
+ dest = kmap_atomic(page);
+ src = kmap_atomic(b->bm_pages[page_nr]);
memcpy(dest, src, PAGE_SIZE);
- kunmap_atomic(src, KM_USER1);
- kunmap_atomic(dest, KM_USER0);
+ kunmap_atomic(src);
+ kunmap_atomic(dest);
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
- err = -ENODEV;
- goto out;
+ kfree(ctx);
+ return -ENODEV;
}
if (!ctx->flags)
!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
&page_private(b->bm_pages[i])))
continue;
- if (bm_test_page_unchanged(b->bm_pages[i])) {
+
+ if (!(flags & BM_WRITE_ALL_PAGES) &&
+ bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
continue;
}
* "in_flight reached zero, all done" event.
*/
if (!atomic_dec_and_test(&ctx->in_flight))
- wait_until_done_or_disk_failure(mdev, &ctx->done);
+ wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
else
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
err = -EIO; /* ctx->error ? */
}
if (atomic_read(&ctx->in_flight))
- err = -EIO; /* Disk failed during IO... */
+ err = -EIO; /* Disk timeout/force-detach during IO... */
now = jiffies;
if (rw == WRITE) {
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
-out:
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err;
}
return bm_rw(mdev, WRITE, 0, 0);
}
+/**
+ * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
+ * @mdev: DRBD device.
+ *
+ * Will write all pages.
+ */
+int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local)
+{
+ return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
+}
+
/**
* drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
* @mdev: DRBD device.
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx);
}
+/**
+ * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
+ * @mdev: DRBD device.
+ *
+ * Will only write pages that have changed since last IO.
+ * In contrast to drbd_bm_write(), this will copy the bitmap pages
+ * to temporary writeout pages. It is intended to trigger a full write-out
+ * while still allowing the bitmap to change, for example if a resync or online
+ * verify is aborted due to a failed peer disk, while local IO continues, or
+ * pending resync acks are still being processed.
+ */
+int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
+{
+ return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
+}
+
/**
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
* @mdev: DRBD device.
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
- err = -ENODEV;
- goto out;
+ kfree(ctx);
+ return -ENODEV;
}
bm_page_io_async(ctx, idx, WRITE_SYNC);
- wait_until_done_or_disk_failure(mdev, &ctx->done);
+ wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
if (ctx->error)
- drbd_chk_io_error(mdev, 1, true);
- /* that should force detach, so the in memory bitmap will be
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
+ /* that causes us to detach, so the in memory bitmap will be
* gone in a moment as well. */
mdev->bm_writ_cnt++;
err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
- out:
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err;
}
* this returns a bit number, NOT a sector!
*/
static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
- const int find_zero_bit, const enum km_type km)
+ const int find_zero_bit)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long *p_addr;
while (bm_fo < b->bm_bits) {
/* bit offset of the first bit in the page */
bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
- p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
+ p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
if (find_zero_bit)
i = find_next_zero_bit_le(p_addr,
i = find_next_bit_le(p_addr,
PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
- __bm_unmap(p_addr, km);
+ __bm_unmap(p_addr);
if (i < PAGE_SIZE*8) {
bm_fo = bit_offset + i;
if (bm_fo >= b->bm_bits)
if (BM_DONT_TEST & b->bm_flags)
bm_print_lock_info(mdev);
- i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
+ i = __bm_find_next(mdev, bm_fo, find_zero_bit);
spin_unlock_irq(&b->bm_lock);
return i;
unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
{
/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
- return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
+ return __bm_find_next(mdev, bm_fo, 0);
}
unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
{
/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
- return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
+ return __bm_find_next(mdev, bm_fo, 1);
}
/* returns number of bits actually changed.
unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
if (page_nr != last_page_nr) {
if (p_addr)
- __bm_unmap(p_addr, KM_IRQ1);
+ __bm_unmap(p_addr);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
changed_total += c;
c = 0;
- p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
+ p_addr = __bm_map_pidx(b, page_nr);
last_page_nr = page_nr;
}
if (val)
c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
}
if (p_addr)
- __bm_unmap(p_addr, KM_IRQ1);
+ __bm_unmap(p_addr);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
int i;
int bits;
int changed = 0;
- unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
+ unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
changed += BITS_PER_LONG - bits;
}
- kunmap_atomic(paddr, KM_IRQ1);
+ kunmap_atomic(paddr);
if (changed) {
/* We only need lazy writeout, the information is still in the
* remote bitmap as well, and is reconstructed during the next
first_word = 0;
spin_lock_irq(&b->bm_lock);
}
-
/* last page (respectively only page, for first page == last page) */
last_word = MLPP(el >> LN2_BPL);
- bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+ /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
+ * ==> e = 32767, el = 32768, last_page = 2,
+ * and now last_word = 0.
+ * We do not want to touch last_page in this case,
+ * as we did not allocate it, it is not present in bitmap->bm_pages.
+ */
+ if (last_word)
+ bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
/* possibly trailing bits.
* example: (e & 63) == 63, el will be e+1.