2 * Copyright (C) 2012 Red Hat, Inc.
4 * Author: Mikulas Patocka <mpatocka@redhat.com>
6 * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
8 * This file is released under the GPLv2.
10 * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
11 * default prefetch value. Data are read in "prefetch_cluster" chunks from the
12 * hash device. Setting this greatly improves performance when data and hash
13 * are on the same disk on different partitions on devices with poor random
17 #include "dm-verity.h"
19 #include <linux/module.h>
20 #include <linux/reboot.h>
22 #define DM_MSG_PREFIX "verity"
24 #define DM_VERITY_ENV_LENGTH 42
25 #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR"
27 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
29 #define DM_VERITY_MAX_CORRUPTED_ERRS 100
31 #define DM_VERITY_OPT_LOGGING "ignore_corruption"
32 #define DM_VERITY_OPT_RESTART "restart_on_corruption"
34 #define DM_VERITY_OPTS_MAX 1
36 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
38 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
40 struct dm_verity_prefetch_work {
41 struct work_struct work;
48 * Auxiliary structure appended to each dm-bufio buffer. If the value
49 * hash_verified is nonzero, hash of the block has been verified.
51 * The variable hash_verified is set to 0 when allocating the buffer, then
52 * it can be changed to 1 and it is never reset to 0 again.
54 * There is no lock around this value, a race condition can at worst cause
55 * that multiple processes verify the hash of the same buffer simultaneously
56 * and write 1 to hash_verified simultaneously.
57 * This condition is harmless, so we don't need locking.
64 * Initialize struct buffer_aux for a freshly created buffer.
66 static void dm_bufio_alloc_callback(struct dm_buffer *buf)
68 struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
70 aux->hash_verified = 0;
74 * Translate input sector number to the sector number on the target device.
76 static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
78 return v->data_start + dm_target_offset(v->ti, bi_sector);
82 * Return hash position of a specified block at a specified tree level
83 * (0 is the lowest level).
84 * The lowest "hash_per_block_bits"-bits of the result denote hash position
85 * inside a hash block. The remaining bits denote location of the hash block.
87 static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
90 return block >> (level * v->hash_per_block_bits);
94 * Wrapper for crypto_shash_init, which handles verity salting.
96 static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
101 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
103 r = crypto_shash_init(desc);
105 if (unlikely(r < 0)) {
106 DMERR("crypto_shash_init failed: %d", r);
110 if (likely(v->version >= 1)) {
111 r = crypto_shash_update(desc, v->salt, v->salt_size);
113 if (unlikely(r < 0)) {
114 DMERR("crypto_shash_update failed: %d", r);
122 static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
123 const u8 *data, size_t len)
125 int r = crypto_shash_update(desc, data, len);
128 DMERR("crypto_shash_update failed: %d", r);
133 static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
138 if (unlikely(!v->version)) {
139 r = crypto_shash_update(desc, v->salt, v->salt_size);
142 DMERR("crypto_shash_update failed: %d", r);
147 r = crypto_shash_final(desc, digest);
150 DMERR("crypto_shash_final failed: %d", r);
155 int verity_hash(struct dm_verity *v, struct shash_desc *desc,
156 const u8 *data, size_t len, u8 *digest)
160 r = verity_hash_init(v, desc);
164 r = verity_hash_update(v, desc, data, len);
168 return verity_hash_final(v, desc, digest);
171 static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
172 sector_t *hash_block, unsigned *offset)
174 sector_t position = verity_position_at_level(v, block, level);
177 *hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
182 idx = position & ((1 << v->hash_per_block_bits) - 1);
184 *offset = idx * v->digest_size;
186 *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
190 * Handle verification errors.
192 static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
193 unsigned long long block)
195 char verity_env[DM_VERITY_ENV_LENGTH];
196 char *envp[] = { verity_env, NULL };
197 const char *type_str = "";
198 struct mapped_device *md = dm_table_get_md(v->ti->table);
200 /* Corruption should be visible in device status in all modes */
203 if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
209 case DM_VERITY_BLOCK_TYPE_DATA:
212 case DM_VERITY_BLOCK_TYPE_METADATA:
213 type_str = "metadata";
219 DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
222 if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
223 DMERR("%s: reached maximum errors", v->data_dev->name);
225 snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
226 DM_VERITY_ENV_VAR_NAME, type, block);
228 kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
231 if (v->mode == DM_VERITY_MODE_LOGGING)
234 if (v->mode == DM_VERITY_MODE_RESTART)
235 kernel_restart("dm-verity device corrupted");
241 * Verify hash of a metadata block pertaining to the specified data block
242 * ("block" argument) at a specified level ("level" argument).
244 * On successful return, verity_io_want_digest(v, io) contains the hash value
245 * for a lower tree level or for the data block (if we're at the lowest level).
247 * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
248 * If "skip_unverified" is false, unverified buffer is hashed and verified
249 * against current value of verity_io_want_digest(v, io).
251 static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
252 sector_t block, int level, bool skip_unverified,
255 struct dm_buffer *buf;
256 struct buffer_aux *aux;
262 verity_hash_at_level(v, block, level, &hash_block, &offset);
264 data = dm_bufio_read(v->bufio, hash_block, &buf);
266 return PTR_ERR(data);
268 aux = dm_bufio_get_aux_data(buf);
270 if (!aux->hash_verified) {
271 if (skip_unverified) {
276 r = verity_hash(v, verity_io_hash_desc(v, io),
277 data, 1 << v->hash_dev_block_bits,
278 verity_io_real_digest(v, io));
282 if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
283 v->digest_size) == 0))
284 aux->hash_verified = 1;
285 else if (verity_handle_err(v,
286 DM_VERITY_BLOCK_TYPE_METADATA,
294 memcpy(want_digest, data, v->digest_size);
298 dm_bufio_release(buf);
303 * Find a hash for a given block, write it to digest and verify the integrity
304 * of the hash tree if necessary.
306 int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
307 sector_t block, u8 *digest)
312 if (likely(v->levels)) {
314 * First, we try to get the requested hash for
315 * the current block. If the hash block itself is
316 * verified, zero is returned. If it isn't, this
317 * function returns 1 and we fall back to whole
318 * chain verification.
320 r = verity_verify_level(v, io, block, 0, true, digest);
325 memcpy(digest, v->root_digest, v->digest_size);
327 for (i = v->levels - 1; i >= 0; i--) {
328 r = verity_verify_level(v, io, block, i, false, digest);
337 * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
338 * starting from iter.
340 int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
341 struct bvec_iter *iter,
342 int (*process)(struct dm_verity *v,
343 struct dm_verity_io *io, u8 *data,
346 unsigned todo = 1 << v->data_dev_block_bits;
347 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
353 struct bio_vec bv = bio_iter_iovec(bio, *iter);
355 page = kmap_atomic(bv.bv_page);
358 if (likely(len >= todo))
361 r = process(v, io, page + bv.bv_offset, len);
367 bio_advance_iter(bio, iter, len);
374 static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
375 u8 *data, size_t len)
377 return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
381 * Verify one "dm_verity_io" structure.
383 static int verity_verify_io(struct dm_verity_io *io)
385 struct dm_verity *v = io->v;
386 struct bvec_iter start;
389 for (b = 0; b < io->n_blocks; b++) {
391 struct shash_desc *desc = verity_io_hash_desc(v, io);
393 r = verity_hash_for_block(v, io, io->block + b,
394 verity_io_want_digest(v, io));
398 r = verity_hash_init(v, desc);
403 r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update);
407 r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
411 if (likely(memcmp(verity_io_real_digest(v, io),
412 verity_io_want_digest(v, io), v->digest_size) == 0))
414 else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
423 * End one "io" structure with a given error.
425 static void verity_finish_io(struct dm_verity_io *io, int error)
427 struct dm_verity *v = io->v;
428 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
430 bio->bi_end_io = io->orig_bi_end_io;
431 bio->bi_error = error;
436 static void verity_work(struct work_struct *w)
438 struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
440 verity_finish_io(io, verity_verify_io(io));
443 static void verity_end_io(struct bio *bio)
445 struct dm_verity_io *io = bio->bi_private;
448 verity_finish_io(io, bio->bi_error);
452 INIT_WORK(&io->work, verity_work);
453 queue_work(io->v->verify_wq, &io->work);
457 * Prefetch buffers for the specified io.
458 * The root buffer is not prefetched, it is assumed that it will be cached
461 static void verity_prefetch_io(struct work_struct *work)
463 struct dm_verity_prefetch_work *pw =
464 container_of(work, struct dm_verity_prefetch_work, work);
465 struct dm_verity *v = pw->v;
468 for (i = v->levels - 2; i >= 0; i--) {
469 sector_t hash_block_start;
470 sector_t hash_block_end;
471 verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
472 verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
474 unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
476 cluster >>= v->data_dev_block_bits;
477 if (unlikely(!cluster))
478 goto no_prefetch_cluster;
480 if (unlikely(cluster & (cluster - 1)))
481 cluster = 1 << __fls(cluster);
483 hash_block_start &= ~(sector_t)(cluster - 1);
484 hash_block_end |= cluster - 1;
485 if (unlikely(hash_block_end >= v->hash_blocks))
486 hash_block_end = v->hash_blocks - 1;
489 dm_bufio_prefetch(v->bufio, hash_block_start,
490 hash_block_end - hash_block_start + 1);
496 static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
498 struct dm_verity_prefetch_work *pw;
500 pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
501 GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
506 INIT_WORK(&pw->work, verity_prefetch_io);
508 pw->block = io->block;
509 pw->n_blocks = io->n_blocks;
510 queue_work(v->verify_wq, &pw->work);
514 * Bio map function. It allocates dm_verity_io structure and bio vector and
515 * fills them. Then it issues prefetches and the I/O.
517 static int verity_map(struct dm_target *ti, struct bio *bio)
519 struct dm_verity *v = ti->private;
520 struct dm_verity_io *io;
522 bio->bi_bdev = v->data_dev->bdev;
523 bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
525 if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
526 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
527 DMERR_LIMIT("unaligned io");
531 if (bio_end_sector(bio) >>
532 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
533 DMERR_LIMIT("io out of range");
537 if (bio_data_dir(bio) == WRITE)
540 io = dm_per_bio_data(bio, ti->per_bio_data_size);
542 io->orig_bi_end_io = bio->bi_end_io;
543 io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
544 io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
546 bio->bi_end_io = verity_end_io;
547 bio->bi_private = io;
548 io->iter = bio->bi_iter;
550 verity_submit_prefetch(v, io);
552 generic_make_request(bio);
554 return DM_MAPIO_SUBMITTED;
558 * Status: V (valid) or C (corruption found)
560 static void verity_status(struct dm_target *ti, status_type_t type,
561 unsigned status_flags, char *result, unsigned maxlen)
563 struct dm_verity *v = ti->private;
568 case STATUSTYPE_INFO:
569 DMEMIT("%c", v->hash_failed ? 'C' : 'V');
571 case STATUSTYPE_TABLE:
572 DMEMIT("%u %s %s %u %u %llu %llu %s ",
576 1 << v->data_dev_block_bits,
577 1 << v->hash_dev_block_bits,
578 (unsigned long long)v->data_blocks,
579 (unsigned long long)v->hash_start,
582 for (x = 0; x < v->digest_size; x++)
583 DMEMIT("%02x", v->root_digest[x]);
588 for (x = 0; x < v->salt_size; x++)
589 DMEMIT("%02x", v->salt[x]);
590 if (v->mode != DM_VERITY_MODE_EIO) {
593 case DM_VERITY_MODE_LOGGING:
594 DMEMIT(DM_VERITY_OPT_LOGGING);
596 case DM_VERITY_MODE_RESTART:
597 DMEMIT(DM_VERITY_OPT_RESTART);
607 static int verity_prepare_ioctl(struct dm_target *ti,
608 struct block_device **bdev, fmode_t *mode)
610 struct dm_verity *v = ti->private;
612 *bdev = v->data_dev->bdev;
615 ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
620 static int verity_iterate_devices(struct dm_target *ti,
621 iterate_devices_callout_fn fn, void *data)
623 struct dm_verity *v = ti->private;
625 return fn(ti, v->data_dev, v->data_start, ti->len, data);
628 static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
630 struct dm_verity *v = ti->private;
632 if (limits->logical_block_size < 1 << v->data_dev_block_bits)
633 limits->logical_block_size = 1 << v->data_dev_block_bits;
635 if (limits->physical_block_size < 1 << v->data_dev_block_bits)
636 limits->physical_block_size = 1 << v->data_dev_block_bits;
638 blk_limits_io_min(limits, limits->logical_block_size);
641 static void verity_dtr(struct dm_target *ti)
643 struct dm_verity *v = ti->private;
646 destroy_workqueue(v->verify_wq);
649 dm_bufio_client_destroy(v->bufio);
652 kfree(v->root_digest);
655 crypto_free_shash(v->tfm);
660 dm_put_device(ti, v->hash_dev);
663 dm_put_device(ti, v->data_dev);
668 static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
672 struct dm_target *ti = v->ti;
673 const char *arg_name;
675 static struct dm_arg _args[] = {
676 {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
679 r = dm_read_arg_group(_args, as, &argc, &ti->error);
687 arg_name = dm_shift_arg(as);
690 if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) {
691 v->mode = DM_VERITY_MODE_LOGGING;
694 } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
695 v->mode = DM_VERITY_MODE_RESTART;
699 ti->error = "Unrecognized verity feature request";
701 } while (argc && !r);
708 * <version> The current format is version 1.
709 * Vsn 0 is compatible with original Chromium OS releases.
714 * <the number of data blocks>
718 * <salt> Hex string or "-" if no salt.
720 static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
723 struct dm_arg_set as;
725 unsigned long long num_ll;
728 sector_t hash_position;
731 v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
733 ti->error = "Cannot allocate verity structure";
739 if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
740 ti->error = "Device must be readonly";
746 ti->error = "Not enough arguments";
751 if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 ||
753 ti->error = "Invalid version";
759 r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
761 ti->error = "Data device lookup failed";
765 r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
767 ti->error = "Data device lookup failed";
771 if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
772 !num || (num & (num - 1)) ||
773 num < bdev_logical_block_size(v->data_dev->bdev) ||
775 ti->error = "Invalid data device block size";
779 v->data_dev_block_bits = __ffs(num);
781 if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
782 !num || (num & (num - 1)) ||
783 num < bdev_logical_block_size(v->hash_dev->bdev) ||
785 ti->error = "Invalid hash device block size";
789 v->hash_dev_block_bits = __ffs(num);
791 if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
792 (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
793 >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
794 ti->error = "Invalid data blocks";
798 v->data_blocks = num_ll;
800 if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
801 ti->error = "Data device is too small";
806 if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
807 (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
808 >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
809 ti->error = "Invalid hash start";
813 v->hash_start = num_ll;
815 v->alg_name = kstrdup(argv[7], GFP_KERNEL);
817 ti->error = "Cannot allocate algorithm name";
822 v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
823 if (IS_ERR(v->tfm)) {
824 ti->error = "Cannot initialize hash function";
829 v->digest_size = crypto_shash_digestsize(v->tfm);
830 if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
831 ti->error = "Digest size too big";
836 sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
838 v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
839 if (!v->root_digest) {
840 ti->error = "Cannot allocate root digest";
844 if (strlen(argv[8]) != v->digest_size * 2 ||
845 hex2bin(v->root_digest, argv[8], v->digest_size)) {
846 ti->error = "Invalid root digest";
851 if (strcmp(argv[9], "-")) {
852 v->salt_size = strlen(argv[9]) / 2;
853 v->salt = kmalloc(v->salt_size, GFP_KERNEL);
855 ti->error = "Cannot allocate salt";
859 if (strlen(argv[9]) != v->salt_size * 2 ||
860 hex2bin(v->salt, argv[9], v->salt_size)) {
861 ti->error = "Invalid salt";
870 /* Optional parameters */
875 r = verity_parse_opt_args(&as, v);
880 v->hash_per_block_bits =
881 __fls((1 << v->hash_dev_block_bits) / v->digest_size);
885 while (v->hash_per_block_bits * v->levels < 64 &&
886 (unsigned long long)(v->data_blocks - 1) >>
887 (v->hash_per_block_bits * v->levels))
890 if (v->levels > DM_VERITY_MAX_LEVELS) {
891 ti->error = "Too many tree levels";
896 hash_position = v->hash_start;
897 for (i = v->levels - 1; i >= 0; i--) {
899 v->hash_level_block[i] = hash_position;
900 s = (v->data_blocks + ((sector_t)1 << ((i + 1) * v->hash_per_block_bits)) - 1)
901 >> ((i + 1) * v->hash_per_block_bits);
902 if (hash_position + s < hash_position) {
903 ti->error = "Hash device offset overflow";
909 v->hash_blocks = hash_position;
911 v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
912 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
913 dm_bufio_alloc_callback, NULL);
914 if (IS_ERR(v->bufio)) {
915 ti->error = "Cannot initialize dm-bufio";
916 r = PTR_ERR(v->bufio);
921 if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
922 ti->error = "Hash device is too small";
927 ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
929 /* WQ_UNBOUND greatly improves performance when running on ramdisk */
930 v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
932 ti->error = "Cannot allocate workqueue";
945 static struct target_type verity_target = {
947 .version = {1, 2, 0},
948 .module = THIS_MODULE,
952 .status = verity_status,
953 .prepare_ioctl = verity_prepare_ioctl,
954 .iterate_devices = verity_iterate_devices,
955 .io_hints = verity_io_hints,
958 static int __init dm_verity_init(void)
962 r = dm_register_target(&verity_target);
964 DMERR("register failed %d", r);
969 static void __exit dm_verity_exit(void)
971 dm_unregister_target(&verity_target);
974 module_init(dm_verity_init);
975 module_exit(dm_verity_exit);
977 MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
978 MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
979 MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
980 MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
981 MODULE_LICENSE("GPL");