From e025344c56e08b155f43ea09647969286c78377c Mon Sep 17 00:00:00 2001 From: Shane Michael Matthews Date: Thu, 10 Feb 2011 08:51:24 -0500 Subject: [PATCH] NVMe: Initial PRP List support Add a pointer to the nvme_req_info to hold a new data structure (nvme_prps) which contains a list of the pages allocated to this particular request for holding PRP list entries. nvme_setup_prps() now returns this pointer. To allocate and free the memory used for PRP lists, we need a struct device, so we need to pass the nvme_queue pointer to many functions which didn't use to need it. Signed-off-by: Matthew Wilcox --- drivers/block/nvme.c | 104 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 91 insertions(+), 13 deletions(-) diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 903e7f15b60d..b1e8445985a2 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -247,21 +247,55 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) return 0; } +static __le64 *alloc_prp_list(struct nvme_queue *nvmeq, int length, + dma_addr_t *addr) +{ + return dma_alloc_coherent(nvmeq->q_dmadev, PAGE_SIZE, addr, GFP_ATOMIC); +} + +struct nvme_prps { + int npages; + dma_addr_t first_dma; + __le64 *list[0]; +}; + +static void nvme_free_prps(struct nvme_queue *nvmeq, struct nvme_prps *prps) +{ + const int last_prp = PAGE_SIZE / 8 - 1; + int i; + dma_addr_t prp_dma; + + if (!prps) + return; + + prp_dma = prps->first_dma; + for (i = 0; i < prps->npages; i++) { + __le64 *prp_list = prps->list[i]; + dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); + dma_free_coherent(nvmeq->q_dmadev, PAGE_SIZE, prp_list, + prp_dma); + prp_dma = next_prp_dma; + } + kfree(prps); +} + struct nvme_req_info { struct bio *bio; int nents; + struct nvme_prps *prps; struct scatterlist sg[0]; }; /* XXX: use a mempool */ static struct nvme_req_info *alloc_info(unsigned nseg, gfp_t gfp) { - return kmalloc(sizeof(struct nvme_req_info) + + return kzalloc(sizeof(struct nvme_req_info) + sizeof(struct scatterlist) * nseg, gfp); } -static void free_info(struct nvme_req_info *info) +static void free_info(struct nvme_queue *nvmeq, struct nvme_req_info *info) { + nvme_free_prps(nvmeq, info->prps); kfree(info); } @@ -274,7 +308,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, dma_unmap_sg(nvmeq->q_dmadev, info->sg, info->nents, bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - free_info(info); + free_info(nvmeq, info); bio_endio(bio, status ? -EIO : 0); bio = bio_list_pop(&nvmeq->sq_cong); if (bio) @@ -282,17 +316,22 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, } /* length is in bytes */ -static void nvme_setup_prps(struct nvme_common_command *cmd, +static struct nvme_prps *nvme_setup_prps(struct nvme_queue *nvmeq, + struct nvme_common_command *cmd, struct scatterlist *sg, int length) { int dma_len = sg_dma_len(sg); u64 dma_addr = sg_dma_address(sg); int offset = offset_in_page(dma_addr); + __le64 *prp_list; + dma_addr_t prp_dma; + int nprps, npages, i, prp_page; + struct nvme_prps *prps = NULL; cmd->prp1 = cpu_to_le64(dma_addr); length -= (PAGE_SIZE - offset); if (length <= 0) - return; + return prps; dma_len -= (PAGE_SIZE - offset); if (dma_len) { @@ -305,10 +344,42 @@ static void nvme_setup_prps(struct nvme_common_command *cmd, if (length <= PAGE_SIZE) { cmd->prp2 = cpu_to_le64(dma_addr); - return; + return prps; } - /* XXX: support PRP lists */ + nprps = DIV_ROUND_UP(length, PAGE_SIZE); + npages = DIV_ROUND_UP(8 * nprps, PAGE_SIZE); + prps = kmalloc(sizeof(*prps) + sizeof(__le64 *) * npages, GFP_ATOMIC); + prps->npages = npages; + prp_page = 0; + prp_list = alloc_prp_list(nvmeq, length, &prp_dma); + prps->list[prp_page++] = prp_list; + prps->first_dma = prp_dma; + cmd->prp2 = cpu_to_le64(prp_dma); + i = 0; + for (;;) { + if (i == PAGE_SIZE / 8 - 1) { + __le64 *old_prp_list = prp_list; + prp_list = alloc_prp_list(nvmeq, length, &prp_dma); + prps->list[prp_page++] = prp_list; + old_prp_list[i] = cpu_to_le64(prp_dma); + i = 0; + } + prp_list[i++] = cpu_to_le64(dma_addr); + dma_len -= PAGE_SIZE; + dma_addr += PAGE_SIZE; + length -= PAGE_SIZE; + if (length <= 0) + break; + if (dma_len > 0) + continue; + BUG_ON(dma_len < 0); + sg = sg_next(sg); + dma_addr = sg_dma_address(sg); + dma_len = sg_dma_len(sg); + } + + return prps; } static int nvme_map_bio(struct device *dev, struct nvme_req_info *info, @@ -378,7 +449,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, cmnd->rw.flags = 1; cmnd->rw.command_id = cmdid; cmnd->rw.nsid = cpu_to_le32(ns->ns_id); - nvme_setup_prps(&cmnd->common, info->sg, bio->bi_size); + info->prps = nvme_setup_prps(nvmeq, &cmnd->common, info->sg, + bio->bi_size); cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); cmnd->rw.length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1); cmnd->rw.control = cpu_to_le16(control); @@ -393,7 +465,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; free_info: - free_info(info); + free_info(nvmeq, info); congestion: return -EBUSY; } @@ -852,13 +924,15 @@ static int nvme_submit_user_admin_command(struct nvme_dev *dev, { int err, nents; struct scatterlist *sg; + struct nvme_prps *prps; nents = nvme_map_user_pages(dev, 0, addr, length, &sg); if (nents < 0) return nents; - nvme_setup_prps(&cmd->common, sg, length); + prps = nvme_setup_prps(dev->queues[0], &cmd->common, sg, length); err = nvme_submit_admin_cmd(dev, cmd, NULL); nvme_unmap_user_pages(dev, 0, addr, length, sg, nents); + nvme_free_prps(dev->queues[0], prps); return err ? -EIO : 0; } @@ -896,6 +970,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) u32 result; int nents, status; struct scatterlist *sg; + struct nvme_prps *prps; if (copy_from_user(&io, uio, sizeof(io))) return -EFAULT; @@ -915,10 +990,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.reftag = cpu_to_le32(io.reftag); /* XXX: endian? */ c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); + nvmeq = get_nvmeq(ns); /* XXX: metadata */ - nvme_setup_prps(&c.common, sg, length); + prps = nvme_setup_prps(nvmeq, &c.common, sg, length); - nvmeq = get_nvmeq(ns); /* Since nvme_submit_sync_cmd sleeps, we can't keep preemption * disabled. We may be preempted at any point, and be rescheduled * to a different CPU. That will cause cacheline bouncing, but no @@ -928,6 +1003,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) status = nvme_submit_sync_cmd(nvmeq, &c, &result, IO_TIMEOUT); nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents); + nvme_free_prps(nvmeq, prps); put_user(result, &uio->result); return status; } @@ -940,6 +1016,7 @@ static int nvme_download_firmware(struct nvme_ns *ns, struct nvme_command c; int nents, status; struct scatterlist *sg; + struct nvme_prps *prps; if (copy_from_user(&dlfw, udlfw, sizeof(dlfw))) return -EFAULT; @@ -954,10 +1031,11 @@ static int nvme_download_firmware(struct nvme_ns *ns, c.dlfw.opcode = nvme_admin_download_fw; c.dlfw.numd = cpu_to_le32(dlfw.length); c.dlfw.offset = cpu_to_le32(dlfw.offset); - nvme_setup_prps(&c.common, sg, dlfw.length * 4); + prps = nvme_setup_prps(dev->queues[0], &c.common, sg, dlfw.length * 4); status = nvme_submit_admin_cmd(dev, &c, NULL); nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents); + nvme_free_prps(dev->queues[0], prps); return status; } -- 2.34.1