From ba6abf1352dc83e500a71e3ad9b39de0337f0c6b Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Sat, 30 Jul 2005 22:38:30 -0700 Subject: [PATCH] [PATCH] USB: ub 1/3: Axboe's quasi-S/G This the quasi-S/G patch for ub as suggested by Jens Axboe at OLS and implemented that night before 4 a.m. Surprisingly, it worked right away... Alas, I had to skip some OLS partying, but it was for the good cause. Now the speed of ub is quite acceptable even on partitions with small block size. The ub does not really support S/G. Instead, it just tells the block layer that it does. Then, most of the time, the block layer merges requests and passes single-segmnent requests down to ub; everything works as before. Very rarely ub gets an unmerged S/G request. In such case, it issues several commands to the device. I added a small array of counters to monitor the merging (sg_stat). This may be dropped later. Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- drivers/block/ub.c | 185 ++++++++++++++++++++++++++++++--------------- 1 file changed, 126 insertions(+), 59 deletions(-) diff --git a/drivers/block/ub.c b/drivers/block/ub.c index a026567f5d18..fb3d1e9bc407 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -16,9 +16,10 @@ * -- verify the 13 conditions and do bulk resets * -- kill last_pipe and simply do two-state clearing on both pipes * -- verify protocol (bulk) from USB descriptors (maybe...) - * -- highmem and sg + * -- highmem * -- move top_sense and work_bcs into separate allocations (if they survive) * for cache purists and esoteric architectures. + * -- Allocate structure for LUN 0 before the first ub_sync_tur, avoid NULL. ? * -- prune comments, they are too volumnous * -- Exterminate P3 printks * -- Resove XXX's @@ -171,7 +172,7 @@ struct bulk_cs_wrap { */ struct ub_dev; -#define UB_MAX_REQ_SG 1 +#define UB_MAX_REQ_SG 4 #define UB_MAX_SECTORS 64 /* @@ -240,13 +241,21 @@ struct ub_scsi_cmd { */ char *data; /* Requested buffer */ unsigned int len; /* Requested length */ - // struct scatterlist sgv[UB_MAX_REQ_SG]; struct ub_lun *lun; void (*done)(struct ub_dev *, struct ub_scsi_cmd *); void *back; }; +struct ub_request { + struct request *rq; + unsigned char dir; + unsigned int current_block; + unsigned int current_sg; + unsigned int nsg; /* sgv[nsg] */ + struct scatterlist sgv[UB_MAX_REQ_SG]; +}; + /* */ struct ub_capacity { @@ -342,6 +351,8 @@ struct ub_lun { int readonly; int first_open; /* Kludge. See ub_bd_open. */ + struct ub_request urq; + /* Use Ingo's mempool if or when we have more than one command. */ /* * Currently we never need more than one command for the whole device. @@ -389,6 +400,7 @@ struct ub_dev { struct bulk_cs_wrap work_bcs; struct usb_ctrlrequest work_cr; + int sg_stat[UB_MAX_REQ_SG+1]; struct ub_scsi_trace tr; }; @@ -398,10 +410,14 @@ static void ub_cleanup(struct ub_dev *sc); static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq); static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, struct ub_scsi_cmd *cmd, struct request *rq); -static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_scsi_cmd *cmd, - struct request *rq); +static void ub_scsi_build_block(struct ub_lun *lun, + struct ub_scsi_cmd *cmd, struct ub_request *urq); +static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun, + struct ub_scsi_cmd *cmd, struct request *rq); static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd); static void ub_end_rq(struct request *rq, int uptodate); +static int ub_request_advance(struct ub_dev *sc, struct ub_lun *lun, + struct ub_request *urq, struct ub_scsi_cmd *cmd); static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd); static void ub_urb_complete(struct urb *urb, struct pt_regs *pt); static void ub_scsi_action(unsigned long _dev); @@ -523,6 +539,13 @@ static ssize_t ub_diag_show(struct device *dev, struct device_attribute *attr, c cnt += sprintf(page + cnt, "qlen %d qmax %d\n", sc->cmd_queue.qlen, sc->cmd_queue.qmax); + cnt += sprintf(page + cnt, + "sg %d %d %d %d %d\n", + sc->sg_stat[0], + sc->sg_stat[1], + sc->sg_stat[2], + sc->sg_stat[3], + sc->sg_stat[4]); list_for_each (p, &sc->luns) { lun = list_entry(p, struct ub_lun, link); @@ -769,14 +792,15 @@ static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq) return 0; } + if (lun->urq.rq != NULL) + return -1; if ((cmd = ub_get_cmd(lun)) == NULL) return -1; memset(cmd, 0, sizeof(struct ub_scsi_cmd)); blkdev_dequeue_request(rq); - if (blk_pc_request(rq)) { - rc = ub_cmd_build_packet(sc, cmd, rq); + rc = ub_cmd_build_packet(sc, lun, cmd, rq); } else { rc = ub_cmd_build_block(sc, lun, cmd, rq); } @@ -788,10 +812,10 @@ static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq) cmd->state = UB_CMDST_INIT; cmd->lun = lun; cmd->done = ub_rw_cmd_done; - cmd->back = rq; + cmd->back = &lun->urq; cmd->tag = sc->tagcnt++; - if ((rc = ub_submit_scsi(sc, cmd)) != 0) { + if (ub_submit_scsi(sc, cmd) != 0) { ub_put_cmd(lun, cmd); ub_end_rq(rq, 0); return 0; @@ -803,12 +827,12 @@ static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq) static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, struct ub_scsi_cmd *cmd, struct request *rq) { + struct ub_request *urq; int ub_dir; -#if 0 /* We use rq->buffer for now */ - struct scatterlist *sg; int n_elem; -#endif - unsigned int block, nblks; + + urq = &lun->urq; + memset(urq, 0, sizeof(struct ub_request)); if (rq_data_dir(rq) == WRITE) ub_dir = UB_DIR_WRITE; @@ -818,44 +842,19 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, /* * get scatterlist from block layer */ -#if 0 /* We use rq->buffer for now */ - sg = &cmd->sgv[0]; - n_elem = blk_rq_map_sg(q, rq, sg); + n_elem = blk_rq_map_sg(lun->disk->queue, rq, &urq->sgv[0]); if (n_elem <= 0) { - ub_put_cmd(lun, cmd); - ub_end_rq(rq, 0); - blk_start_queue(q); - return 0; /* request with no s/g entries? */ + printk(KERN_INFO "%s: failed request map (%d)\n", + sc->name, n_elem); /* P3 */ + return -1; /* request with no s/g entries? */ } - - if (n_elem != 1) { /* Paranoia */ + if (n_elem > UB_MAX_REQ_SG) { /* Paranoia */ printk(KERN_WARNING "%s: request with %d segments\n", sc->name, n_elem); - ub_put_cmd(lun, cmd); - ub_end_rq(rq, 0); - blk_start_queue(q); - return 0; - } -#endif - - /* - * XXX Unfortunately, this check does not work. It is quite possible - * to get bogus non-null rq->buffer if you allow sg by mistake. - */ - if (rq->buffer == NULL) { - /* - * This must not happen if we set the queue right. - * The block level must create bounce buffers for us. - */ - static int do_print = 1; - if (do_print) { - printk(KERN_WARNING "%s: unmapped block request" - " flags 0x%lx sectors %lu\n", - sc->name, rq->flags, rq->nr_sectors); - do_print = 0; - } return -1; } + urq->nsg = n_elem; + sc->sg_stat[n_elem]++; /* * build the command @@ -863,10 +862,29 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, * The call to blk_queue_hardsect_size() guarantees that request * is aligned, but it is given in terms of 512 byte units, always. */ - block = rq->sector >> lun->capacity.bshift; - nblks = rq->nr_sectors >> lun->capacity.bshift; + urq->current_block = rq->sector >> lun->capacity.bshift; + // nblks = rq->nr_sectors >> lun->capacity.bshift; + + urq->rq = rq; + urq->current_sg = 0; + urq->dir = ub_dir; + + ub_scsi_build_block(lun, cmd, urq); + return 0; +} + +static void ub_scsi_build_block(struct ub_lun *lun, + struct ub_scsi_cmd *cmd, struct ub_request *urq) +{ + struct scatterlist *sg; + unsigned int block, nblks; + + sg = &urq->sgv[urq->current_sg]; + + block = urq->current_block; + nblks = sg->length >> (lun->capacity.bshift + 9); - cmd->cdb[0] = (ub_dir == UB_DIR_READ)? READ_10: WRITE_10; + cmd->cdb[0] = (urq->dir == UB_DIR_READ)? READ_10: WRITE_10; /* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */ cmd->cdb[2] = block >> 24; cmd->cdb[3] = block >> 16; @@ -876,16 +894,20 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, cmd->cdb[8] = nblks; cmd->cdb_len = 10; - cmd->dir = ub_dir; - cmd->data = rq->buffer; - cmd->len = rq->nr_sectors * 512; - - return 0; + cmd->dir = urq->dir; + cmd->data = page_address(sg->page) + sg->offset; + cmd->len = sg->length; } -static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_scsi_cmd *cmd, - struct request *rq) +static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun, + struct ub_scsi_cmd *cmd, struct request *rq) { + struct ub_request *urq; + + urq = &lun->urq; + memset(urq, 0, sizeof(struct ub_request)); + urq->rq = rq; + sc->sg_stat[0]++; if (rq->data_len != 0 && rq->data == NULL) { static int do_print = 1; @@ -917,12 +939,13 @@ static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_scsi_cmd *cmd, static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) { - struct request *rq = cmd->back; struct ub_lun *lun = cmd->lun; - struct gendisk *disk = lun->disk; - request_queue_t *q = disk->queue; + struct ub_request *urq = cmd->back; + struct request *rq; int uptodate; + rq = urq->rq; + if (blk_pc_request(rq)) { /* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */ memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE); @@ -934,9 +957,19 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) else uptodate = 0; + if (cmd->error == 0 && urq->current_sg+1 < urq->nsg) { + if (ub_request_advance(sc, lun, urq, cmd) == 0) { + /* Stay on target... */ + return; + } + uptodate = 0; + } + + urq->rq = NULL; + ub_put_cmd(lun, cmd); ub_end_rq(rq, uptodate); - blk_start_queue(q); + blk_start_queue(lun->disk->queue); } static void ub_end_rq(struct request *rq, int uptodate) @@ -948,6 +981,40 @@ static void ub_end_rq(struct request *rq, int uptodate) end_that_request_last(rq); } +static int ub_request_advance(struct ub_dev *sc, struct ub_lun *lun, + struct ub_request *urq, struct ub_scsi_cmd *cmd) +{ + struct scatterlist *sg; + unsigned int nblks; + + /* XXX This is temporary, until we sort out S/G in packet requests. */ + if (blk_pc_request(urq->rq)) { + printk(KERN_WARNING + "2-segment packet request completed\n"); /* P3 */ + return -1; + } + + sg = &urq->sgv[urq->current_sg]; + nblks = sg->length >> (lun->capacity.bshift + 9); + urq->current_block += nblks; + urq->current_sg++; + sg++; + + memset(cmd, 0, sizeof(struct ub_scsi_cmd)); + ub_scsi_build_block(lun, cmd, urq); + cmd->state = UB_CMDST_INIT; + cmd->lun = lun; + cmd->done = ub_rw_cmd_done; + cmd->back = &lun->urq; + + cmd->tag = sc->tagcnt++; + if (ub_submit_scsi(sc, cmd) != 0) { + return -1; + } + + return 0; +} + /* * Submit a regular SCSI operation (not an auto-sense). * -- 2.34.1