From 85617dc7ee1caa68a8d078b5dd4328d0ef373282 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 17 Feb 2015 08:02:49 +0100 Subject: [PATCH] xen: support suspend/resume in pvscsi frontend Up to now the pvscsi frontend hasn't supported domain suspend and resume. When a domain with an assigned pvscsi device was suspended and resumed again, it was not able to use the device any more: trying to do so resulted in hanging processes. Support suspend and resume of pvscsi devices. Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- drivers/scsi/xen-scsifront.c | 214 +++++++++++++++++++++++++++++------ 1 file changed, 179 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 34199d206ba6..78d95069ac6a 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -63,6 +63,7 @@ #define VSCSIFRONT_OP_ADD_LUN 1 #define VSCSIFRONT_OP_DEL_LUN 2 +#define VSCSIFRONT_OP_READD_LUN 3 /* Tuning point. */ #define VSCSIIF_DEFAULT_CMD_PER_LUN 10 @@ -113,8 +114,13 @@ struct vscsifrnt_info { DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS); struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS]; + /* Following items are protected by the host lock. */ wait_queue_head_t wq_sync; + wait_queue_head_t wq_pause; unsigned int wait_ring_available:1; + unsigned int waiting_pause:1; + unsigned int pause:1; + unsigned callers; char dev_state_path[64]; struct task_struct *curr; @@ -274,31 +280,31 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info, wake_up(&shadow->wq_reset); } -static int scsifront_cmd_done(struct vscsifrnt_info *info) +static void scsifront_do_response(struct vscsifrnt_info *info, + struct vscsiif_response *ring_rsp) +{ + if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS || + test_bit(ring_rsp->rqid, info->shadow_free_bitmap), + "illegal rqid %u returned by backend!\n", ring_rsp->rqid)) + return; + + if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB) + scsifront_cdb_cmd_done(info, ring_rsp); + else + scsifront_sync_cmd_done(info, ring_rsp); +} + +static int scsifront_ring_drain(struct vscsifrnt_info *info) { struct vscsiif_response *ring_rsp; RING_IDX i, rp; int more_to_do = 0; - unsigned long flags; - - spin_lock_irqsave(info->host->host_lock, flags); rp = info->ring.sring->rsp_prod; rmb(); /* ordering required respective to dom0 */ for (i = info->ring.rsp_cons; i != rp; i++) { - ring_rsp = RING_GET_RESPONSE(&info->ring, i); - - if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS || - test_bit(ring_rsp->rqid, info->shadow_free_bitmap), - "illegal rqid %u returned by backend!\n", - ring_rsp->rqid)) - continue; - - if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB) - scsifront_cdb_cmd_done(info, ring_rsp); - else - scsifront_sync_cmd_done(info, ring_rsp); + scsifront_do_response(info, ring_rsp); } info->ring.rsp_cons = i; @@ -308,6 +314,18 @@ static int scsifront_cmd_done(struct vscsifrnt_info *info) else info->ring.sring->rsp_event = i + 1; + return more_to_do; +} + +static int scsifront_cmd_done(struct vscsifrnt_info *info) +{ + int more_to_do; + unsigned long flags; + + spin_lock_irqsave(info->host->host_lock, flags); + + more_to_do = scsifront_ring_drain(info); + info->wait_ring_available = 0; spin_unlock_irqrestore(info->host->host_lock, flags); @@ -328,6 +346,24 @@ static irqreturn_t scsifront_irq_fn(int irq, void *dev_id) return IRQ_HANDLED; } +static void scsifront_finish_all(struct vscsifrnt_info *info) +{ + unsigned i; + struct vscsiif_response resp; + + scsifront_ring_drain(info); + + for (i = 0; i < VSCSIIF_MAX_REQS; i++) { + if (test_bit(i, info->shadow_free_bitmap)) + continue; + resp.rqid = i; + resp.sense_len = 0; + resp.rslt = DID_RESET << 16; + resp.residual_len = 0; + scsifront_do_response(info, &resp); + } +} + static int map_data_for_request(struct vscsifrnt_info *info, struct scsi_cmnd *sc, struct vscsiif_request *ring_req, @@ -475,6 +511,27 @@ static struct vscsiif_request *scsifront_command2ring( return ring_req; } +static int scsifront_enter(struct vscsifrnt_info *info) +{ + if (info->pause) + return 1; + info->callers++; + return 0; +} + +static void scsifront_return(struct vscsifrnt_info *info) +{ + info->callers--; + if (info->callers) + return; + + if (!info->waiting_pause) + return; + + info->waiting_pause = 0; + wake_up(&info->wq_pause); +} + static int scsifront_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc) { @@ -486,6 +543,10 @@ static int scsifront_queuecommand(struct Scsi_Host *shost, uint16_t rqid; spin_lock_irqsave(shost->host_lock, flags); + if (scsifront_enter(info)) { + spin_unlock_irqrestore(shost->host_lock, flags); + return SCSI_MLQUEUE_HOST_BUSY; + } if (RING_FULL(&info->ring)) goto busy; @@ -505,6 +566,7 @@ static int scsifront_queuecommand(struct Scsi_Host *shost, if (err < 0) { pr_debug("%s: err %d\n", __func__, err); scsifront_put_rqid(info, rqid); + scsifront_return(info); spin_unlock_irqrestore(shost->host_lock, flags); if (err == -ENOMEM) return SCSI_MLQUEUE_HOST_BUSY; @@ -514,11 +576,13 @@ static int scsifront_queuecommand(struct Scsi_Host *shost, } scsifront_do_request(info); + scsifront_return(info); spin_unlock_irqrestore(shost->host_lock, flags); return 0; busy: + scsifront_return(info); spin_unlock_irqrestore(shost->host_lock, flags); pr_debug("%s: busy\n", __func__); return SCSI_MLQUEUE_HOST_BUSY; @@ -549,7 +613,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act) if (ring_req) break; } - if (err) { + if (err || info->pause) { spin_unlock_irq(host->host_lock); kfree(shadow); return FAILED; @@ -561,6 +625,11 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act) spin_lock_irq(host->host_lock); } + if (scsifront_enter(info)) { + spin_unlock_irq(host->host_lock); + return FAILED; + } + ring_req->act = act; ring_req->ref_rqid = s->rqid; @@ -587,6 +656,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act) err = FAILED; } + scsifront_return(info); spin_unlock_irq(host->host_lock); return err; } @@ -698,6 +768,13 @@ free_gnttab: return err; } +static void scsifront_free_ring(struct vscsifrnt_info *info) +{ + unbind_from_irqhandler(info->irq, info); + gnttab_end_foreign_access(info->ring_ref, 0, + (unsigned long)info->ring.sring); +} + static int scsifront_init_ring(struct vscsifrnt_info *info) { struct xenbus_device *dev = info->dev; @@ -744,9 +821,7 @@ again: fail: xenbus_transaction_end(xbt, 1); free_sring: - unbind_from_irqhandler(info->irq, info); - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); + scsifront_free_ring(info); return err; } @@ -779,6 +854,7 @@ static int scsifront_probe(struct xenbus_device *dev, } init_waitqueue_head(&info->wq_sync); + init_waitqueue_head(&info->wq_pause); spin_lock_init(&info->shadow_lock); snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no); @@ -802,13 +878,60 @@ static int scsifront_probe(struct xenbus_device *dev, return 0; free_sring: - unbind_from_irqhandler(info->irq, info); - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); + scsifront_free_ring(info); scsi_host_put(host); return err; } +static int scsifront_resume(struct xenbus_device *dev) +{ + struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); + struct Scsi_Host *host = info->host; + int err; + + spin_lock_irq(host->host_lock); + + /* Finish all still pending commands. */ + scsifront_finish_all(info); + + spin_unlock_irq(host->host_lock); + + /* Reconnect to dom0. */ + scsifront_free_ring(info); + err = scsifront_init_ring(info); + if (err) { + dev_err(&dev->dev, "fail to resume %d\n", err); + scsi_host_put(host); + return err; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; +} + +static int scsifront_suspend(struct xenbus_device *dev) +{ + struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); + struct Scsi_Host *host = info->host; + int err = 0; + + /* No new commands for the backend. */ + spin_lock_irq(host->host_lock); + info->pause = 1; + while (info->callers && !err) { + info->waiting_pause = 1; + info->wait_ring_available = 0; + spin_unlock_irq(host->host_lock); + wake_up(&info->wq_sync); + err = wait_event_interruptible(info->wq_pause, + !info->waiting_pause); + spin_lock_irq(host->host_lock); + } + spin_unlock_irq(host->host_lock); + return err; +} + static int scsifront_remove(struct xenbus_device *dev) { struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); @@ -823,10 +946,7 @@ static int scsifront_remove(struct xenbus_device *dev) } mutex_unlock(&scsifront_mutex); - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - unbind_from_irqhandler(info->irq, info); - + scsifront_free_ring(info); scsi_host_put(info->host); return 0; @@ -919,6 +1039,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) scsi_device_put(sdev); } break; + case VSCSIFRONT_OP_READD_LUN: + if (device_state == XenbusStateConnected) + xenbus_printf(XBT_NIL, dev->nodename, + info->dev_state_path, + "%d", XenbusStateConnected); + break; default: break; } @@ -932,21 +1058,29 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) static void scsifront_read_backend_params(struct xenbus_device *dev, struct vscsifrnt_info *info) { - unsigned int sg_grant; + unsigned int sg_grant, nr_segs; int ret; struct Scsi_Host *host = info->host; ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u", &sg_grant); - if (ret == 1 && sg_grant) { - sg_grant = min_t(unsigned int, sg_grant, SG_ALL); - sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE); - host->sg_tablesize = min_t(unsigned int, sg_grant, + if (ret != 1) + sg_grant = 0; + nr_segs = min_t(unsigned int, sg_grant, SG_ALL); + nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE); + nr_segs = min_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE * PAGE_SIZE / sizeof(struct scsiif_request_segment)); - host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512; - } - dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize); + + if (!info->pause && sg_grant) + dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs); + else if (info->pause && nr_segs < host->sg_tablesize) + dev_warn(&dev->dev, + "SG entries decreased from %d to %u - device may not work properly anymore\n", + host->sg_tablesize, nr_segs); + + host->sg_tablesize = nr_segs; + host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512; } static void scsifront_backend_changed(struct xenbus_device *dev, @@ -965,6 +1099,14 @@ static void scsifront_backend_changed(struct xenbus_device *dev, case XenbusStateConnected: scsifront_read_backend_params(dev, info); + + if (info->pause) { + scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN); + xenbus_switch_state(dev, XenbusStateConnected); + info->pause = 0; + return; + } + if (xenbus_read_driver_state(dev->nodename) == XenbusStateInitialised) scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN); @@ -1002,6 +1144,8 @@ static struct xenbus_driver scsifront_driver = { .ids = scsifront_ids, .probe = scsifront_probe, .remove = scsifront_remove, + .resume = scsifront_resume, + .suspend = scsifront_suspend, .otherend_changed = scsifront_backend_changed, }; -- 2.34.1