[SCSI] mptfusion: Fix for device offline while doing aggressive HBA reset
authorkashyap.desai@lsi.com <kashyap.desai@lsi.com>
Fri, 5 Aug 2011 05:34:37 +0000 (11:04 +0530)
committerJames Bottomley <JBottomley@Parallels.com>
Sat, 27 Aug 2011 14:35:53 +0000 (08:35 -0600)
[Resend patch as per Bernd Schubert comment ]

Issue:

Device goes offline while doing aggressive HBA reset
along with IO using some utility.

Root cause:

FW goes into bad state due to aggressive reset. Softreset does not
help to recover FW. And also aggressive reset open up the window for
Error handling thread to kicked off at the same time HBA will be in
constant RESET loop as part of aggressive reset test case can lead
Device to goes offline.

Changes:

1. Added extra check as below inside eh_timed_out call back as below.
   if(ioc->ioc_reset_in_progress) Rc = EH_TIMER_RESET

2. Removed " DOORBELL_ACTIVE" check for SAS controller from task
   management context.  Since SAS controller uses high priority queue
   for task management. This check is not required for SAS controller.

3. Moved SoftReset call to HardReset from Task Mgmt context.

Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
drivers/message/fusion/mptbase.c
drivers/message/fusion/mptbase.h
drivers/message/fusion/mptsas.c
drivers/message/fusion/mptscsih.c

index 517621fa8bca3ffebc0206e9b7cdee5524c9f33c..e9c6a6047a00cfcf5c28306226e62ef148a35531 100644 (file)
@@ -6474,8 +6474,19 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg)
                        pReq->Action, ioc->mptbase_cmds.status, timeleft));
                if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET)
                        goto out;
-               if (!timeleft)
+               if (!timeleft) {
+                       spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
+                       if (ioc->ioc_reset_in_progress) {
+                               spin_unlock_irqrestore(&ioc->taskmgmt_lock,
+                                       flags);
+                               printk(MYIOC_s_INFO_FMT "%s: host reset in"
+                                       " progress mpt_config timed out.!!\n",
+                                       __func__, ioc->name);
+                               return -EFAULT;
+                       }
+                       spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
                        issue_hard_reset = 1;
+               }
                goto out;
        }
 
@@ -7189,7 +7200,18 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
        spin_lock_irqsave(&ioc->taskmgmt_lock, flags);
        if (ioc->ioc_reset_in_progress) {
                spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
-               return 0;
+               ioc->wait_on_reset_completion = 1;
+               do {
+                       ssleep(1);
+               } while (ioc->ioc_reset_in_progress == 1);
+               ioc->wait_on_reset_completion = 0;
+               return ioc->reset_status;
+       }
+       if (ioc->wait_on_reset_completion) {
+               spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags);
+               rc = 0;
+               time_count = jiffies;
+               goto exit;
        }
        ioc->ioc_reset_in_progress = 1;
        if (ioc->alt_ioc)
@@ -7226,6 +7248,7 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
        ioc->ioc_reset_in_progress = 0;
        ioc->taskmgmt_quiesce_io = 0;
        ioc->taskmgmt_in_progress = 0;
+       ioc->reset_status = rc;
        if (ioc->alt_ioc) {
                ioc->alt_ioc->ioc_reset_in_progress = 0;
                ioc->alt_ioc->taskmgmt_quiesce_io = 0;
@@ -7241,7 +7264,7 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
                                        ioc->alt_ioc, MPT_IOC_POST_RESET);
                }
        }
-
+exit:
        dtmprintk(ioc,
            printk(MYIOC_s_DEBUG_FMT
                "HardResetHandler: completed (%d seconds): %s\n", ioc->name,
index a4048ea45c9251f0957c860e45cce12ed34b5844..b4d24dc081ae5de35c3d4d0c3765ddf63235b517 100644 (file)
@@ -753,6 +753,8 @@ typedef struct _MPT_ADAPTER
        int                      taskmgmt_in_progress;
        u8                       taskmgmt_quiesce_io;
        u8                       ioc_reset_in_progress;
+       u8                       reset_status;
+       u8                       wait_on_reset_completion;
        MPT_SCHEDULE_TARGET_RESET schedule_target_reset;
        MPT_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;
        struct work_struct       sas_persist_task;
index 074e52254fcd22573708548ff5742f922c6d8fbd..9d9504298549d147133e107a1d66c1a74440c3eb 100644 (file)
@@ -1950,6 +1950,15 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc)
                goto done;
        }
 
+       /* In case if IOC is in reset from internal context.
+       *  Do not execute EEH for the same IOC. SML should to reset timer.
+       */
+       if (ioc->ioc_reset_in_progress) {
+               dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: ioc is in reset,"
+                   "SML need to reset the timer (sc=%p)\n",
+                   ioc->name, __func__, sc));
+               rc = BLK_EH_RESET_TIMER;
+       }
        vdevice = sc->device->hostdata;
        if (vdevice && vdevice->vtarget && (vdevice->vtarget->inDMD
                || vdevice->vtarget->deleted)) {
index de8cf92d8614e42fe5252dde6fb51991c8ff84ec..ced6e4dc0847a1dc76fa2d677f4e5962506322f0 100644 (file)
@@ -1630,7 +1630,13 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun,
                return 0;
        }
 
-       if (ioc_raw_state & MPI_DOORBELL_ACTIVE) {
+       /* DOORBELL ACTIVE check is not required if
+       *  MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q is supported.
+       */
+
+       if (!((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q)
+                && (ioc->facts.MsgVersion >= MPI_VERSION_01_05)) &&
+               (ioc_raw_state & MPI_DOORBELL_ACTIVE)) {
                printk(MYIOC_s_WARN_FMT
                        "TaskMgmt type=%x: ioc_state: "
                        "DOORBELL_ACTIVE (0x%x)!\n",
@@ -1729,7 +1735,9 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun,
                printk(MYIOC_s_WARN_FMT
                       "Issuing Reset from %s!! doorbell=0x%08x\n",
                       ioc->name, __func__, mpt_GetIocState(ioc, 0));
-               retval = mpt_Soft_Hard_ResetHandler(ioc, CAN_SLEEP);
+               retval = (ioc->bus_type == SAS) ?
+                       mpt_HardResetHandler(ioc, CAN_SLEEP) :
+                       mpt_Soft_Hard_ResetHandler(ioc, CAN_SLEEP);
                mpt_free_msg_frame(ioc, mf);
        }