[SCSI] qla2xxx: Extend base EEH support in qla2xxx.
authorAndrew Vasquez <andrew.vasquez@qlogic.com>
Wed, 16 Dec 2009 05:29:46 +0000 (21:29 -0800)
committerJames Bottomley <James.Bottomley@suse.de>
Wed, 30 Dec 2009 17:09:49 +0000 (11:09 -0600)
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_dbg.h
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_os.c

index 21e2bc4d74013caa1446385a72f53c17eaa58391..3a9f5b288aee40f6553b3dc2362efab9f0632c47 100644 (file)
@@ -232,6 +232,9 @@ qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
        if (off)
                return 0;
 
+       if (unlikely(pci_channel_offline(ha->pdev)))
+               return 0;
+
        if (sscanf(buf, "%d:%x:%x", &val, &start, &size) < 1)
                return -EINVAL;
        if (start > ha->optrom_size)
@@ -379,6 +382,9 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj,
            struct device, kobj)));
        struct qla_hw_data *ha = vha->hw;
 
+       if (unlikely(pci_channel_offline(ha->pdev)))
+               return 0;
+
        if (!capable(CAP_SYS_ADMIN))
                return 0;
 
@@ -398,6 +404,9 @@ qla2x00_sysfs_write_vpd(struct kobject *kobj,
        struct qla_hw_data *ha = vha->hw;
        uint8_t *tmp_data;
 
+       if (unlikely(pci_channel_offline(ha->pdev)))
+               return 0;
+
        if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->vpd_size ||
            !ha->isp_ops->write_nvram)
                return 0;
@@ -1238,10 +1247,11 @@ qla2x00_fw_state_show(struct device *dev, struct device_attribute *attr,
     char *buf)
 {
        scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
-       int rval;
+       int rval = QLA_FUNCTION_FAILED;
        uint16_t state[5];
 
-       rval = qla2x00_get_firmware_state(vha, state);
+       if (!vha->hw->flags.eeh_busy)
+               rval = qla2x00_get_firmware_state(vha, state);
        if (rval != QLA_SUCCESS)
                memset(state, -1, sizeof(state));
 
@@ -1452,10 +1462,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
        if (!fcport)
                return;
 
-       if (unlikely(pci_channel_offline(fcport->vha->hw->pdev)))
+       if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+               return;
+
+       if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
                qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
-       else
-               qla2x00_abort_fcport_cmds(fcport);
+               return;
+       }
 
        /*
         * Transport has effectively 'deleted' the rport, clear
@@ -1475,6 +1488,9 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
        if (!fcport)
                return;
 
+       if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+               return;
+
        if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
                qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
                return;
@@ -1515,6 +1531,12 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
        pfc_host_stat = &ha->fc_host_stat;
        memset(pfc_host_stat, -1, sizeof(struct fc_host_statistics));
 
+       if (test_bit(UNLOADING, &vha->dpc_flags))
+               goto done;
+
+       if (unlikely(pci_channel_offline(ha->pdev)))
+               goto done;
+
        stats = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &stats_dma);
        if (stats == NULL) {
                DEBUG2_3_11(printk("%s(%ld): Failed to allocate memory.\n",
index f660dd70b72e460671a00cc7342224197d8a4cc3..d6d9c86cb05826b8ddaf54cd465c160ad6d0d2c0 100644 (file)
@@ -26,7 +26,7 @@
 /* #define QL_DEBUG_LEVEL_14 */ /* Output RSCN trace msgs */
 /* #define QL_DEBUG_LEVEL_15 */ /* Output NPIV trace msgs */
 /* #define QL_DEBUG_LEVEL_16 */ /* Output ISP84XX trace msgs */
-/* #define QL_DEBUG_LEVEL_17 */ /* Output MULTI-Q trace messages */
+/* #define QL_DEBUG_LEVEL_17 */ /* Output EEH trace messages */
 
 /*
 * Macros use for debugging the driver.
 #else
 #define DEBUG16(x)     do {} while (0)
 #endif
+
+#if defined(QL_DEBUG_LEVEL_17)
+#define DEBUG17(x)     do {x;} while (0)
+#else
+#define DEBUG17(x)     do {} while (0)
+#endif
+
 /*
  * Firmware Dump structure definition
  */
index 384afda7dbe942f2ade0f189be0da5e4e7d71685..608e675f68c8569b79ccfc2fb79a2965824de347 100644 (file)
@@ -2256,11 +2256,13 @@ struct qla_hw_data {
                uint32_t        disable_serdes          :1;
                uint32_t        gpsc_supported          :1;
                uint32_t        npiv_supported          :1;
+               uint32_t        pci_channel_io_perm_failure     :1;
                uint32_t        fce_enabled             :1;
                uint32_t        fac_supported           :1;
                uint32_t        chip_reset_done         :1;
                uint32_t        port0                   :1;
                uint32_t        running_gold_fw         :1;
+               uint32_t        eeh_busy                :1;
                uint32_t        cpu_affinity_enabled    :1;
                uint32_t        disable_msix_handshake  :1;
        } flags;
index 0f7ea6cc02f615b58356c34b815a1705f91a8834..b4a0eac8f96d845f34043da8e9eb7e65cd5e2b98 100644 (file)
@@ -269,6 +269,8 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
        vha->flags.online = 0;
        ha->flags.chip_reset_done = 0;
        vha->flags.reset_active = 0;
+       ha->flags.pci_channel_io_perm_failure = 0;
+       ha->flags.eeh_busy = 0;
        atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
        atomic_set(&vha->loop_state, LOOP_DOWN);
        vha->device_flags = DFLG_NO_CABLE;
@@ -581,6 +583,9 @@ qla2x00_reset_chip(scsi_qla_host_t *vha)
        uint32_t        cnt;
        uint16_t        cmd;
 
+       if (unlikely(pci_channel_offline(ha->pdev)))
+               return;
+
        ha->isp_ops->disable_intrs(ha);
 
        spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -786,6 +791,12 @@ void
 qla24xx_reset_chip(scsi_qla_host_t *vha)
 {
        struct qla_hw_data *ha = vha->hw;
+
+       if (pci_channel_offline(ha->pdev) &&
+           ha->flags.pci_channel_io_perm_failure) {
+               return;
+       }
+
        ha->isp_ops->disable_intrs(ha);
 
        /* Perform RISC reset. */
@@ -3562,6 +3573,13 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
                /* Requeue all commands in outstanding command list. */
                qla2x00_abort_all_cmds(vha, DID_RESET << 16);
 
+               if (unlikely(pci_channel_offline(ha->pdev) &&
+                   ha->flags.pci_channel_io_perm_failure)) {
+                       clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
+                       status = 0;
+                       return status;
+               }
+
                ha->isp_ops->get_flash_version(vha, req->ring);
 
                ha->isp_ops->nvram_config(vha);
@@ -4460,6 +4478,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
        int ret, retries;
        struct qla_hw_data *ha = vha->hw;
 
+       if (ha->flags.pci_channel_io_perm_failure)
+               return;
        if (!IS_FWI2_CAPABLE(ha))
                return;
        if (!ha->fw_major_version)
index 1692a883f4de837b7cd386bd6ea02e4de4c278ef..ffd0efdff40e5f04504e2d148349e0909172364f 100644 (file)
@@ -152,7 +152,7 @@ qla2300_intr_handler(int irq, void *dev_id)
        for (iter = 50; iter--; ) {
                stat = RD_REG_DWORD(&reg->u.isp2300.host_status);
                if (stat & HSR_RISC_PAUSED) {
-                       if (pci_channel_offline(ha->pdev))
+                       if (unlikely(pci_channel_offline(ha->pdev)))
                                break;
 
                        hccr = RD_REG_WORD(&reg->hccr);
@@ -1846,12 +1846,15 @@ qla24xx_intr_handler(int irq, void *dev_id)
        reg = &ha->iobase->isp24;
        status = 0;
 
+       if (unlikely(pci_channel_offline(ha->pdev)))
+               return IRQ_HANDLED;
+
        spin_lock_irqsave(&ha->hardware_lock, flags);
        vha = pci_get_drvdata(ha->pdev);
        for (iter = 50; iter--; ) {
                stat = RD_REG_DWORD(&reg->host_status);
                if (stat & HSRX_RISC_PAUSED) {
-                       if (pci_channel_offline(ha->pdev))
+                       if (unlikely(pci_channel_offline(ha->pdev)))
                                break;
 
                        hccr = RD_REG_DWORD(&reg->hccr);
@@ -1992,7 +1995,7 @@ qla24xx_msix_default(int irq, void *dev_id)
        do {
                stat = RD_REG_DWORD(&reg->host_status);
                if (stat & HSRX_RISC_PAUSED) {
-                       if (pci_channel_offline(ha->pdev))
+                       if (unlikely(pci_channel_offline(ha->pdev)))
                                break;
 
                        hccr = RD_REG_DWORD(&reg->hccr);
index e91f3d82b2fdea7e61f4901a1dd7db342144a0d7..056e4d4505f369852788409a8b56ba913086d546 100644 (file)
@@ -56,6 +56,12 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 
        DEBUG11(printk("%s(%ld): entered.\n", __func__, base_vha->host_no));
 
+       if (ha->flags.pci_channel_io_perm_failure) {
+               DEBUG(printk("%s(%ld): Perm failure on EEH, timeout MBX "
+                            "Exiting.\n", __func__, vha->host_no));
+               return QLA_FUNCTION_TIMEOUT;
+       }
+
        /*
         * Wait for active mailbox commands to finish by waiting at most tov
         * seconds. This is to serialize actual issuing of mailbox cmds during
@@ -154,10 +160,14 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
                        /* Check for pending interrupts. */
                        qla2x00_poll(ha->rsp_q_map[0]);
 
-                       if (command != MBC_LOAD_RISC_RAM_EXTENDED &&
-                           !ha->flags.mbox_int)
+                       if (!ha->flags.mbox_int &&
+                           !(IS_QLA2200(ha) &&
+                           command == MBC_LOAD_RISC_RAM_EXTENDED))
                                msleep(10);
                } /* while */
+               DEBUG17(qla_printk(KERN_WARNING, ha,
+                       "Waited %d sec\n",
+                       (uint)((jiffies - (wait_time - (mcp->tov * HZ)))/HZ)));
        }
 
        /* Check whether we timed out */
@@ -227,7 +237,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
 
        if (rval == QLA_FUNCTION_TIMEOUT &&
            mcp->mb[0] != MBC_GEN_SYSTEM_ERROR) {
-               if (!io_lock_on || (mcp->flags & IOCTL_CMD)) {
+               if (!io_lock_on || (mcp->flags & IOCTL_CMD) ||
+                   ha->flags.eeh_busy) {
                        /* not in dpc. schedule it for dpc to take over. */
                        DEBUG(printk("%s(%ld): timeout schedule "
                        "isp_abort_needed.\n", __func__,
@@ -237,7 +248,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
                        base_vha->host_no));
                        qla_printk(KERN_WARNING, ha,
                            "Mailbox command timeout occurred. Scheduling ISP "
-                           "abort.\n");
+                           "abort. eeh_busy: 0x%x\n", ha->flags.eeh_busy);
                        set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags);
                        qla2xxx_wake_dpc(vha);
                } else if (!abort_active) {
@@ -2530,6 +2541,9 @@ qla2x00_enable_eft_trace(scsi_qla_host_t *vha, dma_addr_t eft_dma,
        if (!IS_FWI2_CAPABLE(vha->hw))
                return QLA_FUNCTION_FAILED;
 
+       if (unlikely(pci_channel_offline(vha->hw->pdev)))
+               return QLA_FUNCTION_FAILED;
+
        DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
        mcp->mb[0] = MBC_TRACE_CONTROL;
@@ -2565,6 +2579,9 @@ qla2x00_disable_eft_trace(scsi_qla_host_t *vha)
        if (!IS_FWI2_CAPABLE(vha->hw))
                return QLA_FUNCTION_FAILED;
 
+       if (unlikely(pci_channel_offline(vha->hw->pdev)))
+               return QLA_FUNCTION_FAILED;
+
        DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
        mcp->mb[0] = MBC_TRACE_CONTROL;
@@ -2595,6 +2612,9 @@ qla2x00_enable_fce_trace(scsi_qla_host_t *vha, dma_addr_t fce_dma,
        if (!IS_QLA25XX(vha->hw) && !IS_QLA81XX(vha->hw))
                return QLA_FUNCTION_FAILED;
 
+       if (unlikely(pci_channel_offline(vha->hw->pdev)))
+               return QLA_FUNCTION_FAILED;
+
        DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
        mcp->mb[0] = MBC_TRACE_CONTROL;
@@ -2639,6 +2659,9 @@ qla2x00_disable_fce_trace(scsi_qla_host_t *vha, uint64_t *wr, uint64_t *rd)
        if (!IS_FWI2_CAPABLE(vha->hw))
                return QLA_FUNCTION_FAILED;
 
+       if (unlikely(pci_channel_offline(vha->hw->pdev)))
+               return QLA_FUNCTION_FAILED;
+
        DEBUG11(printk("%s(%ld): entered.\n", __func__, vha->host_no));
 
        mcp->mb[0] = MBC_TRACE_CONTROL;
index 2f873d23732584e334be93a64e6ae6e2ba204744..1ab358210c6a9ac302af2b6f0cee1fbf4e1f3485 100644 (file)
@@ -475,11 +475,11 @@ qla2xxx_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
        srb_t *sp;
        int rval;
 
-       if (unlikely(pci_channel_offline(ha->pdev))) {
-               if (ha->pdev->error_state == pci_channel_io_frozen)
-                       cmd->result = DID_REQUEUE << 16;
-               else
+       if (ha->flags.eeh_busy) {
+               if (ha->flags.pci_channel_io_perm_failure)
                        cmd->result = DID_NO_CONNECT << 16;
+               else
+                       cmd->result = DID_REQUEUE << 16;
                goto qc24_fail_command;
        }
 
@@ -552,8 +552,15 @@ qla2x00_eh_wait_on_command(struct scsi_cmnd *cmd)
 #define ABORT_POLLING_PERIOD   1000
 #define ABORT_WAIT_ITER                ((10 * 1000) / (ABORT_POLLING_PERIOD))
        unsigned long wait_iter = ABORT_WAIT_ITER;
+       scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+       struct qla_hw_data *ha = vha->hw;
        int ret = QLA_SUCCESS;
 
+       if (unlikely(pci_channel_offline(ha->pdev)) || ha->flags.eeh_busy) {
+               DEBUG17(qla_printk(KERN_WARNING, ha, "return:eh_wait\n"));
+               return ret;
+       }
+
        while (CMD_SP(cmd) && wait_iter--) {
                msleep(ABORT_POLLING_PERIOD);
        }
@@ -2174,6 +2181,24 @@ qla2x00_free_device(scsi_qla_host_t *vha)
 {
        struct qla_hw_data *ha = vha->hw;
 
+       qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16);
+
+       /* Disable timer */
+       if (vha->timer_active)
+               qla2x00_stop_timer(vha);
+
+       /* Kill the kernel thread for this host */
+       if (ha->dpc_thread) {
+               struct task_struct *t = ha->dpc_thread;
+
+               /*
+                * qla2xxx_wake_dpc checks for ->dpc_thread
+                * so we need to zero it out.
+                */
+               ha->dpc_thread = NULL;
+               kthread_stop(t);
+       }
+
        qla25xx_delete_queues(vha);
 
        if (ha->flags.fce_enabled)
@@ -2185,6 +2210,8 @@ qla2x00_free_device(scsi_qla_host_t *vha)
        /* Stop currently executing firmware. */
        qla2x00_try_to_stop_firmware(vha);
 
+       vha->flags.online = 0;
+
        /* turn-off interrupts on the card */
        if (ha->interrupts_on)
                ha->isp_ops->disable_intrs(ha);
@@ -2859,6 +2886,13 @@ qla2x00_do_dpc(void *data)
                if (!base_vha->flags.init_done)
                        continue;
 
+               if (ha->flags.eeh_busy) {
+                       DEBUG17(qla_printk(KERN_WARNING, ha,
+                           "qla2x00_do_dpc: dpc_flags: %lx\n",
+                           base_vha->dpc_flags));
+                       continue;
+               }
+
                DEBUG3(printk("scsi(%ld): DPC handler\n", base_vha->host_no));
 
                ha->dpc_active = 1;
@@ -3049,8 +3083,13 @@ qla2x00_timer(scsi_qla_host_t *vha)
        int             index;
        srb_t           *sp;
        int             t;
+       uint16_t        w;
        struct qla_hw_data *ha = vha->hw;
        struct req_que *req;
+
+       /* Hardware read to raise pending EEH errors during mailbox waits. */
+       if (!pci_channel_offline(ha->pdev))
+               pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
        /*
         * Ports - Port down timer.
         *
@@ -3252,16 +3291,23 @@ qla2x00_release_firmware(void)
 static pci_ers_result_t
 qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 {
-       scsi_qla_host_t *base_vha = pci_get_drvdata(pdev);
+       scsi_qla_host_t *vha = pci_get_drvdata(pdev);
+       struct qla_hw_data *ha = vha->hw;
+
+       DEBUG2(qla_printk(KERN_WARNING, ha, "error_detected:state %x\n",
+           state));
 
        switch (state) {
        case pci_channel_io_normal:
+               ha->flags.eeh_busy = 0;
                return PCI_ERS_RESULT_CAN_RECOVER;
        case pci_channel_io_frozen:
+               ha->flags.eeh_busy = 1;
                pci_disable_device(pdev);
                return PCI_ERS_RESULT_NEED_RESET;
        case pci_channel_io_perm_failure:
-               qla2x00_abort_all_cmds(base_vha, DID_NO_CONNECT << 16);
+               ha->flags.pci_channel_io_perm_failure = 1;
+               qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16);
                return PCI_ERS_RESULT_DISCONNECT;
        }
        return PCI_ERS_RESULT_NEED_RESET;
@@ -3312,6 +3358,8 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev)
        struct qla_hw_data *ha = base_vha->hw;
        int rc;
 
+       DEBUG17(qla_printk(KERN_WARNING, ha, "slot_reset\n"));
+
        if (ha->mem_only)
                rc = pci_enable_device_mem(pdev);
        else
@@ -3320,19 +3368,33 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev)
        if (rc) {
                qla_printk(KERN_WARNING, ha,
                    "Can't re-enable PCI device after reset.\n");
-
                return ret;
        }
-       pci_set_master(pdev);
 
        if (ha->isp_ops->pci_config(base_vha))
                return ret;
 
+#ifdef QL_DEBUG_LEVEL_17
+       {
+               uint8_t b;
+               uint32_t i;
+
+               printk("slot_reset_1: ");
+               for (i = 0; i < 256; i++) {
+                       pci_read_config_byte(ha->pdev, i, &b);
+                       printk("%s%02x", (i%16) ? " " : "\n", b);
+               }
+               printk("\n");
+       }
+#endif
        set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
        if (qla2x00_abort_isp(base_vha) == QLA_SUCCESS)
                ret =  PCI_ERS_RESULT_RECOVERED;
        clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
 
+       DEBUG17(qla_printk(KERN_WARNING, ha,
+           "slot_reset-return:ret=%x\n", ret));
+
        return ret;
 }
 
@@ -3343,12 +3405,17 @@ qla2xxx_pci_resume(struct pci_dev *pdev)
        struct qla_hw_data *ha = base_vha->hw;
        int ret;
 
+       DEBUG17(qla_printk(KERN_WARNING, ha, "pci_resume\n"));
+
        ret = qla2x00_wait_for_hba_online(base_vha);
        if (ret != QLA_SUCCESS) {
                qla_printk(KERN_ERR, ha,
                    "the device failed to resume I/O "
                    "from slot/link_reset");
        }
+
+       ha->flags.eeh_busy = 0;
+
        pci_cleanup_aer_uncorrect_error_status(pdev);
 }