From 39a985547cbfcbb0b23667b69b8ae82a6cf312ac Mon Sep 17 00:00:00 2001 From: bo yang Date: Wed, 22 Sep 2010 22:36:29 -0400 Subject: [PATCH] [SCSI] megaraid_sas: Add Online Controller Reset to MegaRAID SAS drive To add the Online controller reset support, driver need to do: a). reset the controller chips -- Xscale and Gen2 which will change the function calls and add the reset function related to this two chips. b). during the reset, driver will store the pending cmds which not returned by FW to driver's pending queue. Driver will re-issue those pending cmds again to FW after the OCR finished. c). In driver's timeout routine, driver will report to OS as reset. Also driver's queue routine will block the cmds until the OCR finished. d). in Driver's ISR routine, if driver get the FW state as state change, FW in Failure status and FW support online controller reset (OCR), driver will start to do the controller reset. e). In driver's IOCTL routine, the application cmds will wait for the OCR to finish, then issue the cmds to FW. Signed-off-by Bo Yang Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_sas.c | 756 +++++++++++++++++++++++++-- drivers/scsi/megaraid/megaraid_sas.h | 88 +++- 2 files changed, 787 insertions(+), 57 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index 99e4478c3f3e..55951f4d3260 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -62,6 +62,11 @@ MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux@lsi.com"); MODULE_DESCRIPTION("LSI MegaRAID SAS Driver"); +static int megasas_transition_to_ready(struct megasas_instance *instance); +static int megasas_get_pd_list(struct megasas_instance *instance); +static int megasas_issue_init_mfi(struct megasas_instance *instance); +static int megasas_register_aen(struct megasas_instance *instance, + u32 seq_num, u32 class_locale_word); /* * PCI ID table for all supported controllers */ @@ -164,7 +169,7 @@ megasas_return_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd) static inline void megasas_enable_intr_xscale(struct megasas_register_set __iomem * regs) { - writel(1, &(regs)->outbound_intr_mask); + writel(0, &(regs)->outbound_intr_mask); /* Dummy readl to force pci flush */ readl(®s->outbound_intr_mask); @@ -200,24 +205,27 @@ static int megasas_clear_intr_xscale(struct megasas_register_set __iomem * regs) { u32 status; + u32 mfiStatus = 0; /* * Check if it is our interrupt */ status = readl(®s->outbound_intr_status); - if (!(status & MFI_OB_INTR_STATUS_MASK)) { - return 1; - } + if (status & MFI_OB_INTR_STATUS_MASK) + mfiStatus = MFI_INTR_FLAG_REPLY_MESSAGE; + if (status & MFI_XSCALE_OMR0_CHANGE_INTERRUPT) + mfiStatus |= MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE; /* * Clear the interrupt by writing back the same value */ - writel(status, ®s->outbound_intr_status); + if (mfiStatus) + writel(status, ®s->outbound_intr_status); /* Dummy readl to force pci flush */ readl(®s->outbound_intr_status); - return 0; + return mfiStatus; } /** @@ -232,8 +240,69 @@ megasas_fire_cmd_xscale(struct megasas_instance *instance, u32 frame_count, struct megasas_register_set __iomem *regs) { + unsigned long flags; + spin_lock_irqsave(&instance->hba_lock, flags); writel((frame_phys_addr >> 3)|(frame_count), &(regs)->inbound_queue_port); + spin_unlock_irqrestore(&instance->hba_lock, flags); +} + +/** + * megasas_adp_reset_xscale - For controller reset + * @regs: MFI register set + */ +static int +megasas_adp_reset_xscale(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + u32 i; + u32 pcidata; + writel(MFI_ADP_RESET, ®s->inbound_doorbell); + + for (i = 0; i < 3; i++) + msleep(1000); /* sleep for 3 secs */ + pcidata = 0; + pci_read_config_dword(instance->pdev, MFI_1068_PCSR_OFFSET, &pcidata); + printk(KERN_NOTICE "pcidata = %x\n", pcidata); + if (pcidata & 0x2) { + printk(KERN_NOTICE "mfi 1068 offset read=%x\n", pcidata); + pcidata &= ~0x2; + pci_write_config_dword(instance->pdev, + MFI_1068_PCSR_OFFSET, pcidata); + + for (i = 0; i < 2; i++) + msleep(1000); /* need to wait 2 secs again */ + + pcidata = 0; + pci_read_config_dword(instance->pdev, + MFI_1068_FW_HANDSHAKE_OFFSET, &pcidata); + printk(KERN_NOTICE "1068 offset handshake read=%x\n", pcidata); + if ((pcidata & 0xffff0000) == MFI_1068_FW_READY) { + printk(KERN_NOTICE "1068 offset pcidt=%x\n", pcidata); + pcidata = 0; + pci_write_config_dword(instance->pdev, + MFI_1068_FW_HANDSHAKE_OFFSET, pcidata); + } + } + return 0; +} + +/** + * megasas_check_reset_xscale - For controller reset check + * @regs: MFI register set + */ +static int +megasas_check_reset_xscale(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + u32 consumer; + consumer = *instance->consumer; + + if ((instance->adprecovery != MEGASAS_HBA_OPERATIONAL) && + (*instance->consumer == MEGASAS_ADPRESET_INPROG_SIGN)) { + return 1; + } + return 0; } static struct megasas_instance_template megasas_instance_template_xscale = { @@ -243,6 +312,8 @@ static struct megasas_instance_template megasas_instance_template_xscale = { .disable_intr = megasas_disable_intr_xscale, .clear_intr = megasas_clear_intr_xscale, .read_fw_status_reg = megasas_read_fw_status_reg_xscale, + .adp_reset = megasas_adp_reset_xscale, + .check_reset = megasas_check_reset_xscale, }; /** @@ -264,7 +335,7 @@ megasas_enable_intr_ppc(struct megasas_register_set __iomem * regs) { writel(0xFFFFFFFF, &(regs)->outbound_doorbell_clear); - writel(~0x80000004, &(regs)->outbound_intr_mask); + writel(~0x80000000, &(regs)->outbound_intr_mask); /* Dummy readl to force pci flush */ readl(®s->outbound_intr_mask); @@ -307,7 +378,7 @@ megasas_clear_intr_ppc(struct megasas_register_set __iomem * regs) status = readl(®s->outbound_intr_status); if (!(status & MFI_REPLY_1078_MESSAGE_INTERRUPT)) { - return 1; + return 0; } /* @@ -318,7 +389,7 @@ megasas_clear_intr_ppc(struct megasas_register_set __iomem * regs) /* Dummy readl to force pci flush */ readl(®s->outbound_doorbell_clear); - return 0; + return 1; } /** * megasas_fire_cmd_ppc - Sends command to the FW @@ -332,10 +403,34 @@ megasas_fire_cmd_ppc(struct megasas_instance *instance, u32 frame_count, struct megasas_register_set __iomem *regs) { + unsigned long flags; + spin_lock_irqsave(&instance->hba_lock, flags); writel((frame_phys_addr | (frame_count<<1))|1, &(regs)->inbound_queue_port); + spin_unlock_irqrestore(&instance->hba_lock, flags); +} + +/** + * megasas_adp_reset_ppc - For controller reset + * @regs: MFI register set + */ +static int +megasas_adp_reset_ppc(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + return 0; } +/** + * megasas_check_reset_ppc - For controller reset check + * @regs: MFI register set + */ +static int +megasas_check_reset_ppc(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + return 0; +} static struct megasas_instance_template megasas_instance_template_ppc = { .fire_cmd = megasas_fire_cmd_ppc, @@ -343,6 +438,8 @@ static struct megasas_instance_template megasas_instance_template_ppc = { .disable_intr = megasas_disable_intr_ppc, .clear_intr = megasas_clear_intr_ppc, .read_fw_status_reg = megasas_read_fw_status_reg_ppc, + .adp_reset = megasas_adp_reset_ppc, + .check_reset = megasas_check_reset_ppc, }; /** @@ -397,7 +494,7 @@ megasas_clear_intr_skinny(struct megasas_register_set __iomem *regs) status = readl(®s->outbound_intr_status); if (!(status & MFI_SKINNY_ENABLE_INTERRUPT_MASK)) { - return 1; + return 0; } /* @@ -410,7 +507,7 @@ megasas_clear_intr_skinny(struct megasas_register_set __iomem *regs) */ readl(®s->outbound_intr_status); - return 0; + return 1; } /** @@ -426,11 +523,33 @@ megasas_fire_cmd_skinny(struct megasas_instance *instance, struct megasas_register_set __iomem *regs) { unsigned long flags; - spin_lock_irqsave(&instance->fire_lock, flags); + spin_lock_irqsave(&instance->hba_lock, flags); writel(0, &(regs)->inbound_high_queue_port); writel((frame_phys_addr | (frame_count<<1))|1, &(regs)->inbound_low_queue_port); - spin_unlock_irqrestore(&instance->fire_lock, flags); + spin_unlock_irqrestore(&instance->hba_lock, flags); +} + +/** + * megasas_adp_reset_skinny - For controller reset + * @regs: MFI register set + */ +static int +megasas_adp_reset_skinny(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + return 0; +} + +/** + * megasas_check_reset_skinny - For controller reset check + * @regs: MFI register set + */ +static int +megasas_check_reset_skinny(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + return 0; } static struct megasas_instance_template megasas_instance_template_skinny = { @@ -440,6 +559,8 @@ static struct megasas_instance_template megasas_instance_template_skinny = { .disable_intr = megasas_disable_intr_skinny, .clear_intr = megasas_clear_intr_skinny, .read_fw_status_reg = megasas_read_fw_status_reg_skinny, + .adp_reset = megasas_adp_reset_skinny, + .check_reset = megasas_check_reset_skinny, }; @@ -495,23 +616,29 @@ static int megasas_clear_intr_gen2(struct megasas_register_set __iomem *regs) { u32 status; + u32 mfiStatus = 0; /* * Check if it is our interrupt */ status = readl(®s->outbound_intr_status); - if (!(status & MFI_GEN2_ENABLE_INTERRUPT_MASK)) - return 1; + if (status & MFI_GEN2_ENABLE_INTERRUPT_MASK) { + mfiStatus = MFI_INTR_FLAG_REPLY_MESSAGE; + } + if (status & MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT) { + mfiStatus |= MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE; + } /* * Clear the interrupt by writing back the same value */ - writel(status, ®s->outbound_doorbell_clear); + if (mfiStatus) + writel(status, ®s->outbound_doorbell_clear); /* Dummy readl to force pci flush */ readl(®s->outbound_intr_status); - return 0; + return mfiStatus; } /** * megasas_fire_cmd_gen2 - Sends command to the FW @@ -525,8 +652,74 @@ megasas_fire_cmd_gen2(struct megasas_instance *instance, u32 frame_count, struct megasas_register_set __iomem *regs) { + unsigned long flags; + spin_lock_irqsave(&instance->hba_lock, flags); writel((frame_phys_addr | (frame_count<<1))|1, &(regs)->inbound_queue_port); + spin_unlock_irqrestore(&instance->hba_lock, flags); +} + +/** + * megasas_adp_reset_gen2 - For controller reset + * @regs: MFI register set + */ +static int +megasas_adp_reset_gen2(struct megasas_instance *instance, + struct megasas_register_set __iomem *reg_set) +{ + u32 retry = 0 ; + u32 HostDiag; + + writel(0, ®_set->seq_offset); + writel(4, ®_set->seq_offset); + writel(0xb, ®_set->seq_offset); + writel(2, ®_set->seq_offset); + writel(7, ®_set->seq_offset); + writel(0xd, ®_set->seq_offset); + msleep(1000); + + HostDiag = (u32)readl(®_set->host_diag); + + while ( !( HostDiag & DIAG_WRITE_ENABLE) ) { + msleep(100); + HostDiag = (u32)readl(®_set->host_diag); + printk(KERN_NOTICE "RESETGEN2: retry=%x, hostdiag=%x\n", + retry, HostDiag); + + if (retry++ >= 100) + return 1; + + } + + printk(KERN_NOTICE "ADP_RESET_GEN2: HostDiag=%x\n", HostDiag); + + writel((HostDiag | DIAG_RESET_ADAPTER), ®_set->host_diag); + + ssleep(10); + + HostDiag = (u32)readl(®_set->host_diag); + while ( ( HostDiag & DIAG_RESET_ADAPTER) ) { + msleep(100); + HostDiag = (u32)readl(®_set->host_diag); + printk(KERN_NOTICE "RESET_GEN2: retry=%x, hostdiag=%x\n", + retry, HostDiag); + + if (retry++ >= 1000) + return 1; + + } + return 0; +} + +/** + * megasas_check_reset_gen2 - For controller reset check + * @regs: MFI register set + */ +static int +megasas_check_reset_gen2(struct megasas_instance *instance, + struct megasas_register_set __iomem *regs) +{ + return 0; } static struct megasas_instance_template megasas_instance_template_gen2 = { @@ -536,11 +729,13 @@ static struct megasas_instance_template megasas_instance_template_gen2 = { .disable_intr = megasas_disable_intr_gen2, .clear_intr = megasas_clear_intr_gen2, .read_fw_status_reg = megasas_read_fw_status_reg_gen2, + .adp_reset = megasas_adp_reset_gen2, + .check_reset = megasas_check_reset_gen2, }; /** * This is the end of set of functions & definitions -* specific to ppc (deviceid : 0x60) controllers +* specific to gen2 (deviceid : 0x78, 0x79) controllers */ /** @@ -599,8 +794,7 @@ megasas_issue_blocked_cmd(struct megasas_instance *instance, instance->instancet->fire_cmd(instance, cmd->frame_phys_addr, 0, instance->reg_set); - wait_event_timeout(instance->int_cmd_wait_q, (cmd->cmd_status != ENODATA), - MEGASAS_INTERNAL_CMD_WAIT_TIME*HZ); + wait_event(instance->int_cmd_wait_q, cmd->cmd_status != ENODATA); return 0; } @@ -648,8 +842,8 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance, /* * Wait for this cmd to complete */ - wait_event_timeout(instance->abort_cmd_wait_q, (cmd->cmd_status != 0xFF), - MEGASAS_INTERNAL_CMD_WAIT_TIME*HZ); + wait_event(instance->abort_cmd_wait_q, cmd->cmd_status != 0xFF); + cmd->sync_cmd = 0; megasas_return_cmd(instance, cmd); return 0; @@ -1131,14 +1325,22 @@ megasas_queue_command(struct scsi_cmnd *scmd, void (*done) (struct scsi_cmnd *)) u32 frame_count; struct megasas_cmd *cmd; struct megasas_instance *instance; + unsigned long flags; instance = (struct megasas_instance *) scmd->device->host->hostdata; - /* Don't process if we have already declared adapter dead */ - if (instance->hw_crit_error) + if (instance->issuepend_done == 0) return SCSI_MLQUEUE_HOST_BUSY; + spin_lock_irqsave(&instance->hba_lock, flags); + if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) { + spin_unlock_irqrestore(&instance->hba_lock, flags); + return SCSI_MLQUEUE_HOST_BUSY; + } + + spin_unlock_irqrestore(&instance->hba_lock, flags); + scmd->scsi_done = done; scmd->result = 0; @@ -1274,6 +1476,18 @@ static int megasas_slave_alloc(struct scsi_device *sdev) return 0; } +static void megaraid_sas_kill_hba(struct megasas_instance *instance) +{ + if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0073SKINNY) || + (instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY)) { + writel(MFI_STOP_ADP, + &instance->reg_set->reserved_0[0]); + } else { + writel(MFI_STOP_ADP, + &instance->reg_set->inbound_doorbell); + } +} + /** * megasas_complete_cmd_dpc - Returns FW's controller structure * @instance_addr: Address of adapter soft state @@ -1291,7 +1505,7 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) unsigned long flags; /* If we have already declared adapter dead, donot complete cmds */ - if (instance->hw_crit_error) + if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR ) return; spin_lock_irqsave(&instance->completion_lock, flags); @@ -1301,6 +1515,11 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) while (consumer != producer) { context = instance->reply_queue[consumer]; + if (context >= instance->max_fw_cmds) { + printk(KERN_ERR "Unexpected context value %x\n", + context); + BUG(); + } cmd = instance->cmd_list[context]; @@ -1350,7 +1569,76 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) static int megasas_wait_for_outstanding(struct megasas_instance *instance) { int i; + u32 reset_index; u32 wait_time = MEGASAS_RESET_WAIT_TIME; + u8 adprecovery; + unsigned long flags; + struct list_head clist_local; + struct megasas_cmd *reset_cmd; + + spin_lock_irqsave(&instance->hba_lock, flags); + adprecovery = instance->adprecovery; + spin_unlock_irqrestore(&instance->hba_lock, flags); + + if (adprecovery != MEGASAS_HBA_OPERATIONAL) { + + INIT_LIST_HEAD(&clist_local); + spin_lock_irqsave(&instance->hba_lock, flags); + list_splice_init(&instance->internal_reset_pending_q, + &clist_local); + spin_unlock_irqrestore(&instance->hba_lock, flags); + + printk(KERN_NOTICE "megasas: HBA reset wait ...\n"); + for (i = 0; i < wait_time; i++) { + msleep(1000); + spin_lock_irqsave(&instance->hba_lock, flags); + adprecovery = instance->adprecovery; + spin_unlock_irqrestore(&instance->hba_lock, flags); + if (adprecovery == MEGASAS_HBA_OPERATIONAL) + break; + } + + if (adprecovery != MEGASAS_HBA_OPERATIONAL) { + printk(KERN_NOTICE "megasas: reset: Stopping HBA.\n"); + spin_lock_irqsave(&instance->hba_lock, flags); + instance->adprecovery = MEGASAS_HW_CRITICAL_ERROR; + spin_unlock_irqrestore(&instance->hba_lock, flags); + return FAILED; + } + + reset_index = 0; + while (!list_empty(&clist_local)) { + reset_cmd = list_entry((&clist_local)->next, + struct megasas_cmd, list); + list_del_init(&reset_cmd->list); + if (reset_cmd->scmd) { + reset_cmd->scmd->result = DID_RESET << 16; + printk(KERN_NOTICE "%d:%p reset [%02x], %#lx\n", + reset_index, reset_cmd, + reset_cmd->scmd->cmnd[0], + reset_cmd->scmd->serial_number); + + reset_cmd->scmd->scsi_done(reset_cmd->scmd); + megasas_return_cmd(instance, reset_cmd); + } else if (reset_cmd->sync_cmd) { + printk(KERN_NOTICE "megasas:%p synch cmds" + "reset queue\n", + reset_cmd); + + reset_cmd->cmd_status = ENODATA; + instance->instancet->fire_cmd(instance, + reset_cmd->frame_phys_addr, + 0, instance->reg_set); + } else { + printk(KERN_NOTICE "megasas: %p unexpected" + "cmds lst\n", + reset_cmd); + } + reset_index++; + } + + return SUCCESS; + } for (i = 0; i < wait_time; i++) { @@ -1373,6 +1661,7 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance) } if (atomic_read(&instance->fw_outstanding)) { + printk(KERN_NOTICE "megaraid_sas: pending cmds after reset\n"); /* * Send signal to FW to stop processing any pending cmds. * The controller will be taken offline by the OS now. @@ -1388,10 +1677,14 @@ static int megasas_wait_for_outstanding(struct megasas_instance *instance) &instance->reg_set->inbound_doorbell); } megasas_dump_pending_frames(instance); - instance->hw_crit_error = 1; + spin_lock_irqsave(&instance->hba_lock, flags); + instance->adprecovery = MEGASAS_HW_CRITICAL_ERROR; + spin_unlock_irqrestore(&instance->hba_lock, flags); return FAILED; } + printk(KERN_NOTICE "megaraid_sas: no pending cmds after reset\n"); + return SUCCESS; } @@ -1413,7 +1706,7 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x retries=%x\n", scmd->serial_number, scmd->cmnd[0], scmd->retries); - if (instance->hw_crit_error) { + if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) { printk(KERN_ERR "megasas: cannot recover from previous reset " "failures\n"); return FAILED; @@ -1568,7 +1861,8 @@ megasas_service_aen(struct megasas_instance *instance, struct megasas_cmd *cmd) instance->aen_cmd = NULL; megasas_return_cmd(instance, cmd); - if (instance->unload == 0) { + if ((instance->unload == 0) && + ((instance->issuepend_done == 1))) { struct megasas_aen_event *ev; ev = kzalloc(sizeof(*ev), GFP_ATOMIC); if (!ev) { @@ -1663,6 +1957,9 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, struct megasas_header *hdr = &cmd->frame->hdr; unsigned long flags; + /* flag for the retry reset */ + cmd->retry_for_fw_reset = 0; + if (cmd->scmd) cmd->scmd->SCp.ptr = NULL; @@ -1782,40 +2079,302 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, } } +/** + * megasas_issue_pending_cmds_again - issue all pending cmds + * in FW again because of the fw reset + * @instance: Adapter soft state + */ +static inline void +megasas_issue_pending_cmds_again(struct megasas_instance *instance) +{ + struct megasas_cmd *cmd; + struct list_head clist_local; + union megasas_evt_class_locale class_locale; + unsigned long flags; + u32 seq_num; + + INIT_LIST_HEAD(&clist_local); + spin_lock_irqsave(&instance->hba_lock, flags); + list_splice_init(&instance->internal_reset_pending_q, &clist_local); + spin_unlock_irqrestore(&instance->hba_lock, flags); + + while (!list_empty(&clist_local)) { + cmd = list_entry((&clist_local)->next, + struct megasas_cmd, list); + list_del_init(&cmd->list); + + if (cmd->sync_cmd || cmd->scmd) { + printk(KERN_NOTICE "megaraid_sas: command %p, %p:%d" + "detected to be pending while HBA reset.\n", + cmd, cmd->scmd, cmd->sync_cmd); + + cmd->retry_for_fw_reset++; + + if (cmd->retry_for_fw_reset == 3) { + printk(KERN_NOTICE "megaraid_sas: cmd %p, %p:%d" + "was tried multiple times during reset." + "Shutting down the HBA\n", + cmd, cmd->scmd, cmd->sync_cmd); + megaraid_sas_kill_hba(instance); + + instance->adprecovery = + MEGASAS_HW_CRITICAL_ERROR; + return; + } + } + + if (cmd->sync_cmd == 1) { + if (cmd->scmd) { + printk(KERN_NOTICE "megaraid_sas: unexpected" + "cmd attached to internal command!\n"); + } + printk(KERN_NOTICE "megasas: %p synchronous cmd" + "on the internal reset queue," + "issue it again.\n", cmd); + cmd->cmd_status = ENODATA; + instance->instancet->fire_cmd(instance, + cmd->frame_phys_addr , + 0, instance->reg_set); + } else if (cmd->scmd) { + printk(KERN_NOTICE "megasas: %p scsi cmd [%02x],%#lx" + "detected on the internal queue, issue again.\n", + cmd, cmd->scmd->cmnd[0], cmd->scmd->serial_number); + + atomic_inc(&instance->fw_outstanding); + instance->instancet->fire_cmd(instance, + cmd->frame_phys_addr, + cmd->frame_count-1, instance->reg_set); + } else { + printk(KERN_NOTICE "megasas: %p unexpected cmd on the" + "internal reset defer list while re-issue!!\n", + cmd); + } + } + + if (instance->aen_cmd) { + printk(KERN_NOTICE "megaraid_sas: aen_cmd in def process\n"); + megasas_return_cmd(instance, instance->aen_cmd); + + instance->aen_cmd = NULL; + } + + /* + * Initiate AEN (Asynchronous Event Notification) + */ + seq_num = instance->last_seq_num; + class_locale.members.reserved = 0; + class_locale.members.locale = MR_EVT_LOCALE_ALL; + class_locale.members.class = MR_EVT_CLASS_DEBUG; + + megasas_register_aen(instance, seq_num, class_locale.word); +} + +/** + * Move the internal reset pending commands to a deferred queue. + * + * We move the commands pending at internal reset time to a + * pending queue. This queue would be flushed after successful + * completion of the internal reset sequence. if the internal reset + * did not complete in time, the kernel reset handler would flush + * these commands. + **/ +static void +megasas_internal_reset_defer_cmds(struct megasas_instance *instance) +{ + struct megasas_cmd *cmd; + int i; + u32 max_cmd = instance->max_fw_cmds; + u32 defer_index; + unsigned long flags; + + defer_index = 0; + spin_lock_irqsave(&instance->cmd_pool_lock, flags); + for (i = 0; i < max_cmd; i++) { + cmd = instance->cmd_list[i]; + if (cmd->sync_cmd == 1 || cmd->scmd) { + printk(KERN_NOTICE "megasas: moving cmd[%d]:%p:%d:%p" + "on the defer queue as internal\n", + defer_index, cmd, cmd->sync_cmd, cmd->scmd); + + if (!list_empty(&cmd->list)) { + printk(KERN_NOTICE "megaraid_sas: ERROR while" + " moving this cmd:%p, %d %p, it was" + "discovered on some list?\n", + cmd, cmd->sync_cmd, cmd->scmd); + + list_del_init(&cmd->list); + } + defer_index++; + list_add_tail(&cmd->list, + &instance->internal_reset_pending_q); + } + } + spin_unlock_irqrestore(&instance->cmd_pool_lock, flags); +} + + +static void +process_fw_state_change_wq(struct work_struct *work) +{ + struct megasas_instance *instance = + container_of(work, struct megasas_instance, work_init); + u32 wait; + unsigned long flags; + + if (instance->adprecovery != MEGASAS_ADPRESET_SM_INFAULT) { + printk(KERN_NOTICE "megaraid_sas: error, recovery st %x \n", + instance->adprecovery); + return ; + } + + if (instance->adprecovery == MEGASAS_ADPRESET_SM_INFAULT) { + printk(KERN_NOTICE "megaraid_sas: FW detected to be in fault" + "state, restarting it...\n"); + + instance->instancet->disable_intr(instance->reg_set); + atomic_set(&instance->fw_outstanding, 0); + + atomic_set(&instance->fw_reset_no_pci_access, 1); + instance->instancet->adp_reset(instance, instance->reg_set); + atomic_set(&instance->fw_reset_no_pci_access, 0 ); + + printk(KERN_NOTICE "megaraid_sas: FW restarted successfully," + "initiating next stage...\n"); + + printk(KERN_NOTICE "megaraid_sas: HBA recovery state machine," + "state 2 starting...\n"); + + /*waitting for about 20 second before start the second init*/ + for (wait = 0; wait < 30; wait++) { + msleep(1000); + } + + if (megasas_transition_to_ready(instance)) { + printk(KERN_NOTICE "megaraid_sas:adapter not ready\n"); + + megaraid_sas_kill_hba(instance); + instance->adprecovery = MEGASAS_HW_CRITICAL_ERROR; + return ; + } + + if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) || + (instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) || + (instance->pdev->device == PCI_DEVICE_ID_LSI_VERDE_ZCR) + ) { + *instance->consumer = *instance->producer; + } else { + *instance->consumer = 0; + *instance->producer = 0; + } + + megasas_issue_init_mfi(instance); + + spin_lock_irqsave(&instance->hba_lock, flags); + instance->adprecovery = MEGASAS_HBA_OPERATIONAL; + spin_unlock_irqrestore(&instance->hba_lock, flags); + instance->instancet->enable_intr(instance->reg_set); + + megasas_issue_pending_cmds_again(instance); + instance->issuepend_done = 1; + } + return ; +} + /** * megasas_deplete_reply_queue - Processes all completed commands * @instance: Adapter soft state * @alt_status: Alternate status to be returned to * SCSI mid-layer instead of the status * returned by the FW + * Note: this must be called with hba lock held */ static int -megasas_deplete_reply_queue(struct megasas_instance *instance, u8 alt_status) +megasas_deplete_reply_queue(struct megasas_instance *instance, + u8 alt_status) { - /* - * Check if it is our interrupt - * Clear the interrupt - */ - if(instance->instancet->clear_intr(instance->reg_set)) + u32 mfiStatus; + u32 fw_state; + + if ((mfiStatus = instance->instancet->check_reset(instance, + instance->reg_set)) == 1) { + return IRQ_HANDLED; + } + + if ((mfiStatus = instance->instancet->clear_intr( + instance->reg_set) + ) == 0) { return IRQ_NONE; + } + + instance->mfiStatus = mfiStatus; + + if ((mfiStatus & MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE)) { + fw_state = instance->instancet->read_fw_status_reg( + instance->reg_set) & MFI_STATE_MASK; + + if (fw_state != MFI_STATE_FAULT) { + printk(KERN_NOTICE "megaraid_sas: fw state:%x\n", + fw_state); + } + + if ((fw_state == MFI_STATE_FAULT) && + (instance->disableOnlineCtrlReset == 0)) { + printk(KERN_NOTICE "megaraid_sas: wait adp restart\n"); + + if ((instance->pdev->device == + PCI_DEVICE_ID_LSI_SAS1064R) || + (instance->pdev->device == + PCI_DEVICE_ID_DELL_PERC5) || + (instance->pdev->device == + PCI_DEVICE_ID_LSI_VERDE_ZCR)) { + + *instance->consumer = + MEGASAS_ADPRESET_INPROG_SIGN; + } + + + instance->instancet->disable_intr(instance->reg_set); + instance->adprecovery = MEGASAS_ADPRESET_SM_INFAULT; + instance->issuepend_done = 0; + + atomic_set(&instance->fw_outstanding, 0); + megasas_internal_reset_defer_cmds(instance); + + printk(KERN_NOTICE "megasas: fwState=%x, stage:%d\n", + fw_state, instance->adprecovery); + + schedule_work(&instance->work_init); + return IRQ_HANDLED; + + } else { + printk(KERN_NOTICE "megasas: fwstate:%x, dis_OCR=%x\n", + fw_state, instance->disableOnlineCtrlReset); + } + } - if (instance->hw_crit_error) - goto out_done; - /* - * Schedule the tasklet for cmd completion - */ tasklet_schedule(&instance->isr_tasklet); -out_done: return IRQ_HANDLED; } - /** * megasas_isr - isr entry point */ static irqreturn_t megasas_isr(int irq, void *devp) { - return megasas_deplete_reply_queue((struct megasas_instance *)devp, - DID_OK); + struct megasas_instance *instance; + unsigned long flags; + irqreturn_t rc; + + if (atomic_read( + &(((struct megasas_instance *)devp)->fw_reset_no_pci_access))) + return IRQ_HANDLED; + + instance = (struct megasas_instance *)devp; + + spin_lock_irqsave(&instance->hba_lock, flags); + rc = megasas_deplete_reply_queue(instance, DID_OK); + spin_unlock_irqrestore(&instance->hba_lock, flags); + + return rc; } /** @@ -1972,7 +2531,7 @@ megasas_transition_to_ready(struct megasas_instance* instance) "in %d secs\n", fw_state, max_wait); return -ENODEV; } - }; + } printk(KERN_INFO "megasas: FW now in Ready state\n"); return 0; @@ -2054,6 +2613,7 @@ static int megasas_create_frame_pool(struct megasas_instance *instance) */ sgl_sz = sge_sz * instance->max_num_sge; frame_count = (sgl_sz + MEGAMFI_FRAME_SIZE - 1) / MEGAMFI_FRAME_SIZE; + frame_count = 15; /* * We need one extra frame for the MFI command @@ -2201,6 +2761,7 @@ static int megasas_alloc_cmds(struct megasas_instance *instance) cmd = instance->cmd_list[i]; memset(cmd, 0, sizeof(struct megasas_cmd)); cmd->index = i; + cmd->scmd = NULL; cmd->instance = instance; list_add_tail(&cmd->list, &instance->cmd_pool); @@ -2368,7 +2929,7 @@ megasas_get_ld_list(struct megasas_instance *instance) /* the following function will get the instance PD LIST */ - if ((ret == 0) && (ci->ldCount < MAX_LOGICAL_DRIVES)) { + if ((ret == 0) && (ci->ldCount <= MAX_LOGICAL_DRIVES)) { memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); for (ld_index = 0; ld_index < ci->ldCount; ld_index++) { @@ -2682,6 +3243,21 @@ static int megasas_init_mfi(struct megasas_instance *instance) if (megasas_issue_init_mfi(instance)) goto fail_fw_init; + instance->fw_support_ieee = 0; + instance->fw_support_ieee = + (instance->instancet->read_fw_status_reg(reg_set) & + 0x04000000); + + printk(KERN_NOTICE "megasas_init_mfi: fw_support_ieee=%d", + instance->fw_support_ieee); + + if (instance->fw_support_ieee) + instance->flag_ieee = 1; + + /** for passthrough + * the following function will get the PD LIST. + */ + memset(instance->pd_list, 0 , (MEGASAS_MAX_PD * sizeof(struct megasas_pd_list))); megasas_get_pd_list(instance); @@ -2708,6 +3284,8 @@ static int megasas_init_mfi(struct megasas_instance *instance) max_sectors_2 = ctrl_info->max_request_size; tmp_sectors = min_t(u32, max_sectors_1 , max_sectors_2); + instance->disableOnlineCtrlReset = + ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset; } instance->max_sectors_per_req = instance->max_num_sge * @@ -2929,6 +3507,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->pad_0 = 0; + instance->last_seq_num = seq_num; dcmd->data_xfer_len = sizeof(struct megasas_evt_detail); dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT; dcmd->mbox.w[0] = seq_num; @@ -3097,6 +3676,7 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) instance = (struct megasas_instance *)host->hostdata; memset(instance, 0, sizeof(*instance)); + atomic_set( &instance->fw_reset_no_pci_access, 0 ); instance->producer = pci_alloc_consistent(pdev, sizeof(u32), &instance->producer_h); @@ -3114,6 +3694,9 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) megasas_poll_wait_aen = 0; instance->flag_ieee = 0; instance->ev = NULL; + instance->issuepend_done = 1; + instance->adprecovery = MEGASAS_HBA_OPERATIONAL; + megasas_poll_wait_aen = 0; instance->evt_detail = pci_alloc_consistent(pdev, sizeof(struct @@ -3130,6 +3713,7 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) * Initialize locks and queues */ INIT_LIST_HEAD(&instance->cmd_pool); + INIT_LIST_HEAD(&instance->internal_reset_pending_q); atomic_set(&instance->fw_outstanding,0); @@ -3137,7 +3721,7 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) init_waitqueue_head(&instance->abort_cmd_wait_q); spin_lock_init(&instance->cmd_pool_lock); - spin_lock_init(&instance->fire_lock); + spin_lock_init(&instance->hba_lock); spin_lock_init(&instance->completion_lock); spin_lock_init(&poll_aen_lock); @@ -3162,6 +3746,9 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) instance->flag = 0; instance->unload = 1; instance->last_time = 0; + instance->disableOnlineCtrlReset = 1; + + INIT_WORK(&instance->work_init, process_fw_state_change_wq); /* * Initialize MFI Firmware @@ -3253,6 +3840,9 @@ static void megasas_flush_cache(struct megasas_instance *instance) struct megasas_cmd *cmd; struct megasas_dcmd_frame *dcmd; + if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) + return; + cmd = megasas_get_cmd(instance); if (!cmd) @@ -3290,6 +3880,9 @@ static void megasas_shutdown_controller(struct megasas_instance *instance, struct megasas_cmd *cmd; struct megasas_dcmd_frame *dcmd; + if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) + return; + cmd = megasas_get_cmd(instance); if (!cmd) @@ -3781,6 +4374,9 @@ static int megasas_mgmt_ioctl_fw(struct file *file, unsigned long arg) struct megasas_iocpacket *ioc; struct megasas_instance *instance; int error; + int i; + unsigned long flags; + u32 wait_time = MEGASAS_RESET_WAIT_TIME; ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) @@ -3797,8 +4393,8 @@ static int megasas_mgmt_ioctl_fw(struct file *file, unsigned long arg) goto out_kfree_ioc; } - if (instance->hw_crit_error == 1) { - printk(KERN_DEBUG "Controller in Crit ERROR\n"); + if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) { + printk(KERN_ERR "Controller in crit error\n"); error = -ENODEV; goto out_kfree_ioc; } @@ -3815,6 +4411,35 @@ static int megasas_mgmt_ioctl_fw(struct file *file, unsigned long arg) error = -ERESTARTSYS; goto out_kfree_ioc; } + + for (i = 0; i < wait_time; i++) { + + spin_lock_irqsave(&instance->hba_lock, flags); + if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) { + spin_unlock_irqrestore(&instance->hba_lock, flags); + break; + } + spin_unlock_irqrestore(&instance->hba_lock, flags); + + if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) { + printk(KERN_NOTICE "megasas: waiting" + "for controller reset to finish\n"); + } + + msleep(1000); + } + + spin_lock_irqsave(&instance->hba_lock, flags); + if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) { + spin_unlock_irqrestore(&instance->hba_lock, flags); + + printk(KERN_ERR "megaraid_sas: timed out while" + "waiting for HBA to recover\n"); + error = -ENODEV; + goto out_kfree_ioc; + } + spin_unlock_irqrestore(&instance->hba_lock, flags); + error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc); up(&instance->ioctl_sem); @@ -3828,6 +4453,9 @@ static int megasas_mgmt_ioctl_aen(struct file *file, unsigned long arg) struct megasas_instance *instance; struct megasas_aen aen; int error; + int i; + unsigned long flags; + u32 wait_time = MEGASAS_RESET_WAIT_TIME; if (file->private_data != file) { printk(KERN_DEBUG "megasas: fasync_helper was not " @@ -3843,14 +4471,42 @@ static int megasas_mgmt_ioctl_aen(struct file *file, unsigned long arg) if (!instance) return -ENODEV; - if (instance->hw_crit_error == 1) { - error = -ENODEV; + if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) { + return -ENODEV; } if (instance->unload == 1) { return -ENODEV; } + for (i = 0; i < wait_time; i++) { + + spin_lock_irqsave(&instance->hba_lock, flags); + if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) { + spin_unlock_irqrestore(&instance->hba_lock, + flags); + break; + } + + spin_unlock_irqrestore(&instance->hba_lock, flags); + + if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) { + printk(KERN_NOTICE "megasas: waiting for" + "controller reset to finish\n"); + } + + msleep(1000); + } + + spin_lock_irqsave(&instance->hba_lock, flags); + if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) { + spin_unlock_irqrestore(&instance->hba_lock, flags); + printk(KERN_ERR "megaraid_sas: timed out while waiting" + "for HBA to recover.\n"); + return -ENODEV; + } + spin_unlock_irqrestore(&instance->hba_lock, flags); + mutex_lock(&instance->aen_mutex); error = megasas_register_aen(instance, aen.seq_num, aen.class_locale_word); diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 9d8b6bf605aa..16a4f68a34b0 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -60,6 +60,7 @@ #define MFI_STATE_READY 0xB0000000 #define MFI_STATE_OPERATIONAL 0xC0000000 #define MFI_STATE_FAULT 0xF0000000 +#define MFI_RESET_REQUIRED 0x00000001 #define MEGAMFI_FRAME_SIZE 64 @@ -73,6 +74,12 @@ * HOTPLUG : Resume from Hotplug * MFI_STOP_ADP : Send signal to FW to stop processing */ +#define WRITE_SEQUENCE_OFFSET (0x0000000FC) /* I20 */ +#define HOST_DIAGNOSTIC_OFFSET (0x000000F8) /* I20 */ +#define DIAG_WRITE_ENABLE (0x00000080) +#define DIAG_RESET_ADAPTER (0x00000004) + +#define MFI_ADP_RESET 0x00000040 #define MFI_INIT_ABORT 0x00000001 #define MFI_INIT_READY 0x00000002 #define MFI_INIT_MFIMODE 0x00000004 @@ -402,8 +409,40 @@ struct megasas_ctrl_prop { u16 ecc_bucket_leak_rate; u8 restore_hotspare_on_insertion; u8 expose_encl_devices; - u8 reserved[38]; + u8 maintainPdFailHistory; + u8 disallowHostRequestReordering; + u8 abortCCOnError; + u8 loadBalanceMode; + u8 disableAutoDetectBackplane; + + u8 snapVDSpace; + + /* + * Add properties that can be controlled by + * a bit in the following structure. + */ + struct { + u32 copyBackDisabled : 1; + u32 SMARTerEnabled : 1; + u32 prCorrectUnconfiguredAreas : 1; + u32 useFdeOnly : 1; + u32 disableNCQ : 1; + u32 SSDSMARTerEnabled : 1; + u32 SSDPatrolReadEnabled : 1; + u32 enableSpinDownUnconfigured : 1; + u32 autoEnhancedImport : 1; + u32 enableSecretKeyControl : 1; + u32 disableOnlineCtrlReset : 1; + u32 allowBootWithPinnedCache : 1; + u32 disableSpinDownHS : 1; + u32 enableJBOD : 1; + u32 reserved :18; + } OnOffProperties; + u8 autoSnapVDSpace; + u8 viewSpace; + u16 spinDownTime; + u8 reserved[24]; } __packed; /* @@ -704,6 +743,12 @@ struct megasas_ctrl_info { */ #define IS_DMA64 (sizeof(dma_addr_t) == 8) +#define MFI_XSCALE_OMR0_CHANGE_INTERRUPT 0x00000001 + +#define MFI_INTR_FLAG_REPLY_MESSAGE 0x00000001 +#define MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE 0x00000002 +#define MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT 0x00000004 + #define MFI_OB_INTR_STATUS_MASK 0x00000002 #define MFI_POLL_TIMEOUT_SECS 60 #define MEGASAS_COMPLETION_TIMER_INTERVAL (HZ/10) @@ -714,6 +759,9 @@ struct megasas_ctrl_info { #define MFI_REPLY_SKINNY_MESSAGE_INTERRUPT 0x40000000 #define MFI_SKINNY_ENABLE_INTERRUPT_MASK (0x00000001) +#define MFI_1068_PCSR_OFFSET 0x84 +#define MFI_1068_FW_HANDSHAKE_OFFSET 0x64 +#define MFI_1068_FW_READY 0xDDDD0000 /* * register set for both 1068 and 1078 controllers * structure extended for 1078 registers @@ -755,8 +803,10 @@ struct megasas_register_set { u32 inbound_high_queue_port ; /*00C4h*/ u32 reserved_5; /*00C8h*/ - u32 index_registers[820]; /*00CCh*/ - + u32 res_6[11]; /*CCh*/ + u32 host_diag; + u32 seq_offset; + u32 index_registers[807]; /*00CCh*/ } __attribute__ ((packed)); struct megasas_sge32 { @@ -1226,11 +1276,12 @@ struct megasas_instance { struct megasas_cmd **cmd_list; struct list_head cmd_pool; + /* used to sync fire the cmd to fw */ spinlock_t cmd_pool_lock; + /* used to sync fire the cmd to fw */ + spinlock_t hba_lock; /* used to synch producer, consumer ptrs in dpc */ spinlock_t completion_lock; - /* used to sync fire the cmd to fw */ - spinlock_t fire_lock; struct dma_pool *frame_dma_pool; struct dma_pool *sense_dma_pool; @@ -1247,19 +1298,36 @@ struct megasas_instance { struct pci_dev *pdev; u32 unique_id; + u32 fw_support_ieee; atomic_t fw_outstanding; - u32 hw_crit_error; + atomic_t fw_reset_no_pci_access; struct megasas_instance_template *instancet; struct tasklet_struct isr_tasklet; + struct work_struct work_init; u8 flag; u8 unload; u8 flag_ieee; + u8 issuepend_done; + u8 disableOnlineCtrlReset; + u8 adprecovery; unsigned long last_time; + u32 mfiStatus; + u32 last_seq_num; struct timer_list io_completion_timer; + struct list_head internal_reset_pending_q; +}; + +enum { + MEGASAS_HBA_OPERATIONAL = 0, + MEGASAS_ADPRESET_SM_INFAULT = 1, + MEGASAS_ADPRESET_SM_FW_RESET_SUCCESS = 2, + MEGASAS_ADPRESET_SM_OPERATIONAL = 3, + MEGASAS_HW_CRITICAL_ERROR = 4, + MEGASAS_ADPRESET_INPROG_SIGN = 0xDEADDEAD, }; struct megasas_instance_template { @@ -1272,6 +1340,10 @@ struct megasas_instance_template { int (*clear_intr)(struct megasas_register_set __iomem *); u32 (*read_fw_status_reg)(struct megasas_register_set __iomem *); + int (*adp_reset)(struct megasas_instance *, \ + struct megasas_register_set __iomem *); + int (*check_reset)(struct megasas_instance *, \ + struct megasas_register_set __iomem *); }; #define MEGASAS_IS_LOGICAL(scp) \ @@ -1291,7 +1363,9 @@ struct megasas_cmd { u32 index; u8 sync_cmd; u8 cmd_status; - u16 abort_aen; + u8 abort_aen; + u8 retry_for_fw_reset; + struct list_head list; struct scsi_cmnd *scmd; -- 2.34.1