[SCSI] scsi_transport_fc: remove invalid BUG_ON
authorMichael Reed <mdr@sgi.com>
Fri, 9 Oct 2009 19:15:59 +0000 (14:15 -0500)
committerJames Bottomley <James.Bottomley@suse.de>
Thu, 29 Oct 2009 17:03:28 +0000 (13:03 -0400)
I was doing some large lun count testing with 2.6.31 and hit
a BUG_ON() in fc_timeout_deleted_rport(), and it seems like it
should have been just a matter of time before someone did.

It seems invalid to set port_state under lock, then expect it to
remain set after releasing the lock.  Another thread called
fc_remote_port_add() when the lock was released, changing the
port_state.

This patch removes the BUG_ON and moves the test of the
port_state to inside the host_lock.  It's been running for
several weeks now with no ill effect.

Signed-off-by: Michael Reed <mdr@sgi.com>
Acked-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
drivers/scsi/scsi_transport_fc.c

index a67fed10598acfc3dd00234d3f3222470d18a9f9..f436e033adaf27089103a29bfcb7d85bd7fc7008 100644 (file)
@@ -2384,6 +2384,7 @@ fc_rport_final_delete(struct work_struct *work)
        struct Scsi_Host *shost = rport_to_shost(rport);
        struct fc_internal *i = to_fc_internal(shost->transportt);
        unsigned long flags;
+       int do_callback = 0;
 
        /*
         * if a scan is pending, flush the SCSI Host work_q so that
@@ -2422,8 +2423,15 @@ fc_rport_final_delete(struct work_struct *work)
         * Avoid this call if we already called it when we preserved the
         * rport for the binding.
         */
+       spin_lock_irqsave(shost->host_lock, flags);
        if (!(rport->flags & FC_RPORT_DEVLOSS_CALLBK_DONE) &&
-           (i->f->dev_loss_tmo_callbk))
+           (i->f->dev_loss_tmo_callbk)) {
+               rport->flags |= FC_RPORT_DEVLOSS_CALLBK_DONE;
+               do_callback = 1;
+       }
+       spin_unlock_irqrestore(shost->host_lock, flags);
+
+       if (do_callback)
                i->f->dev_loss_tmo_callbk(rport);
 
        fc_bsg_remove(rport->rqst_q);
@@ -2970,6 +2978,7 @@ fc_timeout_deleted_rport(struct work_struct *work)
        struct fc_internal *i = to_fc_internal(shost->transportt);
        struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
        unsigned long flags;
+       int do_callback = 0;
 
        spin_lock_irqsave(shost->host_lock, flags);
 
@@ -3035,7 +3044,6 @@ fc_timeout_deleted_rport(struct work_struct *work)
        rport->roles = FC_PORT_ROLE_UNKNOWN;
        rport->port_state = FC_PORTSTATE_NOTPRESENT;
        rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
-       rport->flags |= FC_RPORT_DEVLOSS_CALLBK_DONE;
 
        /*
         * Pre-emptively kill I/O rather than waiting for the work queue
@@ -3045,32 +3053,40 @@ fc_timeout_deleted_rport(struct work_struct *work)
        spin_unlock_irqrestore(shost->host_lock, flags);
        fc_terminate_rport_io(rport);
 
-       BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
+       spin_lock_irqsave(shost->host_lock, flags);
 
-       /* remove the identifiers that aren't used in the consisting binding */
-       switch (fc_host->tgtid_bind_type) {
-       case FC_TGTID_BIND_BY_WWPN:
-               rport->node_name = -1;
-               rport->port_id = -1;
-               break;
-       case FC_TGTID_BIND_BY_WWNN:
-               rport->port_name = -1;
-               rport->port_id = -1;
-               break;
-       case FC_TGTID_BIND_BY_ID:
-               rport->node_name = -1;
-               rport->port_name = -1;
-               break;
-       case FC_TGTID_BIND_NONE:        /* to keep compiler happy */
-               break;
+       if (rport->port_state == FC_PORTSTATE_NOTPRESENT) {     /* still missing */
+
+               /* remove the identifiers that aren't used in the consisting binding */
+               switch (fc_host->tgtid_bind_type) {
+               case FC_TGTID_BIND_BY_WWPN:
+                       rport->node_name = -1;
+                       rport->port_id = -1;
+                       break;
+               case FC_TGTID_BIND_BY_WWNN:
+                       rport->port_name = -1;
+                       rport->port_id = -1;
+                       break;
+               case FC_TGTID_BIND_BY_ID:
+                       rport->node_name = -1;
+                       rport->port_name = -1;
+                       break;
+               case FC_TGTID_BIND_NONE:        /* to keep compiler happy */
+                       break;
+               }
+
+               /*
+                * As this only occurs if the remote port (scsi target)
+                * went away and didn't come back - we'll remove
+                * all attached scsi devices.
+                */
+               rport->flags |= FC_RPORT_DEVLOSS_CALLBK_DONE;
+               fc_queue_work(shost, &rport->stgt_delete_work);
+
+               do_callback = 1;
        }
 
-       /*
-        * As this only occurs if the remote port (scsi target)
-        * went away and didn't come back - we'll remove
-        * all attached scsi devices.
-        */
-       fc_queue_work(shost, &rport->stgt_delete_work);
+       spin_unlock_irqrestore(shost->host_lock, flags);
 
        /*
         * Notify the driver that the rport is now dead. The LLDD will
@@ -3078,7 +3094,7 @@ fc_timeout_deleted_rport(struct work_struct *work)
         *
         * Note: we set the CALLBK_DONE flag above to correspond
         */
-       if (i->f->dev_loss_tmo_callbk)
+       if (do_callback && i->f->dev_loss_tmo_callbk)
                i->f->dev_loss_tmo_callbk(rport);
 }