From 848e7d5b46b9b0ee613a106bc460acf6a09a8546 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Thu, 25 Aug 2011 12:40:47 -0700 Subject: [PATCH] [SCSI] fcoe: Fix deadlock between fip's recv_work and rtnl The rtnl cannot be held durrng the fcoe_interface_put. If it is the last reference on the fcoe_interface the fcoe_ctlr_destroy will be called as a part of the cleanup, ultimately calling cancel_work_sync(&fip->recv_work); If we are processing a flogi response we will be in the recv_work context and we will lock the rtnl to add a new unicast MAC address. This is how the deadlock can occur. The fix is simply to move the rtnl_lock/unlock into fcoe_interface_cleanup so that it can be unlocked before fcoe_interface_put is called. Here is the lockdep report: Jul 21 11:26:35 bubba [ 223.870702] ul 21 11:26:35 bubba [ 223.870704] ======================================================= Jul 21 11:26:35 bubba [ 223.871255] [ INFO: possible circular locking dependency detected ] Jul 21 11:26:35 bubba [ 223.871530] 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.871797] ------------------------------------------------------- Jul 21 11:26:35 bubba [ 223.872072] lockdeptest.sh/3464 is trying to acquire lock: Jul 21 11:26:35 bubba [ 223.872345] ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] wait_on_work+0x0/0xbd Jul 21 11:26:35 bubba [ 223.873022] Jul 21 11:26:35 bubba [ 223.873023] but task is already holding lock: Jul 21 11:26:35 bubba [ 223.873555] (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.874229] Jul 21 11:26:35 bubba [ 223.874230] which lock already depends on the new lock. Jul 21 11:26:35 bubba [ 223.874231] Jul 21 11:26:35 bubba [ 223.875032] Jul 21 11:26:35 bubba [ 223.875033] the existing dependency chain (in reverse order) is: Jul 21 11:26:35 bubba [ 223.875573] Jul 21 11:26:35 bubba [ 223.875573] -> #1 Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.876301] Jul 21 11:26:35 bubba [] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.876645] Jul 21 11:26:35 bubba [] __mutex_lock_common+0x47/0x30d Jul 21 11:26:35 bubba [ 223.876991] Jul 21 11:26:35 bubba [] mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.877334] Jul 21 11:26:35 bubba [] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.877675] Jul 21 11:26:35 bubba [] fcoe_update_src_mac+0x2b/0x80 [fcoe] Jul 21 11:26:35 bubba [ 223.878022] Jul 21 11:26:35 bubba [] fcoe_flogi_resp+0x5e/0x79 [fcoe] Jul 21 11:26:35 bubba [ 223.878366] Jul 21 11:26:35 bubba [] fc_exch_recv+0x7f5/0x9da [libfc] Jul 21 11:26:35 bubba [ 223.878713] Jul 21 11:26:35 bubba [] fcoe_ctlr_recv_work+0x71f/0x10dc [libfcoe] Jul 21 11:26:35 bubba [ 223.879258] Jul 21 11:26:35 bubba [] process_one_work+0x1d7/0x347 Jul 21 11:26:35 bubba [ 223.879601] Jul 21 11:26:35 bubba [] worker_thread+0xf8/0x17c Jul 21 11:26:35 bubba [ 223.879944] Jul 21 11:26:35 bubba [] kthread+0x7d/0x85 Jul 21 11:26:35 bubba [ 223.880287] Jul 21 11:26:35 bubba [] kernel_thread_helper+0x4/0x10 Jul 21 11:26:35 bubba [ 223.880634] Jul 21 11:26:35 bubba [ 223.880635] -> #0 Jul 21 11:26:35 bubba ((&fip->recv_work) Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba : Jul 21 11:26:35 bubba [ 223.881357] Jul 21 11:26:35 bubba [] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.881695] Jul 21 11:26:35 bubba [] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.882033] Jul 21 11:26:35 bubba [] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.882378] Jul 21 11:26:35 bubba [] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.882718] Jul 21 11:26:35 bubba [] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.883057] Jul 21 11:26:35 bubba [] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.883399] Jul 21 11:26:35 bubba [] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.883940] Jul 21 11:26:35 bubba [] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.884280] Jul 21 11:26:35 bubba [] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.884624] Jul 21 11:26:35 bubba [] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.885163] Jul 21 11:26:35 bubba [] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.885502] Jul 21 11:26:35 bubba [] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.886045] Jul 21 11:26:35 bubba [] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.886385] Jul 21 11:26:35 bubba [] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.886728] Jul 21 11:26:35 bubba [] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.887068] Jul 21 11:26:35 bubba [] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.887406] Jul 21 11:26:35 bubba [] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.887742] Jul 21 11:26:35 bubba [] system_call_fastpath+0x16/0x1b Jul 21 11:26:35 bubba [ 223.888083] Jul 21 11:26:35 bubba [ 223.888084] other info that might help us debug this: Jul 21 11:26:35 bubba [ 223.888085] Jul 21 11:26:35 bubba [ 223.888879] Possible unsafe locking scenario: Jul 21 11:26:35 bubba [ 223.888881] Jul 21 11:26:35 bubba [ 223.889411] CPU0 CPU1 Jul 21 11:26:35 bubba [ 223.889683] ---- ---- Jul 21 11:26:35 bubba [ 223.889955] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890349] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.890751] lock( Jul 21 11:26:35 bubba rtnl_mutex Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891154] lock( Jul 21 11:26:35 bubba (&fip->recv_work) Jul 21 11:26:35 bubba ); Jul 21 11:26:35 bubba [ 223.891549] Jul 21 11:26:35 bubba [ 223.891550] *** DEADLOCK *** Jul 21 11:26:35 bubba [ 223.891551] Jul 21 11:26:35 bubba [ 223.892347] 6 locks held by lockdeptest.sh/3464: Jul 21 11:26:35 bubba [ 223.892621] #0: Jul 21 11:26:35 bubba (&buffer->mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] sysfs_write_file+0x37/0x13f Jul 21 11:26:35 bubba [ 223.893359] #1: Jul 21 11:26:35 bubba (s_active Jul 21 11:26:35 bubba ){++++.+} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] sysfs_write_file+0xe2/0x13f Jul 21 11:26:35 bubba [ 223.894094] #2: Jul 21 11:26:35 bubba (param_lock Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] param_attr_store+0x36/0x62 Jul 21 11:26:35 bubba [ 223.894835] #3: Jul 21 11:26:35 bubba (ft_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] fcoe_transport_destroy+0x1e/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.895574] #4: Jul 21 11:26:35 bubba (fcoe_config_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] fcoe_destroy+0x18/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.896314] #5: Jul 21 11:26:35 bubba (rtnl_mutex Jul 21 11:26:35 bubba ){+.+.+.} Jul 21 11:26:35 bubba , at: Jul 21 11:26:35 bubba [] rtnl_lock+0x12/0x14 Jul 21 11:26:35 bubba [ 223.897047] Jul 21 11:26:35 bubba [ 223.897048] stack backtrace: Jul 21 11:26:35 bubba [ 223.897578] Pid: 3464, comm: lockdeptest.sh Not tainted 3.0.0-rc7+ #1 Jul 21 11:26:35 bubba [ 223.897853] Call Trace: Jul 21 11:26:35 bubba [ 223.898128] [] print_circular_bug+0x1f8/0x209 Jul 21 11:26:35 bubba [ 223.898416] [] __lock_acquire+0xb1d/0xe2c Jul 21 11:26:35 bubba [ 223.898699] [] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.898982] [] lock_acquire+0xd2/0xf7 Jul 21 11:26:35 bubba [ 223.899263] [] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.899547] [] ? mod_timer+0x8f/0x98 Jul 21 11:26:35 bubba [ 223.899827] [] wait_on_work+0x50/0xbd Jul 21 11:26:35 bubba [ 223.900108] [] ? wait_on_cpu_work+0xe6/0xe6 Jul 21 11:26:35 bubba [ 223.900390] [] __cancel_work_timer+0xb6/0xf4 Jul 21 11:26:35 bubba [ 223.900671] [] cancel_work_sync+0xb/0xd Jul 21 11:26:35 bubba [ 223.900953] [] fcoe_ctlr_destroy+0x1d/0x67 [libfcoe] Jul 21 11:26:35 bubba [ 223.901237] [] fcoe_interface_release+0x21/0x45 [fcoe] Jul 21 11:26:35 bubba [ 223.901522] [] ? fcoe_enable+0x6b/0x6b [fcoe] Jul 21 11:26:35 bubba [ 223.901803] [] kref_put+0x43/0x4d Jul 21 11:26:35 bubba [ 223.902083] [] fcoe_interface_put+0x17/0x19 [fcoe] Jul 21 11:26:35 bubba [ 223.902367] [] fcoe_interface_cleanup+0x188/0x193 [fcoe] Jul 21 11:26:35 bubba [ 223.902653] [] ? mutex_lock_nested+0x3b/0x40 Jul 21 11:26:35 bubba [ 223.902939] [] fcoe_destroy+0x52/0x72 [fcoe] Jul 21 11:26:35 bubba [ 223.903223] [] fcoe_transport_destroy+0xab/0x110 [libfcoe] Jul 21 11:26:35 bubba [ 223.903508] [] param_attr_store+0x43/0x62 Jul 21 11:26:35 bubba [ 223.903792] [] module_attr_store+0x21/0x25 Jul 21 11:26:35 bubba [ 223.904075] [] sysfs_write_file+0x103/0x13f Jul 21 11:26:35 bubba [ 223.904357] [] vfs_write+0xa7/0xfa Jul 21 11:26:35 bubba [ 223.904642] [] ? fget_light+0x35/0x96 Jul 21 11:26:35 bubba [ 223.904923] [] sys_write+0x45/0x69 Jul 21 11:26:35 bubba [ 223.905204] [] system_call_fastpath+0x16/0x1b Jul 21 11:26:36 bubba [ 223.964438] ixgbe 0000:05:00.0: eth3: detected SFP+: 5 Jul 21 11:26:37 bubba [ 225.196702] ixgbe 0000:05:00.0: eth3: NIC Link is Up 10 Gbps, Flow Control: None Signed-off-by: Robert Love Tested-by: Ross Brattain Reviewed-by: Yi Zou Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index ba710e350ac5..5d0e9a24ae94 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -432,6 +432,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) u8 flogi_maddr[ETH_ALEN]; const struct net_device_ops *ops; + rtnl_lock(); + /* * Don't listen for Ethernet packets anymore. * synchronize_net() ensures that the packet handlers are not running @@ -461,6 +463,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe) " specific feature for LLD.\n"); } + rtnl_unlock(); + /* Release the self-reference taken during fcoe_interface_create() */ fcoe_interface_put(fcoe); } @@ -1951,11 +1955,8 @@ static void fcoe_destroy_work(struct work_struct *work) fcoe_if_destroy(port->lport); /* Do not tear down the fcoe interface for NPIV port */ - if (!npiv) { - rtnl_lock(); + if (!npiv) fcoe_interface_cleanup(fcoe); - rtnl_unlock(); - } mutex_unlock(&fcoe_config_mutex); } @@ -2009,8 +2010,9 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) printk(KERN_ERR "fcoe: Failed to create interface (%s)\n", netdev->name); rc = -EIO; + rtnl_unlock(); fcoe_interface_cleanup(fcoe); - goto out_nodev; + goto out_nortnl; } /* Make this the "master" N_Port */ @@ -2027,6 +2029,7 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) out_nodev: rtnl_unlock(); +out_nortnl: mutex_unlock(&fcoe_config_mutex); return rc; } -- 2.34.1