From 8ceaf361ffd131e835aef1e6cdb1d5ba70702617 Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Wed, 23 Sep 2015 13:37:11 -0400 Subject: [PATCH] net: netcp: fix deadlock reported by lockup detector A deadlock trace is seen in netcp driver with lockup detector enabled. The trace log is provided below for reference. This patch fixes the bug by removing the usage of netcp_modules_lock within ndo_ops functions. ndo_{open/close/ioctl)() is already called with rtnl_lock held. So there is no need to hold another mutex for serialization across processes on multiple cores. So remove use of netcp_modules_lock mutex from these ndo ops functions. ndo_set_rx_mode() shouldn't be using a mutex as it is called from atomic context. In the case of ndo_set_rx_mode(), there can be call to this API without rtnl_lock held from an atomic context. As the underlying modules are expected to add address to a hardware table, it is to be protected across concurrent updates and hence a spin lock is used to synchronize the access. Same with ndo_vlan_rx_add_vid() & ndo_vlan_rx_kill_vid(). Probably the netcp_modules_lock is used to protect the module not being removed as part of rmmod. Currently this is not fully implemented and assumes the interface is brought down before doing rmmod of modules. The support for rmmmod while interface is up is expected in a future patch set when additional modules such as pa, qos are added. For now all of the tests such as if up/down, reboot, iperf works fine with this patch applied. Deadlock trace seen with lockup detector enabled is shown below for reference. [ 16.863014] ====================================================== [ 16.869183] [ INFO: possible circular locking dependency detected ] [ 16.875441] 4.1.6-01265-gfb1e101 #1 Tainted: G W [ 16.881176] ------------------------------------------------------- [ 16.887432] ifconfig/1662 is trying to acquire lock: [ 16.892386] (netcp_modules_lock){+.+.+.}, at: [] netcp_ndo_open+0x168/0x518 [ 16.900321] [ 16.900321] but task is already holding lock: [ 16.906144] (rtnl_mutex){+.+.+.}, at: [] devinet_ioctl+0xf8/0x7e4 [ 16.913206] [ 16.913206] which lock already depends on the new lock. [ 16.913206] [ 16.921372] [ 16.921372] the existing dependency chain (in reverse order) is: [ 16.928844] -> #1 (rtnl_mutex){+.+.+.}: [ 16.932865] [] mutex_lock_nested+0x68/0x4a8 [ 16.938521] [] register_netdev+0xc/0x24 [ 16.943831] [] netcp_module_probe+0x214/0x2ec [ 16.949660] [] netcp_register_module+0xd4/0x140 [ 16.955663] [] keystone_gbe_init+0x10/0x28 [ 16.961233] [] do_one_initcall+0xb8/0x1f8 [ 16.966714] [] kernel_init_freeable+0x148/0x1e8 [ 16.972720] [] kernel_init+0xc/0xe8 [ 16.977682] [] ret_from_fork+0x14/0x3c [ 16.982905] -> #0 (netcp_modules_lock){+.+.+.}: [ 16.987619] [] lock_acquire+0x118/0x320 [ 16.992928] [] mutex_lock_nested+0x68/0x4a8 [ 16.998582] [] netcp_ndo_open+0x168/0x518 [ 17.004064] [] __dev_open+0xa8/0x10c [ 17.009112] [] __dev_change_flags+0x94/0x144 [ 17.014853] [] dev_change_flags+0x18/0x48 [ 17.020334] [] devinet_ioctl+0x6dc/0x7e4 [ 17.025729] [] sock_ioctl+0x1d0/0x2a8 [ 17.030865] [] do_vfs_ioctl+0x41c/0x688 [ 17.036173] [] SyS_ioctl+0x34/0x5c [ 17.041046] [] ret_fast_syscall+0x0/0x54 [ 17.046441] [ 17.046441] other info that might help us debug this: [ 17.046441] [ 17.054434] Possible unsafe locking scenario: [ 17.054434] [ 17.060343] CPU0 CPU1 [ 17.064862] ---- ---- [ 17.069381] lock(rtnl_mutex); [ 17.072522] lock(netcp_modules_lock); [ 17.078875] lock(rtnl_mutex); [ 17.084532] lock(netcp_modules_lock); [ 17.088366] [ 17.088366] *** DEADLOCK *** [ 17.088366] [ 17.094279] 1 lock held by ifconfig/1662: [ 17.098278] #0: (rtnl_mutex){+.+.+.}, at: [] devinet_ioctl+0xf8/0x7e4 [ 17.105774] [ 17.105774] stack backtrace: [ 17.110124] CPU: 1 PID: 1662 Comm: ifconfig Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 17.118637] Hardware name: Keystone [ 17.122123] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 17.129862] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 17.137079] [] (dump_stack) from [] (print_circular_bug+0x210/0x330) [ 17.145161] [] (print_circular_bug) from [] (validate_chain.isra.35+0xf98/0x13ac) [ 17.154372] [] (validate_chain.isra.35) from [] (__lock_acquire+0x52c/0xcc0) [ 17.163149] [] (__lock_acquire) from [] (lock_acquire+0x118/0x320) [ 17.171058] [] (lock_acquire) from [] (mutex_lock_nested+0x68/0x4a8) [ 17.179140] [] (mutex_lock_nested) from [] (netcp_ndo_open+0x168/0x518) [ 17.187484] [] (netcp_ndo_open) from [] (__dev_open+0xa8/0x10c) [ 17.195133] [] (__dev_open) from [] (__dev_change_flags+0x94/0x144) [ 17.203129] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 17.211560] [] (dev_change_flags) from [] (devinet_ioctl+0x6dc/0x7e4) [ 17.219729] [] (devinet_ioctl) from [] (sock_ioctl+0x1d0/0x2a8) [ 17.227378] [] (sock_ioctl) from [] (do_vfs_ioctl+0x41c/0x688) [ 17.234939] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 17.242242] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 17.258855] netcp-1.0 2620110.netcp eth0: Link is Up - 1Gbps/Full - flow control off [ 17.271282] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:616 [ 17.279712] in_atomic(): 1, irqs_disabled(): 0, pid: 1662, name: ifconfig [ 17.286500] INFO: lockdep is turned off. [ 17.290413] Preemption disabled at:[< (null)>] (null) [ 17.295728] [ 17.297214] CPU: 1 PID: 1662 Comm: ifconfig Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 17.305735] Hardware name: Keystone [ 17.309223] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 17.316970] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 17.324194] [] (dump_stack) from [] (mutex_lock_nested+0x28/0x4a8) [ 17.332112] [] (mutex_lock_nested) from [] (netcp_set_rx_mode+0x160/0x210) [ 17.340724] [] (netcp_set_rx_mode) from [] (dev_set_rx_mode+0x1c/0x28) [ 17.348982] [] (dev_set_rx_mode) from [] (__dev_open+0xc4/0x10c) [ 17.356724] [] (__dev_open) from [] (__dev_change_flags+0x94/0x144) [ 17.364729] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 17.373166] [] (dev_change_flags) from [] (devinet_ioctl+0x6dc/0x7e4) [ 17.381344] [] (devinet_ioctl) from [] (sock_ioctl+0x1d0/0x2a8) [ 17.388994] [] (sock_ioctl) from [] (do_vfs_ioctl+0x41c/0x688) [ 17.396563] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 17.403873] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 17.413772] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready udhcpc (v1.20.2) started Sending discover... [ 18.690666] netcp-1.0 2620110.netcp eth0: Link is Up - 1Gbps/Full - flow control off Sending discover... [ 22.250972] netcp-1.0 2620110.netcp eth0: Link is Up - 1Gbps/Full - flow control off [ 22.258721] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [ 22.265458] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:616 [ 22.273896] in_atomic(): 1, irqs_disabled(): 0, pid: 342, name: kworker/1:1 [ 22.280854] INFO: lockdep is turned off. [ 22.284767] Preemption disabled at:[< (null)>] (null) [ 22.290074] [ 22.291568] CPU: 1 PID: 342 Comm: kworker/1:1 Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 22.300255] Hardware name: Keystone [ 22.303750] Workqueue: ipv6_addrconf addrconf_dad_work [ 22.308895] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 22.316643] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 22.323867] [] (dump_stack) from [] (mutex_lock_nested+0x28/0x4a8) [ 22.331786] [] (mutex_lock_nested) from [] (netcp_set_rx_mode+0x160/0x210) [ 22.340394] [] (netcp_set_rx_mode) from [] (__dev_mc_add+0x54/0x68) [ 22.348401] [] (__dev_mc_add) from [] (igmp6_group_added+0x168/0x1b4) [ 22.356580] [] (igmp6_group_added) from [] (ipv6_dev_mc_inc+0x4f0/0x5a8) [ 22.365019] [] (ipv6_dev_mc_inc) from [] (addrconf_dad_work+0x21c/0x33c) [ 22.373460] [] (addrconf_dad_work) from [] (process_one_work+0x214/0x8d0) [ 22.381986] [] (process_one_work) from [] (worker_thread+0x48/0x4bc) [ 22.390071] [] (worker_thread) from [] (kthread+0xf0/0x108) [ 22.397381] [] (kthread) from [] Trace related to incorrect usage of mutex inside ndo_set_rx_mode [ 24.086066] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:616 [ 24.094506] in_atomic(): 1, irqs_disabled(): 0, pid: 1682, name: ifconfig [ 24.101291] INFO: lockdep is turned off. [ 24.105203] Preemption disabled at:[< (null)>] (null) [ 24.110511] [ 24.112005] CPU: 2 PID: 1682 Comm: ifconfig Tainted: G W 4.1.6-01265-gfb1e101 #1 [ 24.120518] Hardware name: Keystone [ 24.124018] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 24.131772] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 24.138989] [] (dump_stack) from [] (mutex_lock_nested+0x28/0x4a8) [ 24.146908] [] (mutex_lock_nested) from [] (netcp_set_rx_mode+0x160/0x210) [ 24.155523] [] (netcp_set_rx_mode) from [] (dev_set_rx_mode+0x1c/0x28) [ 24.163787] [] (dev_set_rx_mode) from [] (__dev_open+0xc4/0x10c) [ 24.171531] [] (__dev_open) from [] (__dev_change_flags+0x94/0x144) [ 24.179528] [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [ 24.187966] [] (dev_change_flags) from [] (devinet_ioctl+0x6dc/0x7e4) [ 24.196145] [] (devinet_ioctl) from [] (sock_ioctl+0x1d0/0x2a8) [ 24.203803] [] (sock_ioctl) from [] (do_vfs_ioctl+0x41c/0x688) [ 24.211373] [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [ 24.218676] [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 24.227156] IPv6: ADDRCONF(NETDEV_UP): eth1: link is not ready Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 8026daaf940b..9f9832f0dea9 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -367,7 +367,6 @@ int netcp_register_module(struct netcp_module *module) if (ret < 0) goto fail; } - mutex_unlock(&netcp_modules_lock); return 0; @@ -1395,7 +1394,6 @@ static void netcp_addr_sweep_del(struct netcp_intf *netcp) continue; dev_dbg(netcp->ndev_dev, "deleting address %pM, type %x\n", naddr->addr, naddr->type); - mutex_lock(&netcp_modules_lock); for_each_module(netcp, priv) { module = priv->netcp_module; if (!module->del_addr) @@ -1404,7 +1402,6 @@ static void netcp_addr_sweep_del(struct netcp_intf *netcp) naddr); WARN_ON(error); } - mutex_unlock(&netcp_modules_lock); netcp_addr_del(netcp, naddr); } } @@ -1421,7 +1418,7 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) continue; dev_dbg(netcp->ndev_dev, "adding address %pM, type %x\n", naddr->addr, naddr->type); - mutex_lock(&netcp_modules_lock); + for_each_module(netcp, priv) { module = priv->netcp_module; if (!module->add_addr) @@ -1429,7 +1426,6 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) error = module->add_addr(priv->module_priv, naddr); WARN_ON(error); } - mutex_unlock(&netcp_modules_lock); } } @@ -1443,6 +1439,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) ndev->flags & IFF_ALLMULTI || netdev_mc_count(ndev) > NETCP_MAX_MCAST_ADDR); + spin_lock(&netcp->lock); /* first clear all marks */ netcp_addr_clear_mark(netcp); @@ -1461,6 +1458,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) /* finally sweep and callout into modules */ netcp_addr_sweep_del(netcp); netcp_addr_sweep_add(netcp); + spin_unlock(&netcp->lock); } static void netcp_free_navigator_resources(struct netcp_intf *netcp) @@ -1625,7 +1623,6 @@ static int netcp_ndo_open(struct net_device *ndev) goto fail; } - mutex_lock(&netcp_modules_lock); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (module->open) { @@ -1636,7 +1633,6 @@ static int netcp_ndo_open(struct net_device *ndev) } } } - mutex_unlock(&netcp_modules_lock); napi_enable(&netcp->rx_napi); napi_enable(&netcp->tx_napi); @@ -1653,7 +1649,6 @@ fail_open: if (module->close) module->close(intf_modpriv->module_priv, ndev); } - mutex_unlock(&netcp_modules_lock); fail: netcp_free_navigator_resources(netcp); @@ -1677,7 +1672,6 @@ static int netcp_ndo_stop(struct net_device *ndev) napi_disable(&netcp->rx_napi); napi_disable(&netcp->tx_napi); - mutex_lock(&netcp_modules_lock); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (module->close) { @@ -1686,7 +1680,6 @@ static int netcp_ndo_stop(struct net_device *ndev) dev_err(netcp->ndev_dev, "Close failed\n"); } } - mutex_unlock(&netcp_modules_lock); /* Recycle Rx descriptors from completion queue */ netcp_empty_rx_queue(netcp); @@ -1714,7 +1707,6 @@ static int netcp_ndo_ioctl(struct net_device *ndev, if (!netif_running(ndev)) return -EINVAL; - mutex_lock(&netcp_modules_lock); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (!module->ioctl) @@ -1730,7 +1722,6 @@ static int netcp_ndo_ioctl(struct net_device *ndev, } out: - mutex_unlock(&netcp_modules_lock); return (ret == 0) ? 0 : err; } @@ -1765,11 +1756,12 @@ static int netcp_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) struct netcp_intf *netcp = netdev_priv(ndev); struct netcp_intf_modpriv *intf_modpriv; struct netcp_module *module; + unsigned long flags; int err = 0; dev_dbg(netcp->ndev_dev, "adding rx vlan id: %d\n", vid); - mutex_lock(&netcp_modules_lock); + spin_lock_irqsave(&netcp->lock, flags); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if ((module->add_vid) && (vid != 0)) { @@ -1781,7 +1773,8 @@ static int netcp_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) } } } - mutex_unlock(&netcp_modules_lock); + spin_unlock_irqrestore(&netcp->lock, flags); + return err; } @@ -1790,11 +1783,12 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) struct netcp_intf *netcp = netdev_priv(ndev); struct netcp_intf_modpriv *intf_modpriv; struct netcp_module *module; + unsigned long flags; int err = 0; dev_dbg(netcp->ndev_dev, "removing rx vlan id: %d\n", vid); - mutex_lock(&netcp_modules_lock); + spin_lock_irqsave(&netcp->lock, flags); for_each_module(netcp, intf_modpriv) { module = intf_modpriv->netcp_module; if (module->del_vid) { @@ -1806,7 +1800,7 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) } } } - mutex_unlock(&netcp_modules_lock); + spin_unlock_irqrestore(&netcp->lock, flags); return err; } -- 2.34.1