From: Benoit Goby Date: Tue, 15 Feb 2011 05:32:43 +0000 (-0800) Subject: PM: Add watchdog to catch lockup during device resume X-Git-Tag: firefly_0821_release~4090^2~694 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=6893020d33ee9de7fc6e56c44e281ce1dc1a64c1;p=firefly-linux-kernel-4.4.55.git PM: Add watchdog to catch lockup during device resume Refactor the dpm suspend watchdog code and add watchdogs on resume too. The dpm wachdog prints the stack trace and reboots the system if a device takes more than 12 seconds to suspend or resume. Change-Id: If00c047a17b80bdc13a8426393c698bc450a7347 Signed-off-by: Benoit Goby --- diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 70312e7cacbe..6a33dd85c044 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -56,10 +56,10 @@ struct suspend_stats suspend_stats; static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; -static void dpm_drv_timeout(unsigned long data); -struct dpm_drv_wd_data { - struct device *dev; - struct task_struct *tsk; +struct dpm_watchdog { + struct device *dev; + struct task_struct *tsk; + struct timer_list timer; }; static int async_error; @@ -392,6 +392,56 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, return error; } +/** + * dpm_wd_handler - Driver suspend / resume watchdog handler. + * + * Called when a driver has timed out suspending or resuming. + * There's not much we can do here to recover so BUG() out for + * a crash-dump + */ +static void dpm_wd_handler(unsigned long data) +{ + struct dpm_watchdog *wd = (void *)data; + struct device *dev = wd->dev; + struct task_struct *tsk = wd->tsk; + + dev_emerg(dev, "**** DPM device timeout ****\n"); + show_stack(tsk, NULL); + + BUG(); +} + +/** + * dpm_wd_set - Enable pm watchdog for given device. + * @wd: Watchdog. Must be allocated on the stack. + * @dev: Device to handle. + */ +static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev) +{ + struct timer_list *timer = &wd->timer; + + wd->dev = dev; + wd->tsk = get_current(); + + init_timer_on_stack(timer); + timer->expires = jiffies + HZ * 12; + timer->function = dpm_wd_handler; + timer->data = (unsigned long)wd; + add_timer(timer); +} + +/** + * dpm_wd_clear - Disable pm watchdog. + * @wd: Watchdog to disable. + */ +static void dpm_wd_clear(struct dpm_watchdog *wd) +{ + struct timer_list *timer = &wd->timer; + + del_timer_sync(timer); + destroy_timer_on_stack(timer); +} + /*------------------------- Resume routines -------------------------*/ /** @@ -578,6 +628,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; char *info = NULL; int error = 0; + struct dpm_watchdog wd; TRACE_DEVICE(dev); TRACE_RESUME(0); @@ -593,6 +644,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) * a resumed device, even if the device hasn't been completed yet. */ dev->power.is_prepared = false; + dpm_wd_set(&wd, dev); if (!dev->power.is_suspended) goto Unlock; @@ -644,6 +696,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) Unlock: device_unlock(dev); + dpm_wd_clear(&wd); Complete: complete_all(&dev->power.completion); @@ -670,30 +723,6 @@ static bool is_async(struct device *dev) && !pm_trace_is_enabled(); } -/** - * dpm_drv_timeout - Driver suspend / resume watchdog handler - * @data: struct device which timed out - * - * Called when a driver has timed out suspending or resuming. - * There's not much we can do here to recover so - * BUG() out for a crash-dump - * - */ -static void dpm_drv_timeout(unsigned long data) -{ - struct dpm_drv_wd_data *wd_data = (void *)data; - struct device *dev = wd_data->dev; - struct task_struct *tsk = wd_data->tsk; - - printk(KERN_EMERG "**** DPM device timeout: %s (%s)\n", dev_name(dev), - (dev->driver ? dev->driver->name : "no driver")); - - printk(KERN_EMERG "dpm suspend stack:\n"); - show_stack(tsk, NULL); - - BUG(); -} - /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1085,8 +1114,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; char *info = NULL; int error = 0; - struct timer_list timer; - struct dpm_drv_wd_data data; + struct dpm_watchdog wd; dpm_wait_for_children(dev, async); @@ -1110,13 +1138,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; - data.dev = dev; - data.tsk = get_current(); - init_timer_on_stack(&timer); - timer.expires = jiffies + HZ * 12; - timer.function = dpm_drv_timeout; - timer.data = (unsigned long)&data; - add_timer(&timer); + dpm_wd_set(&wd, dev); device_lock(dev); @@ -1173,8 +1195,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) device_unlock(dev); - del_timer_sync(&timer); - destroy_timer_on_stack(&timer); + dpm_wd_clear(&wd); Complete: complete_all(&dev->power.completion);