drm/i915: Remove nested work in gpu error handling

author Mika Kuoppala <mika.kuoppala@linux.intel.com>

Wed, 28 Jan 2015 15:03:14 +0000 (17:03 +0200)

committer Daniel Vetter <daniel.vetter@ffwll.ch>

Thu, 29 Jan 2015 17:03:07 +0000 (18:03 +0100)
author Mika Kuoppala <mika.kuoppala@linux.intel.com>
Wed, 28 Jan 2015 15:03:14 +0000 (17:03 +0200)
committer Daniel Vetter <daniel.vetter@ffwll.ch>
Thu, 29 Jan 2015 17:03:07 +0000 (18:03 +0100)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 3b332a49367474210946e7222abd85fba1a4a48b..211d4949a675866924d3707b0f56d4db82dc803c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3969,6 +3969,17 @@ i915_wedged_set(void *data, u64 val)
         struct drm_device *dev = data;
         struct drm_i915_private *dev_priv = dev->dev_private;
  
+       /*
+        * There is no safeguard against this debugfs entry colliding
+        * with the hangcheck calling same i915_handle_error() in
+        * parallel, causing an explosion. For now we assume that the
+        * test harness is responsible enough not to inject gpu hangs
+        * while it is writing to 'i915_wedged'
+        */
+
+       if (i915_reset_in_progress(&dev_priv->gpu_error))
+               return -EAGAIN;
+
         intel_runtime_pm_get(dev_priv);
  
         i915_handle_error(dev, val,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c

index 6eaf79504b582b977545eaf740b9115c59ce9562..1a46787129e7a2c5022a0ecf7ced69418afa8320 100644 (file)
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -945,7 +945,6 @@ int i915_driver_unload(struct drm_device *dev)
  
         /* Free error state after interrupts are fully disabled. */
         cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-       cancel_work_sync(&dev_priv->gpu_error.work);
         i915_destroy_error_state(dev);
  
         if (dev->pdev->msi_enabled)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 7add8cd6a54f33e8c7fb0d59fbcd9a78ebb576ec..1c01034a9b0a9062103859e017a2772b7002c95f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1352,8 +1352,6 @@ struct i915_gpu_error {
         spinlock_t lock;
         /* Protected by the above dev->gpu_error.lock. */
         struct drm_i915_error_state *first_error;
-       struct work_struct work;
-
  
         unsigned long missed_irq_rings;
  
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 23bfe2232b6a830d195a153ac858d0f8f966efcc..53c5f9e39fe3d4485ccd976f6c4b84cc94209fe4 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2421,19 +2421,15 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
  }
  
  /**
- * i915_error_work_func - do process context error handling work
- * @work: work struct
+ * i915_reset_and_wakeup - do process context error handling work
   *
   * Fire an error uevent so userspace can see that a hang or error
   * was detected.
   */
-static void i915_error_work_func(struct work_struct *work)
+static void i915_reset_and_wakeup(struct drm_device *dev)
  {
-       struct i915_gpu_error *error = container_of(work, struct i915_gpu_error,
-                                                   work);
-       struct drm_i915_private *dev_priv =
-               container_of(error, struct drm_i915_private, gpu_error);
-       struct drm_device *dev = dev_priv->dev;
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct i915_gpu_error *error = &dev_priv->gpu_error;
         char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
         char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
         char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
@@ -2600,10 +2596,10 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
  }
  
  /**
- * i915_handle_error - handle an error interrupt
+ * i915_handle_error - handle a gpu error
   * @dev: drm device
   *
- * Do some basic checking of regsiter state at error interrupt time and
+ * Do some basic checking of regsiter state at error time and
   * dump it to the syslog.  Also call i915_capture_error_state() to make
   * sure we get a record and make it available in debugfs.  Fire a uevent
   * so userspace knows something bad happened (should trigger collection
@@ -2616,6 +2612,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
         va_list args;
         char error_msg[80];
  
+       if (WARN_ON(mutex_is_locked(&dev_priv->dev->struct_mutex)))
+               return;
+
         va_start(args, fmt);
         vscnprintf(error_msg, sizeof(error_msg), fmt, args);
         va_end(args);
@@ -2628,9 +2627,9 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
                                 &dev_priv->gpu_error.reset_counter);
  
                 /*
-                * Wakeup waiting processes so that the reset work function
-                * i915_error_work_func doesn't deadlock trying to grab various
-                * locks. By bumping the reset counter first, the woken
+                * Wakeup waiting processes so that the reset function
+                * i915_reset_and_wakeup doesn't deadlock trying to grab
+                * various locks. By bumping the reset counter first, the woken
                  * processes will see a reset in progress and back off,
                  * releasing their locks and then wait for the reset completion.
                  * We must do this for _all_ gpu waiters that might hold locks
@@ -2643,13 +2642,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
                 i915_error_wake_up(dev_priv, false);
         }
  
-       /*
-        * Our reset work can grab modeset locks (since it needs to reset the
-        * state of outstanding pagelips). Hence it must not be run on our own
-        * dev-priv->wq work queue for otherwise the flush_work in the pageflip
-        * code will deadlock.
-        */
-       schedule_work(&dev_priv->gpu_error.work);
+       i915_reset_and_wakeup(dev);
  }
  
  /* Called from drm generic code, passed 'crtc' which
@@ -4345,7 +4338,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
  
         INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
         INIT_WORK(&dev_priv->dig_port_work, i915_digport_work_func);
-       INIT_WORK(&dev_priv->gpu_error.work, i915_error_work_func);
         INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
         INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
author	Mika Kuoppala <mika.kuoppala@linux.intel.com>
	Wed, 28 Jan 2015 15:03:14 +0000 (17:03 +0200)
committer	Daniel Vetter <daniel.vetter@ffwll.ch>
	Thu, 29 Jan 2015 17:03:07 +0000 (18:03 +0100)
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_dma.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_irq.c		patch \| blob \| history