Skip to content

Commit 14b730f

Browse files
arun.siluvery@linux.intel.commikuint
authored andcommitted
drm/i915/tdr: Prepare error handler to accept mask of hung engines
In preparation for engine reset, the wedged argument of i915_handle_error() is extended to reflect as a mask of engines that are hung. This is further passed down to error state capture functions which are also updated. Engine reset recovery mechanism uses this mask and schedules recovery work for those particular engines. Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Signed-off-by: Tomas Elf <tomas.elf@intel.com> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1458331676-567-3-git-send-email-arun.siluvery@linux.intel.com
1 parent fc0768c commit 14b730f

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2757,7 +2757,7 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
27572757
/* i915_irq.c */
27582758
void i915_queue_hangcheck(struct drm_device *dev);
27592759
__printf(3, 4)
2760-
void i915_handle_error(struct drm_device *dev, bool wedged,
2760+
void i915_handle_error(struct drm_device *dev, u32 engine_mask,
27612761
const char *fmt, ...);
27622762

27632763
extern void intel_irq_init(struct drm_i915_private *dev_priv);
@@ -3343,7 +3343,7 @@ static inline void i915_error_state_buf_release(
33433343
{
33443344
kfree(eb->buf);
33453345
}
3346-
void i915_capture_error_state(struct drm_device *dev, bool wedge,
3346+
void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
33473347
const char *error_msg);
33483348
void i915_error_state_get(struct drm_device *dev,
33493349
struct i915_error_state_file_priv *error_priv);

drivers/gpu/drm/i915/i915_gpu_error.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
13011301

13021302
static void i915_error_capture_msg(struct drm_device *dev,
13031303
struct drm_i915_error_state *error,
1304-
bool wedged,
1304+
u32 engine_mask,
13051305
const char *error_msg)
13061306
{
13071307
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1324,7 +1324,7 @@ static void i915_error_capture_msg(struct drm_device *dev,
13241324
scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
13251325
", reason: %s, action: %s",
13261326
error_msg,
1327-
wedged ? "reset" : "continue");
1327+
engine_mask ? "reset" : "continue");
13281328
}
13291329

13301330
static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
@@ -1347,7 +1347,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
13471347
* out a structure which becomes available in debugfs for user level tools
13481348
* to pick up.
13491349
*/
1350-
void i915_capture_error_state(struct drm_device *dev, bool wedged,
1350+
void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
13511351
const char *error_msg)
13521352
{
13531353
static bool warned;
@@ -1375,7 +1375,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
13751375
error->overlay = intel_overlay_capture_error_state(dev);
13761376
error->display = intel_display_capture_error_state(dev);
13771377

1378-
i915_error_capture_msg(dev, error, wedged, error_msg);
1378+
i915_error_capture_msg(dev, error, engine_mask, error_msg);
13791379
DRM_INFO("%s\n", error->error_msg);
13801380

13811381
spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);

drivers/gpu/drm/i915/i915_irq.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2653,14 +2653,14 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
26532653
/**
26542654
* i915_handle_error - handle a gpu error
26552655
* @dev: drm device
2656-
*
2656+
* @engine_mask: mask representing engines that are hung
26572657
* Do some basic checking of register state at error time and
26582658
* dump it to the syslog. Also call i915_capture_error_state() to make
26592659
* sure we get a record and make it available in debugfs. Fire a uevent
26602660
* so userspace knows something bad happened (should trigger collection
26612661
* of a ring dump etc.).
26622662
*/
2663-
void i915_handle_error(struct drm_device *dev, bool wedged,
2663+
void i915_handle_error(struct drm_device *dev, u32 engine_mask,
26642664
const char *fmt, ...)
26652665
{
26662666
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2671,10 +2671,10 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
26712671
vscnprintf(error_msg, sizeof(error_msg), fmt, args);
26722672
va_end(args);
26732673

2674-
i915_capture_error_state(dev, wedged, error_msg);
2674+
i915_capture_error_state(dev, engine_mask, error_msg);
26752675
i915_report_and_clear_eir(dev);
26762676

2677-
if (wedged) {
2677+
if (engine_mask) {
26782678
atomic_or(I915_RESET_IN_PROGRESS_FLAG,
26792679
&dev_priv->gpu_error.reset_counter);
26802680

@@ -3033,7 +3033,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
30333033
*/
30343034
tmp = I915_READ_CTL(engine);
30353035
if (tmp & RING_WAIT) {
3036-
i915_handle_error(dev, false,
3036+
i915_handle_error(dev, 0,
30373037
"Kicking stuck wait on %s",
30383038
engine->name);
30393039
I915_WRITE_CTL(engine, tmp);
@@ -3045,7 +3045,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
30453045
default:
30463046
return HANGCHECK_HUNG;
30473047
case 1:
3048-
i915_handle_error(dev, false,
3048+
i915_handle_error(dev, 0,
30493049
"Kicking stuck semaphore on %s",
30503050
engine->name);
30513051
I915_WRITE_CTL(engine, tmp);
@@ -3189,12 +3189,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
31893189
DRM_INFO("%s on %s\n",
31903190
stuck[i] ? "stuck" : "no progress",
31913191
engine->name);
3192-
rings_hung++;
3192+
rings_hung |= intel_engine_flag(engine);
31933193
}
31943194
}
31953195

31963196
if (rings_hung) {
3197-
i915_handle_error(dev, true, "Ring hung");
3197+
i915_handle_error(dev, rings_hung, "Engine(s) hung");
31983198
goto out;
31993199
}
32003200

0 commit comments

Comments
 (0)