Skip to content

Commit ec431ea

Browse files
llandwerlin-inteltursulin
authored andcommitted
drm/i915/perf: lock powergating configuration to default when active
If some of the contexts submitting workloads to the GPU have been configured to shutdown slices/subslices, we might loose the NOA configurations written in the NOA muxes. One possible solution to this problem is to reprogram the NOA muxes when we switch to a new context. We initially tried this in the workaround batchbuffer but some concerns where raised about the cost of reprogramming at every context switch. This solution is also not without consequences from the userspace point of view. Reprogramming of the muxes can only happen once the powergating configuration has changed (which happens after context switch). This means for a window of time during the recording, counters recorded by the OA unit might be invalid. This requires userspace dealing with OA reports to discard the invalid values. Minimizing the reprogramming could be implemented by tracking of the last programmed configuration somewhere in GGTT and use MI_PREDICATE to discard some of the programming commands, but the command streamer would still have to parse all the MI_LRI instructions in the workaround batchbuffer. Another solution, which this change implements, is to simply disregard the user requested configuration for the period of time when i915/perf is active. On most platforms there are no issues with this apart from a performance penality for some media workloads that benefit from running on a partially powergated GPU. We already prevent RC6 from affecting the programming so it doesn't sound completely unreasonable to hold on powergating for the same reason. On Icelake however there would a functional problem if the slices not- containing the VME block were left enabled with a running media workload which explicitly disabled them. To avoid a GPU hang in this case, on Icelake we lock the enablement to only slices which contain VME blocks. Downside is that it means degraded GPU performance when OA is active but there is no known alternative solution for this. v2: Leave RPCS programming in intel_lrc.c (Lionel) v3: Update for s/union intel_sseu/struct intel_sseu/ (Lionel) More to_intel_context() (Tvrtko) s/dev_priv/i915/ (Tvrtko) Tvrtko Ursulin: v4: * Rebase for make_rpcs changes. v5: * Apply OA restriction from make_rpcs directly. v6: * Rebase for context image setup changes. v7: * Move stream assignment before metric enable. v8-9: * Rebase. v10: * Squashed with ICL support patch. Bspec: 21140 Co-developed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v9 Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-2-tvrtko.ursulin@linux.intel.com
1 parent 87f1ef2 commit ec431ea

File tree

3 files changed

+46
-15
lines changed

3 files changed

+46
-15
lines changed

drivers/gpu/drm/i915/i915_perf.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,11 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
16771677

16781678
CTX_REG(reg_state, state_offset, flex_regs[i], value);
16791679
}
1680+
1681+
CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
1682+
gen8_make_rpcs(dev_priv,
1683+
&to_intel_context(ctx,
1684+
dev_priv->engine[RCS])->sseu));
16801685
}
16811686

16821687
/*
@@ -2098,21 +2103,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
20982103
if (ret)
20992104
goto err_lock;
21002105

2106+
stream->ops = &i915_oa_stream_ops;
2107+
dev_priv->perf.oa.exclusive_stream = stream;
2108+
21012109
ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
21022110
if (ret) {
21032111
DRM_DEBUG("Unable to enable metric set\n");
21042112
goto err_enable;
21052113
}
21062114

2107-
stream->ops = &i915_oa_stream_ops;
2108-
2109-
dev_priv->perf.oa.exclusive_stream = stream;
2110-
21112115
mutex_unlock(&dev_priv->drm.struct_mutex);
21122116

21132117
return 0;
21142118

21152119
err_enable:
2120+
dev_priv->perf.oa.exclusive_stream = NULL;
21162121
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
21172122
mutex_unlock(&dev_priv->drm.struct_mutex);
21182123

drivers/gpu/drm/i915/intel_lrc.c

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,9 +1266,6 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
12661266
return i915_vma_pin(vma, 0, 0, flags);
12671267
}
12681268

1269-
static u32
1270-
make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
1271-
12721269
static void
12731270
__execlists_update_reg_state(struct intel_engine_cs *engine,
12741271
struct intel_context *ce)
@@ -1282,8 +1279,8 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
12821279

12831280
/* RPCS */
12841281
if (engine->class == RENDER_CLASS)
1285-
regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915,
1286-
&ce->sseu);
1282+
regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
1283+
&ce->sseu);
12871284
}
12881285

12891286
static struct intel_context *
@@ -2433,13 +2430,12 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
24332430
return logical_ring_init(engine);
24342431
}
24352432

2436-
static u32
2437-
make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
2433+
u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
24382434
{
24392435
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
24402436
bool subslice_pg = sseu->has_subslice_pg;
2441-
u8 slices = hweight8(ctx_sseu->slice_mask);
2442-
u8 subslices = hweight8(ctx_sseu->subslice_mask);
2437+
struct intel_sseu ctx_sseu;
2438+
u8 slices, subslices;
24432439
u32 rpcs = 0;
24442440

24452441
/*
@@ -2449,6 +2445,34 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
24492445
if (INTEL_GEN(i915) < 9)
24502446
return 0;
24512447

2448+
/*
2449+
* If i915/perf is active, we want a stable powergating configuration
2450+
* on the system.
2451+
*
2452+
* We could choose full enablement, but on ICL we know there are use
2453+
* cases which disable slices for functional, apart for performance
2454+
* reasons. So in this case we select a known stable subset.
2455+
*/
2456+
if (!i915->perf.oa.exclusive_stream) {
2457+
ctx_sseu = *req_sseu;
2458+
} else {
2459+
ctx_sseu = intel_device_default_sseu(i915);
2460+
2461+
if (IS_GEN(i915, 11)) {
2462+
/*
2463+
* We only need subslice count so it doesn't matter
2464+
* which ones we select - just turn off low bits in the
2465+
* amount of half of all available subslices per slice.
2466+
*/
2467+
ctx_sseu.subslice_mask =
2468+
~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
2469+
ctx_sseu.slice_mask = 0x1;
2470+
}
2471+
}
2472+
2473+
slices = hweight8(ctx_sseu.slice_mask);
2474+
subslices = hweight8(ctx_sseu.subslice_mask);
2475+
24522476
/*
24532477
* Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
24542478
* wide and Icelake has up to eight subslices, specfial programming is
@@ -2518,13 +2542,13 @@ make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
25182542
if (sseu->has_eu_pg) {
25192543
u32 val;
25202544

2521-
val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
2545+
val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
25222546
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
25232547
val &= GEN8_RPCS_EU_MIN_MASK;
25242548

25252549
rpcs |= val;
25262550

2527-
val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
2551+
val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
25282552
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
25292553
val &= GEN8_RPCS_EU_MAX_MASK;
25302554

drivers/gpu/drm/i915/intel_lrc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
112112
const char *prefix),
113113
unsigned int max);
114114

115+
u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
116+
115117
#endif /* _INTEL_LRC_H_ */

0 commit comments

Comments
 (0)