Skip to content

Commit c41937f

Browse files
mwiniarsickle
authored andcommitted
drm/i915/guc: Preemption! With GuC
Pretty similar to what we have on execlists. We're reusing most of the GEM code, however, due to GuC quirks we need a couple of extra bits. Preemption is implemented as GuC action, and actions can be pretty slow. Because of that, we're using a mutex to serialize them. Since we're requesting preemption from the tasklet, the task of creating a workitem and wrapping it in GuC action is delegated to a worker. To distinguish that preemption has finished, we're using additional piece of HWSP, and since we're not getting context switch interrupts, we're also adding a user interrupt. The fact that our special preempt context has completed unfortunately doesn't mean that we're ready to submit new work. We also need to wait for GuC to finish its own processing. v2: Don't compile out the wait for GuC, handle workqueue flush on reset, no need for ordered workqueue, put on a reviewer hat when looking at my own patches (Chris) Move struct work around in intel_guc, move user interruput outside of conditional (Michał) Keep ring around rather than chase though intel_context v3: Extract WA for flushing ggtt writes to a helper (Chris) Keep work_struct in intel_guc rather than engine (Michał) Use ordered workqueue for inject_preempt worker to avoid GuC quirks. v4: Drop now unused INTEL_GUC_PREEMPT_OPTION_IMMEDIATE (Daniele) Drop stray newlines, use container_of for intel_guc in worker, check for presence of workqueue when flushing it, rather than enable_guc_submission modparam, reorder preempt postprocessing (Chris) v5: Make wq NULL after destroying it v6: Swap struct guc_preempt_work members (Michał) Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jeff McGee <jeff.mcgee@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Oscar Mateo <oscar.mateo@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171026133558.19580-1-michal.winiarski@intel.com
1 parent a4598d1 commit c41937f

File tree

7 files changed

+222
-18
lines changed

7 files changed

+222
-18
lines changed

drivers/gpu/drm/i915/i915_drv.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
373373
value |= I915_SCHEDULER_CAP_PRIORITY;
374374

375375
if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
376-
i915_modparams.enable_execlists &&
377-
!i915_modparams.enable_guc_submission)
376+
i915_modparams.enable_execlists)
378377
value |= I915_SCHEDULER_CAP_PREEMPTION;
379378
}
380379
break;

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,6 +2921,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
29212921
tasklet_kill(&engine->execlists.irq_tasklet);
29222922
tasklet_disable(&engine->execlists.irq_tasklet);
29232923

2924+
/*
2925+
* We're using worker to queue preemption requests from the tasklet in
2926+
* GuC submission mode.
2927+
* Even though tasklet was disabled, we may still have a worker queued.
2928+
* Let's make sure that all workers scheduled before disabling the
2929+
* tasklet are completed before continuing with the reset.
2930+
*/
2931+
if (engine->i915->guc.preempt_wq)
2932+
flush_workqueue(engine->i915->guc.preempt_wq);
2933+
29242934
if (engine->irq_seqno_barrier)
29252935
engine->irq_seqno_barrier(engine);
29262936

drivers/gpu/drm/i915/i915_guc_submission.c

Lines changed: 195 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,108 @@ static void guc_add_request(struct intel_guc *guc,
565565
spin_unlock(&client->wq_lock);
566566
}
567567

568+
/*
569+
* When we're doing submissions using regular execlists backend, writing to
570+
* ELSP from CPU side is enough to make sure that writes to ringbuffer pages
571+
* pinned in mappable aperture portion of GGTT are visible to command streamer.
572+
* Writes done by GuC on our behalf are not guaranteeing such ordering,
573+
* therefore, to ensure the flush, we're issuing a POSTING READ.
574+
*/
575+
static void flush_ggtt_writes(struct i915_vma *vma)
576+
{
577+
struct drm_i915_private *dev_priv = to_i915(vma->obj->base.dev);
578+
579+
if (i915_vma_is_map_and_fenceable(vma))
580+
POSTING_READ_FW(GUC_STATUS);
581+
}
582+
583+
#define GUC_PREEMPT_FINISHED 0x1
584+
#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
585+
static void inject_preempt_context(struct work_struct *work)
586+
{
587+
struct guc_preempt_work *preempt_work =
588+
container_of(work, typeof(*preempt_work), work);
589+
struct intel_engine_cs *engine = preempt_work->engine;
590+
struct intel_guc *guc = container_of(preempt_work, typeof(*guc),
591+
preempt_work[engine->id]);
592+
struct i915_guc_client *client = guc->preempt_client;
593+
struct intel_ring *ring = client->owner->engine[engine->id].ring;
594+
u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner,
595+
engine));
596+
u32 *cs = ring->vaddr + ring->tail;
597+
u32 data[7];
598+
599+
if (engine->id == RCS) {
600+
cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED,
601+
intel_hws_preempt_done_address(engine));
602+
} else {
603+
cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED,
604+
intel_hws_preempt_done_address(engine));
605+
*cs++ = MI_NOOP;
606+
*cs++ = MI_NOOP;
607+
}
608+
*cs++ = MI_USER_INTERRUPT;
609+
*cs++ = MI_NOOP;
610+
611+
GEM_BUG_ON(!IS_ALIGNED(ring->size,
612+
GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)));
613+
GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) !=
614+
GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32));
615+
616+
ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32);
617+
ring->tail &= (ring->size - 1);
618+
619+
flush_ggtt_writes(ring->vma);
620+
621+
spin_lock_irq(&client->wq_lock);
622+
guc_wq_item_append(client, engine->guc_id, ctx_desc,
623+
ring->tail / sizeof(u64), 0);
624+
spin_unlock_irq(&client->wq_lock);
625+
626+
data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION;
627+
data[1] = client->stage_id;
628+
data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q |
629+
INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q;
630+
data[3] = engine->guc_id;
631+
data[4] = guc->execbuf_client->priority;
632+
data[5] = guc->execbuf_client->stage_id;
633+
data[6] = guc_ggtt_offset(guc->shared_data);
634+
635+
if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
636+
execlists_clear_active(&engine->execlists,
637+
EXECLISTS_ACTIVE_PREEMPT);
638+
tasklet_schedule(&engine->execlists.irq_tasklet);
639+
}
640+
}
641+
642+
/*
643+
* We're using user interrupt and HWSP value to mark that preemption has
644+
* finished and GPU is idle. Normally, we could unwind and continue similar to
645+
* execlists submission path. Unfortunately, with GuC we also need to wait for
646+
* it to finish its own postprocessing, before attempting to submit. Otherwise
647+
* GuC may silently ignore our submissions, and thus we risk losing request at
648+
* best, executing out-of-order and causing kernel panic at worst.
649+
*/
650+
#define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10
651+
static void wait_for_guc_preempt_report(struct intel_engine_cs *engine)
652+
{
653+
struct intel_guc *guc = &engine->i915->guc;
654+
struct guc_shared_ctx_data *data = guc->shared_data_vaddr;
655+
struct guc_ctx_report *report =
656+
&data->preempt_ctx_report[engine->guc_id];
657+
658+
WARN_ON(wait_for_atomic(report->report_return_status ==
659+
INTEL_GUC_REPORT_STATUS_COMPLETE,
660+
GUC_PREEMPT_POSTPROCESS_DELAY_MS));
661+
/*
662+
* GuC is expecting that we're also going to clear the affected context
663+
* counter, let's also reset the return status to not depend on GuC
664+
* resetting it after recieving another preempt action
665+
*/
666+
report->affected_count = 0;
667+
report->report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN;
668+
}
669+
568670
/**
569671
* i915_guc_submit() - Submit commands through GuC
570672
* @engine: engine associated with the commands
@@ -574,8 +676,7 @@ static void guc_add_request(struct intel_guc *guc,
574676
*/
575677
static void i915_guc_submit(struct intel_engine_cs *engine)
576678
{
577-
struct drm_i915_private *dev_priv = engine->i915;
578-
struct intel_guc *guc = &dev_priv->guc;
679+
struct intel_guc *guc = &engine->i915->guc;
579680
struct intel_engine_execlists * const execlists = &engine->execlists;
580681
struct execlist_port *port = execlists->port;
581682
unsigned int n;
@@ -588,8 +689,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
588689
if (rq && count == 0) {
589690
port_set(&port[n], port_pack(rq, ++count));
590691

591-
if (i915_vma_is_map_and_fenceable(rq->ring->vma))
592-
POSTING_READ_FW(GUC_STATUS);
692+
flush_ggtt_writes(rq->ring->vma);
593693

594694
guc_add_request(guc, rq);
595695
}
@@ -617,13 +717,32 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
617717
bool submit = false;
618718
struct rb_node *rb;
619719

620-
if (port_isset(port))
621-
port++;
622-
623720
spin_lock_irq(&engine->timeline->lock);
624721
rb = execlists->first;
625722
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
626-
while (rb) {
723+
724+
if (!rb)
725+
goto unlock;
726+
727+
if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && port_isset(port)) {
728+
struct guc_preempt_work *preempt_work =
729+
&engine->i915->guc.preempt_work[engine->id];
730+
731+
if (rb_entry(rb, struct i915_priolist, node)->priority >
732+
max(port_request(port)->priotree.priority, 0)) {
733+
execlists_set_active(execlists,
734+
EXECLISTS_ACTIVE_PREEMPT);
735+
queue_work(engine->i915->guc.preempt_wq,
736+
&preempt_work->work);
737+
goto unlock;
738+
} else if (port_isset(last_port)) {
739+
goto unlock;
740+
}
741+
742+
port++;
743+
}
744+
745+
do {
627746
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
628747
struct drm_i915_gem_request *rq, *rn;
629748

@@ -653,14 +772,15 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
653772
INIT_LIST_HEAD(&p->requests);
654773
if (p->priority != I915_PRIORITY_NORMAL)
655774
kmem_cache_free(engine->i915->priorities, p);
656-
}
775+
} while (rb);
657776
done:
658777
execlists->first = rb;
659778
if (submit) {
660779
port_assign(port, last);
661780
execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
662781
i915_guc_submit(engine);
663782
}
783+
unlock:
664784
spin_unlock_irq(&engine->timeline->lock);
665785
}
666786

@@ -669,8 +789,6 @@ static void i915_guc_irq_handler(unsigned long data)
669789
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
670790
struct intel_engine_execlists * const execlists = &engine->execlists;
671791
struct execlist_port *port = execlists->port;
672-
const struct execlist_port * const last_port =
673-
&execlists->port[execlists->port_mask];
674792
struct drm_i915_gem_request *rq;
675793

676794
rq = port_request(&port[0]);
@@ -685,7 +803,19 @@ static void i915_guc_irq_handler(unsigned long data)
685803
if (!rq)
686804
execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
687805

688-
if (!port_isset(last_port))
806+
if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
807+
intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
808+
GUC_PREEMPT_FINISHED) {
809+
execlists_cancel_port_requests(&engine->execlists);
810+
execlists_unwind_incomplete_requests(execlists);
811+
812+
wait_for_guc_preempt_report(engine);
813+
814+
execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
815+
intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0);
816+
}
817+
818+
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
689819
i915_guc_dequeue(engine);
690820
}
691821

@@ -1059,6 +1189,51 @@ static void guc_ads_destroy(struct intel_guc *guc)
10591189
i915_vma_unpin_and_release(&guc->ads_vma);
10601190
}
10611191

1192+
static int guc_preempt_work_create(struct intel_guc *guc)
1193+
{
1194+
struct drm_i915_private *dev_priv = guc_to_i915(guc);
1195+
struct intel_engine_cs *engine;
1196+
enum intel_engine_id id;
1197+
1198+
/*
1199+
* Even though both sending GuC action, and adding a new workitem to
1200+
* GuC workqueue are serialized (each with its own locking), since
1201+
* we're using mutliple engines, it's possible that we're going to
1202+
* issue a preempt request with two (or more - each for different
1203+
* engine) workitems in GuC queue. In this situation, GuC may submit
1204+
* all of them, which will make us very confused.
1205+
* Our preemption contexts may even already be complete - before we
1206+
* even had the chance to sent the preempt action to GuC!. Rather
1207+
* than introducing yet another lock, we can just use ordered workqueue
1208+
* to make sure we're always sending a single preemption request with a
1209+
* single workitem.
1210+
*/
1211+
guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
1212+
WQ_HIGHPRI);
1213+
if (!guc->preempt_wq)
1214+
return -ENOMEM;
1215+
1216+
for_each_engine(engine, dev_priv, id) {
1217+
guc->preempt_work[id].engine = engine;
1218+
INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
1219+
}
1220+
1221+
return 0;
1222+
}
1223+
1224+
static void guc_preempt_work_destroy(struct intel_guc *guc)
1225+
{
1226+
struct drm_i915_private *dev_priv = guc_to_i915(guc);
1227+
struct intel_engine_cs *engine;
1228+
enum intel_engine_id id;
1229+
1230+
for_each_engine(engine, dev_priv, id)
1231+
cancel_work_sync(&guc->preempt_work[id].work);
1232+
1233+
destroy_workqueue(guc->preempt_wq);
1234+
guc->preempt_wq = NULL;
1235+
}
1236+
10621237
/*
10631238
* Set up the memory resources to be shared with the GuC (via the GGTT)
10641239
* at firmware loading time.
@@ -1083,12 +1258,18 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
10831258
if (ret < 0)
10841259
goto err_shared_data;
10851260

1261+
ret = guc_preempt_work_create(guc);
1262+
if (ret)
1263+
goto err_log;
1264+
10861265
ret = guc_ads_create(guc);
10871266
if (ret < 0)
1088-
goto err_log;
1267+
goto err_wq;
10891268

10901269
return 0;
10911270

1271+
err_wq:
1272+
guc_preempt_work_destroy(guc);
10921273
err_log:
10931274
intel_guc_log_destroy(guc);
10941275
err_shared_data:
@@ -1103,6 +1284,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
11031284
struct intel_guc *guc = &dev_priv->guc;
11041285

11051286
guc_ads_destroy(guc);
1287+
guc_preempt_work_destroy(guc);
11061288
intel_guc_log_destroy(guc);
11071289
guc_shared_data_destroy(guc);
11081290
guc_stage_desc_pool_destroy(guc);

drivers/gpu/drm/i915/intel_guc.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434
#include "i915_guc_reg.h"
3535
#include "i915_vma.h"
3636

37+
struct guc_preempt_work {
38+
struct work_struct work;
39+
struct intel_engine_cs *engine;
40+
};
41+
3742
/*
3843
* Top level structure of GuC. It handles firmware loading and manages client
3944
* pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy
@@ -60,6 +65,9 @@ struct intel_guc {
6065
struct i915_guc_client *execbuf_client;
6166
struct i915_guc_client *preempt_client;
6267

68+
struct guc_preempt_work preempt_work[I915_NUM_ENGINES];
69+
struct workqueue_struct *preempt_wq;
70+
6371
DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
6472
/* Cyclic counter mod pagesize */
6573
u32 db_cacheline;

drivers/gpu/drm/i915/intel_lrc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
385385
}
386386
}
387387

388-
static void
388+
void
389389
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
390390
{
391391
struct intel_engine_cs *engine =
@@ -696,7 +696,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
696696
}
697697
}
698698

699-
static void
699+
void
700700
execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
701701
{
702702
struct execlist_port *port = execlists->port;

drivers/gpu/drm/i915/intel_lrc.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx,
107107
return ctx->engine[engine->id].lrc_desc;
108108
}
109109

110-
111110
/* Execlists */
112111
int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
113112
int enable_execlists);

drivers/gpu/drm/i915/intel_ringbuffer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists,
557557
return test_bit(bit, (unsigned long *)&execlists->active);
558558
}
559559

560+
void
561+
execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
562+
563+
void
564+
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
565+
560566
static inline unsigned int
561567
execlists_num_ports(const struct intel_engine_execlists * const execlists)
562568
{

0 commit comments

Comments
 (0)