Skip to content

Commit f6322ed

Browse files
committed
drm/i915/preemption: Allow preemption between submission ports
Sometimes we need to boost the priority of an in-flight request, which may lead to the situation where the second submission port then contains a higher priority context than the first and so we need to inject a preemption event. To do so we must always check inside execlists_dequeue() whether there is a priority inversion between the ports themselves as well as the head of the priority sorted queue, and we cannot just skip dequeuing if the queue is empty. As Michał noted, this doesn't simply extend to handling more than 2-port submission, as we may need to reorder within the array of executing requests which themselves are lower priority than the first. A task for later! Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180222142229.14517-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski <michal.winiarski@intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
1 parent e532be8 commit f6322ed

File tree

4 files changed

+112
-78
lines changed

4 files changed

+112
-78
lines changed

drivers/gpu/drm/i915/intel_engine_cs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
423423
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
424424
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
425425

426+
execlists->queue_priority = INT_MIN;
426427
execlists->queue = RB_ROOT;
427428
execlists->first = NULL;
428429
}
@@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
19031904
spin_lock_irq(&engine->timeline->lock);
19041905
list_for_each_entry(rq, &engine->timeline->requests, link)
19051906
print_request(m, rq, "\t\tE ");
1907+
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
19061908
for (rb = execlists->first; rb; rb = rb_next(rb)) {
19071909
struct i915_priolist *p =
19081910
rb_entry(rb, typeof(*p), node);

drivers/gpu/drm/i915/intel_guc_submission.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@
7575
*
7676
*/
7777

78+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
79+
{
80+
return rb_entry(rb, struct i915_priolist, node);
81+
}
82+
7883
static inline bool is_high_priority(struct intel_guc_client *client)
7984
{
8085
return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
@@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
682687
rb = execlists->first;
683688
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
684689

685-
if (!rb)
686-
goto unlock;
687-
688690
if (port_isset(port)) {
689691
if (engine->i915->preempt_context) {
690692
struct guc_preempt_work *preempt_work =
691693
&engine->i915->guc.preempt_work[engine->id];
692694

693-
if (rb_entry(rb, struct i915_priolist, node)->priority >
695+
if (execlists->queue_priority >
694696
max(port_request(port)->priotree.priority, 0)) {
695697
execlists_set_active(execlists,
696698
EXECLISTS_ACTIVE_PREEMPT);
@@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
706708
}
707709
GEM_BUG_ON(port_isset(port));
708710

709-
do {
710-
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
711+
while (rb) {
712+
struct i915_priolist *p = to_priolist(rb);
711713
struct i915_request *rq, *rn;
712714

713715
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
@@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
736738
INIT_LIST_HEAD(&p->requests);
737739
if (p->priority != I915_PRIORITY_NORMAL)
738740
kmem_cache_free(engine->i915->priorities, p);
739-
} while (rb);
741+
}
740742
done:
743+
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
741744
execlists->first = rb;
742745
if (submit) {
743746
port_assign(port, last);

drivers/gpu/drm/i915/intel_lrc.c

Lines changed: 90 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
169169
struct intel_engine_cs *engine,
170170
struct intel_ring *ring);
171171

172+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
173+
{
174+
return rb_entry(rb, struct i915_priolist, node);
175+
}
176+
177+
static inline int rq_prio(const struct i915_request *rq)
178+
{
179+
return rq->priotree.priority;
180+
}
181+
182+
static inline bool need_preempt(const struct intel_engine_cs *engine,
183+
const struct i915_request *last,
184+
int prio)
185+
{
186+
return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
187+
}
188+
172189
/**
173190
* intel_lr_context_descriptor_update() - calculate & cache the descriptor
174191
* descriptor for a pinned context
@@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
224241
parent = &execlists->queue.rb_node;
225242
while (*parent) {
226243
rb = *parent;
227-
p = rb_entry(rb, typeof(*p), node);
244+
p = to_priolist(rb);
228245
if (prio > p->priority) {
229246
parent = &rb->rb_left;
230247
} else if (prio < p->priority) {
@@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
264281
if (first)
265282
execlists->first = &p->node;
266283

267-
return ptr_pack_bits(p, first, 1);
284+
return p;
268285
}
269286

270287
static void unwind_wa_tail(struct i915_request *rq)
@@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
290307
__i915_request_unsubmit(rq);
291308
unwind_wa_tail(rq);
292309

293-
GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
294-
if (rq->priotree.priority != last_prio) {
295-
p = lookup_priolist(engine,
296-
&rq->priotree,
297-
rq->priotree.priority);
298-
p = ptr_mask_bits(p, 1);
299-
300-
last_prio = rq->priotree.priority;
310+
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
311+
if (rq_prio(rq) != last_prio) {
312+
last_prio = rq_prio(rq);
313+
p = lookup_priolist(engine, &rq->priotree, last_prio);
301314
}
302315

303316
list_add(&rq->priotree.link, &p->requests);
@@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
397410
desc = execlists_update_context(rq);
398411
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
399412

400-
GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
413+
GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
401414
engine->name, n,
402415
port[n].context_id, count,
403-
rq->global_seqno);
416+
rq->global_seqno,
417+
rq_prio(rq));
404418
} else {
405419
GEM_BUG_ON(!n);
406420
desc = 0;
@@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
453467
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
454468
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
455469

470+
/*
471+
* Switch to our empty preempt context so
472+
* the state of the GPU is known (idle).
473+
*/
456474
GEM_TRACE("%s\n", engine->name);
457475
for (n = execlists_num_ports(&engine->execlists); --n; )
458476
elsp_write(0, engine->execlists.elsp);
459477

460478
elsp_write(ce->lrc_desc, engine->execlists.elsp);
461479
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
480+
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
462481
}
463482

464483
static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
495514
spin_lock_irq(&engine->timeline->lock);
496515
rb = execlists->first;
497516
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
498-
if (!rb)
499-
goto unlock;
500517

501518
if (last) {
502519
/*
@@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
519536
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
520537
goto unlock;
521538

522-
if (engine->i915->preempt_context &&
523-
rb_entry(rb, struct i915_priolist, node)->priority >
524-
max(last->priotree.priority, 0)) {
525-
/*
526-
* Switch to our empty preempt context so
527-
* the state of the GPU is known (idle).
528-
*/
539+
if (need_preempt(engine, last, execlists->queue_priority)) {
529540
inject_preempt_context(engine);
530-
execlists_set_active(execlists,
531-
EXECLISTS_ACTIVE_PREEMPT);
532541
goto unlock;
533-
} else {
534-
/*
535-
* In theory, we could coalesce more requests onto
536-
* the second port (the first port is active, with
537-
* no preemptions pending). However, that means we
538-
* then have to deal with the possible lite-restore
539-
* of the second port (as we submit the ELSP, there
540-
* may be a context-switch) but also we may complete
541-
* the resubmission before the context-switch. Ergo,
542-
* coalescing onto the second port will cause a
543-
* preemption event, but we cannot predict whether
544-
* that will affect port[0] or port[1].
545-
*
546-
* If the second port is already active, we can wait
547-
* until the next context-switch before contemplating
548-
* new requests. The GPU will be busy and we should be
549-
* able to resubmit the new ELSP before it idles,
550-
* avoiding pipeline bubbles (momentary pauses where
551-
* the driver is unable to keep up the supply of new
552-
* work).
553-
*/
554-
if (port_count(&port[1]))
555-
goto unlock;
556-
557-
/* WaIdleLiteRestore:bdw,skl
558-
* Apply the wa NOOPs to prevent
559-
* ring:HEAD == rq:TAIL as we resubmit the
560-
* request. See gen8_emit_breadcrumb() for
561-
* where we prepare the padding after the
562-
* end of the request.
563-
*/
564-
last->tail = last->wa_tail;
565542
}
543+
544+
/*
545+
* In theory, we could coalesce more requests onto
546+
* the second port (the first port is active, with
547+
* no preemptions pending). However, that means we
548+
* then have to deal with the possible lite-restore
549+
* of the second port (as we submit the ELSP, there
550+
* may be a context-switch) but also we may complete
551+
* the resubmission before the context-switch. Ergo,
552+
* coalescing onto the second port will cause a
553+
* preemption event, but we cannot predict whether
554+
* that will affect port[0] or port[1].
555+
*
556+
* If the second port is already active, we can wait
557+
* until the next context-switch before contemplating
558+
* new requests. The GPU will be busy and we should be
559+
* able to resubmit the new ELSP before it idles,
560+
* avoiding pipeline bubbles (momentary pauses where
561+
* the driver is unable to keep up the supply of new
562+
* work). However, we have to double check that the
563+
* priorities of the ports haven't been switch.
564+
*/
565+
if (port_count(&port[1]))
566+
goto unlock;
567+
568+
/*
569+
* WaIdleLiteRestore:bdw,skl
570+
* Apply the wa NOOPs to prevent
571+
* ring:HEAD == rq:TAIL as we resubmit the
572+
* request. See gen8_emit_breadcrumb() for
573+
* where we prepare the padding after the
574+
* end of the request.
575+
*/
576+
last->tail = last->wa_tail;
566577
}
567578

568-
do {
569-
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
579+
while (rb) {
580+
struct i915_priolist *p = to_priolist(rb);
570581
struct i915_request *rq, *rn;
571582

572583
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
@@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
628639
INIT_LIST_HEAD(&p->requests);
629640
if (p->priority != I915_PRIORITY_NORMAL)
630641
kmem_cache_free(engine->i915->priorities, p);
631-
} while (rb);
642+
}
632643
done:
644+
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
633645
execlists->first = rb;
634646
if (submit)
635647
port_assign(port, last);
@@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
690702
/* Flush the queued requests to the timeline list (for retiring). */
691703
rb = execlists->first;
692704
while (rb) {
693-
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
705+
struct i915_priolist *p = to_priolist(rb);
694706

695707
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
696708
INIT_LIST_HEAD(&rq->priotree.link);
@@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
708720

709721
/* Remaining _unready_ requests will be nop'ed when submitted */
710722

711-
723+
execlists->queue_priority = INT_MIN;
712724
execlists->queue = RB_ROOT;
713725
execlists->first = NULL;
714726
GEM_BUG_ON(port_isset(execlists->port));
@@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data)
864876
EXECLISTS_ACTIVE_USER));
865877

866878
rq = port_unpack(port, &count);
867-
GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
879+
GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
868880
engine->name,
869881
port->context_id, count,
870-
rq ? rq->global_seqno : 0);
882+
rq ? rq->global_seqno : 0,
883+
rq ? rq_prio(rq) : 0);
871884

872885
/* Check the context/desc id for this event matches */
873886
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
@@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data)
912925
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
913926
}
914927

915-
static void insert_request(struct intel_engine_cs *engine,
916-
struct i915_priotree *pt,
917-
int prio)
928+
static void queue_request(struct intel_engine_cs *engine,
929+
struct i915_priotree *pt,
930+
int prio)
918931
{
919-
struct i915_priolist *p = lookup_priolist(engine, pt, prio);
932+
list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
933+
}
920934

921-
list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
922-
if (ptr_unmask_bits(p, 1))
935+
static void submit_queue(struct intel_engine_cs *engine, int prio)
936+
{
937+
if (prio > engine->execlists.queue_priority) {
938+
engine->execlists.queue_priority = prio;
923939
tasklet_hi_schedule(&engine->execlists.tasklet);
940+
}
924941
}
925942

926943
static void execlists_submit_request(struct i915_request *request)
@@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request)
931948
/* Will be called from irq-context when using foreign fences. */
932949
spin_lock_irqsave(&engine->timeline->lock, flags);
933950

934-
insert_request(engine, &request->priotree, request->priotree.priority);
951+
queue_request(engine, &request->priotree, rq_prio(request));
952+
submit_queue(engine, rq_prio(request));
935953

936954
GEM_BUG_ON(!engine->execlists.first);
937955
GEM_BUG_ON(list_empty(&request->priotree.link));
@@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
9871005
* static void update_priorities(struct i915_priotree *pt, prio) {
9881006
* list_for_each_entry(dep, &pt->signalers_list, signal_link)
9891007
* update_priorities(dep->signal, prio)
990-
* insert_request(pt);
1008+
* queue_request(pt);
9911009
* }
9921010
* but that may have unlimited recursion depth and so runs a very
9931011
* real risk of overunning the kernel stack. Instead, we build
@@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
10501068
pt->priority = prio;
10511069
if (!list_empty(&pt->link)) {
10521070
__list_del_entry(&pt->link);
1053-
insert_request(engine, pt, prio);
1071+
queue_request(engine, pt, prio);
10541072
}
1073+
submit_queue(engine, prio);
10551074
}
10561075

10571076
spin_unlock_irq(&engine->timeline->lock);

drivers/gpu/drm/i915/intel_ringbuffer.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,16 @@ struct intel_engine_execlists {
257257
*/
258258
unsigned int port_mask;
259259

260+
/**
261+
* @queue_priority: Highest pending priority.
262+
*
263+
* When we add requests into the queue, or adjust the priority of
264+
* executing requests, we compute the maximum priority of those
265+
* pending requests. We can then use this value to determine if
266+
* we need to preempt the executing requests to service the queue.
267+
*/
268+
int queue_priority;
269+
260270
/**
261271
* @queue: queue of requests, in priority lists
262272
*/

0 commit comments

Comments
 (0)