@@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
169
169
struct intel_engine_cs * engine ,
170
170
struct intel_ring * ring );
171
171
172
+ static inline struct i915_priolist * to_priolist (struct rb_node * rb )
173
+ {
174
+ return rb_entry (rb , struct i915_priolist , node );
175
+ }
176
+
177
+ static inline int rq_prio (const struct i915_request * rq )
178
+ {
179
+ return rq -> priotree .priority ;
180
+ }
181
+
182
+ static inline bool need_preempt (const struct intel_engine_cs * engine ,
183
+ const struct i915_request * last ,
184
+ int prio )
185
+ {
186
+ return engine -> i915 -> preempt_context && prio > max (rq_prio (last ), 0 );
187
+ }
188
+
172
189
/**
173
190
* intel_lr_context_descriptor_update() - calculate & cache the descriptor
174
191
* descriptor for a pinned context
@@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
224
241
parent = & execlists -> queue .rb_node ;
225
242
while (* parent ) {
226
243
rb = * parent ;
227
- p = rb_entry (rb , typeof ( * p ), node );
244
+ p = to_priolist (rb );
228
245
if (prio > p -> priority ) {
229
246
parent = & rb -> rb_left ;
230
247
} else if (prio < p -> priority ) {
@@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
264
281
if (first )
265
282
execlists -> first = & p -> node ;
266
283
267
- return ptr_pack_bits ( p , first , 1 ) ;
284
+ return p ;
268
285
}
269
286
270
287
static void unwind_wa_tail (struct i915_request * rq )
@@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
290
307
__i915_request_unsubmit (rq );
291
308
unwind_wa_tail (rq );
292
309
293
- GEM_BUG_ON (rq -> priotree .priority == I915_PRIORITY_INVALID );
294
- if (rq -> priotree .priority != last_prio ) {
295
- p = lookup_priolist (engine ,
296
- & rq -> priotree ,
297
- rq -> priotree .priority );
298
- p = ptr_mask_bits (p , 1 );
299
-
300
- last_prio = rq -> priotree .priority ;
310
+ GEM_BUG_ON (rq_prio (rq ) == I915_PRIORITY_INVALID );
311
+ if (rq_prio (rq ) != last_prio ) {
312
+ last_prio = rq_prio (rq );
313
+ p = lookup_priolist (engine , & rq -> priotree , last_prio );
301
314
}
302
315
303
316
list_add (& rq -> priotree .link , & p -> requests );
@@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
397
410
desc = execlists_update_context (rq );
398
411
GEM_DEBUG_EXEC (port [n ].context_id = upper_32_bits (desc ));
399
412
400
- GEM_TRACE ("%s in[%d]: ctx=%d.%d, seqno=%x\n" ,
413
+ GEM_TRACE ("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d \n" ,
401
414
engine -> name , n ,
402
415
port [n ].context_id , count ,
403
- rq -> global_seqno );
416
+ rq -> global_seqno ,
417
+ rq_prio (rq ));
404
418
} else {
405
419
GEM_BUG_ON (!n );
406
420
desc = 0 ;
@@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
453
467
_MASKED_BIT_ENABLE (CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
454
468
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT ));
455
469
470
+ /*
471
+ * Switch to our empty preempt context so
472
+ * the state of the GPU is known (idle).
473
+ */
456
474
GEM_TRACE ("%s\n" , engine -> name );
457
475
for (n = execlists_num_ports (& engine -> execlists ); -- n ; )
458
476
elsp_write (0 , engine -> execlists .elsp );
459
477
460
478
elsp_write (ce -> lrc_desc , engine -> execlists .elsp );
461
479
execlists_clear_active (& engine -> execlists , EXECLISTS_ACTIVE_HWACK );
480
+ execlists_set_active (& engine -> execlists , EXECLISTS_ACTIVE_PREEMPT );
462
481
}
463
482
464
483
static void execlists_dequeue (struct intel_engine_cs * engine )
@@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
495
514
spin_lock_irq (& engine -> timeline -> lock );
496
515
rb = execlists -> first ;
497
516
GEM_BUG_ON (rb_first (& execlists -> queue ) != rb );
498
- if (!rb )
499
- goto unlock ;
500
517
501
518
if (last ) {
502
519
/*
@@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
519
536
if (!execlists_is_active (execlists , EXECLISTS_ACTIVE_HWACK ))
520
537
goto unlock ;
521
538
522
- if (engine -> i915 -> preempt_context &&
523
- rb_entry (rb , struct i915_priolist , node )-> priority >
524
- max (last -> priotree .priority , 0 )) {
525
- /*
526
- * Switch to our empty preempt context so
527
- * the state of the GPU is known (idle).
528
- */
539
+ if (need_preempt (engine , last , execlists -> queue_priority )) {
529
540
inject_preempt_context (engine );
530
- execlists_set_active (execlists ,
531
- EXECLISTS_ACTIVE_PREEMPT );
532
541
goto unlock ;
533
- } else {
534
- /*
535
- * In theory, we could coalesce more requests onto
536
- * the second port (the first port is active, with
537
- * no preemptions pending). However, that means we
538
- * then have to deal with the possible lite-restore
539
- * of the second port (as we submit the ELSP, there
540
- * may be a context-switch) but also we may complete
541
- * the resubmission before the context-switch. Ergo,
542
- * coalescing onto the second port will cause a
543
- * preemption event, but we cannot predict whether
544
- * that will affect port[0] or port[1].
545
- *
546
- * If the second port is already active, we can wait
547
- * until the next context-switch before contemplating
548
- * new requests. The GPU will be busy and we should be
549
- * able to resubmit the new ELSP before it idles,
550
- * avoiding pipeline bubbles (momentary pauses where
551
- * the driver is unable to keep up the supply of new
552
- * work).
553
- */
554
- if (port_count (& port [1 ]))
555
- goto unlock ;
556
-
557
- /* WaIdleLiteRestore:bdw,skl
558
- * Apply the wa NOOPs to prevent
559
- * ring:HEAD == rq:TAIL as we resubmit the
560
- * request. See gen8_emit_breadcrumb() for
561
- * where we prepare the padding after the
562
- * end of the request.
563
- */
564
- last -> tail = last -> wa_tail ;
565
542
}
543
+
544
+ /*
545
+ * In theory, we could coalesce more requests onto
546
+ * the second port (the first port is active, with
547
+ * no preemptions pending). However, that means we
548
+ * then have to deal with the possible lite-restore
549
+ * of the second port (as we submit the ELSP, there
550
+ * may be a context-switch) but also we may complete
551
+ * the resubmission before the context-switch. Ergo,
552
+ * coalescing onto the second port will cause a
553
+ * preemption event, but we cannot predict whether
554
+ * that will affect port[0] or port[1].
555
+ *
556
+ * If the second port is already active, we can wait
557
+ * until the next context-switch before contemplating
558
+ * new requests. The GPU will be busy and we should be
559
+ * able to resubmit the new ELSP before it idles,
560
+ * avoiding pipeline bubbles (momentary pauses where
561
+ * the driver is unable to keep up the supply of new
562
+ * work). However, we have to double check that the
563
+ * priorities of the ports haven't been switch.
564
+ */
565
+ if (port_count (& port [1 ]))
566
+ goto unlock ;
567
+
568
+ /*
569
+ * WaIdleLiteRestore:bdw,skl
570
+ * Apply the wa NOOPs to prevent
571
+ * ring:HEAD == rq:TAIL as we resubmit the
572
+ * request. See gen8_emit_breadcrumb() for
573
+ * where we prepare the padding after the
574
+ * end of the request.
575
+ */
576
+ last -> tail = last -> wa_tail ;
566
577
}
567
578
568
- do {
569
- struct i915_priolist * p = rb_entry (rb , typeof ( * p ), node );
579
+ while ( rb ) {
580
+ struct i915_priolist * p = to_priolist (rb );
570
581
struct i915_request * rq , * rn ;
571
582
572
583
list_for_each_entry_safe (rq , rn , & p -> requests , priotree .link ) {
@@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
628
639
INIT_LIST_HEAD (& p -> requests );
629
640
if (p -> priority != I915_PRIORITY_NORMAL )
630
641
kmem_cache_free (engine -> i915 -> priorities , p );
631
- } while ( rb );
642
+ }
632
643
done :
644
+ execlists -> queue_priority = rb ? to_priolist (rb )-> priority : INT_MIN ;
633
645
execlists -> first = rb ;
634
646
if (submit )
635
647
port_assign (port , last );
@@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
690
702
/* Flush the queued requests to the timeline list (for retiring). */
691
703
rb = execlists -> first ;
692
704
while (rb ) {
693
- struct i915_priolist * p = rb_entry (rb , typeof ( * p ), node );
705
+ struct i915_priolist * p = to_priolist (rb );
694
706
695
707
list_for_each_entry_safe (rq , rn , & p -> requests , priotree .link ) {
696
708
INIT_LIST_HEAD (& rq -> priotree .link );
@@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
708
720
709
721
/* Remaining _unready_ requests will be nop'ed when submitted */
710
722
711
-
723
+ execlists -> queue_priority = INT_MIN ;
712
724
execlists -> queue = RB_ROOT ;
713
725
execlists -> first = NULL ;
714
726
GEM_BUG_ON (port_isset (execlists -> port ));
@@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data)
864
876
EXECLISTS_ACTIVE_USER ));
865
877
866
878
rq = port_unpack (port , & count );
867
- GEM_TRACE ("%s out[0]: ctx=%d.%d, seqno=%x\n" ,
879
+ GEM_TRACE ("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d \n" ,
868
880
engine -> name ,
869
881
port -> context_id , count ,
870
- rq ? rq -> global_seqno : 0 );
882
+ rq ? rq -> global_seqno : 0 ,
883
+ rq ? rq_prio (rq ) : 0 );
871
884
872
885
/* Check the context/desc id for this event matches */
873
886
GEM_DEBUG_BUG_ON (buf [2 * head + 1 ] != port -> context_id );
@@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data)
912
925
intel_uncore_forcewake_put (dev_priv , execlists -> fw_domains );
913
926
}
914
927
915
- static void insert_request (struct intel_engine_cs * engine ,
916
- struct i915_priotree * pt ,
917
- int prio )
928
+ static void queue_request (struct intel_engine_cs * engine ,
929
+ struct i915_priotree * pt ,
930
+ int prio )
918
931
{
919
- struct i915_priolist * p = lookup_priolist (engine , pt , prio );
932
+ list_add_tail (& pt -> link , & lookup_priolist (engine , pt , prio )-> requests );
933
+ }
920
934
921
- list_add_tail (& pt -> link , & ptr_mask_bits (p , 1 )-> requests );
922
- if (ptr_unmask_bits (p , 1 ))
935
+ static void submit_queue (struct intel_engine_cs * engine , int prio )
936
+ {
937
+ if (prio > engine -> execlists .queue_priority ) {
938
+ engine -> execlists .queue_priority = prio ;
923
939
tasklet_hi_schedule (& engine -> execlists .tasklet );
940
+ }
924
941
}
925
942
926
943
static void execlists_submit_request (struct i915_request * request )
@@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request)
931
948
/* Will be called from irq-context when using foreign fences. */
932
949
spin_lock_irqsave (& engine -> timeline -> lock , flags );
933
950
934
- insert_request (engine , & request -> priotree , request -> priotree .priority );
951
+ queue_request (engine , & request -> priotree , rq_prio (request ));
952
+ submit_queue (engine , rq_prio (request ));
935
953
936
954
GEM_BUG_ON (!engine -> execlists .first );
937
955
GEM_BUG_ON (list_empty (& request -> priotree .link ));
@@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
987
1005
* static void update_priorities(struct i915_priotree *pt, prio) {
988
1006
* list_for_each_entry(dep, &pt->signalers_list, signal_link)
989
1007
* update_priorities(dep->signal, prio)
990
- * insert_request (pt);
1008
+ * queue_request (pt);
991
1009
* }
992
1010
* but that may have unlimited recursion depth and so runs a very
993
1011
* real risk of overunning the kernel stack. Instead, we build
@@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
1050
1068
pt -> priority = prio ;
1051
1069
if (!list_empty (& pt -> link )) {
1052
1070
__list_del_entry (& pt -> link );
1053
- insert_request (engine , pt , prio );
1071
+ queue_request (engine , pt , prio );
1054
1072
}
1073
+ submit_queue (engine , prio );
1055
1074
}
1056
1075
1057
1076
spin_unlock_irq (& engine -> timeline -> lock );
0 commit comments