@@ -565,6 +565,108 @@ static void guc_add_request(struct intel_guc *guc,
565
565
spin_unlock (& client -> wq_lock );
566
566
}
567
567
568
+ /*
569
+ * When we're doing submissions using regular execlists backend, writing to
570
+ * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
571
+ * pinned in mappable aperture portion of GGTT are visible to command streamer.
572
+ * Writes done by GuC on our behalf are not guaranteeing such ordering,
573
+ * therefore, to ensure the flush, we're issuing a POSTING READ.
574
+ */
575
+ static void flush_ggtt_writes (struct i915_vma * vma )
576
+ {
577
+ struct drm_i915_private * dev_priv = to_i915 (vma -> obj -> base .dev );
578
+
579
+ if (i915_vma_is_map_and_fenceable (vma ))
580
+ POSTING_READ_FW (GUC_STATUS );
581
+ }
582
+
583
+ #define GUC_PREEMPT_FINISHED 0x1
584
+ #define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
585
+ static void inject_preempt_context (struct work_struct * work )
586
+ {
587
+ struct guc_preempt_work * preempt_work =
588
+ container_of (work , typeof (* preempt_work ), work );
589
+ struct intel_engine_cs * engine = preempt_work -> engine ;
590
+ struct intel_guc * guc = container_of (preempt_work , typeof (* guc ),
591
+ preempt_work [engine -> id ]);
592
+ struct i915_guc_client * client = guc -> preempt_client ;
593
+ struct intel_ring * ring = client -> owner -> engine [engine -> id ].ring ;
594
+ u32 ctx_desc = lower_32_bits (intel_lr_context_descriptor (client -> owner ,
595
+ engine ));
596
+ u32 * cs = ring -> vaddr + ring -> tail ;
597
+ u32 data [7 ];
598
+
599
+ if (engine -> id == RCS ) {
600
+ cs = gen8_emit_ggtt_write_rcs (cs , GUC_PREEMPT_FINISHED ,
601
+ intel_hws_preempt_done_address (engine ));
602
+ } else {
603
+ cs = gen8_emit_ggtt_write (cs , GUC_PREEMPT_FINISHED ,
604
+ intel_hws_preempt_done_address (engine ));
605
+ * cs ++ = MI_NOOP ;
606
+ * cs ++ = MI_NOOP ;
607
+ }
608
+ * cs ++ = MI_USER_INTERRUPT ;
609
+ * cs ++ = MI_NOOP ;
610
+
611
+ GEM_BUG_ON (!IS_ALIGNED (ring -> size ,
612
+ GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof (u32 )));
613
+ GEM_BUG_ON ((void * )cs - (ring -> vaddr + ring -> tail ) !=
614
+ GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof (u32 ));
615
+
616
+ ring -> tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof (u32 );
617
+ ring -> tail &= (ring -> size - 1 );
618
+
619
+ flush_ggtt_writes (ring -> vma );
620
+
621
+ spin_lock_irq (& client -> wq_lock );
622
+ guc_wq_item_append (client , engine -> guc_id , ctx_desc ,
623
+ ring -> tail / sizeof (u64 ), 0 );
624
+ spin_unlock_irq (& client -> wq_lock );
625
+
626
+ data [0 ] = INTEL_GUC_ACTION_REQUEST_PREEMPTION ;
627
+ data [1 ] = client -> stage_id ;
628
+ data [2 ] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q |
629
+ INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q ;
630
+ data [3 ] = engine -> guc_id ;
631
+ data [4 ] = guc -> execbuf_client -> priority ;
632
+ data [5 ] = guc -> execbuf_client -> stage_id ;
633
+ data [6 ] = guc_ggtt_offset (guc -> shared_data );
634
+
635
+ if (WARN_ON (intel_guc_send (guc , data , ARRAY_SIZE (data )))) {
636
+ execlists_clear_active (& engine -> execlists ,
637
+ EXECLISTS_ACTIVE_PREEMPT );
638
+ tasklet_schedule (& engine -> execlists .irq_tasklet );
639
+ }
640
+ }
641
+
642
+ /*
643
+ * We're using user interrupt and HWSP value to mark that preemption has
644
+ * finished and GPU is idle. Normally, we could unwind and continue similar to
645
+ * execlists submission path. Unfortunately, with GuC we also need to wait for
646
+ * it to finish its own postprocessing, before attempting to submit. Otherwise
647
+ * GuC may silently ignore our submissions, and thus we risk losing request at
648
+ * best, executing out-of-order and causing kernel panic at worst.
649
+ */
650
+ #define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10
651
+ static void wait_for_guc_preempt_report (struct intel_engine_cs * engine )
652
+ {
653
+ struct intel_guc * guc = & engine -> i915 -> guc ;
654
+ struct guc_shared_ctx_data * data = guc -> shared_data_vaddr ;
655
+ struct guc_ctx_report * report =
656
+ & data -> preempt_ctx_report [engine -> guc_id ];
657
+
658
+ WARN_ON (wait_for_atomic (report -> report_return_status ==
659
+ INTEL_GUC_REPORT_STATUS_COMPLETE ,
660
+ GUC_PREEMPT_POSTPROCESS_DELAY_MS ));
661
+ /*
662
+ * GuC is expecting that we're also going to clear the affected context
663
+ * counter, let's also reset the return status to not depend on GuC
664
+ * resetting it after recieving another preempt action
665
+ */
666
+ report -> affected_count = 0 ;
667
+ report -> report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN ;
668
+ }
669
+
568
670
/**
569
671
* i915_guc_submit() - Submit commands through GuC
570
672
* @engine: engine associated with the commands
@@ -574,8 +676,7 @@ static void guc_add_request(struct intel_guc *guc,
574
676
*/
575
677
static void i915_guc_submit (struct intel_engine_cs * engine )
576
678
{
577
- struct drm_i915_private * dev_priv = engine -> i915 ;
578
- struct intel_guc * guc = & dev_priv -> guc ;
679
+ struct intel_guc * guc = & engine -> i915 -> guc ;
579
680
struct intel_engine_execlists * const execlists = & engine -> execlists ;
580
681
struct execlist_port * port = execlists -> port ;
581
682
unsigned int n ;
@@ -588,8 +689,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
588
689
if (rq && count == 0 ) {
589
690
port_set (& port [n ], port_pack (rq , ++ count ));
590
691
591
- if (i915_vma_is_map_and_fenceable (rq -> ring -> vma ))
592
- POSTING_READ_FW (GUC_STATUS );
692
+ flush_ggtt_writes (rq -> ring -> vma );
593
693
594
694
guc_add_request (guc , rq );
595
695
}
@@ -617,13 +717,32 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
617
717
bool submit = false;
618
718
struct rb_node * rb ;
619
719
620
- if (port_isset (port ))
621
- port ++ ;
622
-
623
720
spin_lock_irq (& engine -> timeline -> lock );
624
721
rb = execlists -> first ;
625
722
GEM_BUG_ON (rb_first (& execlists -> queue ) != rb );
626
- while (rb ) {
723
+
724
+ if (!rb )
725
+ goto unlock ;
726
+
727
+ if (HAS_LOGICAL_RING_PREEMPTION (engine -> i915 ) && port_isset (port )) {
728
+ struct guc_preempt_work * preempt_work =
729
+ & engine -> i915 -> guc .preempt_work [engine -> id ];
730
+
731
+ if (rb_entry (rb , struct i915_priolist , node )-> priority >
732
+ max (port_request (port )-> priotree .priority , 0 )) {
733
+ execlists_set_active (execlists ,
734
+ EXECLISTS_ACTIVE_PREEMPT );
735
+ queue_work (engine -> i915 -> guc .preempt_wq ,
736
+ & preempt_work -> work );
737
+ goto unlock ;
738
+ } else if (port_isset (last_port )) {
739
+ goto unlock ;
740
+ }
741
+
742
+ port ++ ;
743
+ }
744
+
745
+ do {
627
746
struct i915_priolist * p = rb_entry (rb , typeof (* p ), node );
628
747
struct drm_i915_gem_request * rq , * rn ;
629
748
@@ -653,14 +772,15 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
653
772
INIT_LIST_HEAD (& p -> requests );
654
773
if (p -> priority != I915_PRIORITY_NORMAL )
655
774
kmem_cache_free (engine -> i915 -> priorities , p );
656
- }
775
+ } while ( rb );
657
776
done :
658
777
execlists -> first = rb ;
659
778
if (submit ) {
660
779
port_assign (port , last );
661
780
execlists_set_active (execlists , EXECLISTS_ACTIVE_USER );
662
781
i915_guc_submit (engine );
663
782
}
783
+ unlock :
664
784
spin_unlock_irq (& engine -> timeline -> lock );
665
785
}
666
786
@@ -669,8 +789,6 @@ static void i915_guc_irq_handler(unsigned long data)
669
789
struct intel_engine_cs * const engine = (struct intel_engine_cs * )data ;
670
790
struct intel_engine_execlists * const execlists = & engine -> execlists ;
671
791
struct execlist_port * port = execlists -> port ;
672
- const struct execlist_port * const last_port =
673
- & execlists -> port [execlists -> port_mask ];
674
792
struct drm_i915_gem_request * rq ;
675
793
676
794
rq = port_request (& port [0 ]);
@@ -685,7 +803,19 @@ static void i915_guc_irq_handler(unsigned long data)
685
803
if (!rq )
686
804
execlists_clear_active (execlists , EXECLISTS_ACTIVE_USER );
687
805
688
- if (!port_isset (last_port ))
806
+ if (execlists_is_active (execlists , EXECLISTS_ACTIVE_PREEMPT ) &&
807
+ intel_read_status_page (engine , I915_GEM_HWS_PREEMPT_INDEX ) ==
808
+ GUC_PREEMPT_FINISHED ) {
809
+ execlists_cancel_port_requests (& engine -> execlists );
810
+ execlists_unwind_incomplete_requests (execlists );
811
+
812
+ wait_for_guc_preempt_report (engine );
813
+
814
+ execlists_clear_active (execlists , EXECLISTS_ACTIVE_PREEMPT );
815
+ intel_write_status_page (engine , I915_GEM_HWS_PREEMPT_INDEX , 0 );
816
+ }
817
+
818
+ if (!execlists_is_active (execlists , EXECLISTS_ACTIVE_PREEMPT ))
689
819
i915_guc_dequeue (engine );
690
820
}
691
821
@@ -1059,6 +1189,51 @@ static void guc_ads_destroy(struct intel_guc *guc)
1059
1189
i915_vma_unpin_and_release (& guc -> ads_vma );
1060
1190
}
1061
1191
1192
+ static int guc_preempt_work_create (struct intel_guc * guc )
1193
+ {
1194
+ struct drm_i915_private * dev_priv = guc_to_i915 (guc );
1195
+ struct intel_engine_cs * engine ;
1196
+ enum intel_engine_id id ;
1197
+
1198
+ /*
1199
+ * Even though both sending GuC action, and adding a new workitem to
1200
+ * GuC workqueue are serialized (each with its own locking), since
1201
+ * we're using mutliple engines, it's possible that we're going to
1202
+ * issue a preempt request with two (or more - each for different
1203
+ * engine) workitems in GuC queue. In this situation, GuC may submit
1204
+ * all of them, which will make us very confused.
1205
+ * Our preemption contexts may even already be complete - before we
1206
+ * even had the chance to sent the preempt action to GuC!. Rather
1207
+ * than introducing yet another lock, we can just use ordered workqueue
1208
+ * to make sure we're always sending a single preemption request with a
1209
+ * single workitem.
1210
+ */
1211
+ guc -> preempt_wq = alloc_ordered_workqueue ("i915-guc_preempt" ,
1212
+ WQ_HIGHPRI );
1213
+ if (!guc -> preempt_wq )
1214
+ return - ENOMEM ;
1215
+
1216
+ for_each_engine (engine , dev_priv , id ) {
1217
+ guc -> preempt_work [id ].engine = engine ;
1218
+ INIT_WORK (& guc -> preempt_work [id ].work , inject_preempt_context );
1219
+ }
1220
+
1221
+ return 0 ;
1222
+ }
1223
+
1224
+ static void guc_preempt_work_destroy (struct intel_guc * guc )
1225
+ {
1226
+ struct drm_i915_private * dev_priv = guc_to_i915 (guc );
1227
+ struct intel_engine_cs * engine ;
1228
+ enum intel_engine_id id ;
1229
+
1230
+ for_each_engine (engine , dev_priv , id )
1231
+ cancel_work_sync (& guc -> preempt_work [id ].work );
1232
+
1233
+ destroy_workqueue (guc -> preempt_wq );
1234
+ guc -> preempt_wq = NULL ;
1235
+ }
1236
+
1062
1237
/*
1063
1238
* Set up the memory resources to be shared with the GuC (via the GGTT)
1064
1239
* at firmware loading time.
@@ -1083,12 +1258,18 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
1083
1258
if (ret < 0 )
1084
1259
goto err_shared_data ;
1085
1260
1261
+ ret = guc_preempt_work_create (guc );
1262
+ if (ret )
1263
+ goto err_log ;
1264
+
1086
1265
ret = guc_ads_create (guc );
1087
1266
if (ret < 0 )
1088
- goto err_log ;
1267
+ goto err_wq ;
1089
1268
1090
1269
return 0 ;
1091
1270
1271
+ err_wq :
1272
+ guc_preempt_work_destroy (guc );
1092
1273
err_log :
1093
1274
intel_guc_log_destroy (guc );
1094
1275
err_shared_data :
@@ -1103,6 +1284,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
1103
1284
struct intel_guc * guc = & dev_priv -> guc ;
1104
1285
1105
1286
guc_ads_destroy (guc );
1287
+ guc_preempt_work_destroy (guc );
1106
1288
intel_guc_log_destroy (guc );
1107
1289
guc_shared_data_destroy (guc );
1108
1290
guc_stage_desc_pool_destroy (guc );
0 commit comments