Skip to content

Commit 5013eb8

Browse files
committed
drm/i915: Track the context's seqno in its own timeline HWSP
Now that we have allocated ourselves a cacheline to store a breadcrumb, we can emit a write from the GPU into the timeline's HWSP of the per-context seqno as we complete each request. This drops the mirroring of the per-engine HWSP and allows each context to operate independently. We do not need to unwind the per-context timeline, and so requests are always consistent with the timeline breadcrumb, greatly simplifying the completion checks as we no longer need to be concerned about the global_seqno changing mid check. One complication though is that we have to be wary that the request may outlive the HWSP and so avoid touching the potentially danging pointer after we have retired the fence. We also have to guard our access of the HWSP with RCU, the release of the obj->mm.pages should already be RCU-safe. At this point, we are emitting both per-context and global seqno and still using the single per-engine execution timeline for resolving interrupts. v2: s/fake_complete/mark_complete/ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-5-chris@chris-wilson.co.uk
1 parent 8ba306a commit 5013eb8

File tree

10 files changed

+139
-60
lines changed

10 files changed

+139
-60
lines changed

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2890,7 +2890,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
28902890
*/
28912891
spin_lock_irqsave(&engine->timeline.lock, flags);
28922892
list_for_each_entry(request, &engine->timeline.requests, link) {
2893-
if (__i915_request_completed(request, request->global_seqno))
2893+
if (i915_request_completed(request))
28942894
continue;
28952895

28962896
active = request;

drivers/gpu/drm/i915/i915_request.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
199199
spin_unlock(&engine->timeline.lock);
200200

201201
spin_lock(&rq->lock);
202+
i915_request_mark_complete(rq);
202203
if (!i915_request_signaled(rq))
203204
dma_fence_signal_locked(&rq->fence);
204205
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
@@ -621,7 +622,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
621622
rq->ring = ce->ring;
622623
rq->timeline = ce->ring->timeline;
623624
GEM_BUG_ON(rq->timeline == &engine->timeline);
624-
rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
625+
rq->hwsp_seqno = rq->timeline->hwsp_seqno;
625626

626627
spin_lock_init(&rq->lock);
627628
dma_fence_init(&rq->fence,

drivers/gpu/drm/i915/i915_request.h

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ long i915_request_wait(struct i915_request *rq,
289289

290290
static inline bool i915_request_signaled(const struct i915_request *rq)
291291
{
292+
/* The request may live longer than its HWSP, so check flags first! */
292293
return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
293294
}
294295

@@ -340,32 +341,23 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
340341
*/
341342
static inline bool i915_request_started(const struct i915_request *rq)
342343
{
343-
u32 seqno;
344-
345-
seqno = i915_request_global_seqno(rq);
346-
if (!seqno) /* not yet submitted to HW */
347-
return false;
344+
if (i915_request_signaled(rq))
345+
return true;
348346

349-
return i915_seqno_passed(hwsp_seqno(rq), seqno - 1);
350-
}
351-
352-
static inline bool
353-
__i915_request_completed(const struct i915_request *rq, u32 seqno)
354-
{
355-
GEM_BUG_ON(!seqno);
356-
return i915_seqno_passed(hwsp_seqno(rq), seqno) &&
357-
seqno == i915_request_global_seqno(rq);
347+
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
358348
}
359349

360350
static inline bool i915_request_completed(const struct i915_request *rq)
361351
{
362-
u32 seqno;
352+
if (i915_request_signaled(rq))
353+
return true;
363354

364-
seqno = i915_request_global_seqno(rq);
365-
if (!seqno)
366-
return false;
355+
return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
356+
}
367357

368-
return __i915_request_completed(rq, seqno);
358+
static inline void i915_request_mark_complete(struct i915_request *rq)
359+
{
360+
rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
369361
}
370362

371363
void i915_retire_requests(struct drm_i915_private *i915);

drivers/gpu/drm/i915/i915_reset.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@ static void nop_submit_request(struct i915_request *request)
760760

761761
spin_lock_irqsave(&request->engine->timeline.lock, flags);
762762
__i915_request_submit(request);
763+
i915_request_mark_complete(request);
763764
intel_engine_write_global_seqno(request->engine, request->global_seqno);
764765
spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
765766
}

drivers/gpu/drm/i915/i915_timeline.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,10 @@ int i915_timeline_pin(struct i915_timeline *tl)
270270
if (err)
271271
goto unpin;
272272

273+
tl->hwsp_offset =
274+
i915_ggtt_offset(tl->hwsp_ggtt) +
275+
offset_in_page(tl->hwsp_offset);
276+
273277
return 0;
274278

275279
unpin:

drivers/gpu/drm/i915/intel_engine_cs.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -660,10 +660,16 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
660660
frame->rq.ring = &frame->ring;
661661
frame->rq.timeline = &frame->timeline;
662662

663+
dw = i915_timeline_pin(&frame->timeline);
664+
if (dw < 0)
665+
goto out_timeline;
666+
663667
dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
664668

665-
i915_timeline_fini(&frame->timeline);
669+
i915_timeline_unpin(&frame->timeline);
666670

671+
out_timeline:
672+
i915_timeline_fini(&frame->timeline);
667673
out_frame:
668674
kfree(frame);
669675
return dw;
@@ -1426,9 +1432,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
14261432
char hdr[80];
14271433

14281434
snprintf(hdr, sizeof(hdr),
1429-
"\t\tELSP[%d] count=%d, ring->start=%08x, rq: ",
1435+
"\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",
14301436
idx, count,
1431-
i915_ggtt_offset(rq->ring->vma));
1437+
i915_ggtt_offset(rq->ring->vma),
1438+
rq->timeline->hwsp_offset);
14321439
print_request(m, rq, hdr);
14331440
} else {
14341441
drm_printf(m, "\t\tELSP[%d] idle\n", idx);
@@ -1538,6 +1545,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
15381545
rq->ring->emit);
15391546
drm_printf(m, "\t\tring->space: 0x%08x\n",
15401547
rq->ring->space);
1548+
drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
1549+
rq->timeline->hwsp_offset);
15411550

15421551
print_request_ring(m, rq);
15431552
}

drivers/gpu/drm/i915/intel_lrc.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -832,10 +832,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
832832
list_for_each_entry(rq, &engine->timeline.requests, link) {
833833
GEM_BUG_ON(!rq->global_seqno);
834834

835-
if (i915_request_signaled(rq))
836-
continue;
835+
if (!i915_request_signaled(rq))
836+
dma_fence_set_error(&rq->fence, -EIO);
837837

838-
dma_fence_set_error(&rq->fence, -EIO);
838+
i915_request_mark_complete(rq);
839839
}
840840

841841
/* Flush the queued requests to the timeline list (for retiring). */
@@ -845,9 +845,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
845845

846846
priolist_for_each_request_consume(rq, rn, p, i) {
847847
list_del_init(&rq->sched.link);
848-
849-
dma_fence_set_error(&rq->fence, -EIO);
850848
__i915_request_submit(rq);
849+
dma_fence_set_error(&rq->fence, -EIO);
850+
i915_request_mark_complete(rq);
851851
}
852852

853853
rb_erase_cached(&p->node, &execlists->queue);
@@ -2044,10 +2044,17 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
20442044
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
20452045
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
20462046

2047-
cs = gen8_emit_ggtt_write(cs, request->global_seqno,
2047+
cs = gen8_emit_ggtt_write(cs,
2048+
request->fence.seqno,
2049+
request->timeline->hwsp_offset);
2050+
2051+
cs = gen8_emit_ggtt_write(cs,
2052+
request->global_seqno,
20482053
intel_hws_seqno_address(request->engine));
2054+
20492055
*cs++ = MI_USER_INTERRUPT;
20502056
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2057+
20512058
request->tail = intel_ring_offset(request, cs);
20522059
assert_ring_tail_valid(request->ring, request->tail);
20532060

@@ -2056,18 +2063,20 @@ static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
20562063

20572064
static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
20582065
{
2059-
/* We're using qword write, seqno should be aligned to 8 bytes. */
2060-
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
2061-
20622066
cs = gen8_emit_ggtt_write_rcs(cs,
2063-
request->global_seqno,
2064-
intel_hws_seqno_address(request->engine),
2067+
request->fence.seqno,
2068+
request->timeline->hwsp_offset,
20652069
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
20662070
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
20672071
PIPE_CONTROL_DC_FLUSH_ENABLE |
20682072
PIPE_CONTROL_FLUSH_ENABLE |
20692073
PIPE_CONTROL_CS_STALL);
20702074

2075+
cs = gen8_emit_ggtt_write_rcs(cs,
2076+
request->global_seqno,
2077+
intel_hws_seqno_address(request->engine),
2078+
PIPE_CONTROL_CS_STALL);
2079+
20712080
*cs++ = MI_USER_INTERRUPT;
20722081
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
20732082

0 commit comments

Comments
 (0)