Skip to content

Commit 767a983

Browse files
committed
drm/i915/execlists: Read the context-status HEAD from the HWSP
The engine also provides a mirror of the CSB write pointer in the HWSP, but not of our read pointer. To take advantage of this we need to remember where we read up to on the last interrupt and continue off from there. This poses a problem following a reset, as we don't know where the hw will start writing from, and due to the use of power contexts we cannot perform that query during the reset itself. So we continue the current modus operandi of delaying the first read of the context-status read/write pointers until after the first interrupt. With this we should now have eliminated all uncached mmio reads in handling the context-status interrupt, though we still have the uncached mmio writes for submitting new work, and many uncached mmio reads in the global interrupt handler itself. Still a step in the right direction towards reducing our resubmit latency, although it appears lost in the noise! v2: Cannonlake moved the CSB write index v3: Include the sw/hwsp state in debugfs/i915_engine_info v4: Also revert to using CSB mmio for GVT-g v5: Prevent the compiler reloading tail (Mika) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Zhenyu Wang <zhenyuw@linux.intel.com> Cc: Zhi Wang <zhi.a.wang@intel.com> Acked-by: Michel Thierry <michel.thierry@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170913085605.18299-6-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
1 parent 6d2cb5a commit 767a983

File tree

4 files changed

+35
-9
lines changed

4 files changed

+35
-9
lines changed

drivers/gpu/drm/i915/i915_debugfs.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3326,8 +3326,10 @@ static int i915_engine_info(struct seq_file *m, void *unused)
33263326
ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
33273327
read = GEN8_CSB_READ_PTR(ptr);
33283328
write = GEN8_CSB_WRITE_PTR(ptr);
3329-
seq_printf(m, "\tExeclist CSB read %d, write %d, interrupt posted? %s\n",
3330-
read, write,
3329+
seq_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
3330+
read, engine->csb_head,
3331+
write,
3332+
intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
33313333
yesno(test_bit(ENGINE_IRQ_EXECLIST,
33323334
&engine->irq_posted)));
33333335
if (read >= GEN8_CSB_ENTRIES)

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4400,4 +4400,12 @@ int remap_io_mapping(struct vm_area_struct *vma,
44004400
unsigned long addr, unsigned long pfn, unsigned long size,
44014401
struct io_mapping *iomap);
44024402

4403+
static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
4404+
{
4405+
if (INTEL_GEN(i915) >= 10)
4406+
return CNL_HWS_CSB_WRITE_INDEX;
4407+
else
4408+
return I915_HWS_CSB_WRITE_INDEX;
4409+
}
4410+
44034411
#endif

drivers/gpu/drm/i915/intel_lrc.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -539,8 +539,6 @@ static void intel_lrc_irq_handler(unsigned long data)
539539
* new request (outside of the context-switch interrupt).
540540
*/
541541
while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
542-
u32 __iomem *csb_mmio =
543-
dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
544542
/* The HWSP contains a (cacheable) mirror of the CSB */
545543
const u32 *buf =
546544
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -550,6 +548,7 @@ static void intel_lrc_irq_handler(unsigned long data)
550548
if (unlikely(engine->csb_use_mmio)) {
551549
buf = (u32 * __force)
552550
(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
551+
engine->csb_head = -1; /* force mmio read of CSB ptrs */
553552
}
554553

555554
/* The write will be ordered by the uncached read (itself
@@ -563,9 +562,19 @@ static void intel_lrc_irq_handler(unsigned long data)
563562
* is set and we do a new loop.
564563
*/
565564
__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
566-
head = readl(csb_mmio);
567-
tail = GEN8_CSB_WRITE_PTR(head);
568-
head = GEN8_CSB_READ_PTR(head);
565+
if (unlikely(engine->csb_head == -1)) { /* following a reset */
566+
head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
567+
tail = GEN8_CSB_WRITE_PTR(head);
568+
head = GEN8_CSB_READ_PTR(head);
569+
engine->csb_head = head;
570+
} else {
571+
const int write_idx =
572+
intel_hws_csb_write_index(dev_priv) -
573+
I915_HWS_CSB_BUF0_INDEX;
574+
575+
head = engine->csb_head;
576+
tail = READ_ONCE(buf[write_idx]);
577+
}
569578
while (head != tail) {
570579
struct drm_i915_gem_request *rq;
571580
unsigned int status;
@@ -619,8 +628,11 @@ static void intel_lrc_irq_handler(unsigned long data)
619628
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
620629
}
621630

622-
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
623-
csb_mmio);
631+
if (head != engine->csb_head) {
632+
engine->csb_head = head;
633+
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
634+
dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
635+
}
624636
}
625637

626638
if (execlists_elsp_ready(engine))
@@ -1269,6 +1281,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
12691281
I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
12701282
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
12711283
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
1284+
engine->csb_head = -1;
12721285

12731286
/* After a GPU reset, we may have requests to replay */
12741287
submit = false;

drivers/gpu/drm/i915/intel_ringbuffer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ struct intel_engine_cs {
391391
struct rb_root execlist_queue;
392392
struct rb_node *execlist_first;
393393
unsigned int fw_domains;
394+
unsigned int csb_head;
394395
bool csb_use_mmio;
395396

396397
/* Contexts are pinned whilst they are active on the GPU. The last
@@ -498,6 +499,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
498499
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
499500

500501
#define I915_HWS_CSB_BUF0_INDEX 0x10
502+
#define I915_HWS_CSB_WRITE_INDEX 0x1f
503+
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
501504

502505
struct intel_ring *
503506
intel_engine_create_ring(struct intel_engine_cs *engine, int size);

0 commit comments

Comments
 (0)