Skip to content

Commit 0ca88ba

Browse files
committed
drm/i915: Always allocate an object/vma for the HWSP
Currently we only allocate an object and vma if we are using a GGTT virtual HWSP, and a plain struct page for a physical HWSP. For convenience later on with global timelines, it will be useful to always have the status page being tracked by a struct i915_vma. Make it so. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-4-chris@chris-wilson.co.uk
1 parent 528cbd1 commit 0ca88ba

File tree

6 files changed

+93
-80
lines changed

6 files changed

+93
-80
lines changed

drivers/gpu/drm/i915/intel_engine_cs.c

Lines changed: 55 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
506506

507507
static void cleanup_status_page(struct intel_engine_cs *engine)
508508
{
509+
struct i915_vma *vma;
510+
509511
/* Prevent writes into HWSP after returning the page to the system */
510512
intel_engine_set_hwsp_writemask(engine, ~0u);
511513

512-
if (HWS_NEEDS_PHYSICAL(engine->i915)) {
513-
void *addr = fetch_and_zero(&engine->status_page.page_addr);
514+
vma = fetch_and_zero(&engine->status_page.vma);
515+
if (!vma)
516+
return;
514517

515-
__free_page(virt_to_page(addr));
516-
}
518+
if (!HWS_NEEDS_PHYSICAL(engine->i915))
519+
i915_vma_unpin(vma);
520+
521+
i915_gem_object_unpin_map(vma->obj);
522+
__i915_gem_object_release_unless_active(vma->obj);
523+
}
524+
525+
static int pin_ggtt_status_page(struct intel_engine_cs *engine,
526+
struct i915_vma *vma)
527+
{
528+
unsigned int flags;
529+
530+
flags = PIN_GLOBAL;
531+
if (!HAS_LLC(engine->i915))
532+
/*
533+
* On g33, we cannot place HWS above 256MiB, so
534+
* restrict its pinning to the low mappable arena.
535+
* Though this restriction is not documented for
536+
* gen4, gen5, or byt, they also behave similarly
537+
* and hang if the HWS is placed at the top of the
538+
* GTT. To generalise, it appears that all !llc
539+
* platforms have issues with us placing the HWS
540+
* above the mappable region (even though we never
541+
* actually map it).
542+
*/
543+
flags |= PIN_MAPPABLE;
544+
else
545+
flags |= PIN_HIGH;
517546

518-
i915_vma_unpin_and_release(&engine->status_page.vma,
519-
I915_VMA_RELEASE_MAP);
547+
return i915_vma_pin(vma, 0, 0, flags);
520548
}
521549

522550
static int init_status_page(struct intel_engine_cs *engine)
523551
{
524552
struct drm_i915_gem_object *obj;
525553
struct i915_vma *vma;
526-
unsigned int flags;
527554
void *vaddr;
528555
int ret;
529556

557+
/*
558+
* Though the HWS register does support 36bit addresses, historically
559+
* we have had hangs and corruption reported due to wild writes if
560+
* the HWS is placed above 4G. We only allow objects to be allocated
561+
* in GFP_DMA32 for i965, and no earlier physical address users had
562+
* access to more than 4G.
563+
*/
530564
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
531565
if (IS_ERR(obj)) {
532566
DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
543577
goto err;
544578
}
545579

546-
flags = PIN_GLOBAL;
547-
if (!HAS_LLC(engine->i915))
548-
/* On g33, we cannot place HWS above 256MiB, so
549-
* restrict its pinning to the low mappable arena.
550-
* Though this restriction is not documented for
551-
* gen4, gen5, or byt, they also behave similarly
552-
* and hang if the HWS is placed at the top of the
553-
* GTT. To generalise, it appears that all !llc
554-
* platforms have issues with us placing the HWS
555-
* above the mappable region (even though we never
556-
* actually map it).
557-
*/
558-
flags |= PIN_MAPPABLE;
559-
else
560-
flags |= PIN_HIGH;
561-
ret = i915_vma_pin(vma, 0, 0, flags);
562-
if (ret)
563-
goto err;
564-
565580
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
566581
if (IS_ERR(vaddr)) {
567582
ret = PTR_ERR(vaddr);
568-
goto err_unpin;
583+
goto err;
569584
}
570585

586+
engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
571587
engine->status_page.vma = vma;
572-
engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
573-
engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
588+
589+
if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
590+
ret = pin_ggtt_status_page(engine, vma);
591+
if (ret)
592+
goto err_unpin;
593+
}
594+
574595
return 0;
575596

576597
err_unpin:
577-
i915_vma_unpin(vma);
598+
i915_gem_object_unpin_map(obj);
578599
err:
579600
i915_gem_object_put(obj);
580601
return ret;
581602
}
582603

583-
static int init_phys_status_page(struct intel_engine_cs *engine)
584-
{
585-
struct page *page;
586-
587-
/*
588-
* Though the HWS register does support 36bit addresses, historically
589-
* we have had hangs and corruption reported due to wild writes if
590-
* the HWS is placed above 4G.
591-
*/
592-
page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
593-
if (!page)
594-
return -ENOMEM;
595-
596-
engine->status_page.page_addr = page_address(page);
597-
598-
return 0;
599-
}
600-
601604
static void __intel_context_unpin(struct i915_gem_context *ctx,
602605
struct intel_engine_cs *engine)
603606
{
@@ -690,10 +693,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
690693
if (ret)
691694
goto err_unpin_preempt;
692695

693-
if (HWS_NEEDS_PHYSICAL(i915))
694-
ret = init_phys_status_page(engine);
695-
else
696-
ret = init_status_page(engine);
696+
ret = init_status_page(engine);
697697
if (ret)
698698
goto err_breadcrumbs;
699699

@@ -1366,7 +1366,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
13661366
}
13671367

13681368
if (HAS_EXECLISTS(dev_priv)) {
1369-
const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
1369+
const u32 *hws =
1370+
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
13701371
unsigned int idx;
13711372
u8 read, write;
13721373

@@ -1549,7 +1550,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
15491550
spin_unlock_irqrestore(&b->rb_lock, flags);
15501551

15511552
drm_printf(m, "HWSP:\n");
1552-
hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
1553+
hexdump(m, engine->status_page.addr, PAGE_SIZE);
15531554

15541555
drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
15551556
}

drivers/gpu/drm/i915/intel_guc_submission.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@
8181
*
8282
*/
8383

84+
static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
85+
{
86+
return (i915_ggtt_offset(engine->status_page.vma) +
87+
I915_GEM_HWS_PREEMPT_ADDR);
88+
}
89+
8490
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
8591
{
8692
return rb_entry(rb, struct i915_priolist, node);

drivers/gpu/drm/i915/intel_lrc.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
172172
struct intel_engine_cs *engine,
173173
struct intel_ring *ring);
174174

175+
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
176+
{
177+
return (i915_ggtt_offset(engine->status_page.vma) +
178+
I915_GEM_HWS_INDEX_ADDR);
179+
}
180+
175181
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
176182
{
177183
return rb_entry(rb, struct i915_priolist, node);
@@ -1699,7 +1705,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
16991705
_MASKED_BIT_DISABLE(STOP_RING));
17001706

17011707
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
1702-
engine->status_page.ggtt_offset);
1708+
i915_ggtt_offset(engine->status_page.vma));
17031709
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
17041710
}
17051711

@@ -2244,10 +2250,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
22442250
}
22452251

22462252
execlists->csb_status =
2247-
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
2253+
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
22482254

22492255
execlists->csb_write =
2250-
&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
2256+
&engine->status_page.addr[intel_hws_csb_write_index(i915)];
22512257

22522258
reset_csb_pointers(execlists);
22532259

drivers/gpu/drm/i915/intel_ringbuffer.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
*/
4444
#define LEGACY_REQUEST_SIZE 200
4545

46+
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
47+
{
48+
return (i915_ggtt_offset(engine->status_page.vma) +
49+
I915_GEM_HWS_INDEX_ADDR);
50+
}
51+
4652
static unsigned int __intel_ring_space(unsigned int head,
4753
unsigned int tail,
4854
unsigned int size)
@@ -503,12 +509,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
503509
I915_WRITE(HWS_PGA, addr);
504510
}
505511

506-
static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
512+
static struct page *status_page(struct intel_engine_cs *engine)
507513
{
508-
struct page *page = virt_to_page(engine->status_page.page_addr);
509-
phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
514+
struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
510515

511-
set_hws_pga(engine, phys);
516+
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
517+
return sg_page(obj->mm.pages->sgl);
518+
}
519+
520+
static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
521+
{
522+
set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
512523
set_hwstam(engine, ~0u);
513524
}
514525

@@ -575,7 +586,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
575586

576587
static void ring_setup_status_page(struct intel_engine_cs *engine)
577588
{
578-
set_hwsp(engine, engine->status_page.ggtt_offset);
589+
set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
579590
set_hwstam(engine, ~0u);
580591

581592
flush_cs_tlb(engine);

drivers/gpu/drm/i915/intel_ringbuffer.h

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ struct i915_sched_attr;
3232

3333
struct intel_hw_status_page {
3434
struct i915_vma *vma;
35-
u32 *page_addr;
36-
u32 ggtt_offset;
35+
u32 *addr;
3736
};
3837

3938
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -671,7 +670,7 @@ static inline u32
671670
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
672671
{
673672
/* Ensure that the compiler doesn't optimize away the load. */
674-
return READ_ONCE(engine->status_page.page_addr[reg]);
673+
return READ_ONCE(engine->status_page.addr[reg]);
675674
}
676675

677676
static inline void
@@ -684,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
684683
*/
685684
if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
686685
mb();
687-
clflush(&engine->status_page.page_addr[reg]);
688-
engine->status_page.page_addr[reg] = value;
689-
clflush(&engine->status_page.page_addr[reg]);
686+
clflush(&engine->status_page.addr[reg]);
687+
engine->status_page.addr[reg] = value;
688+
clflush(&engine->status_page.addr[reg]);
690689
mb();
691690
} else {
692-
WRITE_ONCE(engine->status_page.page_addr[reg], value);
691+
WRITE_ONCE(engine->status_page.addr[reg], value);
693692
}
694693
}
695694

@@ -877,16 +876,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
877876
void intel_engine_get_instdone(struct intel_engine_cs *engine,
878877
struct intel_instdone *instdone);
879878

880-
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
881-
{
882-
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
883-
}
884-
885-
static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
886-
{
887-
return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
888-
}
889-
890879
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
891880
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
892881

drivers/gpu/drm/i915/selftests/mock_engine.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
200200
engine->base.i915 = i915;
201201
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
202202
engine->base.id = id;
203-
engine->base.status_page.page_addr = (void *)(engine + 1);
203+
engine->base.status_page.addr = (void *)(engine + 1);
204204

205205
engine->base.context_pin = mock_context_pin;
206206
engine->base.request_alloc = mock_request_alloc;

0 commit comments

Comments
 (0)