Skip to content

Commit 52954ed

Browse files
committed
drm/i915: Allocate a status page for each timeline
Allocate a page for use as a status page by a group of timelines, as we only need a dword of storage for each (rounded up to the cacheline for safety) we can pack multiple timelines into the same page. Each timeline will then be able to track its own HW seqno. v2: Reuse the common per-engine HWSP for the solitary ringbuffer timeline, so that we do not have to emit (using per-gen specialised vfuncs) the breadcrumb into the distinct timeline HWSP and instead can keep on using the common MI_STORE_DWORD_INDEX. However, to maintain the sleight-of-hand for the global/per-context seqno switchover, we will store both temporarily (and so use a custom offset for the shared timeline HWSP until the switch over). v3: Keep things simple and allocate a page for each timeline, page sharing comes next. v4: I was caught repeating the same MI_STORE_DWORD_IMM over and over again in selftests. v5: And caught red handed copying create timeline + check. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190128181812.22804-3-chris@chris-wilson.co.uk
1 parent b18fe4b commit 52954ed

File tree

10 files changed

+543
-56
lines changed

10 files changed

+543
-56
lines changed

drivers/gpu/drm/i915/i915_timeline.c

Lines changed: 112 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,78 @@
99
#include "i915_timeline.h"
1010
#include "i915_syncmap.h"
1111

12-
void i915_timeline_init(struct drm_i915_private *i915,
13-
struct i915_timeline *timeline,
14-
const char *name)
12+
static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
13+
{
14+
struct drm_i915_gem_object *obj;
15+
struct i915_vma *vma;
16+
17+
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
18+
if (IS_ERR(obj))
19+
return ERR_CAST(obj);
20+
21+
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
22+
23+
vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
24+
if (IS_ERR(vma))
25+
i915_gem_object_put(obj);
26+
27+
return vma;
28+
}
29+
30+
static int hwsp_alloc(struct i915_timeline *timeline)
31+
{
32+
struct i915_vma *vma;
33+
34+
vma = __hwsp_alloc(timeline->i915);
35+
if (IS_ERR(vma))
36+
return PTR_ERR(vma);
37+
38+
timeline->hwsp_ggtt = vma;
39+
timeline->hwsp_offset = 0;
40+
41+
return 0;
42+
}
43+
44+
int i915_timeline_init(struct drm_i915_private *i915,
45+
struct i915_timeline *timeline,
46+
const char *name,
47+
struct i915_vma *global_hwsp)
1548
{
1649
struct i915_gt_timelines *gt = &i915->gt.timelines;
50+
void *vaddr;
51+
int err;
1752

1853
/*
1954
* Ideally we want a set of engines on a single leaf as we expect
2055
* to mostly be tracking synchronisation between engines. It is not
2156
* a huge issue if this is not the case, but we may want to mitigate
2257
* any page crossing penalties if they become an issue.
58+
*
59+
* Called during early_init before we know how many engines there are.
2360
*/
2461
BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
2562

2663
timeline->i915 = i915;
2764
timeline->name = name;
65+
timeline->pin_count = 0;
66+
67+
if (global_hwsp) {
68+
timeline->hwsp_ggtt = i915_vma_get(global_hwsp);
69+
timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
70+
} else {
71+
err = hwsp_alloc(timeline);
72+
if (err)
73+
return err;
74+
}
2875

29-
mutex_lock(&gt->mutex);
30-
list_add(&timeline->link, &gt->list);
31-
mutex_unlock(&gt->mutex);
76+
vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
77+
if (IS_ERR(vaddr)) {
78+
i915_vma_put(timeline->hwsp_ggtt);
79+
return PTR_ERR(vaddr);
80+
}
3281

33-
/* Called during early_init before we know how many engines there are */
82+
timeline->hwsp_seqno =
83+
memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
3484

3585
timeline->fence_context = dma_fence_context_alloc(1);
3686

@@ -40,6 +90,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
4090
INIT_LIST_HEAD(&timeline->requests);
4191

4292
i915_syncmap_init(&timeline->sync);
93+
94+
mutex_lock(&gt->mutex);
95+
list_add(&timeline->link, &gt->list);
96+
mutex_unlock(&gt->mutex);
97+
98+
return 0;
4399
}
44100

45101
void i915_timelines_init(struct drm_i915_private *i915)
@@ -85,30 +141,77 @@ void i915_timeline_fini(struct i915_timeline *timeline)
85141
{
86142
struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
87143

144+
GEM_BUG_ON(timeline->pin_count);
88145
GEM_BUG_ON(!list_empty(&timeline->requests));
89146

90147
i915_syncmap_free(&timeline->sync);
91148

92149
mutex_lock(&gt->mutex);
93150
list_del(&timeline->link);
94151
mutex_unlock(&gt->mutex);
152+
153+
i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
154+
i915_vma_put(timeline->hwsp_ggtt);
95155
}
96156

97157
struct i915_timeline *
98-
i915_timeline_create(struct drm_i915_private *i915, const char *name)
158+
i915_timeline_create(struct drm_i915_private *i915,
159+
const char *name,
160+
struct i915_vma *global_hwsp)
99161
{
100162
struct i915_timeline *timeline;
163+
int err;
101164

102165
timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
103166
if (!timeline)
104167
return ERR_PTR(-ENOMEM);
105168

106-
i915_timeline_init(i915, timeline, name);
169+
err = i915_timeline_init(i915, timeline, name, global_hwsp);
170+
if (err) {
171+
kfree(timeline);
172+
return ERR_PTR(err);
173+
}
174+
107175
kref_init(&timeline->kref);
108176

109177
return timeline;
110178
}
111179

180+
int i915_timeline_pin(struct i915_timeline *tl)
181+
{
182+
int err;
183+
184+
if (tl->pin_count++)
185+
return 0;
186+
GEM_BUG_ON(!tl->pin_count);
187+
188+
err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
189+
if (err)
190+
goto unpin;
191+
192+
return 0;
193+
194+
unpin:
195+
tl->pin_count = 0;
196+
return err;
197+
}
198+
199+
void i915_timeline_unpin(struct i915_timeline *tl)
200+
{
201+
GEM_BUG_ON(!tl->pin_count);
202+
if (--tl->pin_count)
203+
return;
204+
205+
/*
206+
* Since this timeline is idle, all bariers upon which we were waiting
207+
* must also be complete and so we can discard the last used barriers
208+
* without loss of information.
209+
*/
210+
i915_syncmap_free(&tl->sync);
211+
212+
__i915_vma_unpin(tl->hwsp_ggtt);
213+
}
214+
112215
void __i915_timeline_free(struct kref *kref)
113216
{
114217
struct i915_timeline *timeline =

drivers/gpu/drm/i915/i915_timeline.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
#include "i915_syncmap.h"
3333
#include "i915_utils.h"
3434

35+
struct i915_vma;
36+
3537
struct i915_timeline {
3638
u64 fence_context;
3739
u32 seqno;
@@ -40,6 +42,11 @@ struct i915_timeline {
4042
#define TIMELINE_CLIENT 0 /* default subclass */
4143
#define TIMELINE_ENGINE 1
4244

45+
unsigned int pin_count;
46+
const u32 *hwsp_seqno;
47+
struct i915_vma *hwsp_ggtt;
48+
u32 hwsp_offset;
49+
4350
/**
4451
* List of breadcrumbs associated with GPU requests currently
4552
* outstanding.
@@ -71,9 +78,10 @@ struct i915_timeline {
7178
struct kref kref;
7279
};
7380

74-
void i915_timeline_init(struct drm_i915_private *i915,
75-
struct i915_timeline *tl,
76-
const char *name);
81+
int i915_timeline_init(struct drm_i915_private *i915,
82+
struct i915_timeline *tl,
83+
const char *name,
84+
struct i915_vma *hwsp);
7785
void i915_timeline_fini(struct i915_timeline *tl);
7886

7987
static inline void
@@ -96,7 +104,9 @@ i915_timeline_set_subclass(struct i915_timeline *timeline,
96104
}
97105

98106
struct i915_timeline *
99-
i915_timeline_create(struct drm_i915_private *i915, const char *name);
107+
i915_timeline_create(struct drm_i915_private *i915,
108+
const char *name,
109+
struct i915_vma *global_hwsp);
100110

101111
static inline struct i915_timeline *
102112
i915_timeline_get(struct i915_timeline *timeline)
@@ -135,6 +145,9 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
135145
return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
136146
}
137147

148+
int i915_timeline_pin(struct i915_timeline *tl);
149+
void i915_timeline_unpin(struct i915_timeline *tl);
150+
138151
void i915_timelines_init(struct drm_i915_private *i915);
139152
void i915_timelines_park(struct drm_i915_private *i915);
140153
void i915_timelines_fini(struct drm_i915_private *i915);

drivers/gpu/drm/i915/intel_engine_cs.c

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -484,26 +484,6 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
484484
execlists->queue = RB_ROOT_CACHED;
485485
}
486486

487-
/**
488-
* intel_engines_setup_common - setup engine state not requiring hw access
489-
* @engine: Engine to setup.
490-
*
491-
* Initializes @engine@ structure members shared between legacy and execlists
492-
* submission modes which do not require hardware access.
493-
*
494-
* Typically done early in the submission mode specific engine setup stage.
495-
*/
496-
void intel_engine_setup_common(struct intel_engine_cs *engine)
497-
{
498-
i915_timeline_init(engine->i915, &engine->timeline, engine->name);
499-
i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
500-
501-
intel_engine_init_execlist(engine);
502-
intel_engine_init_hangcheck(engine);
503-
intel_engine_init_batch_pool(engine);
504-
intel_engine_init_cmd_parser(engine);
505-
}
506-
507487
static void cleanup_status_page(struct intel_engine_cs *engine)
508488
{
509489
struct i915_vma *vma;
@@ -601,6 +581,44 @@ static int init_status_page(struct intel_engine_cs *engine)
601581
return ret;
602582
}
603583

584+
/**
585+
* intel_engines_setup_common - setup engine state not requiring hw access
586+
* @engine: Engine to setup.
587+
*
588+
* Initializes @engine@ structure members shared between legacy and execlists
589+
* submission modes which do not require hardware access.
590+
*
591+
* Typically done early in the submission mode specific engine setup stage.
592+
*/
593+
int intel_engine_setup_common(struct intel_engine_cs *engine)
594+
{
595+
int err;
596+
597+
err = init_status_page(engine);
598+
if (err)
599+
return err;
600+
601+
err = i915_timeline_init(engine->i915,
602+
&engine->timeline,
603+
engine->name,
604+
engine->status_page.vma);
605+
if (err)
606+
goto err_hwsp;
607+
608+
i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
609+
610+
intel_engine_init_execlist(engine);
611+
intel_engine_init_hangcheck(engine);
612+
intel_engine_init_batch_pool(engine);
613+
intel_engine_init_cmd_parser(engine);
614+
615+
return 0;
616+
617+
err_hwsp:
618+
cleanup_status_page(engine);
619+
return err;
620+
}
621+
604622
static void __intel_context_unpin(struct i915_gem_context *ctx,
605623
struct intel_engine_cs *engine)
606624
{
@@ -617,15 +635,18 @@ struct measure_breadcrumb {
617635
static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
618636
{
619637
struct measure_breadcrumb *frame;
620-
unsigned int dw;
638+
int dw = -ENOMEM;
621639

622640
GEM_BUG_ON(!engine->i915->gt.scratch);
623641

624642
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
625643
if (!frame)
626644
return -ENOMEM;
627645

628-
i915_timeline_init(engine->i915, &frame->timeline, "measure");
646+
if (i915_timeline_init(engine->i915,
647+
&frame->timeline, "measure",
648+
engine->status_page.vma))
649+
goto out_frame;
629650

630651
INIT_LIST_HEAD(&frame->ring.request_list);
631652
frame->ring.timeline = &frame->timeline;
@@ -642,8 +663,9 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
642663
dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
643664

644665
i915_timeline_fini(&frame->timeline);
645-
kfree(frame);
646666

667+
out_frame:
668+
kfree(frame);
647669
return dw;
648670
}
649671

@@ -693,20 +715,14 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
693715
if (ret)
694716
goto err_unpin_preempt;
695717

696-
ret = init_status_page(engine);
697-
if (ret)
698-
goto err_breadcrumbs;
699-
700718
ret = measure_breadcrumb_dw(engine);
701719
if (ret < 0)
702-
goto err_status_page;
720+
goto err_breadcrumbs;
703721

704722
engine->emit_breadcrumb_dw = ret;
705723

706724
return 0;
707725

708-
err_status_page:
709-
cleanup_status_page(engine);
710726
err_breadcrumbs:
711727
intel_engine_fini_breadcrumbs(engine);
712728
err_unpin_preempt:

0 commit comments

Comments
 (0)