Skip to content

Commit 0a53bc0

Browse files
fred1gaozhenyw
authored andcommitted
drm/i915/gvt: Separate cmd scan from request allocation
Currently i915 request structure and shadow ring buffer are allocated before command scan, so it will have to restore to previous states once any error happens afterwards in the long dispatch_workload path. This patch is to introduce a reserved ring buffer created at the beginning of vGPU initialization. Workload will be coped to this reserved buffer and be scanned first, the i915 request and shadow ring buffer are only allocated after the result of scan is successful. To balance the memory usage and buffer alloc time, the coming bigger ring buffer will be reallocated and kept until more bigger buffer is coming. v2: - use kmalloc for the smaller ring buffer, realloc if required. (Zhenyu) v3: - remove the dynamically allocated ring buffer. (Zhenyu) v4: - code style polish. - kfree previous allocated buffer once kmalloc failed. (Zhenyu) Signed-off-by: fred gao <fred.gao@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
1 parent f090a00 commit 0a53bc0

File tree

4 files changed

+110
-38
lines changed

4 files changed

+110
-38
lines changed

drivers/gpu/drm/i915/gvt/cmd_parser.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2603,7 +2603,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
26032603
{
26042604
struct intel_vgpu *vgpu = workload->vgpu;
26052605
unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
2606-
u32 *cs;
2606+
void *shadow_ring_buffer_va;
2607+
int ring_id = workload->ring_id;
26072608
int ret;
26082609

26092610
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
@@ -2616,34 +2617,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
26162617
gma_tail = workload->rb_start + workload->rb_tail;
26172618
gma_top = workload->rb_start + guest_rb_size;
26182619

2619-
/* allocate shadow ring buffer */
2620-
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
2621-
if (IS_ERR(cs))
2622-
return PTR_ERR(cs);
2620+
if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) {
2621+
void *va = vgpu->reserve_ring_buffer_va[ring_id];
2622+
/* realloc the new ring buffer if needed */
2623+
vgpu->reserve_ring_buffer_va[ring_id] =
2624+
krealloc(va, workload->rb_len, GFP_KERNEL);
2625+
if (!vgpu->reserve_ring_buffer_va[ring_id]) {
2626+
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
2627+
return -ENOMEM;
2628+
}
2629+
vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len;
2630+
}
2631+
2632+
shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id];
26232633

26242634
/* get shadow ring buffer va */
2625-
workload->shadow_ring_buffer_va = cs;
2635+
workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
26262636

26272637
/* head > tail --> copy head <-> top */
26282638
if (gma_head > gma_tail) {
26292639
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
2630-
gma_head, gma_top, cs);
2640+
gma_head, gma_top, shadow_ring_buffer_va);
26312641
if (ret < 0) {
26322642
gvt_vgpu_err("fail to copy guest ring buffer\n");
26332643
return ret;
26342644
}
2635-
cs += ret / sizeof(u32);
2645+
shadow_ring_buffer_va += ret;
26362646
gma_head = workload->rb_start;
26372647
}
26382648

26392649
/* copy head or start <-> tail */
2640-
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
2650+
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
2651+
shadow_ring_buffer_va);
26412652
if (ret < 0) {
26422653
gvt_vgpu_err("fail to copy guest ring buffer\n");
26432654
return ret;
26442655
}
2645-
cs += ret / sizeof(u32);
2646-
intel_ring_advance(workload->req, cs);
26472656
return 0;
26482657
}
26492658

drivers/gpu/drm/i915/gvt/execlist.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,10 +820,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
820820

821821
void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
822822
{
823+
enum intel_engine_id i;
824+
struct intel_engine_cs *engine;
825+
823826
clean_workloads(vgpu, ALL_ENGINES);
824827
kmem_cache_destroy(vgpu->workloads);
828+
829+
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
830+
kfree(vgpu->reserve_ring_buffer_va[i]);
831+
vgpu->reserve_ring_buffer_va[i] = NULL;
832+
vgpu->reserve_ring_buffer_size[i] = 0;
833+
}
834+
825835
}
826836

837+
#define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8)
827838
int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
828839
{
829840
enum intel_engine_id i;
@@ -843,7 +854,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
843854
if (!vgpu->workloads)
844855
return -ENOMEM;
845856

857+
/* each ring has a shadow ring buffer until vgpu destroyed */
858+
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
859+
vgpu->reserve_ring_buffer_va[i] =
860+
kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL);
861+
if (!vgpu->reserve_ring_buffer_va[i]) {
862+
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
863+
goto out;
864+
}
865+
vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE;
866+
}
846867
return 0;
868+
out:
869+
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
870+
if (vgpu->reserve_ring_buffer_size[i]) {
871+
kfree(vgpu->reserve_ring_buffer_va[i]);
872+
vgpu->reserve_ring_buffer_va[i] = NULL;
873+
vgpu->reserve_ring_buffer_size[i] = 0;
874+
}
875+
}
876+
return -ENOMEM;
847877
}
848878

849879
void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,

drivers/gpu/drm/i915/gvt/gvt.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ struct intel_vgpu {
166166
struct list_head workload_q_head[I915_NUM_ENGINES];
167167
struct kmem_cache *workloads;
168168
atomic_t running_workload_num;
169+
/* 1/2K for each reserve ring buffer */
170+
void *reserve_ring_buffer_va[I915_NUM_ENGINES];
171+
int reserve_ring_buffer_size[I915_NUM_ENGINES];
169172
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
170173
struct i915_gem_context *shadow_ctx;
171174
DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);

drivers/gpu/drm/i915/gvt/scheduler.c

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,34 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
201201
ce->lrc_desc = desc;
202202
}
203203

204+
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
205+
{
206+
struct intel_vgpu *vgpu = workload->vgpu;
207+
void *shadow_ring_buffer_va;
208+
u32 *cs;
209+
210+
/* allocate shadow ring buffer */
211+
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
212+
if (IS_ERR(cs)) {
213+
gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
214+
workload->rb_len);
215+
return PTR_ERR(cs);
216+
}
217+
218+
shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
219+
220+
/* get shadow ring buffer va */
221+
workload->shadow_ring_buffer_va = cs;
222+
223+
memcpy(cs, shadow_ring_buffer_va,
224+
workload->rb_len);
225+
226+
cs += workload->rb_len / sizeof(u32);
227+
intel_ring_advance(workload->req, cs);
228+
229+
return 0;
230+
}
231+
204232
/**
205233
* intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
206234
* shadow it as well, include ringbuffer,wa_ctx and ctx.
@@ -214,8 +242,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
214242
int ring_id = workload->ring_id;
215243
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
216244
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
245+
struct intel_engine_cs *engine = dev_priv->engine[ring_id];
217246
struct drm_i915_gem_request *rq;
218247
struct intel_vgpu *vgpu = workload->vgpu;
248+
struct intel_ring *ring;
219249
int ret;
220250

221251
lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -231,17 +261,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
231261
shadow_context_descriptor_update(shadow_ctx,
232262
dev_priv->engine[ring_id]);
233263

234-
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
235-
if (IS_ERR(rq)) {
236-
gvt_vgpu_err("fail to allocate gem request\n");
237-
ret = PTR_ERR(rq);
238-
goto out;
239-
}
240-
241-
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
242-
243-
workload->req = i915_gem_request_get(rq);
244-
245264
ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
246265
if (ret)
247266
goto out;
@@ -253,10 +272,37 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
253272
goto out;
254273
}
255274

275+
/* pin shadow context by gvt even the shadow context will be pinned
276+
* when i915 alloc request. That is because gvt will update the guest
277+
* context from shadow context when workload is completed, and at that
278+
* moment, i915 may already unpined the shadow context to make the
279+
* shadow_ctx pages invalid. So gvt need to pin itself. After update
280+
* the guest context, gvt can unpin the shadow_ctx safely.
281+
*/
282+
ring = engine->context_pin(engine, shadow_ctx);
283+
if (IS_ERR(ring)) {
284+
ret = PTR_ERR(ring);
285+
gvt_vgpu_err("fail to pin shadow context\n");
286+
goto out;
287+
}
288+
256289
ret = populate_shadow_context(workload);
257290
if (ret)
258291
goto out;
259292

293+
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
294+
if (IS_ERR(rq)) {
295+
gvt_vgpu_err("fail to allocate gem request\n");
296+
ret = PTR_ERR(rq);
297+
goto out;
298+
}
299+
300+
gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
301+
302+
workload->req = i915_gem_request_get(rq);
303+
ret = copy_workload_to_ring_buffer(workload);
304+
if (ret)
305+
goto out;
260306
workload->shadowed = true;
261307

262308
out:
@@ -269,8 +315,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
269315
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
270316
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
271317
struct intel_engine_cs *engine = dev_priv->engine[ring_id];
272-
struct intel_vgpu *vgpu = workload->vgpu;
273-
struct intel_ring *ring;
274318
int ret = 0;
275319

276320
gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -288,20 +332,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
288332
goto out;
289333
}
290334

291-
/* pin shadow context by gvt even the shadow context will be pinned
292-
* when i915 alloc request. That is because gvt will update the guest
293-
* context from shadow context when workload is completed, and at that
294-
* moment, i915 may already unpined the shadow context to make the
295-
* shadow_ctx pages invalid. So gvt need to pin itself. After update
296-
* the guest context, gvt can unpin the shadow_ctx safely.
297-
*/
298-
ring = engine->context_pin(engine, shadow_ctx);
299-
if (IS_ERR(ring)) {
300-
ret = PTR_ERR(ring);
301-
gvt_vgpu_err("fail to pin shadow context\n");
302-
goto out;
303-
}
304-
305335
out:
306336
if (ret)
307337
workload->status = ret;

0 commit comments

Comments
 (0)