Skip to content

Commit 78a4237

Browse files
Brad Volkindanvet
authored andcommitted
drm/i915: Use batch pools with the command parser
This patch sets up all of the tracking and copying necessary to use batch pools with the command parser and dispatches the copied (shadow) batch to the hardware. After this patch, the parser is in 'enabling' mode. Note that performance takes a hit from the copy in some cases and will likely need some work. At a rough pass, the memcpy appears to be the bottleneck. Without having done a deeper analysis, two ideas that come to mind are: 1) Copy sections of the batch at a time, as they are reached by parsing. Might improve cache locality. 2) Copy only up to the userspace-supplied batch length and memset the rest of the buffer. Reduces the number of reads. v2: - Remove setting the capacity of the pool - One global pool instead of per-ring pools - Replace batch_obj with shadow_batch_obj and hook into eb->vmas - Memset any space in the shadow batch beyond what gets copied - Rebased on execlist prep refactoring v3: - Rebase on chained batch handling - Squash in setting the secure dispatch flag - Add a note about the interaction w/secure dispatch pinning - Check for request->batch_obj == NULL in i915_gem_free_request v4: - Fix read domains for shadow_batch_obj - Remove the set_to_gtt_domain call from i915_parse_cmds - ggtt_pin/unpin in the parser block to simplify error handling - Check USES_FULL_PPGTT before setting DISPATCH_SECURE flag - Remove i915_gem_batch_pool_put calls v5: - Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after the parser (danvet, from v4 0/7 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
1 parent 493018d commit 78a4237

File tree

5 files changed

+112
-23
lines changed

5 files changed

+112
-23
lines changed

drivers/gpu/drm/i915/i915_cmd_parser.c

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,56 @@ static u32 *vmap_batch(struct drm_i915_gem_object *obj)
848848
return (u32*)addr;
849849
}
850850

851+
/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
852+
static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
853+
struct drm_i915_gem_object *src_obj)
854+
{
855+
int ret = 0;
856+
int needs_clflush = 0;
857+
u32 *src_addr, *dest_addr = NULL;
858+
859+
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
860+
if (ret) {
861+
DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
862+
return ERR_PTR(ret);
863+
}
864+
865+
src_addr = vmap_batch(src_obj);
866+
if (!src_addr) {
867+
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
868+
ret = -ENOMEM;
869+
goto unpin_src;
870+
}
871+
872+
if (needs_clflush)
873+
drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
874+
875+
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
876+
if (ret) {
877+
DRM_DEBUG_DRIVER("CMD: Failed to set batch CPU domain\n");
878+
goto unmap_src;
879+
}
880+
881+
dest_addr = vmap_batch(dest_obj);
882+
if (!dest_addr) {
883+
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
884+
ret = -ENOMEM;
885+
goto unmap_src;
886+
}
887+
888+
memcpy(dest_addr, src_addr, src_obj->base.size);
889+
if (dest_obj->base.size > src_obj->base.size)
890+
memset((u8 *)dest_addr + src_obj->base.size, 0,
891+
dest_obj->base.size - src_obj->base.size);
892+
893+
unmap_src:
894+
vunmap(src_addr);
895+
unpin_src:
896+
i915_gem_object_unpin_pages(src_obj);
897+
898+
return ret ? ERR_PTR(ret) : dest_addr;
899+
}
900+
851901
/**
852902
* i915_needs_cmd_parser() - should a given ring use software command parsing?
853903
* @ring: the ring in question
@@ -964,6 +1014,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
9641014
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations
9651015
* @ring: the ring on which the batch is to execute
9661016
* @batch_obj: the batch buffer in question
1017+
* @shadow_batch_obj: copy of the batch buffer in question
9671018
* @batch_start_offset: byte offset in the batch at which execution starts
9681019
* @is_master: is the submitting process the drm master?
9691020
*
@@ -975,32 +1026,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
9751026
*/
9761027
int i915_parse_cmds(struct intel_engine_cs *ring,
9771028
struct drm_i915_gem_object *batch_obj,
1029+
struct drm_i915_gem_object *shadow_batch_obj,
9781030
u32 batch_start_offset,
9791031
bool is_master)
9801032
{
9811033
int ret = 0;
9821034
u32 *cmd, *batch_base, *batch_end;
9831035
struct drm_i915_cmd_descriptor default_desc = { 0 };
984-
int needs_clflush = 0;
9851036
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
9861037

987-
ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush);
988-
if (ret) {
989-
DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
990-
return ret;
1038+
batch_base = copy_batch(shadow_batch_obj, batch_obj);
1039+
if (IS_ERR(batch_base)) {
1040+
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
1041+
return PTR_ERR(batch_base);
9911042
}
9921043

993-
batch_base = vmap_batch(batch_obj);
994-
if (!batch_base) {
995-
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
996-
i915_gem_object_unpin_pages(batch_obj);
997-
return -ENOMEM;
998-
}
999-
1000-
if (needs_clflush)
1001-
drm_clflush_virt_range((char *)batch_base, batch_obj->base.size);
1002-
10031044
cmd = batch_base + (batch_start_offset / sizeof(*cmd));
1045+
1046+
/*
1047+
* We use the source object's size because the shadow object is as
1048+
* large or larger and copy_batch() will write MI_NOPs to the extra
1049+
* space. Parsing should be faster in some cases this way.
1050+
*/
10041051
batch_end = cmd + (batch_obj->base.size / sizeof(*batch_end));
10051052

10061053
while (cmd < batch_end) {
@@ -1062,8 +1109,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
10621109

10631110
vunmap(batch_base);
10641111

1065-
i915_gem_object_unpin_pages(batch_obj);
1066-
10671112
return ret;
10681113
}
10691114

drivers/gpu/drm/i915/i915_dma.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@ int i915_driver_unload(struct drm_device *dev)
928928

929929
mutex_lock(&dev->struct_mutex);
930930
i915_gem_cleanup_ringbuffer(dev);
931+
i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
931932
i915_gem_context_fini(dev);
932933
mutex_unlock(&dev->struct_mutex);
933934
i915_gem_cleanup_stolen(dev);

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2963,6 +2963,7 @@ void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring);
29632963
bool i915_needs_cmd_parser(struct intel_engine_cs *ring);
29642964
int i915_parse_cmds(struct intel_engine_cs *ring,
29652965
struct drm_i915_gem_object *batch_obj,
2966+
struct drm_i915_gem_object *shadow_batch_obj,
29662967
u32 batch_start_offset,
29672968
bool is_master);
29682969

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5007,6 +5007,8 @@ i915_gem_load(struct drm_device *dev)
50075007
dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
50085008
register_oom_notifier(&dev_priv->mm.oom_notifier);
50095009

5010+
i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
5011+
50105012
mutex_init(&dev_priv->fb_tracking.lock);
50115013
}
50125014

drivers/gpu/drm/i915/i915_gem_execbuffer.c

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
12831283
struct drm_i915_private *dev_priv = dev->dev_private;
12841284
struct eb_vmas *eb;
12851285
struct drm_i915_gem_object *batch_obj;
1286+
struct drm_i915_gem_object *shadow_batch_obj = NULL;
1287+
struct drm_i915_gem_exec_object2 shadow_exec_entry;
12861288
struct intel_engine_cs *ring;
12871289
struct intel_context *ctx;
12881290
struct i915_address_space *vm;
@@ -1399,28 +1401,66 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
13991401
ret = -EINVAL;
14001402
goto err;
14011403
}
1402-
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
14031404

14041405
if (i915_needs_cmd_parser(ring)) {
1406+
shadow_batch_obj =
1407+
i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
1408+
batch_obj->base.size);
1409+
if (IS_ERR(shadow_batch_obj)) {
1410+
ret = PTR_ERR(shadow_batch_obj);
1411+
/* Don't try to clean up the obj in the error path */
1412+
shadow_batch_obj = NULL;
1413+
goto err;
1414+
}
1415+
1416+
ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
1417+
if (ret)
1418+
goto err;
1419+
14051420
ret = i915_parse_cmds(ring,
14061421
batch_obj,
1422+
shadow_batch_obj,
14071423
args->batch_start_offset,
14081424
file->is_master);
1425+
i915_gem_object_ggtt_unpin(shadow_batch_obj);
1426+
14091427
if (ret) {
14101428
if (ret != -EACCES)
14111429
goto err;
14121430
} else {
1431+
struct i915_vma *vma;
1432+
1433+
memset(&shadow_exec_entry, 0,
1434+
sizeof(shadow_exec_entry));
1435+
1436+
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1437+
vma->exec_entry = &shadow_exec_entry;
1438+
drm_gem_object_reference(&shadow_batch_obj->base);
1439+
list_add_tail(&vma->exec_list, &eb->vmas);
1440+
1441+
shadow_batch_obj->base.pending_read_domains =
1442+
batch_obj->base.pending_read_domains;
1443+
1444+
batch_obj = shadow_batch_obj;
1445+
14131446
/*
1414-
* XXX: Actually do this when enabling batch copy...
1447+
* Set the DISPATCH_SECURE bit to remove the NON_SECURE
1448+
* bit from MI_BATCH_BUFFER_START commands issued in the
1449+
* dispatch_execbuffer implementations. We specifically
1450+
* don't want that set when the command parser is
1451+
* enabled.
14151452
*
1416-
* Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1417-
* from MI_BATCH_BUFFER_START commands issued in the
1418-
* dispatch_execbuffer implementations. We specifically don't
1419-
* want that set when the command parser is enabled.
1453+
* FIXME: with aliasing ppgtt, buffers that should only
1454+
* be in ggtt still end up in the aliasing ppgtt. remove
1455+
* this check when that is fixed.
14201456
*/
1457+
if (USES_FULL_PPGTT(dev))
1458+
flags |= I915_DISPATCH_SECURE;
14211459
}
14221460
}
14231461

1462+
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1463+
14241464
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
14251465
* batch" bit. Hence we need to pin secure batches into the global gtt.
14261466
* hsw should have this fixed, but bdw mucks it up again. */

0 commit comments

Comments
 (0)