Skip to content

Commit edf6b76

Browse files
committed
drm/i915: Add smp_rmb() to busy ioctl's RCU dance
In the debate as to whether the second read of active->request is ordered after the dependent reads of the first read of active->request, just give in and throw a smp_rmb() in there so that ordering of loads is assured. v2: Explain the manual smp_rmb() Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1470731014-6894-1-git-send-email-chris@chris-wilson.co.uk
1 parent 87b723a commit edf6b76

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3733,7 +3733,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
37333733
i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
37343734
}
37353735

3736-
static __always_inline unsigned __busy_read_flag(unsigned int id)
3736+
static __always_inline unsigned int __busy_read_flag(unsigned int id)
37373737
{
37383738
/* Note that we could alias engines in the execbuf API, but
37393739
* that would be very unwise as it prevents userspace from
@@ -3751,7 +3751,7 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
37513751
return id;
37523752
}
37533753

3754-
static __always_inline unsigned
3754+
static __always_inline unsigned int
37553755
__busy_set_if_active(const struct i915_gem_active *active,
37563756
unsigned int (*flag)(unsigned int id))
37573757
{
@@ -3768,19 +3768,45 @@ __busy_set_if_active(const struct i915_gem_active *active,
37683768

37693769
id = request->engine->exec_id;
37703770

3771-
/* Check that the pointer wasn't reassigned and overwritten. */
3771+
/* Check that the pointer wasn't reassigned and overwritten.
3772+
*
3773+
* In __i915_gem_active_get_rcu(), we enforce ordering between
3774+
* the first rcu pointer dereference (imposing a
3775+
* read-dependency only on access through the pointer) and
3776+
* the second lockless access through the memory barrier
3777+
* following a successful atomic_inc_not_zero(). Here there
3778+
* is no such barrier, and so we must manually insert an
3779+
* explicit read barrier to ensure that the following
3780+
* access occurs after all the loads through the first
3781+
* pointer.
3782+
*
3783+
* It is worth comparing this sequence with
3784+
* raw_write_seqcount_latch() which operates very similarly.
3785+
* The challenge here is the visibility of the other CPU
3786+
* writes to the reallocated request vs the local CPU ordering.
3787+
* Before the other CPU can overwrite the request, it will
3788+
* have updated our active->request and gone through a wmb.
3789+
* During the read here, we want to make sure that the values
3790+
* we see have not been overwritten as we do so - and we do
3791+
* that by serialising the second pointer check with the writes
3792+
* on other other CPUs.
3793+
*
3794+
* The corresponding write barrier is part of
3795+
* rcu_assign_pointer().
3796+
*/
3797+
smp_rmb();
37723798
if (request == rcu_access_pointer(active->request))
37733799
return flag(id);
37743800
} while (1);
37753801
}
37763802

3777-
static inline unsigned
3803+
static __always_inline unsigned int
37783804
busy_check_reader(const struct i915_gem_active *active)
37793805
{
37803806
return __busy_set_if_active(active, __busy_read_flag);
37813807
}
37823808

3783-
static inline unsigned
3809+
static __always_inline unsigned int
37843810
busy_check_writer(const struct i915_gem_active *active)
37853811
{
37863812
return __busy_set_if_active(active, __busy_write_id);

drivers/gpu/drm/i915/i915_gem_request.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
490490
* incremented) then the following read for rcu_access_pointer()
491491
* must occur after the atomic operation and so confirm
492492
* that this request is the one currently being tracked.
493+
*
494+
* The corresponding write barrier is part of
495+
* rcu_assign_pointer().
493496
*/
494497
if (!request || request == rcu_access_pointer(active->request))
495498
return rcu_pointer_handoff(request);

0 commit comments

Comments
 (0)