Skip to content

Commit a2b7861

Browse files
fbqrkrcmar
authored andcommitted
kvm/x86: Avoid async PF preempting the kernel incorrectly
Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call schedule() to reschedule in some cases. This could result in accidentally ending the current RCU read-side critical section early, causing random memory corruption in the guest, or otherwise preempting the currently running task inside between preempt_disable and preempt_enable. The difficulty to handle this well is because we don't know whether an async PF delivered in a preemptible section or RCU read-side critical section for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock() are both no-ops in that case. To cure this, we treat any async PF interrupting a kernel context as one that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing the schedule() path in that case. To do so, a second parameter for kvm_async_pf_task_wait() is introduced, so that we know whether it's called from a context interrupting the kernel, and the parameter is set properly in all the callsites. Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Cc: stable@vger.kernel.org Signed-off-by: Boqun Feng <boqun.feng@gmail.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
1 parent 2fb1e94 commit a2b7861

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

arch/x86/include/asm/kvm_para.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
8888
bool kvm_para_available(void);
8989
unsigned int kvm_arch_para_features(void);
9090
void __init kvm_guest_init(void);
91-
void kvm_async_pf_task_wait(u32 token);
91+
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
9292
void kvm_async_pf_task_wake(u32 token);
9393
u32 kvm_read_and_reset_pf_reason(void);
9494
extern void kvm_disable_steal_time(void);
@@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void)
103103

104104
#else /* CONFIG_KVM_GUEST */
105105
#define kvm_guest_init() do {} while (0)
106-
#define kvm_async_pf_task_wait(T) do {} while(0)
106+
#define kvm_async_pf_task_wait(T, I) do {} while(0)
107107
#define kvm_async_pf_task_wake(T) do {} while(0)
108108

109109
static inline bool kvm_para_available(void)

arch/x86/kernel/kvm.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
117117
return NULL;
118118
}
119119

120-
void kvm_async_pf_task_wait(u32 token)
120+
/*
121+
* @interrupt_kernel: Is this called from a routine which interrupts the kernel
122+
* (other than user space)?
123+
*/
124+
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
121125
{
122126
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
123127
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token)
140144

141145
n.token = token;
142146
n.cpu = smp_processor_id();
143-
n.halted = is_idle_task(current) || preempt_count() > 1 ||
144-
rcu_preempt_depth();
147+
n.halted = is_idle_task(current) ||
148+
(IS_ENABLED(CONFIG_PREEMPT_COUNT)
149+
? preempt_count() > 1 || rcu_preempt_depth()
150+
: interrupt_kernel);
145151
init_swait_queue_head(&n.wq);
146152
hlist_add_head(&n.link, &b->list);
147153
raw_spin_unlock(&b->lock);
@@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
269275
case KVM_PV_REASON_PAGE_NOT_PRESENT:
270276
/* page is swapped out by the host. */
271277
prev_state = exception_enter();
272-
kvm_async_pf_task_wait((u32)read_cr2());
278+
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
273279
exception_exit(prev_state);
274280
break;
275281
case KVM_PV_REASON_PAGE_READY:

arch/x86/kvm/mmu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
38373837
case KVM_PV_REASON_PAGE_NOT_PRESENT:
38383838
vcpu->arch.apf.host_apf_reason = 0;
38393839
local_irq_disable();
3840-
kvm_async_pf_task_wait(fault_address);
3840+
kvm_async_pf_task_wait(fault_address, 0);
38413841
local_irq_enable();
38423842
break;
38433843
case KVM_PV_REASON_PAGE_READY:

0 commit comments

Comments
 (0)