Skip to content

Commit d264ee0

Browse files
Sean Christophersonbonzini
authored andcommitted
KVM: VMX: use preemption timer to force immediate VMExit
A VMX preemption timer value of '0' is guaranteed to cause a VMExit prior to the CPU executing any instructions in the guest. Use the preemption timer (if it's supported) to trigger immediate VMExit in place of the current method of sending a self-IPI. This ensures that pending VMExit injection to L1 occurs prior to executing any instructions in the guest (regardless of nesting level). When deferring VMExit injection, KVM generates an immediate VMExit from the (possibly nested) guest by sending itself an IPI. Because hardware interrupts are blocked prior to VMEnter and are unblocked (in hardware) after VMEnter, this results in taking a VMExit(INTR) before any guest instruction is executed. But, as this approach relies on the IPI being received before VMEnter executes, it only works as intended when KVM is running as L0. Because there are no architectural guarantees regarding when IPIs are delivered, when running nested the INTR may "arrive" long after L2 is running e.g. L0 KVM doesn't force an immediate switch to L1 to deliver an INTR. For the most part, this unintended delay is not an issue since the events being injected to L1 also do not have architectural guarantees regarding their timing. The notable exception is the VMX preemption timer[1], which is architecturally guaranteed to cause a VMExit prior to executing any instructions in the guest if the timer value is '0' at VMEnter. Specifically, the delay in injecting the VMExit causes the preemption timer KVM unit test to fail when run in a nested guest. Note: this approach is viable even on CPUs with a broken preemption timer, as broken in this context only means the timer counts at the wrong rate. There are no known errata affecting timer value of '0'. [1] I/O SMIs also have guarantees on when they arrive, but I have no idea if/how those are emulated in KVM. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> [Use a hook for SVM instead of leaving the default in x86.c - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent f459a70 commit d264ee0

File tree

4 files changed

+31
-2
lines changed

4 files changed

+31
-2
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,7 @@ struct kvm_x86_ops {
10551055
bool (*umip_emulated)(void);
10561056

10571057
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
1058+
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
10581059

10591060
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
10601061

@@ -1482,6 +1483,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
14821483

14831484
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
14841485
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
1486+
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
14851487

14861488
int kvm_is_in_guest(void);
14871489

arch/x86/kvm/svm.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7148,6 +7148,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
71487148
.check_intercept = svm_check_intercept,
71497149
.handle_external_intr = svm_handle_external_intr,
71507150

7151+
.request_immediate_exit = __kvm_request_immediate_exit,
7152+
71517153
.sched_in = svm_sched_in,
71527154

71537155
.pmu_ops = &amd_pmu_ops,

arch/x86/kvm/vmx.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ struct vcpu_vmx {
10201020
int ple_window;
10211021
bool ple_window_dirty;
10221022

1023+
bool req_immediate_exit;
1024+
10231025
/* Support for PML */
10241026
#define PML_ENTITY_NUM 512
10251027
struct page *pml_pg;
@@ -2865,6 +2867,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
28652867
u16 fs_sel, gs_sel;
28662868
int i;
28672869

2870+
vmx->req_immediate_exit = false;
2871+
28682872
if (vmx->loaded_cpu_state)
28692873
return;
28702874

@@ -7967,6 +7971,9 @@ static __init int hardware_setup(void)
79677971
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
79687972
}
79697973

7974+
if (!cpu_has_vmx_preemption_timer())
7975+
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
7976+
79707977
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
79717978
u64 vmx_msr;
79727979

@@ -9209,7 +9216,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
92099216

92109217
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
92119218
{
9212-
kvm_lapic_expired_hv_timer(vcpu);
9219+
if (!to_vmx(vcpu)->req_immediate_exit)
9220+
kvm_lapic_expired_hv_timer(vcpu);
92139221
return 1;
92149222
}
92159223

@@ -10611,6 +10619,11 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
1061110619
u64 tscl;
1061210620
u32 delta_tsc;
1061310621

10622+
if (vmx->req_immediate_exit) {
10623+
vmx_arm_hv_timer(vmx, 0);
10624+
return;
10625+
}
10626+
1061410627
if (vmx->hv_deadline_tsc != -1) {
1061510628
tscl = rdtsc();
1061610629
if (vmx->hv_deadline_tsc > tscl)
@@ -12879,6 +12892,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
1287912892
return 0;
1288012893
}
1288112894

12895+
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
12896+
{
12897+
to_vmx(vcpu)->req_immediate_exit = true;
12898+
}
12899+
1288212900
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
1288312901
{
1288412902
ktime_t remaining =
@@ -14135,6 +14153,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
1413514153
.umip_emulated = vmx_umip_emulated,
1413614154

1413714155
.check_nested_events = vmx_check_nested_events,
14156+
.request_immediate_exit = vmx_request_immediate_exit,
1413814157

1413914158
.sched_in = vmx_sched_in,
1414014159

arch/x86/kvm/x86.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7361,6 +7361,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
73617361
}
73627362
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
73637363

7364+
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
7365+
{
7366+
smp_send_reschedule(vcpu->cpu);
7367+
}
7368+
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
7369+
73647370
/*
73657371
* Returns 1 to let vcpu_run() continue the guest execution loop without
73667372
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -7565,7 +7571,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
75657571

75667572
if (req_immediate_exit) {
75677573
kvm_make_request(KVM_REQ_EVENT, vcpu);
7568-
smp_send_reschedule(vcpu->cpu);
7574+
kvm_x86_ops->request_immediate_exit(vcpu);
75697575
}
75707576

75717577
trace_kvm_entry(vcpu->vcpu_id);

0 commit comments

Comments
 (0)