Skip to content

Commit da998b4

Browse files
jsmattsonjrbonzini
authored andcommitted
kvm: x86: Defer setting of CR2 until #PF delivery
When exception payloads are enabled by userspace (which is not yet possible) and a #PF is raised in L2, defer the setting of CR2 until the #PF is delivered. This allows the L1 hypervisor to intercept the fault before CR2 is modified. For backwards compatibility, when exception payloads are not enabled by userspace, kvm_multiple_exception modifies CR2 when the #PF exception is raised. Reported-by: Jim Mattson <jmattson@google.com> Suggested-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Jim Mattson <jmattson@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 91e86d2 commit da998b4

File tree

4 files changed

+62
-21
lines changed

4 files changed

+62
-21
lines changed

arch/x86/kvm/svm.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
805805
nested_svm_check_exception(svm, nr, has_error_code, error_code))
806806
return;
807807

808+
kvm_deliver_exception_payload(&svm->vcpu);
809+
808810
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
809811
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
810812

@@ -2965,16 +2967,13 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
29652967
svm->vmcb->control.exit_info_1 = error_code;
29662968

29672969
/*
2968-
* FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
2969-
* The fix is to add the ancillary datum (CR2 or DR6) to structs
2970-
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
2971-
* written only when inject_pending_event runs (DR6 would written here
2972-
* too). This should be conditional on a new capability---if the
2973-
* capability is disabled, kvm_multiple_exception would write the
2974-
* ancillary information to CR2 or DR6, for backwards ABI-compatibility.
2970+
* EXITINFO2 is undefined for all exception intercepts other
2971+
* than #PF.
29752972
*/
29762973
if (svm->vcpu.arch.exception.nested_apf)
29772974
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
2975+
else if (svm->vcpu.arch.exception.has_payload)
2976+
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
29782977
else
29792978
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
29802979

arch/x86/kvm/vmx.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3292,27 +3292,24 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
32923292
{
32933293
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
32943294
unsigned int nr = vcpu->arch.exception.nr;
3295+
bool has_payload = vcpu->arch.exception.has_payload;
3296+
unsigned long payload = vcpu->arch.exception.payload;
32953297

32963298
if (nr == PF_VECTOR) {
32973299
if (vcpu->arch.exception.nested_apf) {
32983300
*exit_qual = vcpu->arch.apf.nested_apf_token;
32993301
return 1;
33003302
}
3301-
/*
3302-
* FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
3303-
* The fix is to add the ancillary datum (CR2 or DR6) to structs
3304-
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
3305-
* can be written only when inject_pending_event runs. This should be
3306-
* conditional on a new capability---if the capability is disabled,
3307-
* kvm_multiple_exception would write the ancillary information to
3308-
* CR2 or DR6, for backwards ABI-compatibility.
3309-
*/
33103303
if (nested_vmx_is_page_fault_vmexit(vmcs12,
33113304
vcpu->arch.exception.error_code)) {
3312-
*exit_qual = vcpu->arch.cr2;
3305+
*exit_qual = has_payload ? payload : vcpu->arch.cr2;
33133306
return 1;
33143307
}
33153308
} else {
3309+
/*
3310+
* FIXME: we must not write DR6 when L1 intercepts an
3311+
* L2 #DB exception.
3312+
*/
33163313
if (vmcs12->exception_bitmap & (1u << nr)) {
33173314
if (nr == DB_VECTOR) {
33183315
*exit_qual = vcpu->arch.dr6;
@@ -3349,6 +3346,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
33493346
u32 error_code = vcpu->arch.exception.error_code;
33503347
u32 intr_info = nr | INTR_INFO_VALID_MASK;
33513348

3349+
kvm_deliver_exception_payload(vcpu);
3350+
33523351
if (has_error_code) {
33533352
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
33543353
intr_info |= INTR_INFO_DELIVER_CODE_MASK;

arch/x86/kvm/x86.c

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,26 @@ static int exception_type(int vector)
400400
return EXCPT_FAULT;
401401
}
402402

403+
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
404+
{
405+
unsigned nr = vcpu->arch.exception.nr;
406+
bool has_payload = vcpu->arch.exception.has_payload;
407+
unsigned long payload = vcpu->arch.exception.payload;
408+
409+
if (!has_payload)
410+
return;
411+
412+
switch (nr) {
413+
case PF_VECTOR:
414+
vcpu->arch.cr2 = payload;
415+
break;
416+
}
417+
418+
vcpu->arch.exception.has_payload = false;
419+
vcpu->arch.exception.payload = 0;
420+
}
421+
EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
422+
403423
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
404424
unsigned nr, bool has_error, u32 error_code,
405425
bool has_payload, unsigned long payload, bool reinject)
@@ -441,6 +461,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
441461
vcpu->arch.exception.error_code = error_code;
442462
vcpu->arch.exception.has_payload = has_payload;
443463
vcpu->arch.exception.payload = payload;
464+
/*
465+
* In guest mode, payload delivery should be deferred,
466+
* so that the L1 hypervisor can intercept #PF before
467+
* CR2 is modified. However, for ABI compatibility
468+
* with KVM_GET_VCPU_EVENTS and KVM_SET_VCPU_EVENTS,
469+
* we can't delay payload delivery unless userspace
470+
* has enabled this functionality via the per-VM
471+
* capability, KVM_CAP_EXCEPTION_PAYLOAD.
472+
*/
473+
if (!vcpu->kvm->arch.exception_payload_enabled ||
474+
!is_guest_mode(vcpu))
475+
kvm_deliver_exception_payload(vcpu);
444476
return;
445477
}
446478

@@ -486,6 +518,13 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
486518
}
487519
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
488520

521+
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
522+
u32 error_code, unsigned long payload)
523+
{
524+
kvm_multiple_exception(vcpu, nr, true, error_code,
525+
true, payload, false);
526+
}
527+
489528
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
490529
{
491530
if (err)
@@ -502,11 +541,13 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
502541
++vcpu->stat.pf_guest;
503542
vcpu->arch.exception.nested_apf =
504543
is_guest_mode(vcpu) && fault->async_page_fault;
505-
if (vcpu->arch.exception.nested_apf)
544+
if (vcpu->arch.exception.nested_apf) {
506545
vcpu->arch.apf.nested_apf_token = fault->address;
507-
else
508-
vcpu->arch.cr2 = fault->address;
509-
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
546+
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
547+
} else {
548+
kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
549+
fault->address);
550+
}
510551
}
511552
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
512553

arch/x86/kvm/x86.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
266266

267267
int handle_ud(struct kvm_vcpu *vcpu);
268268

269+
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
270+
269271
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
270272
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
271273
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);

0 commit comments

Comments
 (0)