Skip to content

Commit d6e41f1

Browse files
amlutoIngo Molnar
authored andcommitted
x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant
When PCID is enabled, CR3's PCID bits can change during context switches, so KVM won't be able to treat CR3 as a per-mm constant any more. I structured this like the existing CR4 handling. Under ordinary circumstances (PCID disabled or if the current PCID and the value that's already in the VMCS match), then we won't do an extra VMCS write, and we'll never do an extra direct CR3 read. The overhead should be minimal. I disallowed using the new helper in non-atomic context because PCID support will cause CR3 to stop being constant in non-atomic process context. (Frankly, it also scares me a bit that KVM ever treated CR3 as constant, but it looks like it was okay before.) Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@suse.com> Cc: Nadav Amit <nadav.amit@gmail.com> Cc: Nadav Amit <namit@vmware.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kvm@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent be4ffc0 commit d6e41f1

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

arch/x86/include/asm/mmu_context.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,4 +266,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
266266
return __pkru_allows_pkey(vma_pkey(vma), write);
267267
}
268268

269+
270+
/*
271+
* This can be used from process context to figure out what the value of
272+
* CR3 is without needing to do a (slow) read_cr3().
273+
*
274+
* It's intended to be used for code like KVM that sneakily changes CR3
275+
* and needs to restore it. It needs to be used very carefully.
276+
*/
277+
static inline unsigned long __get_current_cr3_fast(void)
278+
{
279+
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
280+
281+
/* For now, be very restrictive about when this can be called. */
282+
VM_WARN_ON(in_nmi() || !in_atomic());
283+
284+
VM_BUG_ON(cr3 != read_cr3());
285+
return cr3;
286+
}
287+
269288
#endif /* _ASM_X86_MMU_CONTEXT_H */

arch/x86/kvm/vmx.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include <asm/kexec.h>
4949
#include <asm/apic.h>
5050
#include <asm/irq_remapping.h>
51+
#include <asm/mmu_context.h>
5152

5253
#include "trace.h"
5354
#include "pmu.h"
@@ -596,6 +597,7 @@ struct vcpu_vmx {
596597
int gs_ldt_reload_needed;
597598
int fs_reload_needed;
598599
u64 msr_host_bndcfgs;
600+
unsigned long vmcs_host_cr3; /* May not match real cr3 */
599601
unsigned long vmcs_host_cr4; /* May not match real cr4 */
600602
} host_state;
601603
struct {
@@ -5012,12 +5014,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
50125014
u32 low32, high32;
50135015
unsigned long tmpl;
50145016
struct desc_ptr dt;
5015-
unsigned long cr0, cr4;
5017+
unsigned long cr0, cr3, cr4;
50165018

50175019
cr0 = read_cr0();
50185020
WARN_ON(cr0 & X86_CR0_TS);
50195021
vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
5020-
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
5022+
5023+
/*
5024+
* Save the most likely value for this task's CR3 in the VMCS.
5025+
* We can't use __get_current_cr3_fast() because we're not atomic.
5026+
*/
5027+
cr3 = read_cr3();
5028+
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
5029+
vmx->host_state.vmcs_host_cr3 = cr3;
50215030

50225031
/* Save the most likely value for this task's CR4 in the VMCS. */
50235032
cr4 = cr4_read_shadow();
@@ -8820,7 +8829,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
88208829
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
88218830
{
88228831
struct vcpu_vmx *vmx = to_vmx(vcpu);
8823-
unsigned long debugctlmsr, cr4;
8832+
unsigned long debugctlmsr, cr3, cr4;
88248833

88258834
/* Don't enter VMX if guest state is invalid, let the exit handler
88268835
start emulation until we arrive back to a valid state */
@@ -8842,6 +8851,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
88428851
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
88438852
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
88448853

8854+
cr3 = __get_current_cr3_fast();
8855+
if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
8856+
vmcs_writel(HOST_CR3, cr3);
8857+
vmx->host_state.vmcs_host_cr3 = cr3;
8858+
}
8859+
88458860
cr4 = cr4_read_shadow();
88468861
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
88478862
vmcs_writel(HOST_CR4, cr4);

0 commit comments

Comments
 (0)