Skip to content

Commit 42b00f1

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - selftests improvements - large PUD support for HugeTLB - single-stepping fixes - improved tracing - various timer and vGIC fixes x86: - Processor Tracing virtualization - STIBP support - some correctness fixes - refactorings and splitting of vmx.c - use the Hyper-V range TLB flush hypercall - reduce order of vcpu struct - WBNOINVD support - do not use -ftrace for __noclone functions - nested guest support for PAUSE filtering on AMD - more Hyper-V enlightenments (direct mode for synthetic timers) PPC: - nested VFIO s390: - bugfixes only this time" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (171 commits) KVM: x86: Add CPUID support for new instruction WBNOINVD kvm: selftests: ucall: fix exit mmio address guessing Revert "compiler-gcc: disable -ftracer for __noclone functions" KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines KVM: VMX: Explicitly reference RCX as the vmx_vcpu pointer in asm blobs KVM: x86: Use jmp to invoke kvm_spurious_fault() from .fixup MAINTAINERS: Add arch/x86/kvm sub-directories to existing KVM/x86 entry KVM/x86: Use SVM assembly instruction mnemonics instead of .byte streams KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range() KVM/MMU: Flush tlb directly in kvm_set_pte_rmapp() KVM/MMU: Move tlb flush in kvm_set_pte_rmapp() to kvm_mmu_notifier_change_pte() KVM: Make kvm_set_spte_hva() return int KVM: Replace old tlb flush function with new one to flush a specified range. KVM/MMU: Add tlb flush with range helper function KVM/VMX: Add hv tlb range flush support x86/hyper-v: Add HvFlushGuestAddressList hypercall support KVM: Add tlb_remote_flush_with_range callback in kvm_x86_ops KVM: x86: Disable Intel PT when VMXON in L1 guest KVM: x86: Set intercept for Intel PT MSRs read/write KVM: x86: Implement Intel PT MSRs read/write emulation ...
2 parents 460023a + a0aea13 commit 42b00f1

File tree

119 files changed

+19024
-16329
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+19024
-16329
lines changed

Documentation/virtual/kvm/api.txt

Lines changed: 132 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,9 @@ the address space for which you want to return the dirty bitmap.
305305
They must be less than the value that KVM_CHECK_EXTENSION returns for
306306
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
307307

308+
The bits in the dirty bitmap are cleared before the ioctl returns, unless
309+
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
310+
see the description of the capability.
308311

309312
4.9 KVM_SET_MEMORY_ALIAS
310313

@@ -1129,10 +1132,15 @@ documentation when it pops into existence).
11291132

11301133
4.37 KVM_ENABLE_CAP
11311134

1132-
Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
1133-
Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
1134-
mips (only KVM_CAP_ENABLE_CAP), ppc, s390
1135-
Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
1135+
Capability: KVM_CAP_ENABLE_CAP
1136+
Architectures: mips, ppc, s390
1137+
Type: vcpu ioctl
1138+
Parameters: struct kvm_enable_cap (in)
1139+
Returns: 0 on success; -1 on error
1140+
1141+
Capability: KVM_CAP_ENABLE_CAP_VM
1142+
Architectures: all
1143+
Type: vcpu ioctl
11361144
Parameters: struct kvm_enable_cap (in)
11371145
Returns: 0 on success; -1 on error
11381146

@@ -3753,6 +3761,102 @@ Coalesced pio is based on coalesced mmio. There is little difference
37533761
between coalesced mmio and pio except that coalesced pio records accesses
37543762
to I/O ports.
37553763

3764+
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
3765+
3766+
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
3767+
Architectures: x86
3768+
Type: vm ioctl
3769+
Parameters: struct kvm_dirty_log (in)
3770+
Returns: 0 on success, -1 on error
3771+
3772+
/* for KVM_CLEAR_DIRTY_LOG */
3773+
struct kvm_clear_dirty_log {
3774+
__u32 slot;
3775+
__u32 num_pages;
3776+
__u64 first_page;
3777+
union {
3778+
void __user *dirty_bitmap; /* one bit per page */
3779+
__u64 padding;
3780+
};
3781+
};
3782+
3783+
The ioctl clears the dirty status of pages in a memory slot, according to
3784+
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
3785+
field. Bit 0 of the bitmap corresponds to page "first_page" in the
3786+
memory slot, and num_pages is the size in bits of the input bitmap.
3787+
Both first_page and num_pages must be a multiple of 64. For each bit
3788+
that is set in the input bitmap, the corresponding page is marked "clean"
3789+
in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
3790+
(for example via write-protection, or by clearing the dirty bit in
3791+
a page table entry).
3792+
3793+
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
3794+
the address space for which you want to return the dirty bitmap.
3795+
They must be less than the value that KVM_CHECK_EXTENSION returns for
3796+
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
3797+
3798+
This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
3799+
is enabled; for more information, see the description of the capability.
3800+
However, it can always be used as long as KVM_CHECK_EXTENSION confirms
3801+
that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
3802+
3803+
4.118 KVM_GET_SUPPORTED_HV_CPUID
3804+
3805+
Capability: KVM_CAP_HYPERV_CPUID
3806+
Architectures: x86
3807+
Type: vcpu ioctl
3808+
Parameters: struct kvm_cpuid2 (in/out)
3809+
Returns: 0 on success, -1 on error
3810+
3811+
struct kvm_cpuid2 {
3812+
__u32 nent;
3813+
__u32 padding;
3814+
struct kvm_cpuid_entry2 entries[0];
3815+
};
3816+
3817+
struct kvm_cpuid_entry2 {
3818+
__u32 function;
3819+
__u32 index;
3820+
__u32 flags;
3821+
__u32 eax;
3822+
__u32 ebx;
3823+
__u32 ecx;
3824+
__u32 edx;
3825+
__u32 padding[3];
3826+
};
3827+
3828+
This ioctl returns x86 cpuid features leaves related to Hyper-V emulation in
3829+
KVM. Userspace can use the information returned by this ioctl to construct
3830+
cpuid information presented to guests consuming Hyper-V enlightenments (e.g.
3831+
Windows or Hyper-V guests).
3832+
3833+
CPUID feature leaves returned by this ioctl are defined by Hyper-V Top Level
3834+
Functional Specification (TLFS). These leaves can't be obtained with
3835+
KVM_GET_SUPPORTED_CPUID ioctl because some of them intersect with KVM feature
3836+
leaves (0x40000000, 0x40000001).
3837+
3838+
Currently, the following list of CPUID leaves are returned:
3839+
HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
3840+
HYPERV_CPUID_INTERFACE
3841+
HYPERV_CPUID_VERSION
3842+
HYPERV_CPUID_FEATURES
3843+
HYPERV_CPUID_ENLIGHTMENT_INFO
3844+
HYPERV_CPUID_IMPLEMENT_LIMITS
3845+
HYPERV_CPUID_NESTED_FEATURES
3846+
3847+
HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
3848+
enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
3849+
3850+
Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
3851+
with the 'nent' field indicating the number of entries in the variable-size
3852+
array 'entries'. If the number of entries is too low to describe all Hyper-V
3853+
feature leaves, an error (E2BIG) is returned. If the number is more or equal
3854+
to the number of Hyper-V feature leaves, the 'nent' field is adjusted to the
3855+
number of valid entries in the 'entries' array, which is then filled.
3856+
3857+
'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
3858+
userspace should not expect to get any particular value there.
3859+
37563860
5. The kvm_run structure
37573861
------------------------
37583862

@@ -4647,6 +4751,30 @@ and injected exceptions.
46474751
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
46484752
will clear DR6.RTM.
46494753

4754+
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
4755+
4756+
Architectures: all
4757+
Parameters: args[0] whether feature should be enabled or not
4758+
4759+
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
4760+
clear and write-protect all pages that are returned as dirty.
4761+
Rather, userspace will have to do this operation separately using
4762+
KVM_CLEAR_DIRTY_LOG.
4763+
4764+
At the cost of a slightly more complicated operation, this provides better
4765+
scalability and responsiveness for two reasons. First,
4766+
KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
4767+
than requiring to sync a full memslot; this ensures that KVM does not
4768+
take spinlocks for an extended period of time. Second, in some cases a
4769+
large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
4770+
userspace actually using the data in the page. Pages can be modified
4771+
during this time, which is inefficint for both the guest and userspace:
4772+
the guest will incur a higher penalty due to write protection faults,
4773+
while userspace can see false reports of dirty pages. Manual reprotection
4774+
helps reducing this time, improving guest performance and reducing the
4775+
number of dirty log false positives.
4776+
4777+
46504778
8. Other capabilities.
46514779
----------------------
46524780

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8309,6 +8309,7 @@ W: http://www.linux-kvm.org
83098309
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
83108310
S: Supported
83118311
F: arch/x86/kvm/
8312+
F: arch/x86/kvm/*/
83128313
F: arch/x86/include/uapi/asm/kvm*
83138314
F: arch/x86/include/asm/kvm*
83148315
F: arch/x86/include/asm/pvclock-abi.h

arch/arm/include/asm/kvm_asm.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323

2424
#define ARM_EXIT_WITH_ABORT_BIT 31
2525
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_ABORT_BIT))
26+
#define ARM_EXCEPTION_IS_TRAP(x) \
27+
(ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_PREF_ABORT || \
28+
ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_DATA_ABORT || \
29+
ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_HVC)
2630
#define ARM_ABORT_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_ABORT_BIT))
2731

2832
#define ARM_EXCEPTION_RESET 0

arch/arm/include/asm/kvm_host.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
225225
#define KVM_ARCH_WANT_MMU_NOTIFIER
226226
int kvm_unmap_hva_range(struct kvm *kvm,
227227
unsigned long start, unsigned long end);
228-
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
228+
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
229229

230230
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
231231
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -296,11 +296,6 @@ static inline void kvm_arm_init_debug(void) {}
296296
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
297297
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
298298
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
299-
static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
300-
struct kvm_run *run)
301-
{
302-
return false;
303-
}
304299

305300
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
306301
struct kvm_device_attr *attr);

arch/arm/include/asm/kvm_mmu.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,67 @@ void kvm_clear_hyp_idmap(void);
8282
#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE)
8383
#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; })
8484

85+
#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
86+
#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
87+
#define kvm_pfn_pud(pfn, prot) (__pud(0))
88+
89+
#define kvm_pud_pfn(pud) ({ WARN_ON(1); 0; })
90+
91+
92+
#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
93+
/* No support for pud hugepages */
94+
#define kvm_pud_mkhuge(pud) ( {WARN_ON(1); pud; })
95+
96+
/*
97+
* The following kvm_*pud*() functions are provided strictly to allow
98+
* sharing code with arm64. They should never be called in practice.
99+
*/
100+
static inline void kvm_set_s2pud_readonly(pud_t *pud)
101+
{
102+
WARN_ON(1);
103+
}
104+
105+
static inline bool kvm_s2pud_readonly(pud_t *pud)
106+
{
107+
WARN_ON(1);
108+
return false;
109+
}
110+
111+
static inline void kvm_set_pud(pud_t *pud, pud_t new_pud)
112+
{
113+
WARN_ON(1);
114+
}
115+
116+
static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
117+
{
118+
WARN_ON(1);
119+
return pud;
120+
}
121+
122+
static inline pud_t kvm_s2pud_mkexec(pud_t pud)
123+
{
124+
WARN_ON(1);
125+
return pud;
126+
}
127+
128+
static inline bool kvm_s2pud_exec(pud_t *pud)
129+
{
130+
WARN_ON(1);
131+
return false;
132+
}
133+
134+
static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
135+
{
136+
BUG();
137+
return pud;
138+
}
139+
140+
static inline bool kvm_s2pud_young(pud_t pud)
141+
{
142+
WARN_ON(1);
143+
return false;
144+
}
145+
85146
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
86147
{
87148
pte_val(pte) |= L_PTE_S2_RDWR;

arch/arm/include/asm/stage2_pgtable.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,12 @@ stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
6868
#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
6969
#define stage2_pud_table_empty(kvm, pudp) false
7070

71+
static inline bool kvm_stage2_has_pud(struct kvm *kvm)
72+
{
73+
return false;
74+
}
75+
76+
#define S2_PMD_MASK PMD_MASK
77+
#define S2_PMD_SIZE PMD_SIZE
78+
7179
#endif /* __ARM_S2_PGTABLE_H_ */

arch/arm/kvm/coproc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,8 +602,8 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
602602
}
603603
} else {
604604
/* If access function fails, it should complain. */
605-
kvm_err("Unsupported guest CP15 access at: %08lx\n",
606-
*vcpu_pc(vcpu));
605+
kvm_err("Unsupported guest CP15 access at: %08lx [%08lx]\n",
606+
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
607607
print_cp_instr(params);
608608
kvm_inject_undefined(vcpu);
609609
}

arch/arm64/include/asm/kvm_arm.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
108108

109109
/* VTCR_EL2 Registers bits */
110-
#define VTCR_EL2_RES1 (1 << 31)
110+
#define VTCR_EL2_RES1 (1U << 31)
111111
#define VTCR_EL2_HD (1 << 22)
112112
#define VTCR_EL2_HA (1 << 21)
113113
#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
@@ -323,10 +323,6 @@
323323
#define PAR_TO_HPFAR(par) \
324324
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
325325

326-
#define kvm_arm_exception_type \
327-
{0, "IRQ" }, \
328-
{1, "TRAP" }
329-
330326
#define ECN(x) { ESR_ELx_EC_##x, #x }
331327

332328
#define kvm_arm_exception_class \

arch/arm64/include/asm/kvm_asm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#define ARM_EXIT_WITH_SERROR_BIT 31
2727
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
28+
#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
2829
#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
2930

3031
#define ARM_EXCEPTION_IRQ 0
@@ -34,6 +35,12 @@
3435
/* The hyp-stub will return this for any kvm_call_hyp() call */
3536
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
3637

38+
#define kvm_arm_exception_type \
39+
{ARM_EXCEPTION_IRQ, "IRQ" }, \
40+
{ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \
41+
{ARM_EXCEPTION_TRAP, "TRAP" }, \
42+
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
43+
3744
#ifndef __ASSEMBLY__
3845

3946
#include <linux/mm.h>

arch/arm64/include/asm/kvm_emulate.h

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
#include <linux/kvm_host.h>
2626

27+
#include <asm/debug-monitors.h>
2728
#include <asm/esr.h>
2829
#include <asm/kvm_arm.h>
2930
#include <asm/kvm_hyp.h>
@@ -147,14 +148,6 @@ static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
147148
return true;
148149
}
149150

150-
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
151-
{
152-
if (vcpu_mode_is_32bit(vcpu))
153-
kvm_skip_instr32(vcpu, is_wide_instr);
154-
else
155-
*vcpu_pc(vcpu) += 4;
156-
}
157-
158151
static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
159152
{
160153
*vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT;
@@ -424,4 +417,30 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
424417
return data; /* Leave LE untouched */
425418
}
426419

420+
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
421+
{
422+
if (vcpu_mode_is_32bit(vcpu))
423+
kvm_skip_instr32(vcpu, is_wide_instr);
424+
else
425+
*vcpu_pc(vcpu) += 4;
426+
427+
/* advance the singlestep state machine */
428+
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
429+
}
430+
431+
/*
432+
* Skip an instruction which has been emulated at hyp while most guest sysregs
433+
* are live.
434+
*/
435+
static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
436+
{
437+
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
438+
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
439+
440+
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
441+
442+
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
443+
write_sysreg_el2(*vcpu_pc(vcpu), elr);
444+
}
445+
427446
#endif /* __ARM64_KVM_EMULATE_H__ */

0 commit comments

Comments
 (0)