Skip to content

Commit e61cf2e

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first set of KVM updates from Paolo Bonzini: "PPC: - minor code cleanups x86: - PCID emulation and CR3 caching for shadow page tables - nested VMX live migration - nested VMCS shadowing - optimized IPI hypercall - some optimizations ARM will come next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits) kvm: x86: Set highest physical address bits in non-present/reserved SPTEs KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c KVM: X86: Implement PV IPIs in linux guest KVM: X86: Add kvm hypervisor init time platform setup callback KVM: X86: Implement "send IPI" hypercall KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs() KVM: x86: Skip pae_root shadow allocation if tdp enabled KVM/MMU: Combine flushing remote tlb in mmu_set_spte() KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup KVM: vmx: move struct host_state usage to struct loaded_vmcs KVM: vmx: compute need to reload FS/GS/LDT on demand KVM: nVMX: remove a misleading comment regarding vmcs02 fields KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state() KVM: vmx: add dedicated utility to access guest's kernel_gs_base KVM: vmx: track host_state.loaded using a loaded_vmcs pointer KVM: vmx: refactor segmentation code in vmx_save_host_state() kvm: nVMX: Fix fault priority for VMX operations kvm: nVMX: Fix fault vector for VMX operation at CPL > 0 ...
2 parents 1009aa1 + 28a1f3a commit e61cf2e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3110
-726
lines changed

Documentation/virtual/kvm/api.txt

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3561,6 +3561,62 @@ Returns: 0 on success,
35613561
-ENOENT on deassign if the conn_id isn't registered
35623562
-EEXIST on assign if the conn_id is already registered
35633563

3564+
4.114 KVM_GET_NESTED_STATE
3565+
3566+
Capability: KVM_CAP_NESTED_STATE
3567+
Architectures: x86
3568+
Type: vcpu ioctl
3569+
Parameters: struct kvm_nested_state (in/out)
3570+
Returns: 0 on success, -1 on error
3571+
Errors:
3572+
E2BIG: the total state size (including the fixed-size part of struct
3573+
kvm_nested_state) exceeds the value of 'size' specified by
3574+
the user; the size required will be written into size.
3575+
3576+
struct kvm_nested_state {
3577+
__u16 flags;
3578+
__u16 format;
3579+
__u32 size;
3580+
union {
3581+
struct kvm_vmx_nested_state vmx;
3582+
struct kvm_svm_nested_state svm;
3583+
__u8 pad[120];
3584+
};
3585+
__u8 data[0];
3586+
};
3587+
3588+
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
3589+
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
3590+
3591+
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
3592+
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
3593+
3594+
struct kvm_vmx_nested_state {
3595+
__u64 vmxon_pa;
3596+
__u64 vmcs_pa;
3597+
3598+
struct {
3599+
__u16 flags;
3600+
} smm;
3601+
};
3602+
3603+
This ioctl copies the vcpu's nested virtualization state from the kernel to
3604+
userspace.
3605+
3606+
The maximum size of the state, including the fixed-size part of struct
3607+
kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
3608+
the KVM_CHECK_EXTENSION ioctl().
3609+
3610+
4.115 KVM_SET_NESTED_STATE
3611+
3612+
Capability: KVM_CAP_NESTED_STATE
3613+
Architectures: x86
3614+
Type: vcpu ioctl
3615+
Parameters: struct kvm_nested_state (in)
3616+
Returns: 0 on success, -1 on error
3617+
3618+
This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For
3619+
the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
35643620

35653621
5. The kvm_run structure
35663622
------------------------

Documentation/virtual/kvm/cpuid.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
6262
|| || can be enabled by setting bit 2
6363
|| || when writing to msr 0x4b564d02
6464
------------------------------------------------------------------------------
65+
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
66+
|| || before using paravirtualized
67+
|| || send IPIs.
68+
------------------------------------------------------------------------------
6569
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
6670
|| || per-cpu warps are expected in
6771
|| || kvmclock.

Documentation/virtual/kvm/hypercalls.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
121121

122122
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
123123
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
124+
125+
6. KVM_HC_SEND_IPI
126+
------------------------
127+
Architecture: x86
128+
Status: active
129+
Purpose: Send IPIs to multiple vCPUs.
130+
131+
a0: lower part of the bitmap of destination APIC IDs
132+
a1: higher part of the bitmap of destination APIC IDs
133+
a2: the lowest APIC ID in bitmap
134+
a3: APIC ICR
135+
136+
The hypercall lets a guest send multicast IPIs, with at most 128
137+
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
138+
hypercall in 32-bit mode. The destinations are represented by a
139+
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
140+
a0 corresponds to the APIC ID in the third argument (a2), bit 1
141+
corresponds to the APIC ID a2+1, and so on.
142+
143+
Returns the number of CPUs to which the IPIs were delivered successfully.

arch/powerpc/include/asm/kvm_book3s.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
390390
#define SPLIT_HACK_MASK 0xff000000
391391
#define SPLIT_HACK_OFFS 0xfb000000
392392

393+
/*
394+
* This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
395+
* [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
396+
* (but not its actual threading mode, which is not available) to avoid
397+
* collisions.
398+
*
399+
* The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
400+
* 0) unchanged: if the guest is filling each VCORE completely then it will be
401+
* using consecutive IDs and it will fill the space without any packing.
402+
*
403+
* For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
404+
* KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
405+
* added to avoid collisions.
406+
*
407+
* VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
408+
* possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
409+
* can be safely packed into the second half of each VCORE by adding an offset
410+
* of (stride / 2).
411+
*
412+
* Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
413+
* (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
414+
* VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
415+
*
416+
* Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
417+
* stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
418+
* must be free to use.
419+
*
420+
* (The offsets for each block are stored in block_offsets[], indexed by the
421+
* block number if the stride is 8. For cases where the guest's stride is less
422+
* than 8, we can re-use the block_offsets array by multiplying the block
423+
* number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
424+
*/
425+
static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
426+
{
427+
const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
428+
int stride = kvm->arch.emul_smt_mode;
429+
int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
430+
u32 packed_id;
431+
432+
if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
433+
return 0;
434+
packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
435+
if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
436+
return 0;
437+
return packed_id;
438+
}
439+
393440
#endif /* __ASM_KVM_BOOK3S_H__ */

arch/powerpc/include/asm/kvm_host.h

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,14 @@
4242
#define KVM_USER_MEM_SLOTS 512
4343

4444
#include <asm/cputhreads.h>
45-
#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
45+
46+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
47+
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
48+
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
49+
50+
#else
51+
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
52+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
4653

4754
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
4855

@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
672679
gva_t vaddr_accessed;
673680
pgd_t *pgdir;
674681

675-
u8 io_gpr; /* GPR used as IO source/target */
682+
u16 io_gpr; /* GPR used as IO source/target */
676683
u8 mmio_host_swabbed;
677684
u8 mmio_sign_extend;
678685
/* conversion between single and double precision */
@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
688695
*/
689696
u8 mmio_vsx_copy_nums;
690697
u8 mmio_vsx_offset;
691-
u8 mmio_vsx_tx_sx_enabled;
692698
u8 mmio_vmx_copy_nums;
693699
u8 mmio_vmx_offset;
694700
u8 mmio_copy_type;
@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
801807
#define KVMPPC_VCPU_BUSY_IN_HOST 2
802808

803809
/* Values for vcpu->arch.io_gpr */
804-
#define KVM_MMIO_REG_MASK 0x001f
805-
#define KVM_MMIO_REG_EXT_MASK 0xffe0
810+
#define KVM_MMIO_REG_MASK 0x003f
811+
#define KVM_MMIO_REG_EXT_MASK 0xffc0
806812
#define KVM_MMIO_REG_GPR 0x0000
807-
#define KVM_MMIO_REG_FPR 0x0020
808-
#define KVM_MMIO_REG_QPR 0x0040
809-
#define KVM_MMIO_REG_FQPR 0x0060
810-
#define KVM_MMIO_REG_VSX 0x0080
811-
#define KVM_MMIO_REG_VMX 0x00c0
813+
#define KVM_MMIO_REG_FPR 0x0040
814+
#define KVM_MMIO_REG_QPR 0x0080
815+
#define KVM_MMIO_REG_FQPR 0x00c0
816+
#define KVM_MMIO_REG_VSX 0x0100
817+
#define KVM_MMIO_REG_VMX 0x0180
812818

813819
#define __KVM_HAVE_ARCH_WQP
814820
#define __KVM_HAVE_CREATE_DEVICE

arch/powerpc/include/asm/reg.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@
163163
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
164164
#define PSSCR_SD 0x00400000 /* Status Disable */
165165
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
166-
#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
166+
#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
167167
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
168168
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
169169

arch/powerpc/kvm/book3s_64_vio.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
179179
if ((tbltmp->it_page_shift <= stt->page_shift) &&
180180
(tbltmp->it_offset << tbltmp->it_page_shift ==
181181
stt->offset << stt->page_shift) &&
182-
(tbltmp->it_size << tbltmp->it_page_shift ==
182+
(tbltmp->it_size << tbltmp->it_page_shift >=
183183
stt->size << stt->page_shift)) {
184184
/*
185185
* Reference the table to avoid races with
@@ -295,15 +295,14 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
295295
{
296296
struct kvmppc_spapr_tce_table *stt = NULL;
297297
struct kvmppc_spapr_tce_table *siter;
298-
unsigned long npages, size;
298+
unsigned long npages, size = args->size;
299299
int ret = -ENOMEM;
300300
int i;
301301

302302
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
303303
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
304304
return -EINVAL;
305305

306-
size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
307306
npages = kvmppc_tce_pages(size);
308307
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
309308
if (ret)

arch/powerpc/kvm/book3s_hv.c

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
127127
* and SPURR count and should be set according to the number of
128128
* online threads in the vcore being run.
129129
*/
130-
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
131-
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
132-
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
133-
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
134-
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
135-
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
136-
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
137-
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
130+
#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
131+
#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
132+
#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
133+
#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
134+
#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
135+
#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
136+
#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
137+
#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
138138

139139
static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
140140
RWMR_RPA_P8_1THREAD,
@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
18071807
return threads_per_subcore;
18081808
}
18091809

1810-
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1810+
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
18111811
{
18121812
struct kvmppc_vcore *vcore;
18131813

@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
18211821
init_swait_queue_head(&vcore->wq);
18221822
vcore->preempt_tb = TB_NIL;
18231823
vcore->lpcr = kvm->arch.lpcr;
1824-
vcore->first_vcpuid = core * kvm->arch.smt_mode;
1824+
vcore->first_vcpuid = id;
18251825
vcore->kvm = kvm;
18261826
INIT_LIST_HEAD(&vcore->preempt_list);
18271827

@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
20372037
mutex_lock(&kvm->lock);
20382038
vcore = NULL;
20392039
err = -EINVAL;
2040-
core = id / kvm->arch.smt_mode;
2040+
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
2041+
if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
2042+
pr_devel("KVM: VCPU ID too high\n");
2043+
core = KVM_MAX_VCORES;
2044+
} else {
2045+
BUG_ON(kvm->arch.smt_mode != 1);
2046+
core = kvmppc_pack_vcpu_id(kvm, id);
2047+
}
2048+
} else {
2049+
core = id / kvm->arch.smt_mode;
2050+
}
20412051
if (core < KVM_MAX_VCORES) {
20422052
vcore = kvm->arch.vcores[core];
2043-
if (!vcore) {
2053+
if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
2054+
pr_devel("KVM: collision on id %u", id);
2055+
vcore = NULL;
2056+
} else if (!vcore) {
20442057
err = -ENOMEM;
2045-
vcore = kvmppc_vcore_create(kvm, core);
2058+
vcore = kvmppc_vcore_create(kvm,
2059+
id & ~(kvm->arch.smt_mode - 1));
20462060
kvm->arch.vcores[core] = vcore;
20472061
kvm->arch.online_vcores++;
20482062
}
@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
45504564
pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
45514565
return -ENODEV;
45524566
}
4567+
/* presence of intc confirmed - node can be dropped again */
4568+
of_node_put(np);
45534569
}
45544570
#endif
45554571

arch/powerpc/kvm/book3s_xive.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
317317
return -EBUSY;
318318
}
319319

320+
static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
321+
{
322+
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
323+
}
324+
320325
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
321326
struct kvmppc_xive_src_block *sb,
322327
struct kvmppc_xive_irq_state *state)
@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
362367
*/
363368
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
364369
xive_native_configure_irq(hw_num,
365-
xive->vp_base + state->act_server,
370+
xive_vp(xive, state->act_server),
366371
MASKED, state->number);
367372
/* set old_p so we can track if an H_EOI was done */
368373
state->old_p = true;
@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
418423
*/
419424
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
420425
xive_native_configure_irq(hw_num,
421-
xive->vp_base + state->act_server,
426+
xive_vp(xive, state->act_server),
422427
state->act_priority, state->number);
423428
/* If an EOI is needed, do it here */
424429
if (!state->old_p)
@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
495500
kvmppc_xive_select_irq(state, &hw_num, NULL);
496501

497502
return xive_native_configure_irq(hw_num,
498-
xive->vp_base + server,
503+
xive_vp(xive, server),
499504
prio, state->number);
500505
}
501506

@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
883888
* which is fine for a never started interrupt.
884889
*/
885890
xive_native_configure_irq(hw_irq,
886-
xive->vp_base + state->act_server,
891+
xive_vp(xive, state->act_server),
887892
state->act_priority, state->number);
888893

889894
/*
@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
959964

960965
/* Reconfigure the IPI */
961966
xive_native_configure_irq(state->ipi_number,
962-
xive->vp_base + state->act_server,
967+
xive_vp(xive, state->act_server),
963968
state->act_priority, state->number);
964969

965970
/*
@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
10841089
pr_devel("Duplicate !\n");
10851090
return -EEXIST;
10861091
}
1087-
if (cpu >= KVM_MAX_VCPUS) {
1092+
if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
10881093
pr_devel("Out of bounds !\n");
10891094
return -EINVAL;
10901095
}
@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
10981103
xc->xive = xive;
10991104
xc->vcpu = vcpu;
11001105
xc->server_num = cpu;
1101-
xc->vp_id = xive->vp_base + cpu;
1106+
xc->vp_id = xive_vp(xive, cpu);
11021107
xc->mfrr = 0xff;
11031108
xc->valid = true;
11041109

0 commit comments

Comments
 (0)