Skip to content

Commit a27fb6d

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Paolo writes: "It's mostly small bugfixes and cleanups, mostly around x86 nested virtualization. One important change, not related to nested virtualization, is that the ability for the guest kernel to trap CPUID instructions (in Linux that's the ARCH_SET_CPUID arch_prctl) is now masked by default. This is because the feature is detected through an MSR; a very bad idea that Intel seems to like more and more. Some applications choke if the other fields of that MSR are not initialized as on real hardware, hence we have to disable the whole MSR by default, as was the case before Linux 4.12." * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (23 commits) KVM: nVMX: Fix bad cleanup on error of get/set nested state IOCTLs kvm: selftests: Add platform_info_test KVM: x86: Control guest reads of MSR_PLATFORM_INFO KVM: x86: Turbo bits in MSR_PLATFORM_INFO nVMX x86: Check VPID value on vmentry of L2 guests nVMX x86: check posted-interrupt descriptor addresss on vmentry of L2 KVM: nVMX: Wake blocked vCPU in guest-mode if pending interrupt in virtual APICv KVM: VMX: check nested state and CR4.VMXE against SMM kvm: x86: make kvm_{load|put}_guest_fpu() static x86/hyper-v: rename ipi_arg_{ex,non_ex} structures KVM: VMX: use preemption timer to force immediate VMExit KVM: VMX: modify preemption timer bit only when arming timer KVM: VMX: immediately mark preemption timer expired only for zero value KVM: SVM: Switch to bitmap_zalloc() KVM/MMU: Fix comment in walk_shadow_page_lockless_end() kvm: selftests: use -pthread instead of -lpthread KVM: x86: don't reset root in kvm_mmu_setup() kvm: mmu: Don't read PDPTEs when paging is not enabled x86/kvm/lapic: always disable MMIO interface in x2APIC mode KVM: s390: Make huge pages unavailable in ucontrol VMs ...
2 parents 0eba869 + 26b471c commit a27fb6d

File tree

27 files changed

+537
-244
lines changed

27 files changed

+537
-244
lines changed

Documentation/virtual/kvm/api.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4510,7 +4510,8 @@ Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
45104510
Architectures: s390
45114511
Parameters: none
45124512
Returns: 0 on success, -EINVAL if hpage module parameter was not set
4513-
or cmma is enabled
4513+
or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
4514+
flag set
45144515

45154516
With this capability the KVM support for memory backing with 1m pages
45164517
through hugetlbfs can be enabled for a VM. After the capability is
@@ -4521,6 +4522,15 @@ hpage module parameter is not set to 1, -EINVAL is returned.
45214522
While it is generally possible to create a huge page backed VM without
45224523
this capability, the VM will not be able to run.
45234524

4525+
7.14 KVM_CAP_MSR_PLATFORM_INFO
4526+
4527+
Architectures: x86
4528+
Parameters: args[0] whether feature should be enabled or not
4529+
4530+
With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
4531+
a #GP would be raised when the guest tries to access. Currently, this
4532+
capability does not enable write permissions of this MSR for the guest.
4533+
45244534
8. Other capabilities.
45254535
----------------------
45264536

arch/powerpc/include/asm/book3s/64/pgtable.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
10511051
return hash__vmemmap_remove_mapping(start, page_size);
10521052
}
10531053
#endif
1054-
struct page *realmode_pfn_to_page(unsigned long pfn);
10551054

10561055
static inline pte_t pmd_pte(pmd_t pmd)
10571056
{

arch/powerpc/include/asm/iommu.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
220220
extern int __init tce_iommu_bus_notifier_init(void);
221221
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
222222
unsigned long *hpa, enum dma_data_direction *direction);
223-
extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
224-
unsigned long *hpa, enum dma_data_direction *direction);
225223
#else
226224
static inline void iommu_register_group(struct iommu_table_group *table_group,
227225
int pci_domain_number,

arch/powerpc/include/asm/mmu_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
3838
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
3939
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
4040
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
41+
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
4142
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
4243
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
4344
#endif

arch/powerpc/kernel/iommu.c

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
10131013
}
10141014
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
10151015

1016-
#ifdef CONFIG_PPC_BOOK3S_64
1017-
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
1018-
unsigned long *hpa, enum dma_data_direction *direction)
1019-
{
1020-
long ret;
1021-
1022-
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
1023-
1024-
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
1025-
(*direction == DMA_BIDIRECTIONAL))) {
1026-
struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
1027-
1028-
if (likely(pg)) {
1029-
SetPageDirty(pg);
1030-
} else {
1031-
tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
1032-
ret = -EFAULT;
1033-
}
1034-
}
1035-
1036-
return ret;
1037-
}
1038-
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
1039-
#endif
1040-
10411016
int iommu_take_ownership(struct iommu_table *tbl)
10421017
{
10431018
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;

arch/powerpc/kvm/book3s_64_mmu_radix.c

Lines changed: 37 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
525525
unsigned long ea, unsigned long dsisr)
526526
{
527527
struct kvm *kvm = vcpu->kvm;
528-
unsigned long mmu_seq, pte_size;
529-
unsigned long gpa, gfn, hva, pfn;
528+
unsigned long mmu_seq;
529+
unsigned long gpa, gfn, hva;
530530
struct kvm_memory_slot *memslot;
531531
struct page *page = NULL;
532532
long ret;
@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
623623
*/
624624
hva = gfn_to_hva_memslot(memslot, gfn);
625625
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
626-
pfn = page_to_pfn(page);
627626
upgrade_write = true;
628627
} else {
628+
unsigned long pfn;
629+
629630
/* Call KVM generic code to do the slow-path check */
630631
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
631632
writing, upgrade_p);
@@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
639640
}
640641
}
641642

642-
/* See if we can insert a 1GB or 2MB large PTE here */
643-
level = 0;
644-
if (page && PageCompound(page)) {
645-
pte_size = PAGE_SIZE << compound_order(compound_head(page));
646-
if (pte_size >= PUD_SIZE &&
647-
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
648-
(hva & (PUD_SIZE - PAGE_SIZE))) {
649-
level = 2;
650-
pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
651-
} else if (pte_size >= PMD_SIZE &&
652-
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
653-
(hva & (PMD_SIZE - PAGE_SIZE))) {
654-
level = 1;
655-
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
656-
}
657-
}
658-
659643
/*
660-
* Compute the PTE value that we need to insert.
644+
* Read the PTE from the process' radix tree and use that
645+
* so we get the shift and attribute bits.
661646
*/
662-
if (page) {
663-
pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
664-
_PAGE_ACCESSED;
665-
if (writing || upgrade_write)
666-
pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
667-
pte = pfn_pte(pfn, __pgprot(pgflags));
647+
local_irq_disable();
648+
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
649+
pte = *ptep;
650+
local_irq_enable();
651+
652+
/* Get pte level from shift/size */
653+
if (shift == PUD_SHIFT &&
654+
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
655+
(hva & (PUD_SIZE - PAGE_SIZE))) {
656+
level = 2;
657+
} else if (shift == PMD_SHIFT &&
658+
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
659+
(hva & (PMD_SIZE - PAGE_SIZE))) {
660+
level = 1;
668661
} else {
669-
/*
670-
* Read the PTE from the process' radix tree and use that
671-
* so we get the attribute bits.
672-
*/
673-
local_irq_disable();
674-
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
675-
pte = *ptep;
676-
local_irq_enable();
677-
if (shift == PUD_SHIFT &&
678-
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
679-
(hva & (PUD_SIZE - PAGE_SIZE))) {
680-
level = 2;
681-
} else if (shift == PMD_SHIFT &&
682-
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
683-
(hva & (PMD_SIZE - PAGE_SIZE))) {
684-
level = 1;
685-
} else if (shift && shift != PAGE_SHIFT) {
686-
/* Adjust PFN */
687-
unsigned long mask = (1ul << shift) - PAGE_SIZE;
688-
pte = __pte(pte_val(pte) | (hva & mask));
689-
}
690-
pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
691-
if (writing || upgrade_write) {
692-
if (pte_val(pte) & _PAGE_WRITE)
693-
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
694-
} else {
695-
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
662+
level = 0;
663+
if (shift > PAGE_SHIFT) {
664+
/*
665+
* If the pte maps more than one page, bring over
666+
* bits from the virtual address to get the real
667+
* address of the specific single page we want.
668+
*/
669+
unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
670+
pte = __pte(pte_val(pte) | (hva & rpnmask));
696671
}
697672
}
698673

674+
pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
675+
if (writing || upgrade_write) {
676+
if (pte_val(pte) & _PAGE_WRITE)
677+
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
678+
} else {
679+
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
680+
}
681+
699682
/* Allocate space in the tree and write the PTE */
700683
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
701684

arch/powerpc/kvm/book3s_64_vio_hv.c

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
187187
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
188188

189189
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
190-
static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
190+
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
191+
unsigned long entry, unsigned long *hpa,
192+
enum dma_data_direction *direction)
193+
{
194+
long ret;
195+
196+
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
197+
198+
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
199+
(*direction == DMA_BIDIRECTIONAL))) {
200+
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
201+
/*
202+
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
203+
* calling this so we still get here a valid UA.
204+
*/
205+
if (pua && *pua)
206+
mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
207+
}
208+
209+
return ret;
210+
}
211+
212+
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
213+
unsigned long entry)
191214
{
192215
unsigned long hpa = 0;
193216
enum dma_data_direction dir = DMA_NONE;
194217

195-
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
218+
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
196219
}
197220

198221
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
224247
unsigned long hpa = 0;
225248
long ret;
226249

227-
if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
250+
if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
228251
/*
229252
* real mode xchg can fail if struct page crosses
230253
* a page boundary
@@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
236259

237260
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
238261
if (ret)
239-
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
262+
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
240263

241264
return ret;
242265
}
@@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
282305
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
283306
return H_CLOSED;
284307

285-
ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
308+
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
286309
if (ret) {
287310
mm_iommu_mapped_dec(mem);
288311
/*
@@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
371394
return ret;
372395

373396
WARN_ON_ONCE_RM(1);
374-
kvmppc_rm_clear_tce(stit->tbl, entry);
397+
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
375398
}
376399

377400
kvmppc_tce_put(stt, entry, tce);
@@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
520543
goto unlock_exit;
521544

522545
WARN_ON_ONCE_RM(1);
523-
kvmppc_rm_clear_tce(stit->tbl, entry);
546+
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
524547
}
525548

526549
kvmppc_tce_put(stt, entry + i, tce);
@@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
571594
return ret;
572595

573596
WARN_ON_ONCE_RM(1);
574-
kvmppc_rm_clear_tce(stit->tbl, entry);
597+
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
575598
}
576599
}
577600

arch/powerpc/mm/init_64.c

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr,
308308
{
309309
}
310310

311-
/*
312-
* We do not have access to the sparsemem vmemmap, so we fallback to
313-
* walking the list of sparsemem blocks which we already maintain for
314-
* the sake of crashdump. In the long run, we might want to maintain
315-
* a tree if performance of that linear walk becomes a problem.
316-
*
317-
* realmode_pfn_to_page functions can fail due to:
318-
* 1) As real sparsemem blocks do not lay in RAM continously (they
319-
* are in virtual address space which is not available in the real mode),
320-
* the requested page struct can be split between blocks so get_page/put_page
321-
* may fail.
322-
* 2) When huge pages are used, the get_page/put_page API will fail
323-
* in real mode as the linked addresses in the page struct are virtual
324-
* too.
325-
*/
326-
struct page *realmode_pfn_to_page(unsigned long pfn)
327-
{
328-
struct vmemmap_backing *vmem_back;
329-
struct page *page;
330-
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
331-
unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
332-
333-
for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
334-
if (pg_va < vmem_back->virt_addr)
335-
continue;
336-
337-
/* After vmemmap_list entry free is possible, need check all */
338-
if ((pg_va + sizeof(struct page)) <=
339-
(vmem_back->virt_addr + page_size)) {
340-
page = (struct page *) (vmem_back->phys + pg_va -
341-
vmem_back->virt_addr);
342-
return page;
343-
}
344-
}
345-
346-
/* Probably that page struct is split between real pages */
347-
return NULL;
348-
}
349-
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
350-
351-
#else
352-
353-
struct page *realmode_pfn_to_page(unsigned long pfn)
354-
{
355-
struct page *page = pfn_to_page(pfn);
356-
return page;
357-
}
358-
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
359-
360311
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
361312

362313
#ifdef CONFIG_PPC_BOOK3S_64

0 commit comments

Comments
 (0)