Skip to content

Commit f710a12

Browse files
Marcelo Tosattimatosatti
authored andcommitted
Merge tag 'kvm-arm-fixes-4.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
Fixes for KVM/ARM for 4.0-rc5. Fixes page refcounting issues in our Stage-2 page table management code, fixes a missing unlock in a gicv3 error path, and fixes a race that can cause lost interrupts if signals are pending just prior to entering the guest.
2 parents 670125b + ae70593 commit f710a12

File tree

8 files changed

+105
-75
lines changed

8 files changed

+105
-75
lines changed

arch/arm/include/asm/kvm_mmu.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
149149
(__boundary - 1 < (end) - 1)? __boundary: (end); \
150150
})
151151

152+
#define kvm_pgd_index(addr) pgd_index(addr)
153+
152154
static inline bool kvm_page_empty(void *ptr)
153155
{
154156
struct page *ptr_page = virt_to_page(ptr);
155157
return page_count(ptr_page) == 1;
156158
}
157159

158-
159160
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
160161
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
161162
#define kvm_pud_table_empty(kvm, pudp) (0)
162163

163164
#define KVM_PREALLOC_LEVEL 0
164165

165-
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
166+
static inline void *kvm_get_hwpgd(struct kvm *kvm)
166167
{
167-
return 0;
168+
return kvm->arch.pgd;
168169
}
169170

170-
static inline void kvm_free_hwpgd(struct kvm *kvm) { }
171-
172-
static inline void *kvm_get_hwpgd(struct kvm *kvm)
171+
static inline unsigned int kvm_get_hwpgd_size(void)
173172
{
174-
return kvm->arch.pgd;
173+
return PTRS_PER_S2_PGD * sizeof(pgd_t);
175174
}
176175

177176
struct kvm;

arch/arm/kvm/mmu.c

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
290290
phys_addr_t addr = start, end = start + size;
291291
phys_addr_t next;
292292

293-
pgd = pgdp + pgd_index(addr);
293+
pgd = pgdp + kvm_pgd_index(addr);
294294
do {
295295
next = kvm_pgd_addr_end(addr, end);
296296
if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
355355
phys_addr_t next;
356356
pgd_t *pgd;
357357

358-
pgd = kvm->arch.pgd + pgd_index(addr);
358+
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
359359
do {
360360
next = kvm_pgd_addr_end(addr, end);
361361
stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
632632
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
633633
}
634634

635+
/* Free the HW pgd, one page at a time */
636+
static void kvm_free_hwpgd(void *hwpgd)
637+
{
638+
free_pages_exact(hwpgd, kvm_get_hwpgd_size());
639+
}
640+
641+
/* Allocate the HW PGD, making sure that each page gets its own refcount */
642+
static void *kvm_alloc_hwpgd(void)
643+
{
644+
unsigned int size = kvm_get_hwpgd_size();
645+
646+
return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
647+
}
648+
635649
/**
636650
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
637651
* @kvm: The KVM struct pointer for the VM.
@@ -645,46 +659,64 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
645659
*/
646660
int kvm_alloc_stage2_pgd(struct kvm *kvm)
647661
{
648-
int ret;
649662
pgd_t *pgd;
663+
void *hwpgd;
650664

651665
if (kvm->arch.pgd != NULL) {
652666
kvm_err("kvm_arch already initialized?\n");
653667
return -EINVAL;
654668
}
655669

670+
hwpgd = kvm_alloc_hwpgd();
671+
if (!hwpgd)
672+
return -ENOMEM;
673+
674+
/* When the kernel uses more levels of page tables than the
675+
* guest, we allocate a fake PGD and pre-populate it to point
676+
* to the next-level page table, which will be the real
677+
* initial page table pointed to by the VTTBR.
678+
*
679+
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for
680+
* the PMD and the kernel will use folded pud.
681+
* When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
682+
* pages.
683+
*/
656684
if (KVM_PREALLOC_LEVEL > 0) {
685+
int i;
686+
657687
/*
658688
* Allocate fake pgd for the page table manipulation macros to
659689
* work. This is not used by the hardware and we have no
660690
* alignment requirement for this allocation.
661691
*/
662692
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
663693
GFP_KERNEL | __GFP_ZERO);
694+
695+
if (!pgd) {
696+
kvm_free_hwpgd(hwpgd);
697+
return -ENOMEM;
698+
}
699+
700+
/* Plug the HW PGD into the fake one. */
701+
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
702+
if (KVM_PREALLOC_LEVEL == 1)
703+
pgd_populate(NULL, pgd + i,
704+
(pud_t *)hwpgd + i * PTRS_PER_PUD);
705+
else if (KVM_PREALLOC_LEVEL == 2)
706+
pud_populate(NULL, pud_offset(pgd, 0) + i,
707+
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
708+
}
664709
} else {
665710
/*
666711
* Allocate actual first-level Stage-2 page table used by the
667712
* hardware for Stage-2 page table walks.
668713
*/
669-
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
714+
pgd = (pgd_t *)hwpgd;
670715
}
671716

672-
if (!pgd)
673-
return -ENOMEM;
674-
675-
ret = kvm_prealloc_hwpgd(kvm, pgd);
676-
if (ret)
677-
goto out_err;
678-
679717
kvm_clean_pgd(pgd);
680718
kvm->arch.pgd = pgd;
681719
return 0;
682-
out_err:
683-
if (KVM_PREALLOC_LEVEL > 0)
684-
kfree(pgd);
685-
else
686-
free_pages((unsigned long)pgd, S2_PGD_ORDER);
687-
return ret;
688720
}
689721

690722
/**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
785817
return;
786818

787819
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
788-
kvm_free_hwpgd(kvm);
820+
kvm_free_hwpgd(kvm_get_hwpgd(kvm));
789821
if (KVM_PREALLOC_LEVEL > 0)
790822
kfree(kvm->arch.pgd);
791-
else
792-
free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
823+
793824
kvm->arch.pgd = NULL;
794825
}
795826

@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
799830
pgd_t *pgd;
800831
pud_t *pud;
801832

802-
pgd = kvm->arch.pgd + pgd_index(addr);
833+
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
803834
if (WARN_ON(pgd_none(*pgd))) {
804835
if (!cache)
805836
return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
10891120
pgd_t *pgd;
10901121
phys_addr_t next;
10911122

1092-
pgd = kvm->arch.pgd + pgd_index(addr);
1123+
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
10931124
do {
10941125
/*
10951126
* Release kvm_mmu_lock periodically if the memory region is

arch/arm64/include/asm/kvm_arm.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@
129129
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
130130
* not known to exist and will break with this configuration.
131131
*
132+
* VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
133+
* (see hyp-init.S).
134+
*
132135
* Note that when using 4K pages, we concatenate two first level page tables
133136
* together.
134137
*
@@ -138,7 +141,6 @@
138141
#ifdef CONFIG_ARM64_64K_PAGES
139142
/*
140143
* Stage2 translation configuration:
141-
* 40bits output (PS = 2)
142144
* 40bits input (T0SZ = 24)
143145
* 64kB pages (TG0 = 1)
144146
* 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
150152
#else
151153
/*
152154
* Stage2 translation configuration:
153-
* 40bits output (PS = 2)
154155
* 40bits input (T0SZ = 24)
155156
* 4kB pages (TG0 = 0)
156157
* 3 level page tables (SL = 1)

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 6 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
158158
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
159159
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
160160

161+
#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
162+
161163
/*
162164
* If we are concatenating first level stage-2 page tables, we would have less
163165
* than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
171173
#define KVM_PREALLOC_LEVEL (0)
172174
#endif
173175

174-
/**
175-
* kvm_prealloc_hwpgd - allocate inital table for VTTBR
176-
* @kvm: The KVM struct pointer for the VM.
177-
* @pgd: The kernel pseudo pgd
178-
*
179-
* When the kernel uses more levels of page tables than the guest, we allocate
180-
* a fake PGD and pre-populate it to point to the next-level page table, which
181-
* will be the real initial page table pointed to by the VTTBR.
182-
*
183-
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
184-
* the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
185-
* allocate 2 consecutive PUD pages.
186-
*/
187-
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
188-
{
189-
unsigned int i;
190-
unsigned long hwpgd;
191-
192-
if (KVM_PREALLOC_LEVEL == 0)
193-
return 0;
194-
195-
hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
196-
if (!hwpgd)
197-
return -ENOMEM;
198-
199-
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
200-
if (KVM_PREALLOC_LEVEL == 1)
201-
pgd_populate(NULL, pgd + i,
202-
(pud_t *)hwpgd + i * PTRS_PER_PUD);
203-
else if (KVM_PREALLOC_LEVEL == 2)
204-
pud_populate(NULL, pud_offset(pgd, 0) + i,
205-
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
206-
}
207-
208-
return 0;
209-
}
210-
211176
static inline void *kvm_get_hwpgd(struct kvm *kvm)
212177
{
213178
pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
224189
return pmd_offset(pud, 0);
225190
}
226191

227-
static inline void kvm_free_hwpgd(struct kvm *kvm)
192+
static inline unsigned int kvm_get_hwpgd_size(void)
228193
{
229-
if (KVM_PREALLOC_LEVEL > 0) {
230-
unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
231-
free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
232-
}
194+
if (KVM_PREALLOC_LEVEL > 0)
195+
return PTRS_PER_S2_PGD * PAGE_SIZE;
196+
return PTRS_PER_S2_PGD * sizeof(pgd_t);
233197
}
234198

235199
static inline bool kvm_page_empty(void *ptr)

include/kvm/arm_vgic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ struct vgic_ops {
114114
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
115115
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
116116
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
117+
void (*clear_eisr)(struct kvm_vcpu *vcpu);
117118
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
118119
void (*enable_underflow)(struct kvm_vcpu *vcpu);
119120
void (*disable_underflow)(struct kvm_vcpu *vcpu);

virt/kvm/arm/vgic-v2.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
7272
{
7373
if (!(lr_desc.state & LR_STATE_MASK))
7474
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
75+
else
76+
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
7577
}
7678

7779
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
8486
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
8587
}
8688

89+
static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
90+
{
91+
vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
92+
}
93+
8794
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
8895
{
8996
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
148155
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
149156
.get_elrsr = vgic_v2_get_elrsr,
150157
.get_eisr = vgic_v2_get_eisr,
158+
.clear_eisr = vgic_v2_clear_eisr,
151159
.get_interrupt_status = vgic_v2_get_interrupt_status,
152160
.enable_underflow = vgic_v2_enable_underflow,
153161
.disable_underflow = vgic_v2_disable_underflow,

virt/kvm/arm/vgic-v3.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
104104
{
105105
if (!(lr_desc.state & LR_STATE_MASK))
106106
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
107+
else
108+
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
107109
}
108110

109111
static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
116118
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
117119
}
118120

121+
static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
122+
{
123+
vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
124+
}
125+
119126
static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
120127
{
121128
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
192199
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
193200
.get_elrsr = vgic_v3_get_elrsr,
194201
.get_eisr = vgic_v3_get_eisr,
202+
.clear_eisr = vgic_v3_clear_eisr,
195203
.get_interrupt_status = vgic_v3_get_interrupt_status,
196204
.enable_underflow = vgic_v3_enable_underflow,
197205
.disable_underflow = vgic_v3_disable_underflow,

0 commit comments

Comments
 (0)