Skip to content

Commit 3c3736c

Browse files
Suzuki K PouloseMarc Zyngier
authored andcommitted
KVM: arm/arm64: Fix handling of stage2 huge mappings
We rely on the mmu_notifier call backs to handle the split/merge of huge pages and thus we are guaranteed that, while creating a block mapping, either the entire block is unmapped at stage2 or it is missing permission. However, we miss a case where the block mapping is split for dirty logging case and then could later be made block mapping, if we cancel the dirty logging. This not only creates inconsistent TLB entries for the pages in the the block, but also leakes the table pages for PMD level. Handle this corner case for the huge mappings at stage2 by unmapping the non-huge mapping for the block. This could potentially release the upper level table. So we need to restart the table walk once we unmap the range. Fixes : ad361f0 ("KVM: ARM: Support hugetlbfs backed huge pages") Reported-by: Zheng Xiang <zhengxiang9@huawei.com> Cc: Zheng Xiang <zhengxiang9@huawei.com> Cc: Zenghui Yu <yuzenghui@huawei.com> Cc: Christoffer Dall <christoffer.dall@arm.com> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
1 parent a80868f commit 3c3736c

File tree

2 files changed

+45
-16
lines changed

2 files changed

+45
-16
lines changed

arch/arm/include/asm/stage2_pgtable.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm)
7575

7676
#define S2_PMD_MASK PMD_MASK
7777
#define S2_PMD_SIZE PMD_SIZE
78+
#define S2_PUD_MASK PUD_MASK
79+
#define S2_PUD_SIZE PUD_SIZE
7880

7981
static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
8082
{

virt/kvm/arm/mmu.c

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,25 +1067,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
10671067
{
10681068
pmd_t *pmd, old_pmd;
10691069

1070+
retry:
10701071
pmd = stage2_get_pmd(kvm, cache, addr);
10711072
VM_BUG_ON(!pmd);
10721073

10731074
old_pmd = *pmd;
1075+
/*
1076+
* Multiple vcpus faulting on the same PMD entry, can
1077+
* lead to them sequentially updating the PMD with the
1078+
* same value. Following the break-before-make
1079+
* (pmd_clear() followed by tlb_flush()) process can
1080+
* hinder forward progress due to refaults generated
1081+
* on missing translations.
1082+
*
1083+
* Skip updating the page table if the entry is
1084+
* unchanged.
1085+
*/
1086+
if (pmd_val(old_pmd) == pmd_val(*new_pmd))
1087+
return 0;
1088+
10741089
if (pmd_present(old_pmd)) {
10751090
/*
1076-
* Multiple vcpus faulting on the same PMD entry, can
1077-
* lead to them sequentially updating the PMD with the
1078-
* same value. Following the break-before-make
1079-
* (pmd_clear() followed by tlb_flush()) process can
1080-
* hinder forward progress due to refaults generated
1081-
* on missing translations.
1091+
* If we already have PTE level mapping for this block,
1092+
* we must unmap it to avoid inconsistent TLB state and
1093+
* leaking the table page. We could end up in this situation
1094+
* if the memory slot was marked for dirty logging and was
1095+
* reverted, leaving PTE level mappings for the pages accessed
1096+
* during the period. So, unmap the PTE level mapping for this
1097+
* block and retry, as we could have released the upper level
1098+
* table in the process.
10821099
*
1083-
* Skip updating the page table if the entry is
1084-
* unchanged.
1100+
* Normal THP split/merge follows mmu_notifier callbacks and do
1101+
* get handled accordingly.
10851102
*/
1086-
if (pmd_val(old_pmd) == pmd_val(*new_pmd))
1087-
return 0;
1088-
1103+
if (!pmd_thp_or_huge(old_pmd)) {
1104+
unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
1105+
goto retry;
1106+
}
10891107
/*
10901108
* Mapping in huge pages should only happen through a
10911109
* fault. If a page is merged into a transparent huge
@@ -1097,8 +1115,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
10971115
* should become splitting first, unmapped, merged,
10981116
* and mapped back in on-demand.
10991117
*/
1100-
VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
1101-
1118+
WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
11021119
pmd_clear(pmd);
11031120
kvm_tlb_flush_vmid_ipa(kvm, addr);
11041121
} else {
@@ -1114,21 +1131,31 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
11141131
{
11151132
pud_t *pudp, old_pud;
11161133

1134+
retry:
11171135
pudp = stage2_get_pud(kvm, cache, addr);
11181136
VM_BUG_ON(!pudp);
11191137

11201138
old_pud = *pudp;
11211139

11221140
/*
11231141
* A large number of vcpus faulting on the same stage 2 entry,
1124-
* can lead to a refault due to the
1125-
* stage2_pud_clear()/tlb_flush(). Skip updating the page
1126-
* tables if there is no change.
1142+
* can lead to a refault due to the stage2_pud_clear()/tlb_flush().
1143+
* Skip updating the page tables if there is no change.
11271144
*/
11281145
if (pud_val(old_pud) == pud_val(*new_pudp))
11291146
return 0;
11301147

11311148
if (stage2_pud_present(kvm, old_pud)) {
1149+
/*
1150+
* If we already have table level mapping for this block, unmap
1151+
* the range for this block and retry.
1152+
*/
1153+
if (!stage2_pud_huge(kvm, old_pud)) {
1154+
unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
1155+
goto retry;
1156+
}
1157+
1158+
WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
11321159
stage2_pud_clear(kvm, pudp);
11331160
kvm_tlb_flush_vmid_ipa(kvm, addr);
11341161
} else {

0 commit comments

Comments
 (0)