Skip to content

Commit cd4a4e5

Browse files
avikivityLinus Torvalds
authored andcommitted
[PATCH] KVM: MMU: Implement simple reverse mapping
Keep in each host page frame's page->private a pointer to the shadow pte which maps it. If there are multiple shadow ptes mapping the page, set bit 0 of page->private, and use the rest as a pointer to a linked list of all such mappings. Reverse mappings are needed because we when we cache shadow page tables, we must protect the guest page tables from being modified by the guest, as that would invalidate the cached ptes. Signed-off-by: Avi Kivity <avi@qumranet.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 399badf commit cd4a4e5

File tree

4 files changed

+142
-13
lines changed

4 files changed

+142
-13
lines changed

drivers/kvm/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ struct kvm {
236236
struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
237237
int memory_config_version;
238238
int busy;
239+
unsigned long rmap_overflow;
239240
};
240241

241242
struct kvm_stat {

drivers/kvm/kvm_main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,7 @@ static int kvm_dev_ioctl_set_memory_region(struct kvm *kvm,
638638
| __GFP_ZERO);
639639
if (!new.phys_mem[i])
640640
goto out_free;
641+
new.phys_mem[i]->private = 0;
641642
}
642643
}
643644

drivers/kvm/mmu.c

Lines changed: 139 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "kvm.h"
2828

2929
#define pgprintk(x...) do { } while (0)
30+
#define rmap_printk(x...) do { } while (0)
3031

3132
#define ASSERT(x) \
3233
if (!(x)) { \
@@ -125,6 +126,13 @@
125126
#define PT_DIRECTORY_LEVEL 2
126127
#define PT_PAGE_TABLE_LEVEL 1
127128

129+
#define RMAP_EXT 4
130+
131+
struct kvm_rmap_desc {
132+
u64 *shadow_ptes[RMAP_EXT];
133+
struct kvm_rmap_desc *more;
134+
};
135+
128136
static int is_write_protection(struct kvm_vcpu *vcpu)
129137
{
130138
return vcpu->cr0 & CR0_WP_MASK;
@@ -150,6 +158,120 @@ static int is_io_pte(unsigned long pte)
150158
return pte & PT_SHADOW_IO_MARK;
151159
}
152160

161+
static int is_rmap_pte(u64 pte)
162+
{
163+
return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
164+
== (PT_WRITABLE_MASK | PT_PRESENT_MASK);
165+
}
166+
167+
/*
168+
* Reverse mapping data structures:
169+
*
170+
* If page->private bit zero is zero, then page->private points to the
171+
* shadow page table entry that points to page_address(page).
172+
*
173+
* If page->private bit zero is one, (then page->private & ~1) points
174+
* to a struct kvm_rmap_desc containing more mappings.
175+
*/
176+
static void rmap_add(struct kvm *kvm, u64 *spte)
177+
{
178+
struct page *page;
179+
struct kvm_rmap_desc *desc;
180+
int i;
181+
182+
if (!is_rmap_pte(*spte))
183+
return;
184+
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
185+
if (!page->private) {
186+
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
187+
page->private = (unsigned long)spte;
188+
} else if (!(page->private & 1)) {
189+
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
190+
desc = kzalloc(sizeof *desc, GFP_NOWAIT);
191+
if (!desc)
192+
BUG(); /* FIXME: return error */
193+
desc->shadow_ptes[0] = (u64 *)page->private;
194+
desc->shadow_ptes[1] = spte;
195+
page->private = (unsigned long)desc | 1;
196+
} else {
197+
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
198+
desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
199+
while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
200+
desc = desc->more;
201+
if (desc->shadow_ptes[RMAP_EXT-1]) {
202+
desc->more = kzalloc(sizeof *desc->more, GFP_NOWAIT);
203+
if (!desc->more)
204+
BUG(); /* FIXME: return error */
205+
desc = desc->more;
206+
}
207+
for (i = 0; desc->shadow_ptes[i]; ++i)
208+
;
209+
desc->shadow_ptes[i] = spte;
210+
}
211+
}
212+
213+
static void rmap_desc_remove_entry(struct page *page,
214+
struct kvm_rmap_desc *desc,
215+
int i,
216+
struct kvm_rmap_desc *prev_desc)
217+
{
218+
int j;
219+
220+
for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
221+
;
222+
desc->shadow_ptes[i] = desc->shadow_ptes[j];
223+
desc->shadow_ptes[j] = 0;
224+
if (j != 0)
225+
return;
226+
if (!prev_desc && !desc->more)
227+
page->private = (unsigned long)desc->shadow_ptes[0];
228+
else
229+
if (prev_desc)
230+
prev_desc->more = desc->more;
231+
else
232+
page->private = (unsigned long)desc->more | 1;
233+
kfree(desc);
234+
}
235+
236+
static void rmap_remove(struct kvm *kvm, u64 *spte)
237+
{
238+
struct page *page;
239+
struct kvm_rmap_desc *desc;
240+
struct kvm_rmap_desc *prev_desc;
241+
int i;
242+
243+
if (!is_rmap_pte(*spte))
244+
return;
245+
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
246+
if (!page->private) {
247+
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
248+
BUG();
249+
} else if (!(page->private & 1)) {
250+
rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
251+
if ((u64 *)page->private != spte) {
252+
printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
253+
spte, *spte);
254+
BUG();
255+
}
256+
page->private = 0;
257+
} else {
258+
rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
259+
desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
260+
prev_desc = NULL;
261+
while (desc) {
262+
for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
263+
if (desc->shadow_ptes[i] == spte) {
264+
rmap_desc_remove_entry(page, desc, i,
265+
prev_desc);
266+
return;
267+
}
268+
prev_desc = desc;
269+
desc = desc->more;
270+
}
271+
BUG();
272+
}
273+
}
274+
153275
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
154276
{
155277
struct kvm_mmu_page *page_head = page_header(page_hpa);
@@ -229,27 +351,27 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
229351
static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
230352
int level)
231353
{
354+
u64 *pos;
355+
u64 *end;
356+
232357
ASSERT(vcpu);
233358
ASSERT(VALID_PAGE(page_hpa));
234359
ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
235360

236-
if (level == 1)
237-
memset(__va(page_hpa), 0, PAGE_SIZE);
238-
else {
239-
u64 *pos;
240-
u64 *end;
361+
for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
362+
pos != end; pos++) {
363+
u64 current_ent = *pos;
241364

242-
for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
243-
pos != end; pos++) {
244-
u64 current_ent = *pos;
245-
246-
*pos = 0;
247-
if (is_present_pte(current_ent))
365+
if (is_present_pte(current_ent)) {
366+
if (level != 1)
248367
release_pt_page_64(vcpu,
249368
current_ent &
250369
PT64_BASE_ADDR_MASK,
251370
level - 1);
371+
else
372+
rmap_remove(vcpu->kvm, pos);
252373
}
374+
*pos = 0;
253375
}
254376
kvm_mmu_free_page(vcpu, page_hpa);
255377
}
@@ -275,6 +397,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
275397
page_header_update_slot(vcpu->kvm, table, v);
276398
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
277399
PT_USER_MASK;
400+
rmap_add(vcpu->kvm, &table[index]);
278401
return 0;
279402
}
280403

@@ -437,6 +560,7 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
437560
} else {
438561
*shadow_pte |= paddr;
439562
page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
563+
rmap_add(vcpu->kvm, shadow_pte);
440564
}
441565
}
442566

@@ -489,6 +613,7 @@ static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
489613
u64 *table = __va(page_addr);
490614

491615
if (level == PT_PAGE_TABLE_LEVEL ) {
616+
rmap_remove(vcpu->kvm, &table[index]);
492617
table[index] = 0;
493618
return;
494619
}
@@ -679,8 +804,9 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
679804
pt = __va(page->page_hpa);
680805
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
681806
/* avoid RMW */
682-
if (pt[i] & PT_WRITABLE_MASK)
807+
if (pt[i] & PT_WRITABLE_MASK) {
808+
rmap_remove(kvm, &pt[i]);
683809
pt[i] &= ~PT_WRITABLE_MASK;
684-
810+
}
685811
}
686812
}

drivers/kvm/paging_tmpl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
261261
mark_page_dirty(vcpu->kvm, gfn);
262262
*shadow_ent |= PT_WRITABLE_MASK;
263263
*guest_ent |= PT_DIRTY_MASK;
264+
rmap_add(vcpu->kvm, shadow_ent);
264265

265266
return 1;
266267
}

0 commit comments

Comments
 (0)