Skip to content

Commit 290fc38

Browse files
Izik Eidusavikivity
authored andcommitted
KVM: Remove the usage of page->private field by rmap
When kvm uses user-allocated pages in the future for the guest, we won't be able to use page->private for rmap, since page->rmap is reserved for the filesystem. So we move the rmap base pointers to the memory slot. A side effect of this is that we need to store the gfn of each gpte in the shadow pages, since the memory slot is addressed by gfn, instead of hfn like struct page. Signed-off-by: Izik Eidus <izik@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
1 parent f566e09 commit 290fc38

File tree

4 files changed

+86
-56
lines changed

4 files changed

+86
-56
lines changed

drivers/kvm/kvm.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ struct kvm_mmu_page {
126126
union kvm_mmu_page_role role;
127127

128128
u64 *spt;
129+
/* hold the gfn of each spte inside spt */
130+
gfn_t *gfns;
129131
unsigned long slot_bitmap; /* One bit set per slot which has memory
130132
* in this shadow page.
131133
*/
@@ -159,7 +161,7 @@ struct kvm_mmu {
159161
u64 *pae_root;
160162
};
161163

162-
#define KVM_NR_MEM_OBJS 20
164+
#define KVM_NR_MEM_OBJS 40
163165

164166
struct kvm_mmu_memory_cache {
165167
int nobjs;
@@ -402,6 +404,7 @@ struct kvm_memory_slot {
402404
unsigned long npages;
403405
unsigned long flags;
404406
struct page **phys_mem;
407+
unsigned long *rmap;
405408
unsigned long *dirty_bitmap;
406409
};
407410

@@ -554,6 +557,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
554557

555558
extern hpa_t bad_page_address;
556559

560+
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
557561
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
558562
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
559563
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

drivers/kvm/kvm_main.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
309309
__free_page(free->phys_mem[i]);
310310
vfree(free->phys_mem);
311311
}
312+
if (!dont || free->rmap != dont->rmap)
313+
vfree(free->rmap);
312314

313315
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
314316
vfree(free->dirty_bitmap);
@@ -719,13 +721,18 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
719721
if (!new.phys_mem)
720722
goto out_unlock;
721723

724+
new.rmap = vmalloc(npages * sizeof(struct page*));
725+
726+
if (!new.rmap)
727+
goto out_unlock;
728+
722729
memset(new.phys_mem, 0, npages * sizeof(struct page *));
730+
memset(new.rmap, 0, npages * sizeof(*new.rmap));
723731
for (i = 0; i < npages; ++i) {
724732
new.phys_mem[i] = alloc_page(GFP_HIGHUSER
725733
| __GFP_ZERO);
726734
if (!new.phys_mem[i])
727735
goto out_unlock;
728-
set_page_private(new.phys_mem[i],0);
729736
}
730737
}
731738

@@ -909,7 +916,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
909916
return r;
910917
}
911918

912-
static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
919+
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
913920
{
914921
int i;
915922
struct kvm_mem_alias *alias;

drivers/kvm/mmu.c

Lines changed: 70 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
276276
rmap_desc_cache, 1);
277277
if (r)
278278
goto out;
279-
r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
279+
r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 8);
280280
if (r)
281281
goto out;
282282
r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
@@ -326,36 +326,53 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
326326
kfree(rd);
327327
}
328328

329+
/*
330+
* Take gfn and return the reverse mapping to it.
331+
* Note: gfn must be unaliased before this function get called
332+
*/
333+
334+
static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
335+
{
336+
struct kvm_memory_slot *slot;
337+
338+
slot = gfn_to_memslot(kvm, gfn);
339+
return &slot->rmap[gfn - slot->base_gfn];
340+
}
341+
329342
/*
330343
* Reverse mapping data structures:
331344
*
332-
* If page->private bit zero is zero, then page->private points to the
333-
* shadow page table entry that points to page_address(page).
345+
* If rmapp bit zero is zero, then rmapp point to the shadw page table entry
346+
* that points to page_address(page).
334347
*
335-
* If page->private bit zero is one, (then page->private & ~1) points
336-
* to a struct kvm_rmap_desc containing more mappings.
348+
* If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
349+
* containing more mappings.
337350
*/
338-
static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
351+
static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
339352
{
340-
struct page *page;
353+
struct kvm_mmu_page *page;
341354
struct kvm_rmap_desc *desc;
355+
unsigned long *rmapp;
342356
int i;
343357

344358
if (!is_rmap_pte(*spte))
345359
return;
346-
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
347-
if (!page_private(page)) {
360+
gfn = unalias_gfn(vcpu->kvm, gfn);
361+
page = page_header(__pa(spte));
362+
page->gfns[spte - page->spt] = gfn;
363+
rmapp = gfn_to_rmap(vcpu->kvm, gfn);
364+
if (!*rmapp) {
348365
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
349-
set_page_private(page,(unsigned long)spte);
350-
} else if (!(page_private(page) & 1)) {
366+
*rmapp = (unsigned long)spte;
367+
} else if (!(*rmapp & 1)) {
351368
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
352369
desc = mmu_alloc_rmap_desc(vcpu);
353-
desc->shadow_ptes[0] = (u64 *)page_private(page);
370+
desc->shadow_ptes[0] = (u64 *)*rmapp;
354371
desc->shadow_ptes[1] = spte;
355-
set_page_private(page,(unsigned long)desc | 1);
372+
*rmapp = (unsigned long)desc | 1;
356373
} else {
357374
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
358-
desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
375+
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
359376
while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
360377
desc = desc->more;
361378
if (desc->shadow_ptes[RMAP_EXT-1]) {
@@ -368,7 +385,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
368385
}
369386
}
370387

371-
static void rmap_desc_remove_entry(struct page *page,
388+
static void rmap_desc_remove_entry(unsigned long *rmapp,
372389
struct kvm_rmap_desc *desc,
373390
int i,
374391
struct kvm_rmap_desc *prev_desc)
@@ -382,44 +399,46 @@ static void rmap_desc_remove_entry(struct page *page,
382399
if (j != 0)
383400
return;
384401
if (!prev_desc && !desc->more)
385-
set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
402+
*rmapp = (unsigned long)desc->shadow_ptes[0];
386403
else
387404
if (prev_desc)
388405
prev_desc->more = desc->more;
389406
else
390-
set_page_private(page,(unsigned long)desc->more | 1);
407+
*rmapp = (unsigned long)desc->more | 1;
391408
mmu_free_rmap_desc(desc);
392409
}
393410

394-
static void rmap_remove(u64 *spte)
411+
static void rmap_remove(struct kvm *kvm, u64 *spte)
395412
{
396-
struct page *page;
397413
struct kvm_rmap_desc *desc;
398414
struct kvm_rmap_desc *prev_desc;
415+
struct kvm_mmu_page *page;
416+
unsigned long *rmapp;
399417
int i;
400418

401419
if (!is_rmap_pte(*spte))
402420
return;
403-
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
404-
if (!page_private(page)) {
421+
page = page_header(__pa(spte));
422+
rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
423+
if (!*rmapp) {
405424
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
406425
BUG();
407-
} else if (!(page_private(page) & 1)) {
426+
} else if (!(*rmapp & 1)) {
408427
rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
409-
if ((u64 *)page_private(page) != spte) {
428+
if ((u64 *)*rmapp != spte) {
410429
printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
411430
spte, *spte);
412431
BUG();
413432
}
414-
set_page_private(page,0);
433+
*rmapp = 0;
415434
} else {
416435
rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
417-
desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
436+
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
418437
prev_desc = NULL;
419438
while (desc) {
420439
for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
421440
if (desc->shadow_ptes[i] == spte) {
422-
rmap_desc_remove_entry(page,
441+
rmap_desc_remove_entry(rmapp,
423442
desc, i,
424443
prev_desc);
425444
return;
@@ -433,28 +452,25 @@ static void rmap_remove(u64 *spte)
433452

434453
static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
435454
{
436-
struct kvm *kvm = vcpu->kvm;
437-
struct page *page;
438455
struct kvm_rmap_desc *desc;
456+
unsigned long *rmapp;
439457
u64 *spte;
440458

441-
page = gfn_to_page(kvm, gfn);
442-
BUG_ON(!page);
459+
gfn = unalias_gfn(vcpu->kvm, gfn);
460+
rmapp = gfn_to_rmap(vcpu->kvm, gfn);
443461

444-
while (page_private(page)) {
445-
if (!(page_private(page) & 1))
446-
spte = (u64 *)page_private(page);
462+
while (*rmapp) {
463+
if (!(*rmapp & 1))
464+
spte = (u64 *)*rmapp;
447465
else {
448-
desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
466+
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
449467
spte = desc->shadow_ptes[0];
450468
}
451469
BUG_ON(!spte);
452-
BUG_ON((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT
453-
!= page_to_pfn(page));
454470
BUG_ON(!(*spte & PT_PRESENT_MASK));
455471
BUG_ON(!(*spte & PT_WRITABLE_MASK));
456472
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
457-
rmap_remove(spte);
473+
rmap_remove(vcpu->kvm, spte);
458474
set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
459475
kvm_flush_remote_tlbs(vcpu->kvm);
460476
}
@@ -482,6 +498,7 @@ static void kvm_mmu_free_page(struct kvm *kvm,
482498
ASSERT(is_empty_shadow_page(page_head->spt));
483499
list_del(&page_head->link);
484500
__free_page(virt_to_page(page_head->spt));
501+
__free_page(virt_to_page(page_head->gfns));
485502
kfree(page_head);
486503
++kvm->n_free_mmu_pages;
487504
}
@@ -502,6 +519,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
502519
page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
503520
sizeof *page);
504521
page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
522+
page->gfns = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
505523
set_page_private(virt_to_page(page->spt), (unsigned long)page);
506524
list_add(&page->link, &vcpu->kvm->active_mmu_pages);
507525
ASSERT(is_empty_shadow_page(page->spt));
@@ -667,7 +685,7 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
667685
if (page->role.level == PT_PAGE_TABLE_LEVEL) {
668686
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
669687
if (is_shadow_present_pte(pt[i]))
670-
rmap_remove(&pt[i]);
688+
rmap_remove(kvm, &pt[i]);
671689
pt[i] = shadow_trap_nonpresent_pte;
672690
}
673691
kvm_flush_remote_tlbs(kvm);
@@ -832,7 +850,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
832850
page_header_update_slot(vcpu->kvm, table, v);
833851
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
834852
PT_USER_MASK;
835-
rmap_add(vcpu, &table[index]);
853+
rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
836854
return 0;
837855
}
838856

@@ -1123,7 +1141,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
11231141
pte = *spte;
11241142
if (is_shadow_present_pte(pte)) {
11251143
if (page->role.level == PT_PAGE_TABLE_LEVEL)
1126-
rmap_remove(spte);
1144+
rmap_remove(vcpu->kvm, spte);
11271145
else {
11281146
child = page_header(pte & PT64_BASE_ADDR_MASK);
11291147
mmu_page_remove_parent_pte(child, spte);
@@ -1340,7 +1358,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
13401358
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
13411359
/* avoid RMW */
13421360
if (pt[i] & PT_WRITABLE_MASK) {
1343-
rmap_remove(&pt[i]);
1361+
rmap_remove(kvm, &pt[i]);
13441362
pt[i] &= ~PT_WRITABLE_MASK;
13451363
}
13461364
}
@@ -1470,15 +1488,15 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
14701488
struct kvm_rmap_desc *d;
14711489

14721490
for (j = 0; j < m->npages; ++j) {
1473-
struct page *page = m->phys_mem[j];
1491+
unsigned long *rmapp = &m->rmap[j];
14741492

1475-
if (!page->private)
1493+
if (!*rmapp)
14761494
continue;
1477-
if (!(page->private & 1)) {
1495+
if (!(*rmapp & 1)) {
14781496
++nmaps;
14791497
continue;
14801498
}
1481-
d = (struct kvm_rmap_desc *)(page->private & ~1ul);
1499+
d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
14821500
while (d) {
14831501
for (k = 0; k < RMAP_EXT; ++k)
14841502
if (d->shadow_ptes[k])
@@ -1530,18 +1548,18 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
15301548
static void audit_write_protection(struct kvm_vcpu *vcpu)
15311549
{
15321550
struct kvm_mmu_page *page;
1551+
struct kvm_memory_slot *slot;
1552+
unsigned long *rmapp;
1553+
gfn_t gfn;
15331554

15341555
list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1535-
hfn_t hfn;
1536-
struct page *pg;
1537-
15381556
if (page->role.metaphysical)
15391557
continue;
15401558

1541-
hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
1542-
>> PAGE_SHIFT;
1543-
pg = pfn_to_page(hfn);
1544-
if (pg->private)
1559+
slot = gfn_to_memslot(vcpu->kvm, page->gfn);
1560+
gfn = unalias_gfn(vcpu->kvm, page->gfn);
1561+
rmapp = &slot->rmap[gfn - slot->base_gfn];
1562+
if (*rmapp)
15451563
printk(KERN_ERR "%s: (%s) shadow page has writable"
15461564
" mappings: gfn %lx role %x\n",
15471565
__FUNCTION__, audit_msg, page->gfn,

drivers/kvm/paging_tmpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
295295
set_shadow_pte(shadow_pte, spte);
296296
page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
297297
if (!was_rmapped)
298-
rmap_add(vcpu, shadow_pte);
298+
rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
299+
>> PAGE_SHIFT);
299300
if (!ptwrite || !*ptwrite)
300301
vcpu->last_pte_updated = shadow_pte;
301302
}

0 commit comments

Comments
 (0)