Skip to content

Commit 77da938

Browse files
author
Matthew Wilcox
committed
mm: Convert collapse_shmem to XArray
I found another victim of the radix tree being hard to use. Because there was no call to radix_tree_preload(), khugepaged was allocating radix_tree_nodes using GFP_ATOMIC. I also converted a local_irq_save()/restore() pair to disable()/enable(). Signed-off-by: Matthew Wilcox <willy@infradead.org>
1 parent aa5dc07 commit 77da938

File tree

1 file changed

+66
-93
lines changed

1 file changed

+66
-93
lines changed

mm/khugepaged.c

Lines changed: 66 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,30 +1288,29 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
12881288
*
12891289
* Basic scheme is simple, details are more complex:
12901290
* - allocate and freeze a new huge page;
1291-
* - scan over radix tree replacing old pages the new one
1291+
* - scan page cache replacing old pages with the new one
12921292
* + swap in pages if necessary;
12931293
* + fill in gaps;
1294-
* + keep old pages around in case if rollback is required;
1295-
* - if replacing succeed:
1294+
* + keep old pages around in case rollback is required;
1295+
* - if replacing succeeds:
12961296
* + copy data over;
12971297
* + free old pages;
12981298
* + unfreeze huge page;
12991299
* - if replacing failed;
13001300
* + put all pages back and unfreeze them;
1301-
* + restore gaps in the radix-tree;
1301+
* + restore gaps in the page cache;
13021302
* + free huge page;
13031303
*/
13041304
static void collapse_shmem(struct mm_struct *mm,
13051305
struct address_space *mapping, pgoff_t start,
13061306
struct page **hpage, int node)
13071307
{
13081308
gfp_t gfp;
1309-
struct page *page, *new_page, *tmp;
1309+
struct page *new_page;
13101310
struct mem_cgroup *memcg;
13111311
pgoff_t index, end = start + HPAGE_PMD_NR;
13121312
LIST_HEAD(pagelist);
1313-
struct radix_tree_iter iter;
1314-
void **slot;
1313+
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
13151314
int nr_none = 0, result = SCAN_SUCCEED;
13161315

13171316
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -1336,48 +1335,49 @@ static void collapse_shmem(struct mm_struct *mm,
13361335
__SetPageLocked(new_page);
13371336
BUG_ON(!page_ref_freeze(new_page, 1));
13381337

1339-
13401338
/*
1341-
* At this point the new_page is 'frozen' (page_count() is zero), locked
1342-
* and not up-to-date. It's safe to insert it into radix tree, because
1343-
* nobody would be able to map it or use it in other way until we
1344-
* unfreeze it.
1339+
* At this point the new_page is 'frozen' (page_count() is zero),
1340+
* locked and not up-to-date. It's safe to insert it into the page
1341+
* cache, because nobody would be able to map it or use it in other
1342+
* way until we unfreeze it.
13451343
*/
13461344

1347-
index = start;
1348-
xa_lock_irq(&mapping->i_pages);
1349-
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
1350-
int n = min(iter.index, end) - index;
1351-
1352-
/*
1353-
* Handle holes in the radix tree: charge it from shmem and
1354-
* insert relevant subpage of new_page into the radix-tree.
1355-
*/
1356-
if (n && !shmem_charge(mapping->host, n)) {
1357-
result = SCAN_FAIL;
1345+
/* This will be less messy when we use multi-index entries */
1346+
do {
1347+
xas_lock_irq(&xas);
1348+
xas_create_range(&xas);
1349+
if (!xas_error(&xas))
13581350
break;
1359-
}
1360-
nr_none += n;
1361-
for (; index < min(iter.index, end); index++) {
1362-
radix_tree_insert(&mapping->i_pages, index,
1363-
new_page + (index % HPAGE_PMD_NR));
1364-
}
1351+
xas_unlock_irq(&xas);
1352+
if (!xas_nomem(&xas, GFP_KERNEL))
1353+
goto out;
1354+
} while (1);
13651355

1366-
/* We are done. */
1367-
if (index >= end)
1368-
break;
1356+
xas_set(&xas, start);
1357+
for (index = start; index < end; index++) {
1358+
struct page *page = xas_next(&xas);
1359+
1360+
VM_BUG_ON(index != xas.xa_index);
1361+
if (!page) {
1362+
if (!shmem_charge(mapping->host, 1)) {
1363+
result = SCAN_FAIL;
1364+
break;
1365+
}
1366+
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
1367+
nr_none++;
1368+
continue;
1369+
}
13691370

1370-
page = radix_tree_deref_slot_protected(slot,
1371-
&mapping->i_pages.xa_lock);
13721371
if (xa_is_value(page) || !PageUptodate(page)) {
1373-
xa_unlock_irq(&mapping->i_pages);
1372+
xas_unlock_irq(&xas);
13741373
/* swap in or instantiate fallocated page */
13751374
if (shmem_getpage(mapping->host, index, &page,
13761375
SGP_NOHUGE)) {
13771376
result = SCAN_FAIL;
1378-
goto tree_unlocked;
1377+
goto xa_unlocked;
13791378
}
1380-
xa_lock_irq(&mapping->i_pages);
1379+
xas_lock_irq(&xas);
1380+
xas_set(&xas, index);
13811381
} else if (trylock_page(page)) {
13821382
get_page(page);
13831383
} else {
@@ -1397,7 +1397,7 @@ static void collapse_shmem(struct mm_struct *mm,
13971397
result = SCAN_TRUNCATED;
13981398
goto out_unlock;
13991399
}
1400-
xa_unlock_irq(&mapping->i_pages);
1400+
xas_unlock_irq(&xas);
14011401

14021402
if (isolate_lru_page(page)) {
14031403
result = SCAN_DEL_PAGE_LRU;
@@ -1407,17 +1407,16 @@ static void collapse_shmem(struct mm_struct *mm,
14071407
if (page_mapped(page))
14081408
unmap_mapping_pages(mapping, index, 1, false);
14091409

1410-
xa_lock_irq(&mapping->i_pages);
1410+
xas_lock_irq(&xas);
1411+
xas_set(&xas, index);
14111412

1412-
slot = radix_tree_lookup_slot(&mapping->i_pages, index);
1413-
VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
1414-
&mapping->i_pages.xa_lock), page);
1413+
VM_BUG_ON_PAGE(page != xas_load(&xas), page);
14151414
VM_BUG_ON_PAGE(page_mapped(page), page);
14161415

14171416
/*
14181417
* The page is expected to have page_count() == 3:
14191418
* - we hold a pin on it;
1420-
* - one reference from radix tree;
1419+
* - one reference from page cache;
14211420
* - one from isolate_lru_page;
14221421
*/
14231422
if (!page_ref_freeze(page, 3)) {
@@ -1432,56 +1431,30 @@ static void collapse_shmem(struct mm_struct *mm,
14321431
list_add_tail(&page->lru, &pagelist);
14331432

14341433
/* Finally, replace with the new page. */
1435-
radix_tree_replace_slot(&mapping->i_pages, slot,
1436-
new_page + (index % HPAGE_PMD_NR));
1437-
1438-
slot = radix_tree_iter_resume(slot, &iter);
1439-
index++;
1434+
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
14401435
continue;
14411436
out_lru:
1442-
xa_unlock_irq(&mapping->i_pages);
1437+
xas_unlock_irq(&xas);
14431438
putback_lru_page(page);
14441439
out_isolate_failed:
14451440
unlock_page(page);
14461441
put_page(page);
1447-
goto tree_unlocked;
1442+
goto xa_unlocked;
14481443
out_unlock:
14491444
unlock_page(page);
14501445
put_page(page);
14511446
break;
14521447
}
1448+
xas_unlock_irq(&xas);
14531449

1454-
/*
1455-
* Handle hole in radix tree at the end of the range.
1456-
* This code only triggers if there's nothing in radix tree
1457-
* beyond 'end'.
1458-
*/
1459-
if (result == SCAN_SUCCEED && index < end) {
1460-
int n = end - index;
1461-
1462-
if (!shmem_charge(mapping->host, n)) {
1463-
result = SCAN_FAIL;
1464-
goto tree_locked;
1465-
}
1466-
1467-
for (; index < end; index++) {
1468-
radix_tree_insert(&mapping->i_pages, index,
1469-
new_page + (index % HPAGE_PMD_NR));
1470-
}
1471-
nr_none += n;
1472-
}
1473-
1474-
tree_locked:
1475-
xa_unlock_irq(&mapping->i_pages);
1476-
tree_unlocked:
1477-
1450+
xa_unlocked:
14781451
if (result == SCAN_SUCCEED) {
1479-
unsigned long flags;
1452+
struct page *page, *tmp;
14801453
struct zone *zone = page_zone(new_page);
14811454

14821455
/*
1483-
* Replacing old pages with new one has succeed, now we need to
1484-
* copy the content and free old pages.
1456+
* Replacing old pages with new one has succeeded, now we
1457+
* need to copy the content and free the old pages.
14851458
*/
14861459
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
14871460
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
@@ -1495,16 +1468,16 @@ static void collapse_shmem(struct mm_struct *mm,
14951468
put_page(page);
14961469
}
14971470

1498-
local_irq_save(flags);
1471+
local_irq_disable();
14991472
__inc_node_page_state(new_page, NR_SHMEM_THPS);
15001473
if (nr_none) {
15011474
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
15021475
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
15031476
}
1504-
local_irq_restore(flags);
1477+
local_irq_enable();
15051478

15061479
/*
1507-
* Remove pte page tables, so we can re-faulti
1480+
* Remove pte page tables, so we can re-fault
15081481
* the page as huge.
15091482
*/
15101483
retract_page_tables(mapping, start);
@@ -1521,37 +1494,37 @@ static void collapse_shmem(struct mm_struct *mm,
15211494

15221495
khugepaged_pages_collapsed++;
15231496
} else {
1524-
/* Something went wrong: rollback changes to the radix-tree */
1497+
struct page *page;
1498+
/* Something went wrong: roll back page cache changes */
15251499
shmem_uncharge(mapping->host, nr_none);
1526-
xa_lock_irq(&mapping->i_pages);
1527-
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
1528-
if (iter.index >= end)
1529-
break;
1500+
xas_lock_irq(&xas);
1501+
xas_set(&xas, start);
1502+
xas_for_each(&xas, page, end - 1) {
15301503
page = list_first_entry_or_null(&pagelist,
15311504
struct page, lru);
1532-
if (!page || iter.index < page->index) {
1505+
if (!page || xas.xa_index < page->index) {
15331506
if (!nr_none)
15341507
break;
15351508
nr_none--;
15361509
/* Put holes back where they were */
1537-
radix_tree_delete(&mapping->i_pages, iter.index);
1510+
xas_store(&xas, NULL);
15381511
continue;
15391512
}
15401513

1541-
VM_BUG_ON_PAGE(page->index != iter.index, page);
1514+
VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
15421515

15431516
/* Unfreeze the page. */
15441517
list_del(&page->lru);
15451518
page_ref_unfreeze(page, 2);
1546-
radix_tree_replace_slot(&mapping->i_pages, slot, page);
1547-
slot = radix_tree_iter_resume(slot, &iter);
1548-
xa_unlock_irq(&mapping->i_pages);
1519+
xas_store(&xas, page);
1520+
xas_pause(&xas);
1521+
xas_unlock_irq(&xas);
15491522
putback_lru_page(page);
15501523
unlock_page(page);
1551-
xa_lock_irq(&mapping->i_pages);
1524+
xas_lock_irq(&xas);
15521525
}
15531526
VM_BUG_ON(nr_none);
1554-
xa_unlock_irq(&mapping->i_pages);
1527+
xas_unlock_irq(&xas);
15551528

15561529
/* Unfreeze new_page, caller would take care about freeing it */
15571530
page_ref_unfreeze(new_page, 1);

0 commit comments

Comments
 (0)