Skip to content

Commit 1fb3f8c

Browse files
Mel Gormantorvalds
authored andcommitted
mm: compaction: capture a suitable high-order page immediately when it is made available
While compaction is migrating pages to free up large contiguous blocks for allocation it races with other allocation requests that may steal these blocks or break them up. This patch alters direct compaction to capture a suitable free page as soon as it becomes available to reduce this race. It uses similar logic to split_free_page() to ensure that watermarks are still obeyed. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 83fde0f commit 1fb3f8c

File tree

5 files changed

+130
-29
lines changed

5 files changed

+130
-29
lines changed

include/linux/compaction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
2222
extern int fragmentation_index(struct zone *zone, unsigned int order);
2323
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
2424
int order, gfp_t gfp_mask, nodemask_t *mask,
25-
bool sync, bool *contended);
25+
bool sync, bool *contended, struct page **page);
2626
extern int compact_pgdat(pg_data_t *pgdat, int order);
2727
extern unsigned long compaction_suitable(struct zone *zone, int order);
2828

@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
6464
#else
6565
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
6666
int order, gfp_t gfp_mask, nodemask_t *nodemask,
67-
bool sync, bool *contended)
67+
bool sync, bool *contended, struct page **page)
6868
{
6969
return COMPACT_CONTINUE;
7070
}

include/linux/mm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ void put_pages_list(struct list_head *pages);
442442

443443
void split_page(struct page *page, unsigned int order);
444444
int split_free_page(struct page *page);
445+
int capture_free_page(struct page *page, int alloc_order, int migratetype);
445446

446447
/*
447448
* Compound pages have a destructor function. Provide a

mm/compaction.c

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
9191
return compact_checklock_irqsave(lock, flags, false, cc);
9292
}
9393

94+
static void compact_capture_page(struct compact_control *cc)
95+
{
96+
unsigned long flags;
97+
int mtype, mtype_low, mtype_high;
98+
99+
if (!cc->page || *cc->page)
100+
return;
101+
102+
/*
103+
* For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
104+
* regardless of the migratetype of the freelist is is captured from.
105+
* This is fine because the order for a high-order MIGRATE_MOVABLE
106+
* allocation is typically at least a pageblock size and overall
107+
* fragmentation is not impaired. Other allocation types must
108+
* capture pages from their own migratelist because otherwise they
109+
* could pollute other pageblocks like MIGRATE_MOVABLE with
110+
* difficult to move pages and making fragmentation worse overall.
111+
*/
112+
if (cc->migratetype == MIGRATE_MOVABLE) {
113+
mtype_low = 0;
114+
mtype_high = MIGRATE_PCPTYPES;
115+
} else {
116+
mtype_low = cc->migratetype;
117+
mtype_high = cc->migratetype + 1;
118+
}
119+
120+
/* Speculatively examine the free lists without zone lock */
121+
for (mtype = mtype_low; mtype < mtype_high; mtype++) {
122+
int order;
123+
for (order = cc->order; order < MAX_ORDER; order++) {
124+
struct page *page;
125+
struct free_area *area;
126+
area = &(cc->zone->free_area[order]);
127+
if (list_empty(&area->free_list[mtype]))
128+
continue;
129+
130+
/* Take the lock and attempt capture of the page */
131+
if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
132+
return;
133+
if (!list_empty(&area->free_list[mtype])) {
134+
page = list_entry(area->free_list[mtype].next,
135+
struct page, lru);
136+
if (capture_free_page(page, cc->order, mtype)) {
137+
spin_unlock_irqrestore(&cc->zone->lock,
138+
flags);
139+
*cc->page = page;
140+
return;
141+
}
142+
}
143+
spin_unlock_irqrestore(&cc->zone->lock, flags);
144+
}
145+
}
146+
}
147+
94148
/*
95149
* Isolate free pages onto a private freelist. Caller must hold zone->lock.
96150
* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
645699
static int compact_finished(struct zone *zone,
646700
struct compact_control *cc)
647701
{
648-
unsigned int order;
649702
unsigned long watermark;
650703

651704
if (fatal_signal_pending(current))
@@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone,
688741
return COMPACT_CONTINUE;
689742

690743
/* Direct compactor: Is a suitable page free? */
691-
for (order = cc->order; order < MAX_ORDER; order++) {
692-
/* Job done if page is free of the right migratetype */
693-
if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
694-
return COMPACT_PARTIAL;
695-
696-
/* Job done if allocation would set block type */
697-
if (order >= pageblock_order && zone->free_area[order].nr_free)
744+
if (cc->page) {
745+
/* Was a suitable page captured? */
746+
if (*cc->page)
698747
return COMPACT_PARTIAL;
748+
} else {
749+
unsigned int order;
750+
for (order = cc->order; order < MAX_ORDER; order++) {
751+
struct free_area *area = &zone->free_area[cc->order];
752+
/* Job done if page is free of the right migratetype */
753+
if (!list_empty(&area->free_list[cc->migratetype]))
754+
return COMPACT_PARTIAL;
755+
756+
/* Job done if allocation would set block type */
757+
if (cc->order >= pageblock_order && area->nr_free)
758+
return COMPACT_PARTIAL;
759+
}
699760
}
700761

701762
return COMPACT_CONTINUE;
@@ -817,6 +878,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
817878
goto out;
818879
}
819880
}
881+
882+
/* Capture a page now if it is a suitable size */
883+
compact_capture_page(cc);
820884
}
821885

822886
out:
@@ -829,7 +893,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
829893

830894
static unsigned long compact_zone_order(struct zone *zone,
831895
int order, gfp_t gfp_mask,
832-
bool sync, bool *contended)
896+
bool sync, bool *contended,
897+
struct page **page)
833898
{
834899
struct compact_control cc = {
835900
.nr_freepages = 0,
@@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone,
839904
.zone = zone,
840905
.sync = sync,
841906
.contended = contended,
907+
.page = page,
842908
};
843909
INIT_LIST_HEAD(&cc.freepages);
844910
INIT_LIST_HEAD(&cc.migratepages);
@@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500;
860926
*/
861927
unsigned long try_to_compact_pages(struct zonelist *zonelist,
862928
int order, gfp_t gfp_mask, nodemask_t *nodemask,
863-
bool sync, bool *contended)
929+
bool sync, bool *contended, struct page **page)
864930
{
865931
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
866932
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
881947
int status;
882948

883949
status = compact_zone_order(zone, order, gfp_mask, sync,
884-
contended);
950+
contended, page);
885951
rc = max(status, rc);
886952

887953
/* If a normal allocation would succeed, stop compacting */
@@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
9361002
struct compact_control cc = {
9371003
.order = order,
9381004
.sync = false,
1005+
.page = NULL,
9391006
};
9401007

9411008
return __compact_pgdat(pgdat, &cc);
@@ -946,6 +1013,7 @@ static int compact_node(int nid)
9461013
struct compact_control cc = {
9471014
.order = -1,
9481015
.sync = true,
1016+
.page = NULL,
9491017
};
9501018

9511019
return __compact_pgdat(NODE_DATA(nid), &cc);

mm/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ struct compact_control {
131131
int migratetype; /* MOVABLE, RECLAIMABLE etc */
132132
struct zone *zone;
133133
bool *contended; /* True if a lock was contended */
134+
struct page **page; /* Page captured of requested size */
134135
};
135136

136137
unsigned long

mm/page_alloc.c

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,16 +1380,11 @@ void split_page(struct page *page, unsigned int order)
13801380
}
13811381

13821382
/*
1383-
* Similar to split_page except the page is already free. As this is only
1384-
* being used for migration, the migratetype of the block also changes.
1385-
* As this is called with interrupts disabled, the caller is responsible
1386-
* for calling arch_alloc_page() and kernel_map_page() after interrupts
1387-
* are enabled.
1388-
*
1389-
* Note: this is probably too low level an operation for use in drivers.
1390-
* Please consult with lkml before using this in your driver.
1383+
* Similar to the split_page family of functions except that the page
1384+
* required at the given order and being isolated now to prevent races
1385+
* with parallel allocators
13911386
*/
1392-
int split_free_page(struct page *page)
1387+
int capture_free_page(struct page *page, int alloc_order, int migratetype)
13931388
{
13941389
unsigned int order;
13951390
unsigned long watermark;
@@ -1411,10 +1406,11 @@ int split_free_page(struct page *page)
14111406
rmv_page_order(page);
14121407
__mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order));
14131408

1414-
/* Split into individual pages */
1415-
set_page_refcounted(page);
1416-
split_page(page, order);
1409+
if (alloc_order != order)
1410+
expand(zone, page, alloc_order, order,
1411+
&zone->free_area[order], migratetype);
14171412

1413+
/* Set the pageblock if the captured page is at least a pageblock */
14181414
if (order >= pageblock_order - 1) {
14191415
struct page *endpage = page + (1 << order) - 1;
14201416
for (; page < endpage; page += pageblock_nr_pages) {
@@ -1425,7 +1421,35 @@ int split_free_page(struct page *page)
14251421
}
14261422
}
14271423

1428-
return 1 << order;
1424+
return 1UL << order;
1425+
}
1426+
1427+
/*
1428+
* Similar to split_page except the page is already free. As this is only
1429+
* being used for migration, the migratetype of the block also changes.
1430+
* As this is called with interrupts disabled, the caller is responsible
1431+
* for calling arch_alloc_page() and kernel_map_page() after interrupts
1432+
* are enabled.
1433+
*
1434+
* Note: this is probably too low level an operation for use in drivers.
1435+
* Please consult with lkml before using this in your driver.
1436+
*/
1437+
int split_free_page(struct page *page)
1438+
{
1439+
unsigned int order;
1440+
int nr_pages;
1441+
1442+
BUG_ON(!PageBuddy(page));
1443+
order = page_order(page);
1444+
1445+
nr_pages = capture_free_page(page, order, 0);
1446+
if (!nr_pages)
1447+
return 0;
1448+
1449+
/* Split into individual pages */
1450+
set_page_refcounted(page);
1451+
split_page(page, order);
1452+
return nr_pages;
14291453
}
14301454

14311455
/*
@@ -2105,7 +2129,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
21052129
bool *contended_compaction, bool *deferred_compaction,
21062130
unsigned long *did_some_progress)
21072131
{
2108-
struct page *page;
2132+
struct page *page = NULL;
21092133

21102134
if (!order)
21112135
return NULL;
@@ -2118,10 +2142,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
21182142
current->flags |= PF_MEMALLOC;
21192143
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
21202144
nodemask, sync_migration,
2121-
contended_compaction);
2145+
contended_compaction, &page);
21222146
current->flags &= ~PF_MEMALLOC;
2123-
if (*did_some_progress != COMPACT_SKIPPED) {
21242147

2148+
/* If compaction captured a page, prep and use it */
2149+
if (page) {
2150+
prep_new_page(page, order, gfp_mask);
2151+
goto got_page;
2152+
}
2153+
2154+
if (*did_some_progress != COMPACT_SKIPPED) {
21252155
/* Page migration frees to the PCP lists but we want merging */
21262156
drain_pages(get_cpu());
21272157
put_cpu();
@@ -2131,6 +2161,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
21312161
alloc_flags & ~ALLOC_NO_WATERMARKS,
21322162
preferred_zone, migratetype);
21332163
if (page) {
2164+
got_page:
21342165
preferred_zone->compact_considered = 0;
21352166
preferred_zone->compact_defer_shift = 0;
21362167
if (order >= preferred_zone->compact_order_failed)

0 commit comments

Comments
 (0)