Skip to content

Commit 47ecfcb

Browse files
Mel Gormantorvalds
authored andcommitted
mm: compaction: Partially revert capture of suitable high-order page
Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8c ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8c "mm: compaction: capture a suitable high-order page immediately when it is made available". Reported-and-tested-by: Eric Wong <normalperson@yhbt.net> Tested-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: stable@vger.kernel.org Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 254adaa commit 47ecfcb

File tree

5 files changed

+23
-110
lines changed

5 files changed

+23
-110
lines changed

include/linux/compaction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
2222
extern int fragmentation_index(struct zone *zone, unsigned int order);
2323
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
2424
int order, gfp_t gfp_mask, nodemask_t *mask,
25-
bool sync, bool *contended, struct page **page);
25+
bool sync, bool *contended);
2626
extern int compact_pgdat(pg_data_t *pgdat, int order);
2727
extern void reset_isolation_suitable(pg_data_t *pgdat);
2828
extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
7575
#else
7676
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
7777
int order, gfp_t gfp_mask, nodemask_t *nodemask,
78-
bool sync, bool *contended, struct page **page)
78+
bool sync, bool *contended)
7979
{
8080
return COMPACT_CONTINUE;
8181
}

include/linux/mm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages);
455455

456456
void split_page(struct page *page, unsigned int order);
457457
int split_free_page(struct page *page);
458-
int capture_free_page(struct page *page, int alloc_order, int migratetype);
459458

460459
/*
461460
* Compound pages have a destructor function. Provide a

mm/compaction.c

Lines changed: 13 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
816816
static int compact_finished(struct zone *zone,
817817
struct compact_control *cc)
818818
{
819+
unsigned int order;
819820
unsigned long watermark;
820821

821822
if (fatal_signal_pending(current))
@@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone,
850851
return COMPACT_CONTINUE;
851852

852853
/* Direct compactor: Is a suitable page free? */
853-
if (cc->page) {
854-
/* Was a suitable page captured? */
855-
if (*cc->page)
854+
for (order = cc->order; order < MAX_ORDER; order++) {
855+
struct free_area *area = &zone->free_area[order];
856+
857+
/* Job done if page is free of the right migratetype */
858+
if (!list_empty(&area->free_list[cc->migratetype]))
859+
return COMPACT_PARTIAL;
860+
861+
/* Job done if allocation would set block type */
862+
if (cc->order >= pageblock_order && area->nr_free)
856863
return COMPACT_PARTIAL;
857-
} else {
858-
unsigned int order;
859-
for (order = cc->order; order < MAX_ORDER; order++) {
860-
struct free_area *area = &zone->free_area[cc->order];
861-
/* Job done if page is free of the right migratetype */
862-
if (!list_empty(&area->free_list[cc->migratetype]))
863-
return COMPACT_PARTIAL;
864-
865-
/* Job done if allocation would set block type */
866-
if (cc->order >= pageblock_order && area->nr_free)
867-
return COMPACT_PARTIAL;
868-
}
869864
}
870865

871866
return COMPACT_CONTINUE;
@@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order)
921916
return COMPACT_CONTINUE;
922917
}
923918

924-
static void compact_capture_page(struct compact_control *cc)
925-
{
926-
unsigned long flags;
927-
int mtype, mtype_low, mtype_high;
928-
929-
if (!cc->page || *cc->page)
930-
return;
931-
932-
/*
933-
* For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
934-
* regardless of the migratetype of the freelist is is captured from.
935-
* This is fine because the order for a high-order MIGRATE_MOVABLE
936-
* allocation is typically at least a pageblock size and overall
937-
* fragmentation is not impaired. Other allocation types must
938-
* capture pages from their own migratelist because otherwise they
939-
* could pollute other pageblocks like MIGRATE_MOVABLE with
940-
* difficult to move pages and making fragmentation worse overall.
941-
*/
942-
if (cc->migratetype == MIGRATE_MOVABLE) {
943-
mtype_low = 0;
944-
mtype_high = MIGRATE_PCPTYPES;
945-
} else {
946-
mtype_low = cc->migratetype;
947-
mtype_high = cc->migratetype + 1;
948-
}
949-
950-
/* Speculatively examine the free lists without zone lock */
951-
for (mtype = mtype_low; mtype < mtype_high; mtype++) {
952-
int order;
953-
for (order = cc->order; order < MAX_ORDER; order++) {
954-
struct page *page;
955-
struct free_area *area;
956-
area = &(cc->zone->free_area[order]);
957-
if (list_empty(&area->free_list[mtype]))
958-
continue;
959-
960-
/* Take the lock and attempt capture of the page */
961-
if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
962-
return;
963-
if (!list_empty(&area->free_list[mtype])) {
964-
page = list_entry(area->free_list[mtype].next,
965-
struct page, lru);
966-
if (capture_free_page(page, cc->order, mtype)) {
967-
spin_unlock_irqrestore(&cc->zone->lock,
968-
flags);
969-
*cc->page = page;
970-
return;
971-
}
972-
}
973-
spin_unlock_irqrestore(&cc->zone->lock, flags);
974-
}
975-
}
976-
}
977-
978919
static int compact_zone(struct zone *zone, struct compact_control *cc)
979920
{
980921
int ret;
@@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
1054995
goto out;
1055996
}
1056997
}
1057-
1058-
/* Capture a page now if it is a suitable size */
1059-
compact_capture_page(cc);
1060998
}
1061999

10621000
out:
@@ -1069,8 +1007,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
10691007

10701008
static unsigned long compact_zone_order(struct zone *zone,
10711009
int order, gfp_t gfp_mask,
1072-
bool sync, bool *contended,
1073-
struct page **page)
1010+
bool sync, bool *contended)
10741011
{
10751012
unsigned long ret;
10761013
struct compact_control cc = {
@@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone,
10801017
.migratetype = allocflags_to_migratetype(gfp_mask),
10811018
.zone = zone,
10821019
.sync = sync,
1083-
.page = page,
10841020
};
10851021
INIT_LIST_HEAD(&cc.freepages);
10861022
INIT_LIST_HEAD(&cc.migratepages);
@@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500;
11101046
*/
11111047
unsigned long try_to_compact_pages(struct zonelist *zonelist,
11121048
int order, gfp_t gfp_mask, nodemask_t *nodemask,
1113-
bool sync, bool *contended, struct page **page)
1049+
bool sync, bool *contended)
11141050
{
11151051
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
11161052
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
11361072
int status;
11371073

11381074
status = compact_zone_order(zone, order, gfp_mask, sync,
1139-
contended, page);
1075+
contended);
11401076
rc = max(status, rc);
11411077

11421078
/* If a normal allocation would succeed, stop compacting */
@@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order)
11921128
struct compact_control cc = {
11931129
.order = order,
11941130
.sync = false,
1195-
.page = NULL,
11961131
};
11971132

11981133
return __compact_pgdat(pgdat, &cc);
@@ -1203,7 +1138,6 @@ static int compact_node(int nid)
12031138
struct compact_control cc = {
12041139
.order = -1,
12051140
.sync = true,
1206-
.page = NULL,
12071141
};
12081142

12091143
return __compact_pgdat(NODE_DATA(nid), &cc);

mm/internal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ struct compact_control {
135135
int migratetype; /* MOVABLE, RECLAIMABLE etc */
136136
struct zone *zone;
137137
bool contended; /* True if a lock was contended */
138-
struct page **page; /* Page captured of requested size */
139138
};
140139

141140
unsigned long

mm/page_alloc.c

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,22 +1384,15 @@ void split_page(struct page *page, unsigned int order)
13841384
set_page_refcounted(page + i);
13851385
}
13861386

1387-
/*
1388-
* Similar to the split_page family of functions except that the page
1389-
* required at the given order and being isolated now to prevent races
1390-
* with parallel allocators
1391-
*/
1392-
int capture_free_page(struct page *page, int alloc_order, int migratetype)
1387+
static int __isolate_free_page(struct page *page, unsigned int order)
13931388
{
1394-
unsigned int order;
13951389
unsigned long watermark;
13961390
struct zone *zone;
13971391
int mt;
13981392

13991393
BUG_ON(!PageBuddy(page));
14001394

14011395
zone = page_zone(page);
1402-
order = page_order(page);
14031396
mt = get_pageblock_migratetype(page);
14041397

14051398
if (mt != MIGRATE_ISOLATE) {
@@ -1408,19 +1401,15 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
14081401
if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
14091402
return 0;
14101403

1411-
__mod_zone_freepage_state(zone, -(1UL << alloc_order), mt);
1404+
__mod_zone_freepage_state(zone, -(1UL << order), mt);
14121405
}
14131406

14141407
/* Remove page from free list */
14151408
list_del(&page->lru);
14161409
zone->free_area[order].nr_free--;
14171410
rmv_page_order(page);
14181411

1419-
if (alloc_order != order)
1420-
expand(zone, page, alloc_order, order,
1421-
&zone->free_area[order], migratetype);
1422-
1423-
/* Set the pageblock if the captured page is at least a pageblock */
1412+
/* Set the pageblock if the isolated page is at least a pageblock */
14241413
if (order >= pageblock_order - 1) {
14251414
struct page *endpage = page + (1 << order) - 1;
14261415
for (; page < endpage; page += pageblock_nr_pages) {
@@ -1431,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
14311420
}
14321421
}
14331422

1434-
return 1UL << alloc_order;
1423+
return 1UL << order;
14351424
}
14361425

14371426
/*
@@ -1449,10 +1438,9 @@ int split_free_page(struct page *page)
14491438
unsigned int order;
14501439
int nr_pages;
14511440

1452-
BUG_ON(!PageBuddy(page));
14531441
order = page_order(page);
14541442

1455-
nr_pages = capture_free_page(page, order, 0);
1443+
nr_pages = __isolate_free_page(page, order);
14561444
if (!nr_pages)
14571445
return 0;
14581446

@@ -2136,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
21362124
bool *contended_compaction, bool *deferred_compaction,
21372125
unsigned long *did_some_progress)
21382126
{
2139-
struct page *page = NULL;
2140-
21412127
if (!order)
21422128
return NULL;
21432129

@@ -2149,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
21492135
current->flags |= PF_MEMALLOC;
21502136
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
21512137
nodemask, sync_migration,
2152-
contended_compaction, &page);
2138+
contended_compaction);
21532139
current->flags &= ~PF_MEMALLOC;
21542140

2155-
/* If compaction captured a page, prep and use it */
2156-
if (page) {
2157-
prep_new_page(page, order, gfp_mask);
2158-
goto got_page;
2159-
}
2160-
21612141
if (*did_some_progress != COMPACT_SKIPPED) {
2142+
struct page *page;
2143+
21622144
/* Page migration frees to the PCP lists but we want merging */
21632145
drain_pages(get_cpu());
21642146
put_cpu();
@@ -2168,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
21682150
alloc_flags & ~ALLOC_NO_WATERMARKS,
21692151
preferred_zone, migratetype);
21702152
if (page) {
2171-
got_page:
21722153
preferred_zone->compact_blockskip_flush = false;
21732154
preferred_zone->compact_considered = 0;
21742155
preferred_zone->compact_defer_shift = 0;

0 commit comments

Comments
 (0)