Skip to content

Commit bb13ffe

Browse files
Mel Gormantorvalds
authored andcommitted
mm: compaction: cache if a pageblock was scanned and no pages were isolated
When compaction was implemented it was known that scanning could potentially be excessive. The ideal was that a counter be maintained for each pageblock but maintaining this information would incur a severe penalty due to a shared writable cache line. It has reached the point where the scanning costs are a serious problem, particularly on long-lived systems where a large process starts and allocates a large number of THPs at the same time. Instead of using a shared counter, this patch adds another bit to the pageblock flags called PG_migrate_skip. If a pageblock is scanned by either migrate or free scanner and 0 pages were isolated, the pageblock is marked to be skipped in the future. When scanning, this bit is checked before any scanning takes place and the block skipped if set. The main difficulty with a patch like this is "when to ignore the cached information?" If it's ignored too often, the scanning rates will still be excessive. If the information is too stale then allocations will fail that might have otherwise succeeded. In this patch o CMA always ignores the information o If the migrate and free scanner meet then the cached information will be discarded if it's at least 5 seconds since the last time the cache was discarded o If there are a large number of allocation failures, discard the cache. The time-based heuristic is very clumsy but there are few choices for a better event. Depending solely on multiple allocation failures still allows excessive scanning when THP allocations are failing in quick succession due to memory pressure. Waiting until memory pressure is relieved would cause compaction to continually fail instead of using reclaim/compaction to try allocate the page. The time-based mechanism is clumsy but a better option is not obvious. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Richard Davies <richard@arachsys.com> Cc: Shaohua Li <shli@kernel.org> Cc: Avi Kivity <avi@redhat.com> Acked-by: Rafael Aquini <aquini@redhat.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> Cc: Kyungmin Park <kyungmin.park@samsung.com> Cc: Mark Brown <broonie@opensource.wolfsonmicro.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 753341a commit bb13ffe

File tree

5 files changed

+151
-38
lines changed

5 files changed

+151
-38
lines changed

include/linux/mmzone.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,9 @@ struct zone {
369369
*/
370370
spinlock_t lock;
371371
int all_unreclaimable; /* All pages pinned */
372+
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
373+
unsigned long compact_blockskip_expire;
374+
#endif
372375
#ifdef CONFIG_MEMORY_HOTPLUG
373376
/* see spanned/present_pages for more description */
374377
seqlock_t span_seqlock;

include/linux/pageblock-flags.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ enum pageblock_bits {
3030
PB_migrate,
3131
PB_migrate_end = PB_migrate + 3 - 1,
3232
/* 3 bits required for migrate types */
33+
#ifdef CONFIG_COMPACTION
34+
PB_migrate_skip,/* If set the block is skipped by compaction */
35+
#endif /* CONFIG_COMPACTION */
3336
NR_PAGEBLOCK_BITS
3437
};
3538

@@ -65,10 +68,22 @@ unsigned long get_pageblock_flags_group(struct page *page,
6568
void set_pageblock_flags_group(struct page *page, unsigned long flags,
6669
int start_bitidx, int end_bitidx);
6770

71+
#ifdef CONFIG_COMPACTION
72+
#define get_pageblock_skip(page) \
73+
get_pageblock_flags_group(page, PB_migrate_skip, \
74+
PB_migrate_skip + 1)
75+
#define clear_pageblock_skip(page) \
76+
set_pageblock_flags_group(page, 0, PB_migrate_skip, \
77+
PB_migrate_skip + 1)
78+
#define set_pageblock_skip(page) \
79+
set_pageblock_flags_group(page, 1, PB_migrate_skip, \
80+
PB_migrate_skip + 1)
81+
#endif /* CONFIG_COMPACTION */
82+
6883
#define get_pageblock_flags(page) \
69-
get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
84+
get_pageblock_flags_group(page, 0, PB_migrate_end)
7085
#define set_pageblock_flags(page, flags) \
7186
set_pageblock_flags_group(page, flags, \
72-
0, NR_PAGEBLOCK_BITS-1)
87+
0, PB_migrate_end)
7388

7489
#endif /* PAGEBLOCK_FLAGS_H */

mm/compaction.c

Lines changed: 108 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,79 @@ static inline bool migrate_async_suitable(int migratetype)
5050
return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
5151
}
5252

53+
#ifdef CONFIG_COMPACTION
54+
/* Returns true if the pageblock should be scanned for pages to isolate. */
55+
static inline bool isolation_suitable(struct compact_control *cc,
56+
struct page *page)
57+
{
58+
if (cc->ignore_skip_hint)
59+
return true;
60+
61+
return !get_pageblock_skip(page);
62+
}
63+
64+
/*
65+
* This function is called to clear all cached information on pageblocks that
66+
* should be skipped for page isolation when the migrate and free page scanner
67+
* meet.
68+
*/
69+
static void reset_isolation_suitable(struct zone *zone)
70+
{
71+
unsigned long start_pfn = zone->zone_start_pfn;
72+
unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
73+
unsigned long pfn;
74+
75+
/*
76+
* Do not reset more than once every five seconds. If allocations are
77+
* failing sufficiently quickly to allow this to happen then continually
78+
* scanning for compaction is not going to help. The choice of five
79+
* seconds is arbitrary but will mitigate excessive scanning.
80+
*/
81+
if (time_before(jiffies, zone->compact_blockskip_expire))
82+
return;
83+
zone->compact_blockskip_expire = jiffies + (HZ * 5);
84+
85+
/* Walk the zone and mark every pageblock as suitable for isolation */
86+
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
87+
struct page *page;
88+
89+
cond_resched();
90+
91+
if (!pfn_valid(pfn))
92+
continue;
93+
94+
page = pfn_to_page(pfn);
95+
if (zone != page_zone(page))
96+
continue;
97+
98+
clear_pageblock_skip(page);
99+
}
100+
}
101+
102+
/*
103+
* If no pages were isolated then mark this pageblock to be skipped in the
104+
* future. The information is later cleared by reset_isolation_suitable().
105+
*/
106+
static void update_pageblock_skip(struct page *page, unsigned long nr_isolated)
107+
{
108+
if (!page)
109+
return;
110+
111+
if (!nr_isolated)
112+
set_pageblock_skip(page);
113+
}
114+
#else
115+
static inline bool isolation_suitable(struct compact_control *cc,
116+
struct page *page)
117+
{
118+
return true;
119+
}
120+
121+
static void update_pageblock_skip(struct page *page, unsigned long nr_isolated)
122+
{
123+
}
124+
#endif /* CONFIG_COMPACTION */
125+
53126
static inline bool should_release_lock(spinlock_t *lock)
54127
{
55128
return need_resched() || spin_is_contended(lock);
@@ -181,7 +254,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
181254
bool strict)
182255
{
183256
int nr_scanned = 0, total_isolated = 0;
184-
struct page *cursor;
257+
struct page *cursor, *valid_page = NULL;
185258
unsigned long nr_strict_required = end_pfn - blockpfn;
186259
unsigned long flags;
187260
bool locked = false;
@@ -196,6 +269,8 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
196269
nr_scanned++;
197270
if (!pfn_valid_within(blockpfn))
198271
continue;
272+
if (!valid_page)
273+
valid_page = page;
199274
if (!PageBuddy(page))
200275
continue;
201276

@@ -250,6 +325,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
250325
if (locked)
251326
spin_unlock_irqrestore(&cc->zone->lock, flags);
252327

328+
/* Update the pageblock-skip if the whole pageblock was scanned */
329+
if (blockpfn == end_pfn)
330+
update_pageblock_skip(valid_page, total_isolated);
331+
253332
return total_isolated;
254333
}
255334

@@ -267,22 +346,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
267346
* a free page).
268347
*/
269348
unsigned long
270-
isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
349+
isolate_freepages_range(struct compact_control *cc,
350+
unsigned long start_pfn, unsigned long end_pfn)
271351
{
272352
unsigned long isolated, pfn, block_end_pfn;
273-
struct zone *zone = NULL;
274353
LIST_HEAD(freelist);
275354

276-
/* cc needed for isolate_freepages_block to acquire zone->lock */
277-
struct compact_control cc = {
278-
.sync = true,
279-
};
280-
281-
if (pfn_valid(start_pfn))
282-
cc.zone = zone = page_zone(pfn_to_page(start_pfn));
283-
284355
for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) {
285-
if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn)))
356+
if (!pfn_valid(pfn) || cc->zone != page_zone(pfn_to_page(pfn)))
286357
break;
287358

288359
/*
@@ -292,7 +363,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
292363
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
293364
block_end_pfn = min(block_end_pfn, end_pfn);
294365

295-
isolated = isolate_freepages_block(&cc, pfn, block_end_pfn,
366+
isolated = isolate_freepages_block(cc, pfn, block_end_pfn,
296367
&freelist, true);
297368

298369
/*
@@ -387,6 +458,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
387458
struct lruvec *lruvec;
388459
unsigned long flags;
389460
bool locked = false;
461+
struct page *page = NULL, *valid_page = NULL;
390462

391463
/*
392464
* Ensure that there are not too many pages isolated from the LRU
@@ -407,8 +479,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
407479
/* Time to isolate some pages for migration */
408480
cond_resched();
409481
for (; low_pfn < end_pfn; low_pfn++) {
410-
struct page *page;
411-
412482
/* give a chance to irqs before checking need_resched() */
413483
if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
414484
if (should_release_lock(&zone->lru_lock)) {
@@ -444,6 +514,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
444514
if (page_zone(page) != zone)
445515
continue;
446516

517+
if (!valid_page)
518+
valid_page = page;
519+
520+
/* If isolation recently failed, do not retry */
521+
pageblock_nr = low_pfn >> pageblock_order;
522+
if (!isolation_suitable(cc, page))
523+
goto next_pageblock;
524+
447525
/* Skip if free */
448526
if (PageBuddy(page))
449527
continue;
@@ -453,7 +531,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
453531
* migration is optimistic to see if the minimum amount of work
454532
* satisfies the allocation
455533
*/
456-
pageblock_nr = low_pfn >> pageblock_order;
457534
if (!cc->sync && last_pageblock_nr != pageblock_nr &&
458535
!migrate_async_suitable(get_pageblock_migratetype(page))) {
459536
goto next_pageblock;
@@ -530,6 +607,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
530607
if (locked)
531608
spin_unlock_irqrestore(&zone->lru_lock, flags);
532609

610+
/* Update the pageblock-skip if the whole pageblock was scanned */
611+
if (low_pfn == end_pfn)
612+
update_pageblock_skip(valid_page, nr_isolated);
613+
533614
trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
534615

535616
return low_pfn;
@@ -593,6 +674,10 @@ static void isolate_freepages(struct zone *zone,
593674
if (!suitable_migration_target(page))
594675
continue;
595676

677+
/* If isolation recently failed, do not retry */
678+
if (!isolation_suitable(cc, page))
679+
continue;
680+
596681
/* Found a block suitable for isolating free pages from */
597682
isolated = 0;
598683
end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
@@ -709,8 +794,10 @@ static int compact_finished(struct zone *zone,
709794
return COMPACT_PARTIAL;
710795

711796
/* Compaction run completes if the migrate and free scanner meet */
712-
if (cc->free_pfn <= cc->migrate_pfn)
797+
if (cc->free_pfn <= cc->migrate_pfn) {
798+
reset_isolation_suitable(cc->zone);
713799
return COMPACT_COMPLETE;
800+
}
714801

715802
/*
716803
* order == -1 is expected when compacting via
@@ -818,6 +905,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
818905
cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
819906
cc->free_pfn &= ~(pageblock_nr_pages-1);
820907

908+
/* Clear pageblock skip if there are numerous alloc failures */
909+
if (zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT)
910+
reset_isolation_suitable(zone);
911+
821912
migrate_prep_local();
822913

823914
while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {

mm/internal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ struct compact_control {
120120
unsigned long free_pfn; /* isolate_freepages search base */
121121
unsigned long migrate_pfn; /* isolate_migratepages search base */
122122
bool sync; /* Synchronous migration */
123+
bool ignore_skip_hint; /* Scan blocks even if marked skip */
123124

124125
int order; /* order a direct compactor needs */
125126
int migratetype; /* MOVABLE, RECLAIMABLE etc */
@@ -129,7 +130,8 @@ struct compact_control {
129130
};
130131

131132
unsigned long
132-
isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn);
133+
isolate_freepages_range(struct compact_control *cc,
134+
unsigned long start_pfn, unsigned long end_pfn);
133135
unsigned long
134136
isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
135137
unsigned long low_pfn, unsigned long end_pfn);

mm/page_alloc.c

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5679,33 +5679,26 @@ __alloc_contig_migrate_alloc(struct page *page, unsigned long private,
56795679
}
56805680

56815681
/* [start, end) must belong to a single zone. */
5682-
static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5682+
static int __alloc_contig_migrate_range(struct compact_control *cc,
5683+
unsigned long start, unsigned long end)
56835684
{
56845685
/* This function is based on compact_zone() from compaction.c. */
56855686

56865687
unsigned long pfn = start;
56875688
unsigned int tries = 0;
56885689
int ret = 0;
56895690

5690-
struct compact_control cc = {
5691-
.nr_migratepages = 0,
5692-
.order = -1,
5693-
.zone = page_zone(pfn_to_page(start)),
5694-
.sync = true,
5695-
};
5696-
INIT_LIST_HEAD(&cc.migratepages);
5697-
56985691
migrate_prep_local();
56995692

5700-
while (pfn < end || !list_empty(&cc.migratepages)) {
5693+
while (pfn < end || !list_empty(&cc->migratepages)) {
57015694
if (fatal_signal_pending(current)) {
57025695
ret = -EINTR;
57035696
break;
57045697
}
57055698

5706-
if (list_empty(&cc.migratepages)) {
5707-
cc.nr_migratepages = 0;
5708-
pfn = isolate_migratepages_range(cc.zone, &cc,
5699+
if (list_empty(&cc->migratepages)) {
5700+
cc->nr_migratepages = 0;
5701+
pfn = isolate_migratepages_range(cc->zone, cc,
57095702
pfn, end);
57105703
if (!pfn) {
57115704
ret = -EINTR;
@@ -5717,14 +5710,14 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
57175710
break;
57185711
}
57195712

5720-
reclaim_clean_pages_from_list(cc.zone, &cc.migratepages);
5713+
reclaim_clean_pages_from_list(cc->zone, &cc->migratepages);
57215714

5722-
ret = migrate_pages(&cc.migratepages,
5715+
ret = migrate_pages(&cc->migratepages,
57235716
__alloc_contig_migrate_alloc,
57245717
0, false, MIGRATE_SYNC);
57255718
}
57265719

5727-
putback_lru_pages(&cc.migratepages);
5720+
putback_lru_pages(&cc->migratepages);
57285721
return ret > 0 ? 0 : ret;
57295722
}
57305723

@@ -5803,6 +5796,15 @@ int alloc_contig_range(unsigned long start, unsigned long end,
58035796
unsigned long outer_start, outer_end;
58045797
int ret = 0, order;
58055798

5799+
struct compact_control cc = {
5800+
.nr_migratepages = 0,
5801+
.order = -1,
5802+
.zone = page_zone(pfn_to_page(start)),
5803+
.sync = true,
5804+
.ignore_skip_hint = true,
5805+
};
5806+
INIT_LIST_HEAD(&cc.migratepages);
5807+
58065808
/*
58075809
* What we do here is we mark all pageblocks in range as
58085810
* MIGRATE_ISOLATE. Because pageblock and max order pages may
@@ -5832,7 +5834,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
58325834
if (ret)
58335835
goto done;
58345836

5835-
ret = __alloc_contig_migrate_range(start, end);
5837+
ret = __alloc_contig_migrate_range(&cc, start, end);
58365838
if (ret)
58375839
goto done;
58385840

@@ -5881,7 +5883,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
58815883
__reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
58825884

58835885
/* Grab isolated pages from freelists. */
5884-
outer_end = isolate_freepages_range(outer_start, end);
5886+
outer_end = isolate_freepages_range(&cc, outer_start, end);
58855887
if (!outer_end) {
58865888
ret = -EBUSY;
58875889
goto done;

0 commit comments

Comments
 (0)