Skip to content

Commit 2d070ea

Browse files
Michal Hockotorvalds
authored andcommitted
mm: consider zone which is not fully populated to have holes
__pageblock_pfn_to_page has two users currently, set_zone_contiguous which checks whether the given zone contains holes and pageblock_pfn_to_page which then carefully returns a first valid page from the given pfn range for the given zone. This doesn't handle zones which are not fully populated though. Memory pageblocks can be offlined or might not have been onlined yet. In such a case the zone should be considered to have holes otherwise pfn walkers can touch and play with offline pages. Current callers of pageblock_pfn_to_page in compaction seem to work properly right now because they only isolate PageBuddy (isolate_freepages_block) or PageLRU resp. __PageMovable (isolate_migratepages_block) which will be always false for these pages. It would be safer to skip these pages altogether, though. In order to do this patch adds a new memory section state (SECTION_IS_ONLINE) which is set in memory_present (during boot time) or in online_pages_range during the memory hotplug. Similarly offline_mem_sections clears the bit and it is called when the memory range is offlined. pfn_to_online_page helper is then added which check the mem section and only returns a page if it is onlined already. Use the new helper in __pageblock_pfn_to_page and skip the whole page block in such a case. [mhocko@suse.com: check valid section number in pfn_to_online_page (Vlastimil), mark sections online after all struct pages are initialized in online_pages_range (Vlastimil)] Link: http://lkml.kernel.org/r/20170518164210.GD18333@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20170515085827.16474-8-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Daniel Kiper <daniel.kiper@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Joonsoo Kim <js1304@gmail.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Reza Arbab <arbab@linux.vnet.ibm.com> Cc: Tobias Regnery <tobias.regnery@gmail.com> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 8b0662f commit 2d070ea

File tree

5 files changed

+103
-8
lines changed

5 files changed

+103
-8
lines changed

include/linux/memory_hotplug.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ struct memory_block;
1414
struct resource;
1515

1616
#ifdef CONFIG_MEMORY_HOTPLUG
17+
/*
18+
* Return page for the valid pfn only if the page is online. All pfn
19+
* walkers which rely on the fully initialized page->flags and others
20+
* should use this rather than pfn_valid && pfn_to_page
21+
*/
22+
#define pfn_to_online_page(pfn) \
23+
({ \
24+
struct page *___page = NULL; \
25+
unsigned long ___nr = pfn_to_section_nr(pfn); \
26+
\
27+
if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
28+
___page = pfn_to_page(pfn); \
29+
___page; \
30+
})
1731

1832
/*
1933
* Types for free bootmem stored in page->lru.next. These have to be in
@@ -203,6 +217,14 @@ extern void set_zone_contiguous(struct zone *zone);
203217
extern void clear_zone_contiguous(struct zone *zone);
204218

205219
#else /* ! CONFIG_MEMORY_HOTPLUG */
220+
#define pfn_to_online_page(pfn) \
221+
({ \
222+
struct page *___page = NULL; \
223+
if (pfn_valid(pfn)) \
224+
___page = pfn_to_page(pfn); \
225+
___page; \
226+
})
227+
206228
/*
207229
* Stub functions for when hotplug is off
208230
*/

include/linux/mmzone.h

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,9 +1144,10 @@ extern unsigned long usemap_size(void);
11441144
*/
11451145
#define SECTION_MARKED_PRESENT (1UL<<0)
11461146
#define SECTION_HAS_MEM_MAP (1UL<<1)
1147-
#define SECTION_MAP_LAST_BIT (1UL<<2)
1147+
#define SECTION_IS_ONLINE (1UL<<2)
1148+
#define SECTION_MAP_LAST_BIT (1UL<<3)
11481149
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
1149-
#define SECTION_NID_SHIFT 2
1150+
#define SECTION_NID_SHIFT 3
11501151

11511152
static inline struct page *__section_mem_map_addr(struct mem_section *section)
11521153
{
@@ -1175,6 +1176,23 @@ static inline int valid_section_nr(unsigned long nr)
11751176
return valid_section(__nr_to_section(nr));
11761177
}
11771178

1179+
static inline int online_section(struct mem_section *section)
1180+
{
1181+
return (section && (section->section_mem_map & SECTION_IS_ONLINE));
1182+
}
1183+
1184+
static inline int online_section_nr(unsigned long nr)
1185+
{
1186+
return online_section(__nr_to_section(nr));
1187+
}
1188+
1189+
#ifdef CONFIG_MEMORY_HOTPLUG
1190+
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1191+
#ifdef CONFIG_MEMORY_HOTREMOVE
1192+
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1193+
#endif
1194+
#endif
1195+
11781196
static inline struct mem_section *__pfn_to_section(unsigned long pfn)
11791197
{
11801198
return __nr_to_section(pfn_to_section_nr(pfn));
@@ -1253,10 +1271,15 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
12531271
#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
12541272
/*
12551273
* pfn_valid() is meant to be able to tell if a given PFN has valid memmap
1256-
* associated with it or not. In FLATMEM, it is expected that holes always
1257-
* have valid memmap as long as there is valid PFNs either side of the hole.
1258-
* In SPARSEMEM, it is assumed that a valid section has a memmap for the
1259-
* entire section.
1274+
* associated with it or not. This means that a struct page exists for this
1275+
* pfn. The caller cannot assume the page is fully initialized in general.
1276+
* Hotplugable pages might not have been onlined yet. pfn_to_online_page()
1277+
* will ensure the struct page is fully online and initialized. Special pages
1278+
* (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
1279+
*
1280+
* In FLATMEM, it is expected that holes always have valid memmap as long as
1281+
* there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
1282+
* that a valid section has a memmap for the entire section.
12601283
*
12611284
* However, an ARM, and maybe other embedded architectures in the future
12621285
* free memmap backing holes to save memory on the assumption the memmap is

mm/memory_hotplug.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,12 +929,16 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
929929
unsigned long i;
930930
unsigned long onlined_pages = *(unsigned long *)arg;
931931
struct page *page;
932+
932933
if (PageReserved(pfn_to_page(start_pfn)))
933934
for (i = 0; i < nr_pages; i++) {
934935
page = pfn_to_page(start_pfn + i);
935936
(*online_page_callback)(page);
936937
onlined_pages++;
937938
}
939+
940+
online_mem_sections(start_pfn, start_pfn + nr_pages);
941+
938942
*(unsigned long *)arg = onlined_pages;
939943
return 0;
940944
}

mm/page_alloc.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1365,7 +1365,9 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
13651365
if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
13661366
return NULL;
13671367

1368-
start_page = pfn_to_page(start_pfn);
1368+
start_page = pfn_to_online_page(start_pfn);
1369+
if (!start_page)
1370+
return NULL;
13691371

13701372
if (page_zone(start_page) != zone)
13711373
return NULL;
@@ -7656,6 +7658,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
76567658
break;
76577659
if (pfn == end_pfn)
76587660
return;
7661+
offline_mem_sections(pfn, end_pfn);
76597662
zone = page_zone(pfn_to_page(pfn));
76607663
spin_lock_irqsave(&zone->lock, flags);
76617664
pfn = start_pfn;

mm/sparse.c

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,8 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
222222

223223
ms = __nr_to_section(section);
224224
if (!ms->section_mem_map) {
225-
ms->section_mem_map = sparse_encode_early_nid(nid);
225+
ms->section_mem_map = sparse_encode_early_nid(nid) |
226+
SECTION_IS_ONLINE;
226227
section_mark_present(ms);
227228
}
228229
}
@@ -622,6 +623,48 @@ void __init sparse_init(void)
622623
}
623624

624625
#ifdef CONFIG_MEMORY_HOTPLUG
626+
627+
/* Mark all memory sections within the pfn range as online */
628+
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
629+
{
630+
unsigned long pfn;
631+
632+
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
633+
unsigned long section_nr = pfn_to_section_nr(start_pfn);
634+
struct mem_section *ms;
635+
636+
/* onlining code should never touch invalid ranges */
637+
if (WARN_ON(!valid_section_nr(section_nr)))
638+
continue;
639+
640+
ms = __nr_to_section(section_nr);
641+
ms->section_mem_map |= SECTION_IS_ONLINE;
642+
}
643+
}
644+
645+
#ifdef CONFIG_MEMORY_HOTREMOVE
646+
/* Mark all memory sections within the pfn range as online */
647+
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
648+
{
649+
unsigned long pfn;
650+
651+
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
652+
unsigned long section_nr = pfn_to_section_nr(start_pfn);
653+
struct mem_section *ms;
654+
655+
/*
656+
* TODO this needs some double checking. Offlining code makes
657+
* sure to check pfn_valid but those checks might be just bogus
658+
*/
659+
if (WARN_ON(!valid_section_nr(section_nr)))
660+
continue;
661+
662+
ms = __nr_to_section(section_nr);
663+
ms->section_mem_map &= ~SECTION_IS_ONLINE;
664+
}
665+
}
666+
#endif
667+
625668
#ifdef CONFIG_SPARSEMEM_VMEMMAP
626669
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
627670
{

0 commit comments

Comments
 (0)