Skip to content

Commit 80b1f41

Browse files
Pavel Tatashintorvalds
authored andcommitted
mm: split deferred_init_range into initializing and freeing parts
In deferred_init_range() we initialize struct pages, and also free them to buddy allocator. We do it in separate loops, because buddy page is computed ahead, so we do not want to access a struct page that has not been initialized yet. There is still, however, a corner case where it is potentially possible to access uninitialized struct page: this is when buddy page is from the next memblock range. This patch fixes this problem by splitting deferred_init_range() into two functions: one to initialize struct pages, and another to free them. In addition, this patch brings the following improvements: - Get rid of __def_free() helper function. And simplifies loop logic by adding a new pfn validity check function: deferred_pfn_valid(). - Reduces number of variables that we track. So, there is a higher chance that we will avoid using stack to store/load variables inside hot loops. - Enables future multi-threading of these functions: do initialization in multiple threads, wait for all threads to finish, do freeing part in multithreading. Tested on x86 with 1T of memory to make sure no regressions are introduced. [akpm@linux-foundation.org: fix spello in comment] Link: http://lkml.kernel.org/r/20171107150446.32055-2-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Steven Sistare <steven.sistare@oracle.com> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 9092c71 commit 80b1f41

File tree

1 file changed

+76
-70
lines changed

1 file changed

+76
-70
lines changed

mm/page_alloc.c

Lines changed: 76 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,92 +1457,87 @@ static inline void __init pgdat_init_report_one_done(void)
14571457
}
14581458

14591459
/*
1460-
* Helper for deferred_init_range, free the given range, reset the counters, and
1461-
* return number of pages freed.
1460+
* Returns true if page needs to be initialized or freed to buddy allocator.
1461+
*
1462+
* First we check if pfn is valid on architectures where it is possible to have
1463+
* holes within pageblock_nr_pages. On systems where it is not possible, this
1464+
* function is optimized out.
1465+
*
1466+
* Then, we check if a current large page is valid by only checking the validity
1467+
* of the head pfn.
1468+
*
1469+
* Finally, meminit_pfn_in_nid is checked on systems where pfns can interleave
1470+
* within a node: a pfn is between start and end of a node, but does not belong
1471+
* to this memory node.
14621472
*/
1463-
static inline unsigned long __init __def_free(unsigned long *nr_free,
1464-
unsigned long *free_base_pfn,
1465-
struct page **page)
1473+
static inline bool __init
1474+
deferred_pfn_valid(int nid, unsigned long pfn,
1475+
struct mminit_pfnnid_cache *nid_init_state)
14661476
{
1467-
unsigned long nr = *nr_free;
1477+
if (!pfn_valid_within(pfn))
1478+
return false;
1479+
if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
1480+
return false;
1481+
if (!meminit_pfn_in_nid(pfn, nid, nid_init_state))
1482+
return false;
1483+
return true;
1484+
}
14681485

1469-
deferred_free_range(*free_base_pfn, nr);
1470-
*free_base_pfn = 0;
1471-
*nr_free = 0;
1472-
*page = NULL;
1486+
/*
1487+
* Free pages to buddy allocator. Try to free aligned pages in
1488+
* pageblock_nr_pages sizes.
1489+
*/
1490+
static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
1491+
unsigned long end_pfn)
1492+
{
1493+
struct mminit_pfnnid_cache nid_init_state = { };
1494+
unsigned long nr_pgmask = pageblock_nr_pages - 1;
1495+
unsigned long nr_free = 0;
14731496

1474-
return nr;
1497+
for (; pfn < end_pfn; pfn++) {
1498+
if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
1499+
deferred_free_range(pfn - nr_free, nr_free);
1500+
nr_free = 0;
1501+
} else if (!(pfn & nr_pgmask)) {
1502+
deferred_free_range(pfn - nr_free, nr_free);
1503+
nr_free = 1;
1504+
cond_resched();
1505+
} else {
1506+
nr_free++;
1507+
}
1508+
}
1509+
/* Free the last block of pages to allocator */
1510+
deferred_free_range(pfn - nr_free, nr_free);
14751511
}
14761512

1477-
static unsigned long __init deferred_init_range(int nid, int zid,
1478-
unsigned long start_pfn,
1479-
unsigned long end_pfn)
1513+
/*
1514+
* Initialize struct pages. We minimize pfn page lookups and scheduler checks
1515+
* by performing it only once every pageblock_nr_pages.
1516+
* Return number of pages initialized.
1517+
*/
1518+
static unsigned long __init deferred_init_pages(int nid, int zid,
1519+
unsigned long pfn,
1520+
unsigned long end_pfn)
14801521
{
14811522
struct mminit_pfnnid_cache nid_init_state = { };
14821523
unsigned long nr_pgmask = pageblock_nr_pages - 1;
1483-
unsigned long free_base_pfn = 0;
14841524
unsigned long nr_pages = 0;
1485-
unsigned long nr_free = 0;
14861525
struct page *page = NULL;
1487-
unsigned long pfn;
14881526

1489-
/*
1490-
* First we check if pfn is valid on architectures where it is possible
1491-
* to have holes within pageblock_nr_pages. On systems where it is not
1492-
* possible, this function is optimized out.
1493-
*
1494-
* Then, we check if a current large page is valid by only checking the
1495-
* validity of the head pfn.
1496-
*
1497-
* meminit_pfn_in_nid is checked on systems where pfns can interleave
1498-
* within a node: a pfn is between start and end of a node, but does not
1499-
* belong to this memory node.
1500-
*
1501-
* Finally, we minimize pfn page lookups and scheduler checks by
1502-
* performing it only once every pageblock_nr_pages.
1503-
*
1504-
* We do it in two loops: first we initialize struct page, than free to
1505-
* buddy allocator, becuse while we are freeing pages we can access
1506-
* pages that are ahead (computing buddy page in __free_one_page()).
1507-
*/
1508-
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1509-
if (!pfn_valid_within(pfn))
1527+
for (; pfn < end_pfn; pfn++) {
1528+
if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
1529+
page = NULL;
15101530
continue;
1511-
if ((pfn & nr_pgmask) || pfn_valid(pfn)) {
1512-
if (meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
1513-
if (page && (pfn & nr_pgmask))
1514-
page++;
1515-
else
1516-
page = pfn_to_page(pfn);
1517-
__init_single_page(page, pfn, zid, nid);
1518-
cond_resched();
1519-
}
1520-
}
1521-
}
1522-
1523-
page = NULL;
1524-
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1525-
if (!pfn_valid_within(pfn)) {
1526-
nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1527-
} else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) {
1528-
nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1529-
} else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
1530-
nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1531-
} else if (page && (pfn & nr_pgmask)) {
1532-
page++;
1533-
nr_free++;
1534-
} else {
1535-
nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1531+
} else if (!page || !(pfn & nr_pgmask)) {
15361532
page = pfn_to_page(pfn);
1537-
free_base_pfn = pfn;
1538-
nr_free = 1;
15391533
cond_resched();
1534+
} else {
1535+
page++;
15401536
}
1537+
__init_single_page(page, pfn, zid, nid);
1538+
nr_pages++;
15411539
}
1542-
/* Free the last block of pages to allocator */
1543-
nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1544-
1545-
return nr_pages;
1540+
return (nr_pages);
15461541
}
15471542

15481543
/* Initialise remaining memory on a node */
@@ -1582,10 +1577,21 @@ static int __init deferred_init_memmap(void *data)
15821577
}
15831578
first_init_pfn = max(zone->zone_start_pfn, first_init_pfn);
15841579

1580+
/*
1581+
* Initialize and free pages. We do it in two loops: first we initialize
1582+
* struct page, than free to buddy allocator, because while we are
1583+
* freeing pages we can access pages that are ahead (computing buddy
1584+
* page in __free_one_page()).
1585+
*/
1586+
for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
1587+
spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
1588+
epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1589+
nr_pages += deferred_init_pages(nid, zid, spfn, epfn);
1590+
}
15851591
for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
15861592
spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
15871593
epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1588-
nr_pages += deferred_init_range(nid, zid, spfn, epfn);
1594+
deferred_free_pages(nid, zid, spfn, epfn);
15891595
}
15901596

15911597
/* Sanity check that the next zone really is unpopulated */

0 commit comments

Comments
 (0)