Skip to content

Commit 7960509

Browse files
Michal Hockotorvalds
authored andcommitted
mm, memory_hotplug: print reason for the offlining failure
The memory offlining failure reporting is inconsistent and insufficient. Some error paths simply do not report the failure to the log at all. When we do report there are no details about the reason of the failure and there are several of them which makes memory offlining failures hard to debug. Make sure that the memory offlining [mem %#010llx-%#010llx] failed message is printed for all failures and also provide a short textual reason for the failure e.g. [ 1984.506184] rac1 kernel: memory offlining [mem 0x82600000000-0x8267fffffff] failed due to signal backoff this tells us that the offlining has failed because of a signal pending aka user intervention. [akpm@linux-foundation.org: tweak messages a bit] Link: http://lkml.kernel.org/r/20181107101830.17405-5-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Baoquan He <bhe@redhat.com> Cc: Oscar Salvador <OSalvador@suse.com> Cc: William Kucharski <william.kucharski@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 6cc2baf commit 7960509

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

mm/memory_hotplug.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,6 +1553,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
15531553
unsigned long valid_start, valid_end;
15541554
struct zone *zone;
15551555
struct memory_notify arg;
1556+
char *reason;
15561557

15571558
mem_hotplug_begin();
15581559

@@ -1561,7 +1562,9 @@ static int __ref __offline_pages(unsigned long start_pfn,
15611562
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
15621563
&valid_end)) {
15631564
mem_hotplug_done();
1564-
return -EINVAL;
1565+
ret = -EINVAL;
1566+
reason = "multizone range";
1567+
goto failed_removal;
15651568
}
15661569

15671570
zone = page_zone(pfn_to_page(valid_start));
@@ -1573,7 +1576,8 @@ static int __ref __offline_pages(unsigned long start_pfn,
15731576
MIGRATE_MOVABLE, true);
15741577
if (ret) {
15751578
mem_hotplug_done();
1576-
return ret;
1579+
reason = "failure to isolate range";
1580+
goto failed_removal;
15771581
}
15781582

15791583
arg.start_pfn = start_pfn;
@@ -1582,15 +1586,19 @@ static int __ref __offline_pages(unsigned long start_pfn,
15821586

15831587
ret = memory_notify(MEM_GOING_OFFLINE, &arg);
15841588
ret = notifier_to_errno(ret);
1585-
if (ret)
1586-
goto failed_removal;
1589+
if (ret) {
1590+
reason = "notifier failure";
1591+
goto failed_removal_isolated;
1592+
}
15871593

15881594
pfn = start_pfn;
15891595
repeat:
15901596
/* start memory hot removal */
15911597
ret = -EINTR;
1592-
if (signal_pending(current))
1593-
goto failed_removal;
1598+
if (signal_pending(current)) {
1599+
reason = "signal backoff";
1600+
goto failed_removal_isolated;
1601+
}
15941602

15951603
cond_resched();
15961604
lru_add_drain_all();
@@ -1607,8 +1615,10 @@ static int __ref __offline_pages(unsigned long start_pfn,
16071615
* actually in order to make hugetlbfs's object counting consistent.
16081616
*/
16091617
ret = dissolve_free_huge_pages(start_pfn, end_pfn);
1610-
if (ret)
1611-
goto failed_removal;
1618+
if (ret) {
1619+
reason = "failure to dissolve huge pages";
1620+
goto failed_removal_isolated;
1621+
}
16121622
/* check again */
16131623
offlined_pages = check_pages_isolated(start_pfn, end_pfn);
16141624
if (offlined_pages < 0)
@@ -1648,13 +1658,15 @@ static int __ref __offline_pages(unsigned long start_pfn,
16481658
mem_hotplug_done();
16491659
return 0;
16501660

1661+
failed_removal_isolated:
1662+
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
16511663
failed_removal:
1652-
pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n",
1664+
pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
16531665
(unsigned long long) start_pfn << PAGE_SHIFT,
1654-
((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
1666+
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
1667+
reason);
16551668
memory_notify(MEM_CANCEL_OFFLINE, &arg);
16561669
/* pushback to free area */
1657-
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
16581670
mem_hotplug_done();
16591671
return ret;
16601672
}

0 commit comments

Comments
 (0)