Skip to content

Commit efaffc5

Browse files
gormanmIngo Molnar
authored andcommitted
mm, sched/numa: Remove rate-limiting of automatic NUMA balancing migration
Rate limiting of page migrations due to automatic NUMA balancing was introduced to mitigate the worst-case scenario of migrating at high frequency due to false sharing or slowly ping-ponging between nodes. Since then, a lot of effort was spent on correctly identifying these pages and avoiding unnecessary migrations and the safety net may no longer be required. Jirka Hladky reported a regression in 4.17 due to a scheduler patch that avoids spreading STREAM tasks wide prematurely. However, once the task was properly placed, it delayed migrating the memory due to rate limiting. Increasing the limit fixed the problem for him. Currently, the limit is hard-coded and does not account for the real capabilities of the hardware. Even if an estimate was attempted, it would not properly account for the number of memory controllers and it could not account for the amount of bandwidth used for normal accesses. Rather than fudging, this patch simply eliminates the rate limiting. However, Jirka reports that a STREAM configuration using multiple processes achieved similar performance to 4.16. In local tests, this patch improved performance of STREAM relative to the baseline but it is somewhat machine-dependent. Most workloads show little or not performance difference implying that there is not a heavily reliance on the throttling mechanism and it is safe to remove. STREAM on 2-socket machine 4.19.0-rc5 4.19.0-rc5 numab-v1r1 noratelimit-v1r1 MB/sec copy 43298.52 ( 0.00%) 44673.38 ( 3.18%) MB/sec scale 30115.06 ( 0.00%) 31293.06 ( 3.91%) MB/sec add 32825.12 ( 0.00%) 34883.62 ( 6.27%) MB/sec triad 32549.52 ( 0.00%) 34906.60 ( 7.24% Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Reviewed-by: Rik van Riel <riel@surriel.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Jirka Hladky <jhladky@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Linux-MM <linux-mm@kvack.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20181001100525.29789-2-mgorman@techsingularity.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 6fd98e7 commit efaffc5

File tree

4 files changed

+0
-100
lines changed

4 files changed

+0
-100
lines changed

include/linux/mmzone.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -671,12 +671,6 @@ typedef struct pglist_data {
671671
#ifdef CONFIG_NUMA_BALANCING
672672
/* Lock serializing the migrate rate limiting window */
673673
spinlock_t numabalancing_migrate_lock;
674-
675-
/* Rate limiting time interval */
676-
unsigned long numabalancing_migrate_next_window;
677-
678-
/* Number of pages migrated during the rate limiting time interval */
679-
unsigned long numabalancing_migrate_nr_pages;
680674
#endif
681675
/*
682676
* This is a per-node reserve of pages that are not available

include/trace/events/migrate.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -70,33 +70,6 @@ TRACE_EVENT(mm_migrate_pages,
7070
__print_symbolic(__entry->mode, MIGRATE_MODE),
7171
__print_symbolic(__entry->reason, MIGRATE_REASON))
7272
);
73-
74-
TRACE_EVENT(mm_numa_migrate_ratelimit,
75-
76-
TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
77-
78-
TP_ARGS(p, dst_nid, nr_pages),
79-
80-
TP_STRUCT__entry(
81-
__array( char, comm, TASK_COMM_LEN)
82-
__field( pid_t, pid)
83-
__field( int, dst_nid)
84-
__field( unsigned long, nr_pages)
85-
),
86-
87-
TP_fast_assign(
88-
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
89-
__entry->pid = p->pid;
90-
__entry->dst_nid = dst_nid;
91-
__entry->nr_pages = nr_pages;
92-
),
93-
94-
TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
95-
__entry->comm,
96-
__entry->pid,
97-
__entry->dst_nid,
98-
__entry->nr_pages)
99-
);
10073
#endif /* _TRACE_MIGRATE_H */
10174

10275
/* This part must be outside protection */

mm/migrate.c

Lines changed: 0 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1855,54 +1855,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
18551855
return newpage;
18561856
}
18571857

1858-
/*
1859-
* page migration rate limiting control.
1860-
* Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
1861-
* window of time. Default here says do not migrate more than 1280M per second.
1862-
*/
1863-
static unsigned int migrate_interval_millisecs __read_mostly = 100;
1864-
static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
1865-
1866-
/* Returns true if the node is migrate rate-limited after the update */
1867-
static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
1868-
unsigned long nr_pages)
1869-
{
1870-
unsigned long next_window, interval;
1871-
1872-
next_window = READ_ONCE(pgdat->numabalancing_migrate_next_window);
1873-
interval = msecs_to_jiffies(migrate_interval_millisecs);
1874-
1875-
/*
1876-
* Rate-limit the amount of data that is being migrated to a node.
1877-
* Optimal placement is no good if the memory bus is saturated and
1878-
* all the time is being spent migrating!
1879-
*/
1880-
if (time_after(jiffies, next_window) &&
1881-
spin_trylock(&pgdat->numabalancing_migrate_lock)) {
1882-
pgdat->numabalancing_migrate_nr_pages = 0;
1883-
do {
1884-
next_window += interval;
1885-
} while (unlikely(time_after(jiffies, next_window)));
1886-
1887-
WRITE_ONCE(pgdat->numabalancing_migrate_next_window, next_window);
1888-
spin_unlock(&pgdat->numabalancing_migrate_lock);
1889-
}
1890-
if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
1891-
trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
1892-
nr_pages);
1893-
return true;
1894-
}
1895-
1896-
/*
1897-
* This is an unlocked non-atomic update so errors are possible.
1898-
* The consequences are failing to migrate when we potentiall should
1899-
* have which is not severe enough to warrant locking. If it is ever
1900-
* a problem, it can be converted to a per-cpu counter.
1901-
*/
1902-
pgdat->numabalancing_migrate_nr_pages += nr_pages;
1903-
return false;
1904-
}
1905-
19061858
static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
19071859
{
19081860
int page_lru;
@@ -1975,14 +1927,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
19751927
if (page_is_file_cache(page) && PageDirty(page))
19761928
goto out;
19771929

1978-
/*
1979-
* Rate-limit the amount of data that is being migrated to a node.
1980-
* Optimal placement is no good if the memory bus is saturated and
1981-
* all the time is being spent migrating!
1982-
*/
1983-
if (numamigrate_update_ratelimit(pgdat, 1))
1984-
goto out;
1985-
19861930
isolated = numamigrate_isolate_page(pgdat, page);
19871931
if (!isolated)
19881932
goto out;
@@ -2029,14 +1973,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
20291973
unsigned long mmun_start = address & HPAGE_PMD_MASK;
20301974
unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
20311975

2032-
/*
2033-
* Rate-limit the amount of data that is being migrated to a node.
2034-
* Optimal placement is no good if the memory bus is saturated and
2035-
* all the time is being spent migrating!
2036-
*/
2037-
if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
2038-
goto out_dropref;
2039-
20401976
new_page = alloc_pages_node(node,
20411977
(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
20421978
HPAGE_PMD_ORDER);
@@ -2133,7 +2069,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
21332069

21342070
out_fail:
21352071
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
2136-
out_dropref:
21372072
ptl = pmd_lock(mm, pmd);
21382073
if (pmd_same(*pmd, entry)) {
21392074
entry = pmd_modify(entry, vma->vm_page_prot);

mm/page_alloc.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6197,8 +6197,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
61976197
static void pgdat_init_numabalancing(struct pglist_data *pgdat)
61986198
{
61996199
spin_lock_init(&pgdat->numabalancing_migrate_lock);
6200-
pgdat->numabalancing_migrate_nr_pages = 0;
6201-
pgdat->numabalancing_migrate_next_window = jiffies;
62026200
}
62036201
#else
62046202
static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}

0 commit comments

Comments
 (0)