Skip to content

Commit 1537b26

Browse files
Pavel Tatashindavem330
authored andcommitted
sparc64: use latency groups to improve add_node_ranges speed
add_node_ranges() takes 2.6s - 3.6s per 1T of boot time. On machine with 6T memory it takes 15.4s, on 32T it would take 82s-115s of boot time. This function sets NUMA ids for memory blocks, and scans the whole memory a page at a time to do so. But, we could use values in latency groups mask and match to determine the boundaries without checking every single page. With the fix the add_node_ranges() time is reduced from 15.4s down to 0.2s on machine with 6T memory. Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Reviewed-by: Babu Moger <babu.moger@oracle.com> Reviewed-by: Bob Picco <bob.picco@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent dcd1912 commit 1537b26

File tree

1 file changed

+113
-95
lines changed

1 file changed

+113
-95
lines changed

arch/sparc/mm/init_64.c

Lines changed: 113 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -829,13 +829,23 @@ static void __init find_ramdisk(unsigned long phys_base)
829829

830830
struct node_mem_mask {
831831
unsigned long mask;
832-
unsigned long val;
832+
unsigned long match;
833833
};
834834
static struct node_mem_mask node_masks[MAX_NUMNODES];
835835
static int num_node_masks;
836836

837837
#ifdef CONFIG_NEED_MULTIPLE_NODES
838838

839+
struct mdesc_mlgroup {
840+
u64 node;
841+
u64 latency;
842+
u64 match;
843+
u64 mask;
844+
};
845+
846+
static struct mdesc_mlgroup *mlgroups;
847+
static int num_mlgroups;
848+
839849
int numa_cpu_lookup_table[NR_CPUS];
840850
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
841851

@@ -846,78 +856,129 @@ struct mdesc_mblock {
846856
};
847857
static struct mdesc_mblock *mblocks;
848858
static int num_mblocks;
849-
static int find_numa_node_for_addr(unsigned long pa,
850-
struct node_mem_mask *pnode_mask);
851859

852-
static unsigned long __init ra_to_pa(unsigned long addr)
860+
static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
853861
{
862+
struct mdesc_mblock *m = NULL;
854863
int i;
855864

856865
for (i = 0; i < num_mblocks; i++) {
857-
struct mdesc_mblock *m = &mblocks[i];
866+
m = &mblocks[i];
858867

859868
if (addr >= m->base &&
860869
addr < (m->base + m->size)) {
861-
addr += m->offset;
862870
break;
863871
}
864872
}
865-
return addr;
873+
874+
return m;
866875
}
867876

868-
static int __init find_node(unsigned long addr)
877+
static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
869878
{
870-
static bool search_mdesc = true;
871-
static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
872-
static int last_index;
873-
int i;
879+
int prev_nid, new_nid;
874880

875-
addr = ra_to_pa(addr);
876-
for (i = 0; i < num_node_masks; i++) {
877-
struct node_mem_mask *p = &node_masks[i];
881+
prev_nid = -1;
882+
for ( ; start < end; start += PAGE_SIZE) {
883+
for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
884+
struct node_mem_mask *p = &node_masks[new_nid];
878885

879-
if ((addr & p->mask) == p->val)
880-
return i;
881-
}
882-
/* The following condition has been observed on LDOM guests because
883-
* node_masks only contains the best latency mask and value.
884-
* LDOM guest's mdesc can contain a single latency group to
885-
* cover multiple address range. Print warning message only if the
886-
* address cannot be found in node_masks nor mdesc.
887-
*/
888-
if ((search_mdesc) &&
889-
((addr & last_mem_mask.mask) != last_mem_mask.val)) {
890-
/* find the available node in the mdesc */
891-
last_index = find_numa_node_for_addr(addr, &last_mem_mask);
892-
numadbg("find_node: latency group for address 0x%lx is %d\n",
893-
addr, last_index);
894-
if ((last_index < 0) || (last_index >= num_node_masks)) {
895-
/* WARN_ONCE() and use default group 0 */
896-
WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
897-
search_mdesc = false;
898-
last_index = 0;
886+
if ((start & p->mask) == p->match) {
887+
if (prev_nid == -1)
888+
prev_nid = new_nid;
889+
break;
890+
}
899891
}
892+
893+
if (new_nid == num_node_masks) {
894+
prev_nid = 0;
895+
WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
896+
start);
897+
break;
898+
}
899+
900+
if (prev_nid != new_nid)
901+
break;
900902
}
903+
*nid = prev_nid;
901904

902-
return last_index;
905+
return start > end ? end : start;
903906
}
904907

905908
static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
906909
{
907-
*nid = find_node(start);
908-
start += PAGE_SIZE;
909-
while (start < end) {
910-
int n = find_node(start);
910+
u64 ret_end, pa_start, m_mask, m_match, m_end;
911+
struct mdesc_mblock *mblock;
912+
int _nid, i;
913+
914+
if (tlb_type != hypervisor)
915+
return memblock_nid_range_sun4u(start, end, nid);
916+
917+
mblock = addr_to_mblock(start);
918+
if (!mblock) {
919+
WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
920+
start);
921+
922+
_nid = 0;
923+
ret_end = end;
924+
goto done;
925+
}
926+
927+
pa_start = start + mblock->offset;
928+
m_match = 0;
929+
m_mask = 0;
930+
931+
for (_nid = 0; _nid < num_node_masks; _nid++) {
932+
struct node_mem_mask *const m = &node_masks[_nid];
911933

912-
if (n != *nid)
934+
if ((pa_start & m->mask) == m->match) {
935+
m_match = m->match;
936+
m_mask = m->mask;
913937
break;
914-
start += PAGE_SIZE;
938+
}
915939
}
916940

917-
if (start > end)
918-
start = end;
941+
if (num_node_masks == _nid) {
942+
/* We could not find NUMA group, so default to 0, but lets
943+
* search for latency group, so we could calculate the correct
944+
* end address that we return
945+
*/
946+
_nid = 0;
919947

920-
return start;
948+
for (i = 0; i < num_mlgroups; i++) {
949+
struct mdesc_mlgroup *const m = &mlgroups[i];
950+
951+
if ((pa_start & m->mask) == m->match) {
952+
m_match = m->match;
953+
m_mask = m->mask;
954+
break;
955+
}
956+
}
957+
958+
if (i == num_mlgroups) {
959+
WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
960+
start);
961+
962+
ret_end = end;
963+
goto done;
964+
}
965+
}
966+
967+
/*
968+
* Each latency group has match and mask, and each memory block has an
969+
* offset. An address belongs to a latency group if its address matches
970+
* the following formula: ((addr + offset) & mask) == match
971+
* It is, however, slow to check every single page if it matches a
972+
* particular latency group. As optimization we calculate end value by
973+
* using bit arithmetics.
974+
*/
975+
m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
976+
m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
977+
ret_end = m_end > end ? end : m_end;
978+
979+
done:
980+
*nid = _nid;
981+
return ret_end;
921982
}
922983
#endif
923984

@@ -958,7 +1019,8 @@ static void init_node_masks_nonnuma(void)
9581019

9591020
numadbg("Initializing tables for non-numa.\n");
9601021

961-
node_masks[0].mask = node_masks[0].val = 0;
1022+
node_masks[0].mask = 0;
1023+
node_masks[0].match = 0;
9621024
num_node_masks = 1;
9631025

9641026
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -976,15 +1038,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
9761038
EXPORT_SYMBOL(numa_cpumask_lookup_table);
9771039
EXPORT_SYMBOL(node_data);
9781040

979-
struct mdesc_mlgroup {
980-
u64 node;
981-
u64 latency;
982-
u64 match;
983-
u64 mask;
984-
};
985-
static struct mdesc_mlgroup *mlgroups;
986-
static int num_mlgroups;
987-
9881041
static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
9891042
u32 cfg_handle)
9901043
{
@@ -1226,49 +1279,14 @@ int __node_distance(int from, int to)
12261279
return numa_latency[from][to];
12271280
}
12281281

1229-
static int find_numa_node_for_addr(unsigned long pa,
1230-
struct node_mem_mask *pnode_mask)
1231-
{
1232-
struct mdesc_handle *md = mdesc_grab();
1233-
u64 node, arc;
1234-
int i = 0;
1235-
1236-
node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
1237-
if (node == MDESC_NODE_NULL)
1238-
goto out;
1239-
1240-
mdesc_for_each_node_by_name(md, node, "group") {
1241-
mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
1242-
u64 target = mdesc_arc_target(md, arc);
1243-
struct mdesc_mlgroup *m = find_mlgroup(target);
1244-
1245-
if (!m)
1246-
continue;
1247-
if ((pa & m->mask) == m->match) {
1248-
if (pnode_mask) {
1249-
pnode_mask->mask = m->mask;
1250-
pnode_mask->val = m->match;
1251-
}
1252-
mdesc_release(md);
1253-
return i;
1254-
}
1255-
}
1256-
i++;
1257-
}
1258-
1259-
out:
1260-
mdesc_release(md);
1261-
return -1;
1262-
}
1263-
12641282
static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
12651283
{
12661284
int i;
12671285

12681286
for (i = 0; i < MAX_NUMNODES; i++) {
12691287
struct node_mem_mask *n = &node_masks[i];
12701288

1271-
if ((grp->mask == n->mask) && (grp->match == n->val))
1289+
if ((grp->mask == n->mask) && (grp->match == n->match))
12721290
break;
12731291
}
12741292
return i;
@@ -1323,10 +1341,10 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
13231341
n = &node_masks[num_node_masks++];
13241342

13251343
n->mask = candidate->mask;
1326-
n->val = candidate->match;
1344+
n->match = candidate->match;
13271345

1328-
numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n",
1329-
index, n->mask, n->val, candidate->latency);
1346+
numadbg("NUMA NODE[%d]: mask[%lx] match[%lx] (latency[%llx])\n",
1347+
index, n->mask, n->match, candidate->latency);
13301348

13311349
return 0;
13321350
}
@@ -1423,7 +1441,7 @@ static int __init numa_parse_jbus(void)
14231441
numa_cpu_lookup_table[cpu] = index;
14241442
cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
14251443
node_masks[index].mask = ~((1UL << 36UL) - 1UL);
1426-
node_masks[index].val = cpu << 36UL;
1444+
node_masks[index].match = cpu << 36UL;
14271445

14281446
index++;
14291447
}

0 commit comments

Comments
 (0)