Skip to content

Commit eb78998

Browse files
idoschdavem330
authored andcommitted
mlxsw: spectrum_router: Populate adjacency entries according to weights
Up until now the driver assumed all the nexthops have an equal weight and wrote each to a single adjacency entry. This patch takes the `weight` parameter into account and populates the adjacency group according to the relative weight of each nexthop. Specifically, the weights of all the nexthops that should be offloaded are first normalized and then used to calculate the upper adjacency index of each nexthop. This is done according to the hash-threshold algorithm used by the kernel for IPv4 multi-path routing. Adjacency groups are currently limited to 32 entries which limits the weights that can be used, but follow-up patches will introduce groups of 512 entries. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 425a08c commit eb78998

File tree

1 file changed

+99
-12
lines changed

1 file changed

+99
-12
lines changed

drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

Lines changed: 99 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <linux/if_bridge.h>
4747
#include <linux/socket.h>
4848
#include <linux/route.h>
49+
#include <linux/gcd.h>
4950
#include <net/netevent.h>
5051
#include <net/neighbour.h>
5152
#include <net/arp.h>
@@ -2204,6 +2205,8 @@ struct mlxsw_sp_nexthop {
22042205
unsigned char gw_addr[sizeof(struct in6_addr)];
22052206
int ifindex;
22062207
int nh_weight;
2208+
int norm_nh_weight;
2209+
int num_adj_entries;
22072210
struct mlxsw_sp_rif *rif;
22082211
u8 should_offload:1, /* set indicates this neigh is connected and
22092212
* should be put to KVD linear area of this group.
@@ -2233,6 +2236,7 @@ struct mlxsw_sp_nexthop_group {
22332236
u32 adj_index;
22342237
u16 ecmp_size;
22352238
u16 count;
2239+
int sum_norm_weight;
22362240
struct mlxsw_sp_nexthop nexthops[0];
22372241
#define nh_rif nexthops[0].rif
22382242
};
@@ -2318,7 +2322,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
23182322
if (nh_iter == nh)
23192323
break;
23202324
if (nh_iter->offloaded)
2321-
adj_hash_index++;
2325+
adj_hash_index += nh_iter->num_adj_entries;
23222326
}
23232327

23242328
*p_adj_hash_index = adj_hash_index;
@@ -2601,8 +2605,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
26012605
return 0;
26022606
}
26032607

2604-
int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2605-
struct mlxsw_sp_nexthop *nh)
2608+
static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2609+
struct mlxsw_sp_nexthop *nh)
26062610
{
26072611
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
26082612
char ratr_pl[MLXSW_REG_RATR_LEN];
@@ -2619,16 +2623,50 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
26192623
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
26202624
}
26212625

2622-
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2623-
u32 adj_index,
2624-
struct mlxsw_sp_nexthop *nh)
2626+
int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2627+
struct mlxsw_sp_nexthop *nh)
2628+
{
2629+
int i;
2630+
2631+
for (i = 0; i < nh->num_adj_entries; i++) {
2632+
int err;
2633+
2634+
err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2635+
if (err)
2636+
return err;
2637+
}
2638+
2639+
return 0;
2640+
}
2641+
2642+
static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2643+
u32 adj_index,
2644+
struct mlxsw_sp_nexthop *nh)
26252645
{
26262646
const struct mlxsw_sp_ipip_ops *ipip_ops;
26272647

26282648
ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
26292649
return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
26302650
}
26312651

2652+
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2653+
u32 adj_index,
2654+
struct mlxsw_sp_nexthop *nh)
2655+
{
2656+
int i;
2657+
2658+
for (i = 0; i < nh->num_adj_entries; i++) {
2659+
int err;
2660+
2661+
err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2662+
nh);
2663+
if (err)
2664+
return err;
2665+
}
2666+
2667+
return 0;
2668+
}
2669+
26322670
static int
26332671
mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
26342672
struct mlxsw_sp_nexthop_group *nh_grp,
@@ -2663,7 +2701,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
26632701
nh->update = 0;
26642702
nh->offloaded = 1;
26652703
}
2666-
adj_index++;
2704+
adj_index += nh->num_adj_entries;
26672705
}
26682706
return 0;
26692707
}
@@ -2761,17 +2799,65 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
27612799
return 0;
27622800
}
27632801

2802+
static void
2803+
mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
2804+
{
2805+
int i, g = 0, sum_norm_weight = 0;
2806+
struct mlxsw_sp_nexthop *nh;
2807+
2808+
for (i = 0; i < nh_grp->count; i++) {
2809+
nh = &nh_grp->nexthops[i];
2810+
2811+
if (!nh->should_offload)
2812+
continue;
2813+
if (g > 0)
2814+
g = gcd(nh->nh_weight, g);
2815+
else
2816+
g = nh->nh_weight;
2817+
}
2818+
2819+
for (i = 0; i < nh_grp->count; i++) {
2820+
nh = &nh_grp->nexthops[i];
2821+
2822+
if (!nh->should_offload)
2823+
continue;
2824+
nh->norm_nh_weight = nh->nh_weight / g;
2825+
sum_norm_weight += nh->norm_nh_weight;
2826+
}
2827+
2828+
nh_grp->sum_norm_weight = sum_norm_weight;
2829+
}
2830+
2831+
static void
2832+
mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
2833+
{
2834+
int total = nh_grp->sum_norm_weight;
2835+
u16 ecmp_size = nh_grp->ecmp_size;
2836+
int i, weight = 0, lower_bound = 0;
2837+
2838+
for (i = 0; i < nh_grp->count; i++) {
2839+
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
2840+
int upper_bound;
2841+
2842+
if (!nh->should_offload)
2843+
continue;
2844+
weight += nh->norm_nh_weight;
2845+
upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
2846+
nh->num_adj_entries = upper_bound - lower_bound;
2847+
lower_bound = upper_bound;
2848+
}
2849+
}
2850+
27642851
static void
27652852
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
27662853
struct mlxsw_sp_nexthop_group *nh_grp)
27672854
{
2855+
u16 ecmp_size, old_ecmp_size;
27682856
struct mlxsw_sp_nexthop *nh;
27692857
bool offload_change = false;
27702858
u32 adj_index;
2771-
u16 ecmp_size = 0;
27722859
bool old_adj_index_valid;
27732860
u32 old_adj_index;
2774-
u16 old_ecmp_size;
27752861
int i;
27762862
int err;
27772863

@@ -2788,8 +2874,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
27882874
if (nh->should_offload)
27892875
nh->update = 1;
27902876
}
2791-
if (nh->should_offload)
2792-
ecmp_size++;
27932877
}
27942878
if (!offload_change) {
27952879
/* Nothing was added or removed, so no need to reallocate. Just
@@ -2802,12 +2886,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
28022886
}
28032887
return;
28042888
}
2805-
if (!ecmp_size)
2889+
mlxsw_sp_nexthop_group_normalize(nh_grp);
2890+
if (!nh_grp->sum_norm_weight)
28062891
/* No neigh of this group is connected so we just set
28072892
* the trap and let everthing flow through kernel.
28082893
*/
28092894
goto set_trap;
28102895

2896+
ecmp_size = nh_grp->sum_norm_weight;
28112897
err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
28122898
if (err)
28132899
/* No valid allocation size available. */
@@ -2827,6 +2913,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
28272913
nh_grp->adj_index_valid = 1;
28282914
nh_grp->adj_index = adj_index;
28292915
nh_grp->ecmp_size = ecmp_size;
2916+
mlxsw_sp_nexthop_group_rebalance(nh_grp);
28302917
err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
28312918
if (err) {
28322919
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");

0 commit comments

Comments
 (0)