Skip to content

Commit 0565de2

Browse files
committed
Merge branch 'ipv6-Separate-data-structures-for-FIB-and-data-path'
David Ahern says: ==================== net/ipv6: Separate data structures for FIB and data path IPv6 uses the same data struct for both control plane (FIB entries) and data path (dst entries). This struct has elements needed for both paths adding memory overhead and complexity (taking a dst hold in most places but an additional reference on rt6i_ref in a few). Furthermore, because of the dst_alloc tie, all FIB entries are allocated with GFP_ATOMIC. This patch set separates FIB entries from dst entries, better aligning IPv6 code with IPv4, simplifying the reference counting and allowing FIB entries added by userspace (not autoconf) to use GFP_KERNEL. It is first step to a number of performance and scalability changes. The end result of this patch set: - FIB entries (fib6_info): /* size: 208, cachelines: 4, members: 25 */ /* sum members: 207, holes: 1, sum holes: 1 */ - dst entries (rt6_info) /* size: 240, cachelines: 4, members: 11 */ Versus the the single rt6_info struct today for both paths: /* size: 320, cachelines: 5, members: 28 */ This amounts to a 35% reduction in memory use for FIB entries and a 25% reduction for dst entries. With respect to locking FIB entries use RCU and a single atomic counter with fib6_info_hold and fib6_info_release helpers to manage the reference counting. dst entries use only the traditional dst refcounts with dst_hold and dst_release. FIB entries for host routes are referenced by inet6_ifaddr and ifacaddr6. In both cases, additional holds are taken -- similar to what is done for devices. This set is the first of many changes to improve the scalability of the IPv6 code. Follow on changes include: - consolidating duplicate fib6_info references like IPv4 does with duplicate fib_info - moving fib6_info into a slab cache to avoid allocation roundups to power of 2 (the 208 size becomes a 256 actual allocation) - Allow FIB lookups without generating a dst (e.g., most rt6_lookup users just want to verify the egress device). Means moving dst allocation to the other side of fib6_rule_lookup which again aligns with IPv4 behavior - using separate standalone nexthop objects which have performance benefits beyond fib_info consolidation At this point I am not seeing any refcount leaks or underflows, no oops or bug_ons, or warnings from kasan, so I think it is ready for others to beat up on it finding errors in code paths I have missed. v2 changes - rebased to top of tree - improved commit message on patch 7 v1 changes - rebased to top of tree - fix memory leak of metrics as noted by Ido - MTU fixes based on pmtu tests (thanks Stefano Brivio for writing) RFC v2 changes - improved commit messages - move common metrics code from dst.c to net/ipv4/metrics.c (comment from DaveM) - address comments from Wei Wang and Martin KaFai Lau (let me know if I missed something) - fixes detected by kernel test robots + added fib6_metric_set to change metric on a FIB entry which could be pointing to read-only dst_default_metrics + 0day testing found a problem with an intermediate patch; added dst_hold_safe on rt->from. Code is removed 3 patches later - allow cacheinfo to handle NULL dst; means only expires is pushed to userspace ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents a2d481b + 77634cc commit 0565de2

File tree

19 files changed

+1218
-1136
lines changed

19 files changed

+1218
-1136
lines changed

drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry {
442442

443443
struct mlxsw_sp_rt6 {
444444
struct list_head list;
445-
struct rt6_info *rt;
445+
struct fib6_info *rt;
446446
};
447447

448448
struct mlxsw_sp_lpm_tree {
@@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
27702770
struct in6_addr *gw;
27712771
int ifindex, weight;
27722772

2773-
ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2774-
weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
2775-
gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2773+
ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2774+
weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2775+
gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
27762776
if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
27772777
weight))
27782778
return false;
@@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
28382838
struct net_device *dev;
28392839

28402840
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2841-
dev = mlxsw_sp_rt6->rt->dst.dev;
2841+
dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
28422842
val ^= dev->ifindex;
28432843
}
28442844

@@ -3834,11 +3834,11 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
38343834

38353835
for (i = 0; i < nh_grp->count; i++) {
38363836
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3837-
struct rt6_info *rt = mlxsw_sp_rt6->rt;
3837+
struct fib6_info *rt = mlxsw_sp_rt6->rt;
38383838

3839-
if (nh->rif && nh->rif->dev == rt->dst.dev &&
3839+
if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
38403840
ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3841-
&rt->rt6i_gateway))
3841+
&rt->fib6_nh.nh_gw))
38423842
return nh;
38433843
continue;
38443844
}
@@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
38953895

38963896
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
38973897
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3898-
list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3898+
list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
38993899
return;
39003900
}
39013901

@@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
39053905

39063906
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
39073907
if (nh && nh->offloaded)
3908-
mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3908+
mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
39093909
else
3910-
mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3910+
mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
39113911
}
39123912
}
39133913

@@ -3920,9 +3920,9 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
39203920
fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
39213921
common);
39223922
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3923-
struct rt6_info *rt = mlxsw_sp_rt6->rt;
3923+
struct fib6_info *rt = mlxsw_sp_rt6->rt;
39243924

3925-
rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3925+
rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
39263926
}
39273927
}
39283928

@@ -4699,7 +4699,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
46994699
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
47004700
}
47014701

4702-
static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4702+
static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
47034703
{
47044704
/* Packets with link-local destination IP arriving to the router
47054705
* are trapped to the CPU, so no need to program specific routes
@@ -4721,7 +4721,7 @@ static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
47214721
return false;
47224722
}
47234723

4724-
static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4724+
static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
47254725
{
47264726
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
47274727

@@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
47344734
* memory.
47354735
*/
47364736
mlxsw_sp_rt6->rt = rt;
4737-
rt6_hold(rt);
4737+
fib6_info_hold(rt);
47384738

47394739
return mlxsw_sp_rt6;
47404740
}
47414741

47424742
#if IS_ENABLED(CONFIG_IPV6)
4743-
static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4743+
static void mlxsw_sp_rt6_release(struct fib6_info *rt)
47444744
{
4745-
rt6_release(rt);
4745+
fib6_info_release(rt);
47464746
}
47474747
#else
4748-
static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4748+
static void mlxsw_sp_rt6_release(struct fib6_info *rt)
47494749
{
47504750
}
47514751
#endif
@@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
47564756
kfree(mlxsw_sp_rt6);
47574757
}
47584758

4759-
static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4759+
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
47604760
{
47614761
/* RTF_CACHE routes are ignored */
47624762
return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
47634763
}
47644764

4765-
static struct rt6_info *
4765+
static struct fib6_info *
47664766
mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
47674767
{
47684768
return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
@@ -4771,15 +4771,15 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
47714771

47724772
static struct mlxsw_sp_fib6_entry *
47734773
mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4774-
const struct rt6_info *nrt, bool replace)
4774+
const struct fib6_info *nrt, bool replace)
47754775
{
47764776
struct mlxsw_sp_fib6_entry *fib6_entry;
47774777

47784778
if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
47794779
return NULL;
47804780

47814781
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4782-
struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4782+
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
47834783

47844784
/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
47854785
* virtual router.
@@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
48024802

48034803
static struct mlxsw_sp_rt6 *
48044804
mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4805-
const struct rt6_info *rt)
4805+
const struct fib6_info *rt)
48064806
{
48074807
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
48084808

@@ -4815,21 +4815,21 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
48154815
}
48164816

48174817
static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4818-
const struct rt6_info *rt,
4818+
const struct fib6_info *rt,
48194819
enum mlxsw_sp_ipip_type *ret)
48204820
{
4821-
return rt->dst.dev &&
4822-
mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4821+
return rt->fib6_nh.nh_dev &&
4822+
mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
48234823
}
48244824

48254825
static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
48264826
struct mlxsw_sp_nexthop_group *nh_grp,
48274827
struct mlxsw_sp_nexthop *nh,
4828-
const struct rt6_info *rt)
4828+
const struct fib6_info *rt)
48294829
{
48304830
const struct mlxsw_sp_ipip_ops *ipip_ops;
48314831
struct mlxsw_sp_ipip_entry *ipip_entry;
4832-
struct net_device *dev = rt->dst.dev;
4832+
struct net_device *dev = rt->fib6_nh.nh_dev;
48334833
struct mlxsw_sp_rif *rif;
48344834
int err;
48354835

@@ -4870,13 +4870,13 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
48704870
static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
48714871
struct mlxsw_sp_nexthop_group *nh_grp,
48724872
struct mlxsw_sp_nexthop *nh,
4873-
const struct rt6_info *rt)
4873+
const struct fib6_info *rt)
48744874
{
4875-
struct net_device *dev = rt->dst.dev;
4875+
struct net_device *dev = rt->fib6_nh.nh_dev;
48764876

48774877
nh->nh_grp = nh_grp;
4878-
nh->nh_weight = rt->rt6i_nh_weight;
4879-
memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4878+
nh->nh_weight = rt->fib6_nh.nh_weight;
4879+
memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
48804880
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
48814881

48824882
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -4897,7 +4897,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
48974897
}
48984898

48994899
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4900-
const struct rt6_info *rt)
4900+
const struct fib6_info *rt)
49014901
{
49024902
return rt->rt6i_flags & RTF_GATEWAY ||
49034903
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
@@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
49284928
nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
49294929
nh_grp->count = fib6_entry->nrt6;
49304930
for (i = 0; i < nh_grp->count; i++) {
4931-
struct rt6_info *rt = mlxsw_sp_rt6->rt;
4931+
struct fib6_info *rt = mlxsw_sp_rt6->rt;
49324932

49334933
nh = &nh_grp->nexthops[i];
49344934
err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
@@ -5040,7 +5040,7 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
50405040
static int
50415041
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
50425042
struct mlxsw_sp_fib6_entry *fib6_entry,
5043-
struct rt6_info *rt)
5043+
struct fib6_info *rt)
50445044
{
50455045
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
50465046
int err;
@@ -5068,7 +5068,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
50685068
static void
50695069
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
50705070
struct mlxsw_sp_fib6_entry *fib6_entry,
5071-
struct rt6_info *rt)
5071+
struct fib6_info *rt)
50725072
{
50735073
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
50745074

@@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
50845084

50855085
static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
50865086
struct mlxsw_sp_fib_entry *fib_entry,
5087-
const struct rt6_info *rt)
5087+
const struct fib6_info *rt)
50885088
{
50895089
/* Packets hitting RTF_REJECT routes need to be discarded by the
50905090
* stack. We can rely on their destination device not having a
@@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
51185118
static struct mlxsw_sp_fib6_entry *
51195119
mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
51205120
struct mlxsw_sp_fib_node *fib_node,
5121-
struct rt6_info *rt)
5121+
struct fib6_info *rt)
51225122
{
51235123
struct mlxsw_sp_fib6_entry *fib6_entry;
51245124
struct mlxsw_sp_fib_entry *fib_entry;
@@ -5168,12 +5168,12 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
51685168

51695169
static struct mlxsw_sp_fib6_entry *
51705170
mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5171-
const struct rt6_info *nrt, bool replace)
5171+
const struct fib6_info *nrt, bool replace)
51725172
{
51735173
struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
51745174

51755175
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5176-
struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5176+
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
51775177

51785178
if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
51795179
continue;
@@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
51985198
bool replace)
51995199
{
52005200
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5201-
struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5201+
struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
52025202
struct mlxsw_sp_fib6_entry *fib6_entry;
52035203

52045204
fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
@@ -5213,7 +5213,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
52135213
struct mlxsw_sp_fib6_entry *last;
52145214

52155215
list_for_each_entry(last, &fib_node->entry_list, common.list) {
5216-
struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5216+
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
52175217

52185218
if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
52195219
break;
@@ -5268,7 +5268,7 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
52685268

52695269
static struct mlxsw_sp_fib6_entry *
52705270
mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5271-
const struct rt6_info *rt)
5271+
const struct fib6_info *rt)
52725272
{
52735273
struct mlxsw_sp_fib6_entry *fib6_entry;
52745274
struct mlxsw_sp_fib_node *fib_node;
@@ -5287,7 +5287,7 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
52875287
return NULL;
52885288

52895289
list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5290-
struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5290+
struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
52915291

52925292
if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
52935293
rt->rt6i_metric == iter_rt->rt6i_metric &&
@@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
53165316
}
53175317

53185318
static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5319-
struct rt6_info *rt, bool replace)
5319+
struct fib6_info *rt, bool replace)
53205320
{
53215321
struct mlxsw_sp_fib6_entry *fib6_entry;
53225322
struct mlxsw_sp_fib_node *fib_node;
@@ -5373,7 +5373,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
53735373
}
53745374

53755375
static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5376-
struct rt6_info *rt)
5376+
struct fib6_info *rt)
53775377
{
53785378
struct mlxsw_sp_fib6_entry *fib6_entry;
53795379
struct mlxsw_sp_fib_node *fib_node;
@@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
58365836
fen6_info = container_of(info, struct fib6_entry_notifier_info,
58375837
info);
58385838
fib_work->fen6_info = *fen6_info;
5839-
rt6_hold(fib_work->fen6_info.rt);
5839+
fib6_info_hold(fib_work->fen6_info.rt);
58405840
break;
58415841
}
58425842
}

0 commit comments

Comments
 (0)