Skip to content

Commit 5582745

Browse files
committed
Merge branch 'mlxsw-Add-one-armed-router-support'
Ido Schimmel says: ==================== mlxsw: Add one-armed router support Up until now, when a packet was routed by the ASIC through the same router interface (RIF) from which it ingressed from, the ASIC passed the sole copy of the packet to the kernel. This allowed the kernel to route the packet and also potentially generate an ICMP redirect. There are scenarios (e.g., "one-armed router") where packets are intentionally routed this way and are therefore not deemed as exceptions. In such scenarios the current method of trapping packets to the CPU is problematic, as it results in major packet loss. This patchset solves the problem by having the ASIC forward the packet, but also send a copy to the CPU, which gives the kernel the opportunity to generate required exceptions. To prevent the kernel from forwarding such packets again, the driver marks them with 'offload_l3_fwd_mark', which causes the kernel to consume them in ip{,6}_forward_finish(). Patch #1 renames 'offload_mr_fwd_mark' to 'offload_l3_fwd_mark'. When set, the field indicates that a packet was already forwarded in L3 (unicast / multicast) by a capable device. Patch #2 teaches the kernel to consume unicast packets that have 'offload_l3_fwd_mark' set. Patch #3 changes mlxsw to mirror loopbacked (iRIF == eRIF) packets, instead of trapping them. Patch #4 adds a test case for above mentioned scenario. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents d9bbd6a + b6f153d commit 5582745

File tree

8 files changed

+287
-9
lines changed

8 files changed

+287
-9
lines changed

drivers/net/ethernet/mellanox/mlxsw/reg.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5072,6 +5072,7 @@ enum mlxsw_reg_htgt_trap_group {
50725072
MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
50735073
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
50745074
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
5075+
MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
50755076
};
50765077

50775078
/* reg_htgt_trap_group

drivers/net/ethernet/mellanox/mlxsw/spectrum.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3554,10 +3554,10 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
35543554
return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
35553555
}
35563556

3557-
static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb,
3557+
static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb,
35583558
u8 local_port, void *priv)
35593559
{
3560-
skb->offload_mr_fwd_mark = 1;
3560+
skb->offload_l3_fwd_mark = 1;
35613561
skb->offload_fwd_mark = 1;
35623562
return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
35633563
}
@@ -3605,8 +3605,8 @@ static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
36053605
MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \
36063606
_is_ctrl, SP_##_trap_group, DISCARD)
36073607

3608-
#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
3609-
MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \
3608+
#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
3609+
MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action, \
36103610
_is_ctrl, SP_##_trap_group, DISCARD)
36113611

36123612
#define MLXSW_SP_EVENTL(_func, _trap_id) \
@@ -3639,7 +3639,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
36393639
/* L3 traps */
36403640
MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
36413641
MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
3642-
MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
3642+
MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
36433643
MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
36443644
MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
36453645
false),
@@ -3683,7 +3683,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
36833683
MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
36843684
MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
36853685
MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
3686-
MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
3686+
MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
36873687
/* NVE traps */
36883688
MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
36893689
};
@@ -3713,6 +3713,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
37133713
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
37143714
case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
37153715
case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
3716+
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
37163717
rate = 128;
37173718
burst_size = 7;
37183719
break;
@@ -3798,6 +3799,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
37983799
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
37993800
case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
38003801
case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
3802+
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
38013803
priority = 1;
38023804
tc = 1;
38033805
break;

include/linux/skbuff.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,8 @@ typedef unsigned char *sk_buff_data_t;
616616
* @pkt_type: Packet class
617617
* @fclone: skbuff clone status
618618
* @ipvs_property: skbuff is owned by ipvs
619+
* @offload_fwd_mark: Packet was L2-forwarded in hardware
620+
* @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
619621
* @tc_skip_classify: do not classify packet. set by IFB device
620622
* @tc_at_ingress: used within tc_classify to distinguish in/egress
621623
* @tc_redirected: packet was redirected by a tc action
@@ -799,7 +801,7 @@ struct sk_buff {
799801
__u8 remcsum_offload:1;
800802
#ifdef CONFIG_NET_SWITCHDEV
801803
__u8 offload_fwd_mark:1;
802-
__u8 offload_mr_fwd_mark:1;
804+
__u8 offload_l3_fwd_mark:1;
803805
#endif
804806
#ifdef CONFIG_NET_CLS_ACT
805807
__u8 tc_skip_classify:1;

net/core/skbuff.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4885,7 +4885,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
48854885

48864886
#ifdef CONFIG_NET_SWITCHDEV
48874887
skb->offload_fwd_mark = 0;
4888-
skb->offload_mr_fwd_mark = 0;
4888+
skb->offload_l3_fwd_mark = 0;
48894889
#endif
48904890

48914891
if (!xnet)

net/ipv4/ip_forward.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
6969
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
7070
__IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
7171

72+
#ifdef CONFIG_NET_SWITCHDEV
73+
if (skb->offload_l3_fwd_mark) {
74+
consume_skb(skb);
75+
return 0;
76+
}
77+
#endif
78+
7279
if (unlikely(opt->optlen))
7380
ip_forward_options(skb);
7481

net/ipv4/ipmr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1802,7 +1802,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
18021802
struct vif_device *out_vif = &mrt->vif_table[out_vifi];
18031803
struct vif_device *in_vif = &mrt->vif_table[in_vifi];
18041804

1805-
if (!skb->offload_mr_fwd_mark)
1805+
if (!skb->offload_l3_fwd_mark)
18061806
return false;
18071807
if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
18081808
return false;

net/ipv6/ip6_output.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,13 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
378378
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
379379
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
380380

381+
#ifdef CONFIG_NET_SWITCHDEV
382+
if (skb->offload_l3_fwd_mark) {
383+
consume_skb(skb);
384+
return 0;
385+
}
386+
#endif
387+
381388
return dst_output(net, sk, skb);
382389
}
383390

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: GPL-2.0
3+
#
4+
# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2
5+
# should be forwarded by the ASIC, but also trapped so that ICMP redirect
6+
# packets could be potentially generated.
7+
#
8+
# 1. https://en.wikipedia.org/wiki/One-armed_router
9+
#
10+
# +---------------------------------+
11+
# | H1 (vrf) |
12+
# | + $h1 |
13+
# | | 192.0.2.1/24 |
14+
# | | 2001:db8:1::1/64 |
15+
# | | |
16+
# | | default via 192.0.2.2 |
17+
# | | default via 2001:db8:1::2 |
18+
# +----|----------------------------+
19+
# |
20+
# +----|----------------------------------------------------------------------+
21+
# | SW | |
22+
# | +--|--------------------------------------------------------------------+ |
23+
# | | + $swp1 BR0 (802.1d) | |
24+
# | | | |
25+
# | | 192.0.2.2/24 | |
26+
# | | 2001:db8:1::2/64 | |
27+
# | | 198.51.100.2/24 | |
28+
# | | 2001:db8:2::2/64 | |
29+
# | | | |
30+
# | | + $swp2 | |
31+
# | +--|--------------------------------------------------------------------+ |
32+
# | | |
33+
# +----|----------------------------------------------------------------------+
34+
# |
35+
# +----|----------------------------+
36+
# | | default via 198.51.100.2 |
37+
# | | default via 2001:db8:2::2 |
38+
# | | |
39+
# | | 2001:db8:2::1/64 |
40+
# | | 198.51.100.1/24 |
41+
# | + $h2 |
42+
# | H2 (vrf) |
43+
# +---------------------------------+
44+
45+
lib_dir=$(dirname $0)/../../../net/forwarding
46+
47+
ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6"
48+
NUM_NETIFS=4
49+
source $lib_dir/tc_common.sh
50+
source $lib_dir/lib.sh
51+
52+
h1_create()
53+
{
54+
simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
55+
56+
ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
57+
ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
58+
}
59+
60+
h1_destroy()
61+
{
62+
ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
63+
ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
64+
65+
simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
66+
}
67+
68+
h2_create()
69+
{
70+
simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
71+
72+
ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
73+
ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
74+
}
75+
76+
h2_destroy()
77+
{
78+
ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
79+
ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
80+
81+
simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
82+
}
83+
84+
switch_create()
85+
{
86+
ip link add name br0 type bridge mcast_snooping 0
87+
ip link set dev br0 up
88+
89+
ip link set dev $swp1 master br0
90+
ip link set dev $swp1 up
91+
ip link set dev $swp2 master br0
92+
ip link set dev $swp2 up
93+
94+
tc qdisc add dev $swp1 clsact
95+
tc qdisc add dev $swp2 clsact
96+
97+
__addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64
98+
__addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64
99+
}
100+
101+
switch_destroy()
102+
{
103+
__addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64
104+
__addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64
105+
106+
tc qdisc del dev $swp2 clsact
107+
tc qdisc del dev $swp1 clsact
108+
109+
ip link set dev $swp2 down
110+
ip link set dev $swp2 nomaster
111+
ip link set dev $swp1 down
112+
ip link set dev $swp1 nomaster
113+
114+
ip link set dev br0 down
115+
ip link del dev br0
116+
}
117+
118+
ping_ipv4()
119+
{
120+
ping_test $h1 198.51.100.1 ": h1->h2"
121+
}
122+
123+
ping_ipv6()
124+
{
125+
ping6_test $h1 2001:db8:2::1 ": h1->h2"
126+
}
127+
128+
fwd_mark_ipv4()
129+
{
130+
# Transmit packets from H1 to H2 and make sure they are trapped at
131+
# swp1 due to loopback error, but only forwarded by the ASIC through
132+
# swp2
133+
134+
tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
135+
skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
136+
action pass
137+
138+
tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \
139+
skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
140+
action pass
141+
142+
tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \
143+
skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
144+
action pass
145+
146+
ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \
147+
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
148+
149+
RET=0
150+
151+
tc_check_packets "dev $swp1 ingress" 101 10
152+
check_err $?
153+
154+
log_test "fwd mark: trapping IPv4 packets due to LBERROR"
155+
156+
RET=0
157+
158+
tc_check_packets "dev $swp2 egress" 101 0
159+
check_err $?
160+
161+
log_test "fwd mark: forwarding IPv4 packets in software"
162+
163+
RET=0
164+
165+
tc_check_packets "dev $swp2 egress" 102 10
166+
check_err $?
167+
168+
log_test "fwd mark: forwarding IPv4 packets in hardware"
169+
170+
tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower
171+
tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
172+
tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
173+
}
174+
175+
fwd_mark_ipv6()
176+
{
177+
tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \
178+
skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
179+
action pass
180+
181+
tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \
182+
skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
183+
action pass
184+
185+
tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \
186+
skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
187+
action pass
188+
189+
ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \
190+
-B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q
191+
192+
RET=0
193+
194+
tc_check_packets "dev $swp1 ingress" 101 10
195+
check_err $?
196+
197+
log_test "fwd mark: trapping IPv6 packets due to LBERROR"
198+
199+
RET=0
200+
201+
tc_check_packets "dev $swp2 egress" 101 0
202+
check_err $?
203+
204+
log_test "fwd mark: forwarding IPv6 packets in software"
205+
206+
RET=0
207+
208+
tc_check_packets "dev $swp2 egress" 102 10
209+
check_err $?
210+
211+
log_test "fwd mark: forwarding IPv6 packets in hardware"
212+
213+
tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower
214+
tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower
215+
tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower
216+
}
217+
218+
setup_prepare()
219+
{
220+
h1=${NETIFS[p1]}
221+
swp1=${NETIFS[p2]}
222+
223+
swp2=${NETIFS[p3]}
224+
h2=${NETIFS[p4]}
225+
226+
vrf_prepare
227+
forwarding_enable
228+
229+
sysctl_set net.ipv4.conf.all.accept_redirects 0
230+
sysctl_set net.ipv6.conf.all.accept_redirects 0
231+
232+
h1_create
233+
h2_create
234+
switch_create
235+
}
236+
237+
cleanup()
238+
{
239+
pre_cleanup
240+
241+
switch_destroy
242+
h2_destroy
243+
h1_destroy
244+
245+
sysctl_restore net.ipv6.conf.all.accept_redirects
246+
sysctl_restore net.ipv4.conf.all.accept_redirects
247+
248+
forwarding_restore
249+
vrf_cleanup
250+
}
251+
252+
trap cleanup EXIT
253+
254+
setup_prepare
255+
setup_wait
256+
257+
tests_run
258+
259+
exit $EXIT_STATUS

0 commit comments

Comments
 (0)