Skip to content

Commit 5e5b066

Browse files
dingtianhongdavem330
authored andcommitted
bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for 802.3ad mode
The problem was introduced by the commit 1d3ee88 (bonding: add netlink attributes to slave link dev). The bond_set_active_slave() and bond_set_backup_slave() will use rtmsg_ifinfo to send slave's states, so these two functions should be called in RTNL. In 802.3ad mode, acquiring RTNL for the __enable_port and __disable_port cases is difficult, as those calls generally already hold the state machine lock, and cannot unconditionally call rtnl_lock because either they already hold RTNL (for calls via bond_3ad_unbind_slave) or due to the potential for deadlock with bond_3ad_adapter_speed_changed, bond_3ad_adapter_duplex_changed, bond_3ad_link_change, or bond_3ad_update_lacp_rate. All four of those are called with RTNL held, and acquire the state machine lock second. The calling contexts for __enable_port and __disable_port already hold the state machine lock, and may or may not need RTNL. According to the Jay's opinion, I don't think it is a problem that the slave don't send notify message synchronously when the status changed, normally the state machine is running every 100 ms, send the notify message at the end of the state machine if the slave's state changed should be better. I fix the problem through these steps: 1). add a new function bond_set_slave_state() which could change the slave's state and call rtmsg_ifinfo() according to the input parameters called notify. 2). Add a new slave parameter which called should_notify, if the slave's state changed and don't notify yet, the parameter will be set to 1, and then if the slave's state changed again, the param will be set to 0, it indicate that the slave's state has been restored, no need to notify any one. 3). the __enable_port and __disable_port should not call rtmsg_ifinfo in the state machine lock, any change in the state of slave could set a flag in the slave, it will indicated that an rtmsg_ifinfo should be called at the end of the state machine. Cc: Jay Vosburgh <fubar@us.ibm.com> Cc: Veaceslav Falico <vfalico@redhat.com> Cc: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent bc90d29 commit 5e5b066

File tree

3 files changed

+75
-22
lines changed

3 files changed

+75
-22
lines changed

drivers/net/bonding/bond_3ad.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ static inline int __agg_has_partner(struct aggregator *agg)
181181
*/
182182
static inline void __disable_port(struct port *port)
183183
{
184-
bond_set_slave_inactive_flags(port->slave);
184+
bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER);
185185
}
186186

187187
/**
@@ -193,7 +193,7 @@ static inline void __enable_port(struct port *port)
193193
struct slave *slave = port->slave;
194194

195195
if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev))
196-
bond_set_slave_active_flags(slave);
196+
bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER);
197197
}
198198

199199
/**
@@ -2062,6 +2062,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
20622062
struct list_head *iter;
20632063
struct slave *slave;
20642064
struct port *port;
2065+
bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
20652066

20662067
read_lock(&bond->lock);
20672068
rcu_read_lock();
@@ -2119,8 +2120,25 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
21192120
}
21202121

21212122
re_arm:
2123+
bond_for_each_slave_rcu(bond, slave, iter) {
2124+
if (slave->should_notify) {
2125+
should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW;
2126+
break;
2127+
}
2128+
}
21222129
rcu_read_unlock();
21232130
read_unlock(&bond->lock);
2131+
2132+
if (should_notify_rtnl && rtnl_trylock()) {
2133+
bond_for_each_slave(bond, slave, iter) {
2134+
if (slave->should_notify) {
2135+
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0,
2136+
GFP_KERNEL);
2137+
slave->should_notify = 0;
2138+
}
2139+
}
2140+
rtnl_unlock();
2141+
}
21242142
queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
21252143
}
21262144

drivers/net/bonding/bond_main.c

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -829,21 +829,25 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
829829
if (bond_is_lb(bond)) {
830830
bond_alb_handle_active_change(bond, new_active);
831831
if (old_active)
832-
bond_set_slave_inactive_flags(old_active);
832+
bond_set_slave_inactive_flags(old_active,
833+
BOND_SLAVE_NOTIFY_NOW);
833834
if (new_active)
834-
bond_set_slave_active_flags(new_active);
835+
bond_set_slave_active_flags(new_active,
836+
BOND_SLAVE_NOTIFY_NOW);
835837
} else {
836838
rcu_assign_pointer(bond->curr_active_slave, new_active);
837839
}
838840

839841
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
840842
if (old_active)
841-
bond_set_slave_inactive_flags(old_active);
843+
bond_set_slave_inactive_flags(old_active,
844+
BOND_SLAVE_NOTIFY_NOW);
842845

843846
if (new_active) {
844847
bool should_notify_peers = false;
845848

846-
bond_set_slave_active_flags(new_active);
849+
bond_set_slave_active_flags(new_active,
850+
BOND_SLAVE_NOTIFY_NOW);
847851

848852
if (bond->params.fail_over_mac)
849853
bond_do_fail_over_mac(bond, new_active,
@@ -1463,14 +1467,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
14631467

14641468
switch (bond->params.mode) {
14651469
case BOND_MODE_ACTIVEBACKUP:
1466-
bond_set_slave_inactive_flags(new_slave);
1470+
bond_set_slave_inactive_flags(new_slave,
1471+
BOND_SLAVE_NOTIFY_NOW);
14671472
break;
14681473
case BOND_MODE_8023AD:
14691474
/* in 802.3ad mode, the internal mechanism
14701475
* will activate the slaves in the selected
14711476
* aggregator
14721477
*/
1473-
bond_set_slave_inactive_flags(new_slave);
1478+
bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
14741479
/* if this is the first slave */
14751480
if (!prev_slave) {
14761481
SLAVE_AD_INFO(new_slave).id = 1;
@@ -1488,7 +1493,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
14881493
case BOND_MODE_TLB:
14891494
case BOND_MODE_ALB:
14901495
bond_set_active_slave(new_slave);
1491-
bond_set_slave_inactive_flags(new_slave);
1496+
bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
14921497
break;
14931498
default:
14941499
pr_debug("This slave is always active in trunk mode\n");
@@ -2015,7 +2020,8 @@ static void bond_miimon_commit(struct bonding *bond)
20152020

20162021
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP ||
20172022
bond->params.mode == BOND_MODE_8023AD)
2018-
bond_set_slave_inactive_flags(slave);
2023+
bond_set_slave_inactive_flags(slave,
2024+
BOND_SLAVE_NOTIFY_NOW);
20192025

20202026
pr_info("%s: link status definitely down for interface %s, disabling it\n",
20212027
bond->dev->name, slave->dev->name);
@@ -2562,7 +2568,8 @@ static void bond_ab_arp_commit(struct bonding *bond)
25622568
slave->link = BOND_LINK_UP;
25632569
if (bond->current_arp_slave) {
25642570
bond_set_slave_inactive_flags(
2565-
bond->current_arp_slave);
2571+
bond->current_arp_slave,
2572+
BOND_SLAVE_NOTIFY_NOW);
25662573
bond->current_arp_slave = NULL;
25672574
}
25682575

@@ -2582,7 +2589,8 @@ static void bond_ab_arp_commit(struct bonding *bond)
25822589
slave->link_failure_count++;
25832590

25842591
slave->link = BOND_LINK_DOWN;
2585-
bond_set_slave_inactive_flags(slave);
2592+
bond_set_slave_inactive_flags(slave,
2593+
BOND_SLAVE_NOTIFY_NOW);
25862594

25872595
pr_info("%s: link status definitely down for interface %s, disabling it\n",
25882596
bond->dev->name, slave->dev->name);
@@ -2657,7 +2665,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
26572665
}
26582666
}
26592667

2660-
bond_set_slave_inactive_flags(curr_arp_slave);
2668+
bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_NOW);
26612669

26622670
bond_for_each_slave(bond, slave, iter) {
26632671
if (!found && !before && IS_UP(slave->dev))
@@ -2677,7 +2685,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
26772685
if (slave->link_failure_count < UINT_MAX)
26782686
slave->link_failure_count++;
26792687

2680-
bond_set_slave_inactive_flags(slave);
2688+
bond_set_slave_inactive_flags(slave,
2689+
BOND_SLAVE_NOTIFY_NOW);
26812690

26822691
pr_info("%s: backup interface %s is now down.\n",
26832692
bond->dev->name, slave->dev->name);
@@ -2695,7 +2704,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
26952704
}
26962705

26972706
new_slave->link = BOND_LINK_BACK;
2698-
bond_set_slave_active_flags(new_slave);
2707+
bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
26992708
bond_arp_send_all(bond, new_slave);
27002709
new_slave->jiffies = jiffies;
27012710
rcu_assign_pointer(bond->current_arp_slave, new_slave);
@@ -3046,9 +3055,11 @@ static int bond_open(struct net_device *bond_dev)
30463055
bond_for_each_slave(bond, slave, iter) {
30473056
if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
30483057
&& (slave != bond->curr_active_slave)) {
3049-
bond_set_slave_inactive_flags(slave);
3058+
bond_set_slave_inactive_flags(slave,
3059+
BOND_SLAVE_NOTIFY_NOW);
30503060
} else {
3051-
bond_set_slave_active_flags(slave);
3061+
bond_set_slave_active_flags(slave,
3062+
BOND_SLAVE_NOTIFY_NOW);
30523063
}
30533064
}
30543065
read_unlock(&bond->curr_slave_lock);

drivers/net/bonding/bonding.h

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ struct slave {
195195
s8 new_link;
196196
u8 backup:1, /* indicates backup slave. Value corresponds with
197197
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
198-
inactive:1; /* indicates inactive slave */
198+
inactive:1, /* indicates inactive slave */
199+
should_notify:1; /* indicateds whether the state changed */
199200
u8 duplex;
200201
u32 original_mtu;
201202
u32 link_failure_count;
@@ -303,6 +304,24 @@ static inline void bond_set_backup_slave(struct slave *slave)
303304
}
304305
}
305306

307+
static inline void bond_set_slave_state(struct slave *slave,
308+
int slave_state, bool notify)
309+
{
310+
if (slave->backup == slave_state)
311+
return;
312+
313+
slave->backup = slave_state;
314+
if (notify) {
315+
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_KERNEL);
316+
slave->should_notify = 0;
317+
} else {
318+
if (slave->should_notify)
319+
slave->should_notify = 0;
320+
else
321+
slave->should_notify = 1;
322+
}
323+
}
324+
306325
static inline void bond_slave_state_change(struct bonding *bond)
307326
{
308327
struct list_head *iter;
@@ -343,6 +362,9 @@ static inline bool bond_is_active_slave(struct slave *slave)
343362
#define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \
344363
BOND_ARP_VALIDATE_BACKUP)
345364

365+
#define BOND_SLAVE_NOTIFY_NOW true
366+
#define BOND_SLAVE_NOTIFY_LATER false
367+
346368
static inline int slave_do_arp_validate(struct bonding *bond,
347369
struct slave *slave)
348370
{
@@ -394,17 +416,19 @@ static inline void bond_netpoll_send_skb(const struct slave *slave,
394416
}
395417
#endif
396418

397-
static inline void bond_set_slave_inactive_flags(struct slave *slave)
419+
static inline void bond_set_slave_inactive_flags(struct slave *slave,
420+
bool notify)
398421
{
399422
if (!bond_is_lb(slave->bond))
400-
bond_set_backup_slave(slave);
423+
bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
401424
if (!slave->bond->params.all_slaves_active)
402425
slave->inactive = 1;
403426
}
404427

405-
static inline void bond_set_slave_active_flags(struct slave *slave)
428+
static inline void bond_set_slave_active_flags(struct slave *slave,
429+
bool notify)
406430
{
407-
bond_set_active_slave(slave);
431+
bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify);
408432
slave->inactive = 0;
409433
}
410434

0 commit comments

Comments
 (0)