Skip to content

Commit fbb0c41

Browse files
nikolay@redhat.comdavem330
authored andcommitted
bonding: fix miimon and arp_interval delayed work race conditions
First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent e9296e8 commit fbb0c41

File tree

2 files changed

+36
-86
lines changed

2 files changed

+36
-86
lines changed

drivers/net/bonding/bond_main.c

Lines changed: 26 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3459,6 +3459,28 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
34593459

34603460
/*-------------------------- Device entry points ----------------------------*/
34613461

3462+
static void bond_work_init_all(struct bonding *bond)
3463+
{
3464+
INIT_DELAYED_WORK(&bond->mcast_work,
3465+
bond_resend_igmp_join_requests_delayed);
3466+
INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
3467+
INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3468+
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
3469+
INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon);
3470+
else
3471+
INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
3472+
INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3473+
}
3474+
3475+
static void bond_work_cancel_all(struct bonding *bond)
3476+
{
3477+
cancel_delayed_work_sync(&bond->mii_work);
3478+
cancel_delayed_work_sync(&bond->arp_work);
3479+
cancel_delayed_work_sync(&bond->alb_work);
3480+
cancel_delayed_work_sync(&bond->ad_work);
3481+
cancel_delayed_work_sync(&bond->mcast_work);
3482+
}
3483+
34623484
static int bond_open(struct net_device *bond_dev)
34633485
{
34643486
struct bonding *bond = netdev_priv(bond_dev);
@@ -3481,41 +3503,27 @@ static int bond_open(struct net_device *bond_dev)
34813503
}
34823504
read_unlock(&bond->lock);
34833505

3484-
INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
3506+
bond_work_init_all(bond);
34853507

34863508
if (bond_is_lb(bond)) {
34873509
/* bond_alb_initialize must be called before the timer
34883510
* is started.
34893511
*/
3490-
if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) {
3491-
/* something went wrong - fail the open operation */
3512+
if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB)))
34923513
return -ENOMEM;
3493-
}
3494-
3495-
INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
34963514
queue_delayed_work(bond->wq, &bond->alb_work, 0);
34973515
}
34983516

3499-
if (bond->params.miimon) { /* link check interval, in milliseconds. */
3500-
INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3517+
if (bond->params.miimon) /* link check interval, in milliseconds. */
35013518
queue_delayed_work(bond->wq, &bond->mii_work, 0);
3502-
}
35033519

35043520
if (bond->params.arp_interval) { /* arp interval, in milliseconds. */
3505-
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
3506-
INIT_DELAYED_WORK(&bond->arp_work,
3507-
bond_activebackup_arp_mon);
3508-
else
3509-
INIT_DELAYED_WORK(&bond->arp_work,
3510-
bond_loadbalance_arp_mon);
3511-
35123521
queue_delayed_work(bond->wq, &bond->arp_work, 0);
35133522
if (bond->params.arp_validate)
35143523
bond->recv_probe = bond_arp_rcv;
35153524
}
35163525

35173526
if (bond->params.mode == BOND_MODE_8023AD) {
3518-
INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
35193527
queue_delayed_work(bond->wq, &bond->ad_work, 0);
35203528
/* register to receive LACPDUs */
35213529
bond->recv_probe = bond_3ad_lacpdu_recv;
@@ -3530,34 +3538,10 @@ static int bond_close(struct net_device *bond_dev)
35303538
struct bonding *bond = netdev_priv(bond_dev);
35313539

35323540
write_lock_bh(&bond->lock);
3533-
35343541
bond->send_peer_notif = 0;
3535-
35363542
write_unlock_bh(&bond->lock);
35373543

3538-
if (bond->params.miimon) { /* link check interval, in milliseconds. */
3539-
cancel_delayed_work_sync(&bond->mii_work);
3540-
}
3541-
3542-
if (bond->params.arp_interval) { /* arp interval, in milliseconds. */
3543-
cancel_delayed_work_sync(&bond->arp_work);
3544-
}
3545-
3546-
switch (bond->params.mode) {
3547-
case BOND_MODE_8023AD:
3548-
cancel_delayed_work_sync(&bond->ad_work);
3549-
break;
3550-
case BOND_MODE_TLB:
3551-
case BOND_MODE_ALB:
3552-
cancel_delayed_work_sync(&bond->alb_work);
3553-
break;
3554-
default:
3555-
break;
3556-
}
3557-
3558-
if (delayed_work_pending(&bond->mcast_work))
3559-
cancel_delayed_work_sync(&bond->mcast_work);
3560-
3544+
bond_work_cancel_all(bond);
35613545
if (bond_is_lb(bond)) {
35623546
/* Must be called only after all
35633547
* slaves have been released
@@ -4436,26 +4420,6 @@ static void bond_setup(struct net_device *bond_dev)
44364420
bond_dev->features |= bond_dev->hw_features;
44374421
}
44384422

4439-
static void bond_work_cancel_all(struct bonding *bond)
4440-
{
4441-
if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
4442-
cancel_delayed_work_sync(&bond->mii_work);
4443-
4444-
if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
4445-
cancel_delayed_work_sync(&bond->arp_work);
4446-
4447-
if (bond->params.mode == BOND_MODE_ALB &&
4448-
delayed_work_pending(&bond->alb_work))
4449-
cancel_delayed_work_sync(&bond->alb_work);
4450-
4451-
if (bond->params.mode == BOND_MODE_8023AD &&
4452-
delayed_work_pending(&bond->ad_work))
4453-
cancel_delayed_work_sync(&bond->ad_work);
4454-
4455-
if (delayed_work_pending(&bond->mcast_work))
4456-
cancel_delayed_work_sync(&bond->mcast_work);
4457-
}
4458-
44594423
/*
44604424
* Destroy a bonding device.
44614425
* Must be under rtnl_lock when this function is called.

drivers/net/bonding/bond_sysfs.c

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,8 @@ static ssize_t bonding_store_arp_interval(struct device *d,
513513
int new_value, ret = count;
514514
struct bonding *bond = to_bond(d);
515515

516+
if (!rtnl_trylock())
517+
return restart_syscall();
516518
if (sscanf(buf, "%d", &new_value) != 1) {
517519
pr_err("%s: no arp_interval value specified.\n",
518520
bond->dev->name);
@@ -539,10 +541,6 @@ static ssize_t bonding_store_arp_interval(struct device *d,
539541
pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n",
540542
bond->dev->name, bond->dev->name);
541543
bond->params.miimon = 0;
542-
if (delayed_work_pending(&bond->mii_work)) {
543-
cancel_delayed_work(&bond->mii_work);
544-
flush_workqueue(bond->wq);
545-
}
546544
}
547545
if (!bond->params.arp_targets[0]) {
548546
pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n",
@@ -554,19 +552,12 @@ static ssize_t bonding_store_arp_interval(struct device *d,
554552
* timer will get fired off when the open function
555553
* is called.
556554
*/
557-
if (!delayed_work_pending(&bond->arp_work)) {
558-
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
559-
INIT_DELAYED_WORK(&bond->arp_work,
560-
bond_activebackup_arp_mon);
561-
else
562-
INIT_DELAYED_WORK(&bond->arp_work,
563-
bond_loadbalance_arp_mon);
564-
565-
queue_delayed_work(bond->wq, &bond->arp_work, 0);
566-
}
555+
cancel_delayed_work_sync(&bond->mii_work);
556+
queue_delayed_work(bond->wq, &bond->arp_work, 0);
567557
}
568558

569559
out:
560+
rtnl_unlock();
570561
return ret;
571562
}
572563
static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR,
@@ -962,6 +953,8 @@ static ssize_t bonding_store_miimon(struct device *d,
962953
int new_value, ret = count;
963954
struct bonding *bond = to_bond(d);
964955

956+
if (!rtnl_trylock())
957+
return restart_syscall();
965958
if (sscanf(buf, "%d", &new_value) != 1) {
966959
pr_err("%s: no miimon value specified.\n",
967960
bond->dev->name);
@@ -993,10 +986,6 @@ static ssize_t bonding_store_miimon(struct device *d,
993986
bond->params.arp_validate =
994987
BOND_ARP_VALIDATE_NONE;
995988
}
996-
if (delayed_work_pending(&bond->arp_work)) {
997-
cancel_delayed_work(&bond->arp_work);
998-
flush_workqueue(bond->wq);
999-
}
1000989
}
1001990

1002991
if (bond->dev->flags & IFF_UP) {
@@ -1005,15 +994,12 @@ static ssize_t bonding_store_miimon(struct device *d,
1005994
* timer will get fired off when the open function
1006995
* is called.
1007996
*/
1008-
if (!delayed_work_pending(&bond->mii_work)) {
1009-
INIT_DELAYED_WORK(&bond->mii_work,
1010-
bond_mii_monitor);
1011-
queue_delayed_work(bond->wq,
1012-
&bond->mii_work, 0);
1013-
}
997+
cancel_delayed_work_sync(&bond->arp_work);
998+
queue_delayed_work(bond->wq, &bond->mii_work, 0);
1014999
}
10151000
}
10161001
out:
1002+
rtnl_unlock();
10171003
return ret;
10181004
}
10191005
static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR,

0 commit comments

Comments
 (0)