Skip to content

Commit aaaf598

Browse files
committed
Merge branch 'nic-thunderx-fix-communication-races-between-VF-PF'
Vadim Lomovtsev says: ==================== nic: thunderx: fix communication races between VF & PF The ThunderX CN88XX NIC Virtual Function driver uses mailbox interface to communicate to physical function driver. Each of VF has it's own pair of mailbox registers to read from and write to. The mailbox registers has no protection from possible races, so it has to be implemented at software side. After long term testing by loop of 'ip link set <ifname> up/down' command it was found that there are two possible scenarios when race condition appears: 1. VF receives link change message from PF and VF send RX mode configuration message to PF in the same time from separate thread. 2. PF receives RX mode configuration from VF and in the same time, in separate thread PF detects link status change and sends appropriate message to particular VF. Both cases leads to mailbox data to be rewritten, NIC VF messaging control data to be updated incorrectly and communication sequence gets broken. This patch series is to address race condition with VF & PF communication. Changes: v1 -> v2 - 0000: correct typo in cover letter subject: 'betwen' -> 'between'; - move link state polling request task from pf to vf instead of cheking status of mailbox irq; v2 -> v3 - 0003: change return type of nicvf_send_cfg_done() function from int to void; - 0007: update subject and remove unused variable 'netdev' from nicvf_link_status_check_task() function; ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents efcc9bc + 2e1c3ff commit aaaf598

File tree

5 files changed

+142
-153
lines changed

5 files changed

+142
-153
lines changed

drivers/net/ethernet/cavium/thunder/nic.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ struct xcast_addr_list {
271271
};
272272

273273
struct nicvf_work {
274-
struct delayed_work work;
274+
struct work_struct work;
275275
u8 mode;
276276
struct xcast_addr_list *mc;
277277
};
@@ -327,7 +327,11 @@ struct nicvf {
327327
struct nicvf_work rx_mode_work;
328328
/* spinlock to protect workqueue arguments from concurrent access */
329329
spinlock_t rx_mode_wq_lock;
330-
330+
/* workqueue for handling kernel ndo_set_rx_mode() calls */
331+
struct workqueue_struct *nicvf_rx_mode_wq;
332+
/* mutex to protect VF's mailbox contents from concurrent access */
333+
struct mutex rx_mode_mtx;
334+
struct delayed_work link_change_work;
331335
/* PTP timestamp */
332336
struct cavium_ptp *ptp_clock;
333337
/* Inbound timestamping is on */
@@ -575,10 +579,8 @@ struct set_ptp {
575579

576580
struct xcast {
577581
u8 msg;
578-
union {
579-
u8 mode;
580-
u64 mac;
581-
} data;
582+
u8 mode;
583+
u64 mac:48;
582584
};
583585

584586
/* 128 bit shared memory between PF and each VF */

drivers/net/ethernet/cavium/thunder/nic_main.c

Lines changed: 46 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,8 @@ struct nicpf {
5757
#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
5858
#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF)
5959
u8 *vf_lmac_map;
60-
struct delayed_work dwork;
61-
struct workqueue_struct *check_link;
62-
u8 *link;
63-
u8 *duplex;
64-
u32 *speed;
6560
u16 cpi_base[MAX_NUM_VFS_SUPPORTED];
6661
u16 rssi_base[MAX_NUM_VFS_SUPPORTED];
67-
bool mbx_lock[MAX_NUM_VFS_SUPPORTED];
6862

6963
/* MSI-X */
7064
u8 num_vec;
@@ -929,6 +923,35 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
929923
nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
930924
}
931925

926+
/* Get BGX LMAC link status and update corresponding VF
927+
* if there is a change, valid only if internal L2 switch
928+
* is not present otherwise VF link is always treated as up
929+
*/
930+
static void nic_link_status_get(struct nicpf *nic, u8 vf)
931+
{
932+
union nic_mbx mbx = {};
933+
struct bgx_link_status link;
934+
u8 bgx, lmac;
935+
936+
mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
937+
938+
/* Get BGX, LMAC indices for the VF */
939+
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
940+
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
941+
942+
/* Get interface link status */
943+
bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
944+
945+
/* Send a mbox message to VF with current link status */
946+
mbx.link_status.link_up = link.link_up;
947+
mbx.link_status.duplex = link.duplex;
948+
mbx.link_status.speed = link.speed;
949+
mbx.link_status.mac_type = link.mac_type;
950+
951+
/* reply with link status */
952+
nic_send_msg_to_vf(nic, vf, &mbx);
953+
}
954+
932955
/* Interrupt handler to handle mailbox messages from VFs */
933956
static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
934957
{
@@ -941,8 +964,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
941964
int i;
942965
int ret = 0;
943966

944-
nic->mbx_lock[vf] = true;
945-
946967
mbx_addr = nic_get_mbx_addr(vf);
947968
mbx_data = (u64 *)&mbx;
948969

@@ -957,12 +978,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
957978
switch (mbx.msg.msg) {
958979
case NIC_MBOX_MSG_READY:
959980
nic_mbx_send_ready(nic, vf);
960-
if (vf < nic->num_vf_en) {
961-
nic->link[vf] = 0;
962-
nic->duplex[vf] = 0;
963-
nic->speed[vf] = 0;
964-
}
965-
goto unlock;
981+
return;
966982
case NIC_MBOX_MSG_QS_CFG:
967983
reg_addr = NIC_PF_QSET_0_127_CFG |
968984
(mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -1031,15 +1047,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
10311047
break;
10321048
case NIC_MBOX_MSG_RSS_SIZE:
10331049
nic_send_rss_size(nic, vf);
1034-
goto unlock;
1050+
return;
10351051
case NIC_MBOX_MSG_RSS_CFG:
10361052
case NIC_MBOX_MSG_RSS_CFG_CONT:
10371053
nic_config_rss(nic, &mbx.rss_cfg);
10381054
break;
10391055
case NIC_MBOX_MSG_CFG_DONE:
10401056
/* Last message of VF config msg sequence */
10411057
nic_enable_vf(nic, vf, true);
1042-
goto unlock;
1058+
break;
10431059
case NIC_MBOX_MSG_SHUTDOWN:
10441060
/* First msg in VF teardown sequence */
10451061
if (vf >= nic->num_vf_en)
@@ -1049,19 +1065,19 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
10491065
break;
10501066
case NIC_MBOX_MSG_ALLOC_SQS:
10511067
nic_alloc_sqs(nic, &mbx.sqs_alloc);
1052-
goto unlock;
1068+
return;
10531069
case NIC_MBOX_MSG_NICVF_PTR:
10541070
nic->nicvf[vf] = mbx.nicvf.nicvf;
10551071
break;
10561072
case NIC_MBOX_MSG_PNICVF_PTR:
10571073
nic_send_pnicvf(nic, vf);
1058-
goto unlock;
1074+
return;
10591075
case NIC_MBOX_MSG_SNICVF_PTR:
10601076
nic_send_snicvf(nic, &mbx.nicvf);
1061-
goto unlock;
1077+
return;
10621078
case NIC_MBOX_MSG_BGX_STATS:
10631079
nic_get_bgx_stats(nic, &mbx.bgx_stats);
1064-
goto unlock;
1080+
return;
10651081
case NIC_MBOX_MSG_LOOPBACK:
10661082
ret = nic_config_loopback(nic, &mbx.lbk);
10671083
break;
@@ -1070,7 +1086,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
10701086
break;
10711087
case NIC_MBOX_MSG_PFC:
10721088
nic_pause_frame(nic, vf, &mbx.pfc);
1073-
goto unlock;
1089+
return;
10741090
case NIC_MBOX_MSG_PTP_CFG:
10751091
nic_config_timestamp(nic, vf, &mbx.ptp);
10761092
break;
@@ -1094,7 +1110,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
10941110
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
10951111
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
10961112
bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
1097-
mbx.xcast.data.mac,
1113+
mbx.xcast.mac,
10981114
vf < NIC_VF_PER_MBX_REG ? vf :
10991115
vf - NIC_VF_PER_MBX_REG);
11001116
break;
@@ -1106,8 +1122,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
11061122
}
11071123
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
11081124
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1109-
bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
1125+
bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
11101126
break;
1127+
case NIC_MBOX_MSG_BGX_LINK_CHANGE:
1128+
if (vf >= nic->num_vf_en) {
1129+
ret = -1; /* NACK */
1130+
break;
1131+
}
1132+
nic_link_status_get(nic, vf);
1133+
return;
11111134
default:
11121135
dev_err(&nic->pdev->dev,
11131136
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1121,8 +1144,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
11211144
mbx.msg.msg, vf);
11221145
nic_mbx_send_nack(nic, vf);
11231146
}
1124-
unlock:
1125-
nic->mbx_lock[vf] = false;
11261147
}
11271148

11281149
static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
@@ -1270,52 +1291,6 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
12701291
return 0;
12711292
}
12721293

1273-
/* Poll for BGX LMAC link status and update corresponding VF
1274-
* if there is a change, valid only if internal L2 switch
1275-
* is not present otherwise VF link is always treated as up
1276-
*/
1277-
static void nic_poll_for_link(struct work_struct *work)
1278-
{
1279-
union nic_mbx mbx = {};
1280-
struct nicpf *nic;
1281-
struct bgx_link_status link;
1282-
u8 vf, bgx, lmac;
1283-
1284-
nic = container_of(work, struct nicpf, dwork.work);
1285-
1286-
mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
1287-
1288-
for (vf = 0; vf < nic->num_vf_en; vf++) {
1289-
/* Poll only if VF is UP */
1290-
if (!nic->vf_enabled[vf])
1291-
continue;
1292-
1293-
/* Get BGX, LMAC indices for the VF */
1294-
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1295-
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
1296-
/* Get interface link status */
1297-
bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
1298-
1299-
/* Inform VF only if link status changed */
1300-
if (nic->link[vf] == link.link_up)
1301-
continue;
1302-
1303-
if (!nic->mbx_lock[vf]) {
1304-
nic->link[vf] = link.link_up;
1305-
nic->duplex[vf] = link.duplex;
1306-
nic->speed[vf] = link.speed;
1307-
1308-
/* Send a mbox message to VF with current link status */
1309-
mbx.link_status.link_up = link.link_up;
1310-
mbx.link_status.duplex = link.duplex;
1311-
mbx.link_status.speed = link.speed;
1312-
mbx.link_status.mac_type = link.mac_type;
1313-
nic_send_msg_to_vf(nic, vf, &mbx);
1314-
}
1315-
}
1316-
queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
1317-
}
1318-
13191294
static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
13201295
{
13211296
struct device *dev = &pdev->dev;
@@ -1384,18 +1359,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
13841359
if (!nic->vf_lmac_map)
13851360
goto err_release_regions;
13861361

1387-
nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
1388-
if (!nic->link)
1389-
goto err_release_regions;
1390-
1391-
nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
1392-
if (!nic->duplex)
1393-
goto err_release_regions;
1394-
1395-
nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
1396-
if (!nic->speed)
1397-
goto err_release_regions;
1398-
13991362
/* Initialize hardware */
14001363
nic_init_hw(nic);
14011364

@@ -1411,22 +1374,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
14111374
if (err)
14121375
goto err_unregister_interrupts;
14131376

1414-
/* Register a physical link status poll fn() */
1415-
nic->check_link = alloc_workqueue("check_link_status",
1416-
WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
1417-
if (!nic->check_link) {
1418-
err = -ENOMEM;
1419-
goto err_disable_sriov;
1420-
}
1421-
1422-
INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
1423-
queue_delayed_work(nic->check_link, &nic->dwork, 0);
1424-
14251377
return 0;
14261378

1427-
err_disable_sriov:
1428-
if (nic->flags & NIC_SRIOV_ENABLED)
1429-
pci_disable_sriov(pdev);
14301379
err_unregister_interrupts:
14311380
nic_unregister_interrupts(nic);
14321381
err_release_regions:
@@ -1447,12 +1396,6 @@ static void nic_remove(struct pci_dev *pdev)
14471396
if (nic->flags & NIC_SRIOV_ENABLED)
14481397
pci_disable_sriov(pdev);
14491398

1450-
if (nic->check_link) {
1451-
/* Destroy work Queue */
1452-
cancel_delayed_work_sync(&nic->dwork);
1453-
destroy_workqueue(nic->check_link);
1454-
}
1455-
14561399
nic_unregister_interrupts(nic);
14571400
pci_release_regions(pdev);
14581401

0 commit comments

Comments
 (0)