Skip to content

Commit ce388ff

Browse files
committed
Merge branch 'mlx4-next'
Or Gerlitz says: ==================== Add HA and LAG support to mlx4 RoCE and SRIOV services This series takes advanges of bonding mlx4 Ethernet devices to support a model of High-Availability and Link Aggregation for more environments. The mlx4 driver reacts on netdev events generated by bonding when slave state changes happen by programming a HW V2P (Virt-to-Phys) port table. Bonding was extended to expose these state changes through netdev events. When an mlx4 interface such as the mlx4 IB/RoCE driver is subject to this policy, QPs are created over virtual ports which are mapped to one of the two physical ports. When a failure happens, the re-programming of the V2P table allows traffic to keep flowing. The mlx4 Ethernet driver interfaces are not subject to this policy and act as usual. A 2nd use-case for this model would be to add HA and Link Aggregation support to single ported mlx4 Ethernet VFs. In this case, the PF Ethernet intrfaces are bonded, all the VFs see single port devices (which is supported already today), and VF QPs are subject to V2P. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 251c005 + c621574 commit ce388ff

File tree

24 files changed

+756
-64
lines changed

24 files changed

+756
-64
lines changed

drivers/infiniband/hw/mlx4/ah.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <linux/slab.h>
3737
#include <linux/inet.h>
3838
#include <linux/string.h>
39+
#include <linux/mlx4/driver.h>
3940

4041
#include "mlx4_ib.h"
4142

drivers/infiniband/hw/mlx4/main.c

Lines changed: 143 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
351351
enum ib_mtu tmp;
352352
struct mlx4_cmd_mailbox *mailbox;
353353
int err = 0;
354+
int is_bonded = mlx4_is_bonded(mdev->dev);
354355

355356
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
356357
if (IS_ERR(mailbox))
@@ -374,8 +375,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
374375
props->state = IB_PORT_DOWN;
375376
props->phys_state = state_to_phys_state(props->state);
376377
props->active_mtu = IB_MTU_256;
378+
if (is_bonded)
379+
rtnl_lock(); /* required to get upper dev */
377380
spin_lock_bh(&iboe->lock);
378381
ndev = iboe->netdevs[port - 1];
382+
if (ndev && is_bonded)
383+
ndev = netdev_master_upper_dev_get(ndev);
379384
if (!ndev)
380385
goto out_unlock;
381386

@@ -387,6 +392,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
387392
props->phys_state = state_to_phys_state(props->state);
388393
out_unlock:
389394
spin_unlock_bh(&iboe->lock);
395+
if (is_bonded)
396+
rtnl_unlock();
390397
out:
391398
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
392399
return err;
@@ -844,7 +851,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
844851

845852
struct mlx4_ib_steering {
846853
struct list_head list;
847-
u64 reg_id;
854+
struct mlx4_flow_reg_id reg_id;
848855
union ib_gid gid;
849856
};
850857

@@ -1135,9 +1142,11 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
11351142
struct ib_flow_attr *flow_attr,
11361143
int domain)
11371144
{
1138-
int err = 0, i = 0;
1145+
int err = 0, i = 0, j = 0;
11391146
struct mlx4_ib_flow *mflow;
11401147
enum mlx4_net_trans_promisc_mode type[2];
1148+
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1149+
int is_bonded = mlx4_is_bonded(dev);
11411150

11421151
memset(type, 0, sizeof(type));
11431152

@@ -1172,26 +1181,55 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
11721181

11731182
while (i < ARRAY_SIZE(type) && type[i]) {
11741183
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1175-
&mflow->reg_id[i]);
1184+
&mflow->reg_id[i].id);
11761185
if (err)
11771186
goto err_create_flow;
11781187
i++;
1188+
if (is_bonded) {
1189+
flow_attr->port = 2;
1190+
err = __mlx4_ib_create_flow(qp, flow_attr,
1191+
domain, type[j],
1192+
&mflow->reg_id[j].mirror);
1193+
flow_attr->port = 1;
1194+
if (err)
1195+
goto err_create_flow;
1196+
j++;
1197+
}
1198+
11791199
}
11801200

11811201
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1182-
err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]);
1202+
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1203+
&mflow->reg_id[i].id);
11831204
if (err)
11841205
goto err_create_flow;
11851206
i++;
1207+
if (is_bonded) {
1208+
flow_attr->port = 2;
1209+
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1210+
&mflow->reg_id[j].mirror);
1211+
flow_attr->port = 1;
1212+
if (err)
1213+
goto err_create_flow;
1214+
j++;
1215+
}
1216+
/* function to create mirror rule */
11861217
}
11871218

11881219
return &mflow->ibflow;
11891220

11901221
err_create_flow:
11911222
while (i) {
1192-
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]);
1223+
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1224+
mflow->reg_id[i].id);
11931225
i--;
11941226
}
1227+
1228+
while (j) {
1229+
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1230+
mflow->reg_id[j].mirror);
1231+
j--;
1232+
}
11951233
err_free:
11961234
kfree(mflow);
11971235
return ERR_PTR(err);
@@ -1204,10 +1242,16 @@ static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
12041242
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
12051243
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
12061244

1207-
while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
1208-
err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
1245+
while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1246+
err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
12091247
if (err)
12101248
ret = err;
1249+
if (mflow->reg_id[i].mirror) {
1250+
err = __mlx4_ib_destroy_flow(mdev->dev,
1251+
mflow->reg_id[i].mirror);
1252+
if (err)
1253+
ret = err;
1254+
}
12111255
i++;
12121256
}
12131257

@@ -1219,11 +1263,12 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
12191263
{
12201264
int err;
12211265
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1266+
struct mlx4_dev *dev = mdev->dev;
12221267
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1223-
u64 reg_id;
12241268
struct mlx4_ib_steering *ib_steering = NULL;
12251269
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
12261270
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
1271+
struct mlx4_flow_reg_id reg_id;
12271272

12281273
if (mdev->dev->caps.steering_mode ==
12291274
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1235,10 +1280,20 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
12351280
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
12361281
!!(mqp->flags &
12371282
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1238-
prot, &reg_id);
1283+
prot, &reg_id.id);
12391284
if (err)
12401285
goto err_malloc;
12411286

1287+
reg_id.mirror = 0;
1288+
if (mlx4_is_bonded(dev)) {
1289+
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, 2,
1290+
!!(mqp->flags &
1291+
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1292+
prot, &reg_id.mirror);
1293+
if (err)
1294+
goto err_add;
1295+
}
1296+
12421297
err = add_gid_entry(ibqp, gid);
12431298
if (err)
12441299
goto err_add;
@@ -1254,7 +1309,10 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
12541309

12551310
err_add:
12561311
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1257-
prot, reg_id);
1312+
prot, reg_id.id);
1313+
if (reg_id.mirror)
1314+
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1315+
prot, reg_id.mirror);
12581316
err_malloc:
12591317
kfree(ib_steering);
12601318

@@ -1281,10 +1339,12 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
12811339
{
12821340
int err;
12831341
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1342+
struct mlx4_dev *dev = mdev->dev;
12841343
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
12851344
struct net_device *ndev;
12861345
struct mlx4_ib_gid_entry *ge;
1287-
u64 reg_id = 0;
1346+
struct mlx4_flow_reg_id reg_id = {0, 0};
1347+
12881348
enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
12891349
MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
12901350

@@ -1309,10 +1369,17 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
13091369
}
13101370

13111371
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1312-
prot, reg_id);
1372+
prot, reg_id.id);
13131373
if (err)
13141374
return err;
13151375

1376+
if (mlx4_is_bonded(dev)) {
1377+
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1378+
prot, reg_id.mirror);
1379+
if (err)
1380+
return err;
1381+
}
1382+
13161383
mutex_lock(&mqp->mutex);
13171384
ge = find_gid_entry(mqp, gid->raw);
13181385
if (ge) {
@@ -1440,6 +1507,7 @@ static void update_gids_task(struct work_struct *work)
14401507
union ib_gid *gids;
14411508
int err;
14421509
struct mlx4_dev *dev = gw->dev->dev;
1510+
int is_bonded = mlx4_is_bonded(dev);
14431511

14441512
if (!gw->dev->ib_active)
14451513
return;
@@ -1459,7 +1527,10 @@ static void update_gids_task(struct work_struct *work)
14591527
if (err)
14601528
pr_warn("set port command failed\n");
14611529
else
1462-
mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
1530+
if ((gw->port == 1) || !is_bonded)
1531+
mlx4_ib_dispatch_event(gw->dev,
1532+
is_bonded ? 1 : gw->port,
1533+
IB_EVENT_GID_CHANGE);
14631534

14641535
mlx4_free_cmd_mailbox(dev, mailbox);
14651536
kfree(gw);
@@ -1875,7 +1946,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
18751946
* don't want the bond IP based gids in the table since
18761947
* flows that select port by gid may get the down port.
18771948
*/
1878-
if (port_state == IB_PORT_DOWN) {
1949+
if (port_state == IB_PORT_DOWN &&
1950+
!mlx4_is_bonded(ibdev->dev)) {
18791951
reset_gid_table(ibdev, port);
18801952
mlx4_ib_set_default_gid(ibdev,
18811953
curr_netdev,
@@ -2047,6 +2119,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
20472119
int err;
20482120
struct mlx4_ib_iboe *iboe;
20492121
int ib_num_ports = 0;
2122+
int num_req_counters;
20502123

20512124
pr_info_once("%s", mlx4_ib_version);
20522125

@@ -2080,13 +2153,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
20802153
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
20812154

20822155
ibdev->dev = dev;
2156+
ibdev->bond_next_port = 0;
20832157

20842158
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
20852159
ibdev->ib_dev.owner = THIS_MODULE;
20862160
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
20872161
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
20882162
ibdev->num_ports = num_ports;
2089-
ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
2163+
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
2164+
1 : ibdev->num_ports;
20902165
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
20912166
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
20922167

@@ -2207,7 +2282,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
22072282
if (init_node_data(ibdev))
22082283
goto err_map;
22092284

2210-
for (i = 0; i < ibdev->num_ports; ++i) {
2285+
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2286+
for (i = 0; i < num_req_counters; ++i) {
22112287
mutex_init(&ibdev->qp1_proxy_lock[i]);
22122288
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
22132289
IB_LINK_LAYER_ETHERNET) {
@@ -2218,6 +2294,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
22182294
ibdev->counters[i] = -1;
22192295
}
22202296
}
2297+
if (mlx4_is_bonded(dev))
2298+
for (i = 1; i < ibdev->num_ports ; ++i)
2299+
ibdev->counters[i] = ibdev->counters[0];
2300+
22212301

22222302
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
22232303
ib_num_ports++;
@@ -2538,6 +2618,38 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
25382618
return;
25392619
}
25402620

2621+
static void handle_bonded_port_state_event(struct work_struct *work)
2622+
{
2623+
struct ib_event_work *ew =
2624+
container_of(work, struct ib_event_work, work);
2625+
struct mlx4_ib_dev *ibdev = ew->ib_dev;
2626+
enum ib_port_state bonded_port_state = IB_PORT_NOP;
2627+
int i;
2628+
struct ib_event ibev;
2629+
2630+
kfree(ew);
2631+
spin_lock_bh(&ibdev->iboe.lock);
2632+
for (i = 0; i < MLX4_MAX_PORTS; ++i) {
2633+
struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
2634+
2635+
enum ib_port_state curr_port_state =
2636+
(netif_running(curr_netdev) &&
2637+
netif_carrier_ok(curr_netdev)) ?
2638+
IB_PORT_ACTIVE : IB_PORT_DOWN;
2639+
2640+
bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
2641+
curr_port_state : IB_PORT_ACTIVE;
2642+
}
2643+
spin_unlock_bh(&ibdev->iboe.lock);
2644+
2645+
ibev.device = &ibdev->ib_dev;
2646+
ibev.element.port_num = 1;
2647+
ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
2648+
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
2649+
2650+
ib_dispatch_event(&ibev);
2651+
}
2652+
25412653
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
25422654
enum mlx4_dev_event event, unsigned long param)
25432655
{
@@ -2547,6 +2659,18 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
25472659
struct ib_event_work *ew;
25482660
int p = 0;
25492661

2662+
if (mlx4_is_bonded(dev) &&
2663+
((event == MLX4_DEV_EVENT_PORT_UP) ||
2664+
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
2665+
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
2666+
if (!ew)
2667+
return;
2668+
INIT_WORK(&ew->work, handle_bonded_port_state_event);
2669+
ew->ib_dev = ibdev;
2670+
queue_work(wq, &ew->work);
2671+
return;
2672+
}
2673+
25502674
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
25512675
eqe = (struct mlx4_eqe *)param;
25522676
else
@@ -2607,7 +2731,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
26072731
}
26082732

26092733
ibev.device = ibdev_ptr;
2610-
ibev.element.port_num = (u8) p;
2734+
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
26112735

26122736
ib_dispatch_event(&ibev);
26132737
}
@@ -2616,7 +2740,8 @@ static struct mlx4_interface mlx4_ib_interface = {
26162740
.add = mlx4_ib_add,
26172741
.remove = mlx4_ib_remove,
26182742
.event = mlx4_ib_event,
2619-
.protocol = MLX4_PROT_IB_IPV6
2743+
.protocol = MLX4_PROT_IB_IPV6,
2744+
.flags = MLX4_INTFF_BONDING
26202745
};
26212746

26222747
static int __init mlx4_ib_init(void)

0 commit comments

Comments
 (0)