Skip to content

Commit 10a214d

Browse files
Devesh Sharmadledford
Devesh Sharma
authored andcommitted
RDMA/ocrdma: Depend on async link events from CNA
Recently Dough Ledford reported a deadlock happening between ocrdma-load sequence and NetworkManager service issuing "open" on be2net interface. The deadlock happens when any be2net hook (e.g. open/close) is called in parallel to insmod ocrdma.ko. A. be2net is sending administrative open/close event to ocrdma holding device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net. So sequence of locks is rtnl_lock---> device_list lock B. When new ocrdma roce device gets registered, infiniband stack now takes rtnl_lock in ib_register_device() in GID initialization routines. So sequence of locks in this path is device_list lock ---> rtnl_lock. This improper locking sequence causes deadlock. With this patch we stop using administrative open and close events injected by be2net driver. These events were used to dispatch PORT_ACTIVE and PORT_ERROR events to the IB-stack. This patch implements a logic to receive async-link-events generated from CNA whenever link-state-change is detected. Now on, these async-events will be used to dispatch PORT_ACTIVE and PORT_ERROR events to IB-stack. Depending on async-events from CNA removes the need to hold device-list-mutex and thus breaks the busy-wait scenario. Reported-by: Doug Ledford <dledford@redhat.com> CC: Sathya Perla <sathya.perla@avagotech.com> Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com> Signed-off-by: Selvin Xavier <selvin.xavier@avagotech.com> Signed-off-by: Devesh Sharma <devesh.sharma@avagotech.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 36ac0db commit 10a214d

File tree

6 files changed

+119
-22
lines changed

6 files changed

+119
-22
lines changed

drivers/infiniband/hw/ocrdma/ocrdma.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ struct phy_info {
232232
u16 interface_type;
233233
};
234234

235+
enum ocrdma_flags {
236+
OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01
237+
};
238+
235239
struct ocrdma_dev {
236240
struct ib_device ibdev;
237241
struct ocrdma_dev_attr attr;
@@ -287,6 +291,7 @@ struct ocrdma_dev {
287291
atomic_t update_sl;
288292
u16 pvid;
289293
u32 asic_id;
294+
u32 flags;
290295

291296
ulong last_stats_time;
292297
struct mutex stats_lock; /* provide synch for debugfs operations */
@@ -591,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state)
591596
(state & OCRDMA_STATE_FLAG_SYNC);
592597
}
593598

599+
static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
600+
{
601+
return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
602+
}
603+
594604
#endif

drivers/infiniband/hw/ocrdma/ocrdma_hw.c

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
579579

580580
cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
581581
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
582+
/* Request link events on this MQ. */
583+
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);
582584

583585
cmd->async_cqid_ringsize = cq->id;
584586
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -819,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
819821
}
820822
}
821823

824+
static void ocrdma_process_link_state(struct ocrdma_dev *dev,
825+
struct ocrdma_ae_mcqe *cqe)
826+
{
827+
struct ocrdma_ae_lnkst_mcqe *evt;
828+
u8 lstate;
829+
830+
evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
831+
lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);
832+
833+
if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
834+
return;
835+
836+
if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
837+
ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
838+
}
839+
822840
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
823841
{
824842
/* async CQE processing */
825843
struct ocrdma_ae_mcqe *cqe = ae_cqe;
826844
u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
827845
OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
828-
829-
if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
846+
switch (evt_code) {
847+
case OCRDMA_ASYNC_LINK_EVE_CODE:
848+
ocrdma_process_link_state(dev, cqe);
849+
break;
850+
case OCRDMA_ASYNC_RDMA_EVE_CODE:
830851
ocrdma_dispatch_ibevent(dev, cqe);
831-
else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
852+
break;
853+
case OCRDMA_ASYNC_GRP5_EVE_CODE:
832854
ocrdma_process_grp5_aync(dev, cqe);
833-
else
855+
break;
856+
default:
834857
pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
835858
dev->id, evt_code);
859+
}
836860
}
837861

838862
static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -1363,7 +1387,8 @@ static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
13631387
return status;
13641388
}
13651389

1366-
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
1390+
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
1391+
u8 *lnk_state)
13671392
{
13681393
int status = -ENOMEM;
13691394
struct ocrdma_get_link_speed_rsp *rsp;
@@ -1384,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
13841409
goto mbx_err;
13851410

13861411
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
1387-
*lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
1388-
>> OCRDMA_PHY_PS_SHIFT;
1412+
if (lnk_speed)
1413+
*lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
1414+
>> OCRDMA_PHY_PS_SHIFT;
1415+
if (lnk_state)
1416+
*lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK);
13891417

13901418
mbx_err:
13911419
kfree(cmd);

drivers/infiniband/hw/ocrdma/ocrdma_hw.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
106106
bool solicited, u16 cqe_popped);
107107

108108
/* verbs specific mailbox commands */
109-
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
109+
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
110+
u8 *lnk_st);
110111
int ocrdma_query_config(struct ocrdma_dev *,
111112
struct ocrdma_mbx_query_config *config);
112113

@@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev);
153154
void ocrdma_init_service_level(struct ocrdma_dev *);
154155
void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev);
155156
void ocrdma_free_pd_range(struct ocrdma_dev *dev);
157+
void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate);
156158

157159
#endif /* __OCRDMA_HW_H__ */

drivers/infiniband/hw/ocrdma/ocrdma_main.c

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
290290
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
291291
{
292292
int status = 0, i;
293+
u8 lstate = 0;
293294
struct ocrdma_dev *dev;
294295

295296
dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -319,6 +320,11 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
319320
if (status)
320321
goto alloc_err;
321322

323+
/* Query Link state and update */
324+
status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate);
325+
if (!status)
326+
ocrdma_update_link_state(dev, lstate);
327+
322328
for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
323329
if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
324330
goto sysfs_err;
@@ -373,7 +379,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
373379
ocrdma_remove_free(dev);
374380
}
375381

376-
static int ocrdma_open(struct ocrdma_dev *dev)
382+
static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev)
377383
{
378384
struct ib_event port_event;
379385

@@ -384,7 +390,7 @@ static int ocrdma_open(struct ocrdma_dev *dev)
384390
return 0;
385391
}
386392

387-
static int ocrdma_close(struct ocrdma_dev *dev)
393+
static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev)
388394
{
389395
struct ib_event err_event;
390396

@@ -397,7 +403,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
397403

398404
static void ocrdma_shutdown(struct ocrdma_dev *dev)
399405
{
400-
ocrdma_close(dev);
406+
ocrdma_dispatch_port_error(dev);
401407
ocrdma_remove(dev);
402408
}
403409

@@ -408,18 +414,28 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev)
408414
static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
409415
{
410416
switch (event) {
411-
case BE_DEV_UP:
412-
ocrdma_open(dev);
413-
break;
414-
case BE_DEV_DOWN:
415-
ocrdma_close(dev);
416-
break;
417417
case BE_DEV_SHUTDOWN:
418418
ocrdma_shutdown(dev);
419419
break;
420+
default:
421+
break;
420422
}
421423
}
422424

425+
void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate)
426+
{
427+
if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) {
428+
dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT;
429+
if (!lstate)
430+
return;
431+
}
432+
433+
if (!lstate)
434+
ocrdma_dispatch_port_error(dev);
435+
else
436+
ocrdma_dispatch_port_active(dev);
437+
}
438+
423439
static struct ocrdma_driver ocrdma_drv = {
424440
.name = "ocrdma_driver",
425441
.add = ocrdma_add,

drivers/infiniband/hw/ocrdma/ocrdma_sli.h

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe {
465465
u32 valid_ae_event;
466466
};
467467

468-
#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
469-
#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
468+
enum ocrdma_async_event_code {
469+
OCRDMA_ASYNC_LINK_EVE_CODE = 0x01,
470+
OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05,
471+
OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14
472+
};
470473

471474
enum ocrdma_async_grp5_events {
472475
OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
@@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
489492
OCRDMA_MAX_ASYNC_ERRORS
490493
};
491494

495+
struct ocrdma_ae_lnkst_mcqe {
496+
u32 speed_state_ptn;
497+
u32 qos_reason_falut;
498+
u32 evt_tag;
499+
u32 valid_ae_event;
500+
};
501+
502+
enum {
503+
OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F,
504+
OCRDMA_AE_LSC_PT_SHIFT = 0x06,
505+
OCRDMA_AE_LSC_PT_MASK = (0x03 <<
506+
OCRDMA_AE_LSC_PT_SHIFT),
507+
OCRDMA_AE_LSC_LS_SHIFT = 0x08,
508+
OCRDMA_AE_LSC_LS_MASK = (0xFF <<
509+
OCRDMA_AE_LSC_LS_SHIFT),
510+
OCRDMA_AE_LSC_LD_SHIFT = 0x10,
511+
OCRDMA_AE_LSC_LD_MASK = (0xFF <<
512+
OCRDMA_AE_LSC_LD_SHIFT),
513+
OCRDMA_AE_LSC_PPS_SHIFT = 0x18,
514+
OCRDMA_AE_LSC_PPS_MASK = (0xFF <<
515+
OCRDMA_AE_LSC_PPS_SHIFT),
516+
OCRDMA_AE_LSC_PPF_MASK = 0xFF,
517+
OCRDMA_AE_LSC_ER_SHIFT = 0x08,
518+
OCRDMA_AE_LSC_ER_MASK = (0xFF <<
519+
OCRDMA_AE_LSC_ER_SHIFT),
520+
OCRDMA_AE_LSC_QOS_SHIFT = 0x10,
521+
OCRDMA_AE_LSC_QOS_MASK = (0xFFFF <<
522+
OCRDMA_AE_LSC_QOS_SHIFT)
523+
};
524+
525+
enum {
526+
OCRDMA_AE_LSC_PLINK_DOWN = 0x00,
527+
OCRDMA_AE_LSC_PLINK_UP = 0x01,
528+
OCRDMA_AE_LSC_LLINK_DOWN = 0x02,
529+
OCRDMA_AE_LSC_LLINK_MASK = 0x02,
530+
OCRDMA_AE_LSC_LLINK_UP = 0x03
531+
};
532+
492533
/* mailbox command request and responses */
493534
enum {
494535
OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
@@ -676,7 +717,7 @@ enum {
676717
OCRDMA_PHY_PFLT_SHIFT = 0x18,
677718
OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
678719
OCRDMA_QOS_LNKSP_SHIFT = 0x10,
679-
OCRDMA_LLST_MASK = 0xFF,
720+
OCRDMA_LINK_ST_MASK = 0x01,
680721
OCRDMA_PLFC_MASK = 0x00000400,
681722
OCRDMA_PLFC_SHIFT = 0x8,
682723
OCRDMA_PLRFC_MASK = 0x00000200,
@@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp {
691732

692733
u32 pflt_pps_ld_pnum;
693734
u32 qos_lsp;
694-
u32 res_lls;
735+
u32 res_lnk_st;
695736
};
696737

697738
enum {

drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
171171
int status;
172172
u8 speed;
173173

174-
status = ocrdma_mbx_get_link_speed(dev, &speed);
174+
status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
175175
if (status)
176176
speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
177177

0 commit comments

Comments
 (0)