Skip to content

Commit b00a92c

Browse files
liuyixianjgunthorpe
authored andcommitted
RDMA/hns: Move all prints out of irq handle
It will trigger unnecessary interrupts caused by time out if prints inside aeq handle under some configurations. Thus, move all prints out of aeq handle to work queue. Signed-off-by: liuyixian <liuyixian@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
1 parent 0099103 commit b00a92c

File tree

2 files changed

+97
-132
lines changed

2 files changed

+97
-132
lines changed

drivers/infiniband/hw/hns/hns_roce_device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,7 @@ struct hns_roce_work {
738738
struct hns_roce_dev *hr_dev;
739739
struct work_struct work;
740740
u32 qpn;
741+
u32 cqn;
741742
int event_type;
742743
int sub_type;
743744
};

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 96 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -3995,13 +3995,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
39953995
{
39963996
struct hns_roce_work *irq_work =
39973997
container_of(work, struct hns_roce_work, work);
3998+
struct device *dev = irq_work->hr_dev->dev;
39983999
u32 qpn = irq_work->qpn;
4000+
u32 cqn = irq_work->cqn;
39994001

40004002
switch (irq_work->event_type) {
4003+
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
4004+
dev_info(dev, "Path migrated succeeded.\n");
4005+
break;
4006+
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
4007+
dev_warn(dev, "Path migration failed.\n");
4008+
break;
4009+
case HNS_ROCE_EVENT_TYPE_COMM_EST:
4010+
dev_info(dev, "Communication established.\n");
4011+
break;
4012+
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
4013+
dev_warn(dev, "Send queue drained.\n");
4014+
break;
40014015
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4016+
dev_err(dev, "Local work queue catastrophic error.\n");
4017+
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4018+
switch (irq_work->sub_type) {
4019+
case HNS_ROCE_LWQCE_QPC_ERROR:
4020+
dev_err(dev, "QP %d, QPC error.\n", qpn);
4021+
break;
4022+
case HNS_ROCE_LWQCE_MTU_ERROR:
4023+
dev_err(dev, "QP %d, MTU error.\n", qpn);
4024+
break;
4025+
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
4026+
dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
4027+
break;
4028+
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
4029+
dev_err(dev, "QP %d, WQE addr error.\n", qpn);
4030+
break;
4031+
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
4032+
dev_err(dev, "QP %d, WQE shift error.\n", qpn);
4033+
break;
4034+
default:
4035+
dev_err(dev, "Unhandled sub_event type %d.\n",
4036+
irq_work->sub_type);
4037+
break;
4038+
}
4039+
break;
40024040
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4041+
dev_err(dev, "Invalid request local work queue error.\n");
4042+
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4043+
break;
40034044
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4045+
dev_err(dev, "Local access violation work queue error.\n");
40044046
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4047+
switch (irq_work->sub_type) {
4048+
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
4049+
dev_err(dev, "QP %d, R_key violation.\n", qpn);
4050+
break;
4051+
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
4052+
dev_err(dev, "QP %d, length error.\n", qpn);
4053+
break;
4054+
case HNS_ROCE_LAVWQE_VA_ERROR:
4055+
dev_err(dev, "QP %d, VA error.\n", qpn);
4056+
break;
4057+
case HNS_ROCE_LAVWQE_PD_ERROR:
4058+
dev_err(dev, "QP %d, PD error.\n", qpn);
4059+
break;
4060+
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
4061+
dev_err(dev, "QP %d, rw acc error.\n", qpn);
4062+
break;
4063+
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
4064+
dev_err(dev, "QP %d, key state error.\n", qpn);
4065+
break;
4066+
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
4067+
dev_err(dev, "QP %d, MR operation error.\n", qpn);
4068+
break;
4069+
default:
4070+
dev_err(dev, "Unhandled sub_event type %d.\n",
4071+
irq_work->sub_type);
4072+
break;
4073+
}
4074+
break;
4075+
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
4076+
dev_warn(dev, "SRQ limit reach.\n");
4077+
break;
4078+
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
4079+
dev_warn(dev, "SRQ last wqe reach.\n");
4080+
break;
4081+
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
4082+
dev_err(dev, "SRQ catas error.\n");
4083+
break;
4084+
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4085+
dev_err(dev, "CQ 0x%x access err.\n", cqn);
4086+
break;
4087+
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4088+
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
4089+
break;
4090+
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
4091+
dev_warn(dev, "DB overflow.\n");
4092+
break;
4093+
case HNS_ROCE_EVENT_TYPE_FLR:
4094+
dev_warn(dev, "Function level reset.\n");
40054095
break;
40064096
default:
40074097
break;
@@ -4011,7 +4101,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
40114101
}
40124102

40134103
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
4014-
struct hns_roce_eq *eq, u32 qpn)
4104+
struct hns_roce_eq *eq,
4105+
u32 qpn, u32 cqn)
40154106
{
40164107
struct hns_roce_work *irq_work;
40174108

@@ -4022,6 +4113,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
40224113
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
40234114
irq_work->hr_dev = hr_dev;
40244115
irq_work->qpn = qpn;
4116+
irq_work->cqn = cqn;
40254117
irq_work->event_type = eq->event_type;
40264118
irq_work->sub_type = eq->sub_type;
40274119
queue_work(hr_dev->irq_workq, &(irq_work->work));
@@ -4058,124 +4150,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
40584150
hns_roce_write64_k(doorbell, eq->doorbell);
40594151
}
40604152

4061-
static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
4062-
struct hns_roce_aeqe *aeqe,
4063-
u32 qpn)
4064-
{
4065-
struct device *dev = hr_dev->dev;
4066-
int sub_type;
4067-
4068-
dev_warn(dev, "Local work queue catastrophic error.\n");
4069-
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
4070-
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
4071-
switch (sub_type) {
4072-
case HNS_ROCE_LWQCE_QPC_ERROR:
4073-
dev_warn(dev, "QP %d, QPC error.\n", qpn);
4074-
break;
4075-
case HNS_ROCE_LWQCE_MTU_ERROR:
4076-
dev_warn(dev, "QP %d, MTU error.\n", qpn);
4077-
break;
4078-
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
4079-
dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
4080-
break;
4081-
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
4082-
dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
4083-
break;
4084-
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
4085-
dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
4086-
break;
4087-
default:
4088-
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
4089-
break;
4090-
}
4091-
}
4092-
4093-
static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
4094-
struct hns_roce_aeqe *aeqe, u32 qpn)
4095-
{
4096-
struct device *dev = hr_dev->dev;
4097-
int sub_type;
4098-
4099-
dev_warn(dev, "Local access violation work queue error.\n");
4100-
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
4101-
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
4102-
switch (sub_type) {
4103-
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
4104-
dev_warn(dev, "QP %d, R_key violation.\n", qpn);
4105-
break;
4106-
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
4107-
dev_warn(dev, "QP %d, length error.\n", qpn);
4108-
break;
4109-
case HNS_ROCE_LAVWQE_VA_ERROR:
4110-
dev_warn(dev, "QP %d, VA error.\n", qpn);
4111-
break;
4112-
case HNS_ROCE_LAVWQE_PD_ERROR:
4113-
dev_err(dev, "QP %d, PD error.\n", qpn);
4114-
break;
4115-
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
4116-
dev_warn(dev, "QP %d, rw acc error.\n", qpn);
4117-
break;
4118-
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
4119-
dev_warn(dev, "QP %d, key state error.\n", qpn);
4120-
break;
4121-
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
4122-
dev_warn(dev, "QP %d, MR operation error.\n", qpn);
4123-
break;
4124-
default:
4125-
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
4126-
break;
4127-
}
4128-
}
4129-
4130-
static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
4131-
struct hns_roce_aeqe *aeqe,
4132-
int event_type, u32 qpn)
4133-
{
4134-
struct device *dev = hr_dev->dev;
4135-
4136-
switch (event_type) {
4137-
case HNS_ROCE_EVENT_TYPE_COMM_EST:
4138-
dev_warn(dev, "Communication established.\n");
4139-
break;
4140-
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
4141-
dev_warn(dev, "Send queue drained.\n");
4142-
break;
4143-
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4144-
hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
4145-
break;
4146-
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4147-
dev_warn(dev, "Invalid request local work queue error.\n");
4148-
break;
4149-
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4150-
hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
4151-
break;
4152-
default:
4153-
break;
4154-
}
4155-
4156-
hns_roce_qp_event(hr_dev, qpn, event_type);
4157-
}
4158-
4159-
static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
4160-
struct hns_roce_aeqe *aeqe,
4161-
int event_type, u32 cqn)
4162-
{
4163-
struct device *dev = hr_dev->dev;
4164-
4165-
switch (event_type) {
4166-
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4167-
dev_warn(dev, "CQ 0x%x access err.\n", cqn);
4168-
break;
4169-
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4170-
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
4171-
break;
4172-
default:
4173-
break;
4174-
}
4175-
4176-
hns_roce_cq_event(hr_dev, cqn, event_type);
4177-
}
4178-
41794153
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
41804154
{
41814155
u32 buf_chk_sz;
@@ -4251,31 +4225,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
42514225

42524226
switch (event_type) {
42534227
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
4254-
dev_warn(dev, "Path migrated succeeded.\n");
4255-
break;
42564228
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
4257-
dev_warn(dev, "Path migration failed.\n");
4258-
break;
42594229
case HNS_ROCE_EVENT_TYPE_COMM_EST:
42604230
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
42614231
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
42624232
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
42634233
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4264-
hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
4265-
qpn);
4234+
hns_roce_qp_event(hr_dev, qpn, event_type);
42664235
break;
42674236
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
42684237
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
42694238
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
4270-
dev_warn(dev, "SRQ not support.\n");
42714239
break;
42724240
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
42734241
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4274-
hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
4275-
cqn);
4242+
hns_roce_cq_event(hr_dev, cqn, event_type);
42764243
break;
42774244
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
4278-
dev_warn(dev, "DB overflow.\n");
42794245
break;
42804246
case HNS_ROCE_EVENT_TYPE_MB:
42814247
hns_roce_cmd_event(hr_dev,
@@ -4284,10 +4250,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
42844250
le64_to_cpu(aeqe->event.cmd.out_param));
42854251
break;
42864252
case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
4287-
dev_warn(dev, "CEQ overflow.\n");
42884253
break;
42894254
case HNS_ROCE_EVENT_TYPE_FLR:
4290-
dev_warn(dev, "Function level reset.\n");
42914255
break;
42924256
default:
42934257
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@@ -4304,7 +4268,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
43044268
dev_warn(dev, "cons_index overflow, set back to 0.\n");
43054269
eq->cons_index = 0;
43064270
}
4307-
hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
4271+
hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
43084272
}
43094273

43104274
set_eq_cons_index_v2(eq);

0 commit comments

Comments
 (0)