Skip to content

Commit 384f881

Browse files
oulijunjgunthorpe
authored andcommitted
RDMA/hns: Add atomic support
This patch adds atomic operations for hip08, includes fetchadd and cmpswap operation. In order to enable atomic, the driver needs to do the following steps: 1. Enable the atomic caps for RoCE device 2. Post the wqe context of atomic type 3. Configure the atomic type of mtpt Signed-off-by: Lijun Ou <oulijun@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
1 parent b9c1ea4 commit 384f881

File tree

4 files changed

+45
-3
lines changed

4 files changed

+45
-3
lines changed

drivers/infiniband/hw/hns/hns_roce_device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ enum {
193193
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
194194
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
195195
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
196+
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
196197
};
197198

198199
enum hns_roce_mtt_type {

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,18 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
5454
dseg->len = cpu_to_le32(sg->length);
5555
}
5656

57+
static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
58+
const struct ib_atomic_wr *wr)
59+
{
60+
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
61+
aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
62+
aseg->cmp_data = cpu_to_le64(wr->compare_add);
63+
} else {
64+
aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
65+
aseg->cmp_data = 0;
66+
}
67+
}
68+
5769
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
5870
unsigned int *sge_ind)
5971
{
@@ -179,6 +191,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
179191
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
180192
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
181193
struct hns_roce_qp *qp = to_hr_qp(ibqp);
194+
struct hns_roce_v2_wqe_data_seg *dseg;
182195
struct device *dev = hr_dev->dev;
183196
struct hns_roce_v2_db sq_db;
184197
struct ib_qp_attr attr;
@@ -407,6 +420,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
407420
roce_set_bit(rc_sq_wqe->byte_4,
408421
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
409422

423+
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
410424
switch (wr->opcode) {
411425
case IB_WR_RDMA_READ:
412426
hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
@@ -443,9 +457,21 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
443457
break;
444458
case IB_WR_ATOMIC_CMP_AND_SWP:
445459
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
460+
rc_sq_wqe->rkey =
461+
cpu_to_le32(atomic_wr(wr)->rkey);
462+
rc_sq_wqe->va =
463+
cpu_to_le32(atomic_wr(wr)->remote_addr);
464+
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
465+
set_atomic_seg(wqe, atomic_wr(wr));
446466
break;
447467
case IB_WR_ATOMIC_FETCH_AND_ADD:
448468
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
469+
rc_sq_wqe->rkey =
470+
cpu_to_le32(atomic_wr(wr)->rkey);
471+
rc_sq_wqe->va =
472+
cpu_to_le32(atomic_wr(wr)->remote_addr);
473+
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
474+
set_atomic_seg(wqe, atomic_wr(wr));
449475
break;
450476
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
451477
hr_op =
@@ -463,7 +489,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
463489
roce_set_field(rc_sq_wqe->byte_4,
464490
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
465491
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
466-
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
492+
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
493+
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
494+
dseg =
495+
wqe - sizeof(struct hns_roce_v2_wqe_data_seg);
496+
else
497+
dseg = wqe;
467498

468499
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
469500
&sge_ind, bad_wr);
@@ -1232,6 +1263,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
12321263
caps->local_ca_ack_delay = 0;
12331264
caps->max_mtu = IB_MTU_4096;
12341265

1266+
if (hr_dev->pci_dev->revision == 0x21)
1267+
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
1268+
12351269
ret = hns_roce_v2_set_bt(hr_dev);
12361270
if (ret)
12371271
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1663,7 +1697,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
16631697
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
16641698
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
16651699
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
1666-
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0);
1700+
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
1701+
mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
16671702
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
16681703
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
16691704
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,

drivers/infiniband/hw/hns/hns_roce_hw_v2.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,4 +1564,9 @@ struct hns_roce_eq_context {
15641564
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
15651565
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
15661566

1567+
struct hns_roce_wqe_atomic_seg {
1568+
__le64 fetchadd_swap_data;
1569+
__le64 cmp_data;
1570+
};
1571+
15671572
#endif

drivers/infiniband/hw/hns/hns_roce_main.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
215215
props->max_pd = hr_dev->caps.num_pds;
216216
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
217217
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
218-
props->atomic_cap = IB_ATOMIC_NONE;
218+
props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
219+
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
219220
props->max_pkeys = 1;
220221
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
221222

0 commit comments

Comments
 (0)