Skip to content

Commit be99bb1

Browse files
chuckleverJ. Bruce Fields
authored andcommitted
svcrdma: Use new CQ API for RPC-over-RDMA server send CQs
Calling ib_poll_cq() to sort through WCs during a completion is a common pattern amongst RDMA consumers. Since commit 14d3a3b ("IB: add a proper completion queue abstraction"), WC sorting can be handled by the IB core. By converting to this new API, svcrdma is made a better neighbor to other RDMA consumers, as it allows the core to schedule the delivery of completions more fairly amongst all active consumers. This new API also aims each completion at a function that is specific to the WR's opcode. Thus the ctxt->wr_op field and the switch in process_context is replaced by a set of methods that handle each completion type. Because each ib_cqe carries a pointer to a completion method, the core can now post operations on a consumer's QP, and handle the completions itself. The server's rdma_stat_sq_poll and rdma_stat_sq_prod metrics are no longer updated. As a clean up, the cq_event_handler, the dto_tasklet, and all associated locking is removed, as they are no longer referenced or used. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
1 parent 8bd5ba8 commit be99bb1

File tree

5 files changed

+121
-177
lines changed

5 files changed

+121
-177
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,9 @@ struct svc_rdma_op_ctxt {
7676
int hdr_count;
7777
struct xdr_buf arg;
7878
struct ib_cqe cqe;
79+
struct ib_cqe reg_cqe;
80+
struct ib_cqe inv_cqe;
7981
struct list_head dto_q;
80-
enum ib_wr_opcode wr_op;
8182
enum ib_wc_status wc_status;
8283
u32 byte_len;
8384
u32 position;
@@ -175,7 +176,6 @@ struct svcxprt_rdma {
175176
struct work_struct sc_work;
176177
};
177178
/* sc_flags */
178-
#define RDMAXPRT_SQ_PENDING 2
179179
#define RDMAXPRT_CONN_PENDING 3
180180

181181
#define RPCRDMA_LISTEN_BACKLOG 10
@@ -232,6 +232,11 @@ extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
232232
int);
233233

234234
/* svc_rdma_transport.c */
235+
extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
236+
extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *);
237+
extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *);
238+
extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
239+
extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
235240
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
236241
extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
237242
extern int svc_rdma_repost_recv(struct svcxprt_rdma *, gfp_t);

net/sunrpc/xprtrdma/svc_rdma_backchannel.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
119119
ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
120120
ctxt->count = 1;
121121

122-
ctxt->wr_op = IB_WR_SEND;
123122
ctxt->direction = DMA_TO_DEVICE;
124123
ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
125124
ctxt->sge[0].length = sndbuf->len;
@@ -133,7 +132,8 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
133132
atomic_inc(&rdma->sc_dma_used);
134133

135134
memset(&send_wr, 0, sizeof(send_wr));
136-
send_wr.wr_id = (unsigned long)ctxt;
135+
ctxt->cqe.done = svc_rdma_wc_send;
136+
send_wr.wr_cqe = &ctxt->cqe;
137137
send_wr.sg_list = ctxt->sge;
138138
send_wr.num_sge = 1;
139139
send_wr.opcode = IB_WR_SEND;

net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,9 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
180180
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
181181

182182
memset(&read_wr, 0, sizeof(read_wr));
183-
read_wr.wr.wr_id = (unsigned long)ctxt;
183+
ctxt->cqe.done = svc_rdma_wc_read;
184+
read_wr.wr.wr_cqe = &ctxt->cqe;
184185
read_wr.wr.opcode = IB_WR_RDMA_READ;
185-
ctxt->wr_op = read_wr.wr.opcode;
186186
read_wr.wr.send_flags = IB_SEND_SIGNALED;
187187
read_wr.rkey = rs_handle;
188188
read_wr.remote_addr = rs_offset;
@@ -299,8 +299,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
299299
ctxt->read_hdr = head;
300300

301301
/* Prepare REG WR */
302+
ctxt->reg_cqe.done = svc_rdma_wc_reg;
303+
reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
302304
reg_wr.wr.opcode = IB_WR_REG_MR;
303-
reg_wr.wr.wr_id = 0;
304305
reg_wr.wr.send_flags = IB_SEND_SIGNALED;
305306
reg_wr.wr.num_sge = 0;
306307
reg_wr.mr = frmr->mr;
@@ -310,26 +311,27 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
310311

311312
/* Prepare RDMA_READ */
312313
memset(&read_wr, 0, sizeof(read_wr));
314+
ctxt->cqe.done = svc_rdma_wc_read;
315+
read_wr.wr.wr_cqe = &ctxt->cqe;
313316
read_wr.wr.send_flags = IB_SEND_SIGNALED;
314317
read_wr.rkey = rs_handle;
315318
read_wr.remote_addr = rs_offset;
316319
read_wr.wr.sg_list = ctxt->sge;
317320
read_wr.wr.num_sge = 1;
318321
if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
319322
read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
320-
read_wr.wr.wr_id = (unsigned long)ctxt;
321323
read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
322324
} else {
323325
read_wr.wr.opcode = IB_WR_RDMA_READ;
324326
read_wr.wr.next = &inv_wr;
325327
/* Prepare invalidate */
326328
memset(&inv_wr, 0, sizeof(inv_wr));
327-
inv_wr.wr_id = (unsigned long)ctxt;
329+
ctxt->inv_cqe.done = svc_rdma_wc_inv;
330+
inv_wr.wr_cqe = &ctxt->inv_cqe;
328331
inv_wr.opcode = IB_WR_LOCAL_INV;
329332
inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
330333
inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
331334
}
332-
ctxt->wr_op = read_wr.wr.opcode;
333335

334336
/* Post the chain */
335337
ret = svc_rdma_send(xprt, &reg_wr.wr);

net/sunrpc/xprtrdma/svc_rdma_sendto.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
297297

298298
/* Prepare WRITE WR */
299299
memset(&write_wr, 0, sizeof write_wr);
300-
ctxt->wr_op = IB_WR_RDMA_WRITE;
301-
write_wr.wr.wr_id = (unsigned long)ctxt;
300+
ctxt->cqe.done = svc_rdma_wc_write;
301+
write_wr.wr.wr_cqe = &ctxt->cqe;
302302
write_wr.wr.sg_list = &sge[0];
303303
write_wr.wr.num_sge = sge_no;
304304
write_wr.wr.opcode = IB_WR_RDMA_WRITE;
@@ -549,8 +549,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
549549
goto err;
550550
}
551551
memset(&send_wr, 0, sizeof send_wr);
552-
ctxt->wr_op = IB_WR_SEND;
553-
send_wr.wr_id = (unsigned long)ctxt;
552+
ctxt->cqe.done = svc_rdma_wc_send;
553+
send_wr.wr_cqe = &ctxt->cqe;
554554
send_wr.sg_list = ctxt->sge;
555555
send_wr.num_sge = sge_no;
556556
send_wr.opcode = IB_WR_SEND;
@@ -698,8 +698,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
698698

699699
/* Prepare SEND WR */
700700
memset(&err_wr, 0, sizeof(err_wr));
701-
ctxt->wr_op = IB_WR_SEND;
702-
err_wr.wr_id = (unsigned long)ctxt;
701+
ctxt->cqe.done = svc_rdma_wc_send;
702+
err_wr.wr_cqe = &ctxt->cqe;
703703
err_wr.sg_list = ctxt->sge;
704704
err_wr.num_sge = 1;
705705
err_wr.opcode = IB_WR_SEND;

0 commit comments

Comments
 (0)