Skip to content

Commit 01bb35c

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: RPC completion should wait for Send completion
When an RPC Call includes a file data payload, that payload can come from pages in the page cache, or a user buffer (for direct I/O). If the payload can fit inline, xprtrdma includes it in the Send using a scatter-gather technique. xprtrdma mustn't allow the RPC consumer to re-use the memory where that payload resides before the Send completes. Otherwise, the new contents of that memory would be exposed by an HCA retransmit of the Send operation. So, block RPC completion on Send completion, but only in the case where a separate file data payload is part of the Send. This prevents the reuse of that memory while it is still part of a Send operation without an undue cost to other cases. Waiting is avoided in the common case because typically the Send will have completed long before the RPC Reply arrives. These days, an RPC timeout will trigger a disconnect, which tears down the QP. The disconnect flushes all waiting Sends. This bounds the amount of time the reply handler has to wait for a Send completion. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
1 parent 0ba6f37 commit 01bb35c

File tree

4 files changed

+34
-4
lines changed

4 files changed

+34
-4
lines changed

net/sunrpc/xprtrdma/rpc_rdma.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,11 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
534534
for (count = sc->sc_unmap_count; count; ++sge, --count)
535535
ib_dma_unmap_page(ia->ri_device,
536536
sge->addr, sge->length, DMA_TO_DEVICE);
537+
538+
if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
539+
smp_mb__after_atomic();
540+
wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
541+
}
537542
}
538543

539544
/* Prepare an SGE for the RPC-over-RDMA transport header.
@@ -667,6 +672,8 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
667672

668673
out:
669674
sc->sc_wr.num_sge += sge_no;
675+
if (sc->sc_unmap_count)
676+
__set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
670677
return true;
671678

672679
out_regbuf:
@@ -704,6 +711,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
704711
return -ENOBUFS;
705712
req->rl_sendctx->sc_wr.num_sge = 0;
706713
req->rl_sendctx->sc_unmap_count = 0;
714+
req->rl_sendctx->sc_req = req;
715+
__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
707716

708717
if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
709718
return -EIO;
@@ -1305,6 +1314,20 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
13051314
if (!list_empty(&req->rl_registered))
13061315
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
13071316
&req->rl_registered);
1317+
1318+
/* Ensure that any DMA mapped pages associated with
1319+
* the Send of the RPC Call have been unmapped before
1320+
* allowing the RPC to complete. This protects argument
1321+
* memory not controlled by the RPC client from being
1322+
* re-used before we're done with it.
1323+
*/
1324+
if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
1325+
r_xprt->rx_stats.reply_waits_for_send++;
1326+
out_of_line_wait_on_bit(&req->rl_flags,
1327+
RPCRDMA_REQ_F_TX_RESOURCES,
1328+
bit_wait,
1329+
TASK_UNINTERRUPTIBLE);
1330+
}
13081331
}
13091332

13101333
/* Reply handling runs in the poll worker thread. Anything that
@@ -1384,7 +1407,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
13841407
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
13851408
__func__, rep, req, be32_to_cpu(rep->rr_xid));
13861409

1387-
if (list_empty(&req->rl_registered))
1410+
if (list_empty(&req->rl_registered) &&
1411+
!test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
13881412
rpcrdma_complete_rqst(rep);
13891413
else
13901414
queue_work(rpcrdma_receive_wq, &rep->rr_work);

net/sunrpc/xprtrdma/transport.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -789,12 +789,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
789789
r_xprt->rx_stats.failed_marshal_count,
790790
r_xprt->rx_stats.bad_reply_count,
791791
r_xprt->rx_stats.nomsg_call_count);
792-
seq_printf(seq, "%lu %lu %lu %lu %lu\n",
792+
seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
793793
r_xprt->rx_stats.mrs_recovered,
794794
r_xprt->rx_stats.mrs_orphaned,
795795
r_xprt->rx_stats.mrs_allocated,
796796
r_xprt->rx_stats.local_inv_needed,
797-
r_xprt->rx_stats.empty_sendctx_q);
797+
r_xprt->rx_stats.empty_sendctx_q,
798+
r_xprt->rx_stats.reply_waits_for_send);
798799
}
799800

800801
static int

net/sunrpc/xprtrdma/verbs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1526,7 +1526,8 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
15261526
dprintk("RPC: %s: posting %d s/g entries\n",
15271527
__func__, send_wr->num_sge);
15281528

1529-
if (!ep->rep_send_count) {
1529+
if (!ep->rep_send_count ||
1530+
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
15301531
send_wr->send_flags |= IB_SEND_SIGNALED;
15311532
ep->rep_send_count = ep->rep_send_batch;
15321533
} else {

net/sunrpc/xprtrdma/xprt_rdma.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,13 @@ struct rpcrdma_rep {
236236

237237
/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
238238
*/
239+
struct rpcrdma_req;
239240
struct rpcrdma_xprt;
240241
struct rpcrdma_sendctx {
241242
struct ib_send_wr sc_wr;
242243
struct ib_cqe sc_cqe;
243244
struct rpcrdma_xprt *sc_xprt;
245+
struct rpcrdma_req *sc_req;
244246
unsigned int sc_unmap_count;
245247
struct ib_sge sc_sges[];
246248
};
@@ -387,6 +389,7 @@ struct rpcrdma_req {
387389
enum {
388390
RPCRDMA_REQ_F_BACKCHANNEL = 0,
389391
RPCRDMA_REQ_F_PENDING,
392+
RPCRDMA_REQ_F_TX_RESOURCES,
390393
};
391394

392395
static inline void
@@ -492,6 +495,7 @@ struct rpcrdma_stats {
492495
/* accessed when receiving a reply */
493496
unsigned long long total_rdma_reply;
494497
unsigned long long fixup_copy_count;
498+
unsigned long reply_waits_for_send;
495499
unsigned long local_inv_needed;
496500
unsigned long nomsg_call_count;
497501
unsigned long bcall_count;

0 commit comments

Comments
 (0)