Skip to content

Commit f41725b

Browse files
Israel RukshinChristoph Hellwig
authored andcommitted
nvme-rdma: Use mr pool
Currently, blk_mq_tagset_iter() iterate over initial hctx tags only. If an I/O scheduler is used, it doesn't iterate the hctx scheduler tags and the static request aren't been updated. For example, while using NVMe over Fabrics RDMA host, this cause us not to reinit the scheduler requests and thus not re-register all the memory regions during the tagset re-initialization in the reconnect flow. This may lead to a memory registration error: "MEMREG for CQE 0xffff88044c14dce8 failed with status memory management operation error (6)" With this commit we don't need to reinit the requests, and thus fix this failure. Signed-off-by: Israel Rukshin <israelr@mellanox.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Max Gurtovoy <maxg@mellanox.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
1 parent 3ef0279 commit f41725b

File tree

1 file changed

+37
-58
lines changed

1 file changed

+37
-58
lines changed

drivers/nvme/host/rdma.c

Lines changed: 37 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/module.h>
1616
#include <linux/init.h>
1717
#include <linux/slab.h>
18+
#include <rdma/mr_pool.h>
1819
#include <linux/err.h>
1920
#include <linux/string.h>
2021
#include <linux/atomic.h>
@@ -260,32 +261,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
260261
return ret;
261262
}
262263

263-
static int nvme_rdma_reinit_request(void *data, struct request *rq)
264-
{
265-
struct nvme_rdma_ctrl *ctrl = data;
266-
struct nvme_rdma_device *dev = ctrl->device;
267-
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
268-
int ret = 0;
269-
270-
if (WARN_ON_ONCE(!req->mr))
271-
return 0;
272-
273-
ib_dereg_mr(req->mr);
274-
275-
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
276-
ctrl->max_fr_pages);
277-
if (IS_ERR(req->mr)) {
278-
ret = PTR_ERR(req->mr);
279-
req->mr = NULL;
280-
goto out;
281-
}
282-
283-
req->mr->need_inval = false;
284-
285-
out:
286-
return ret;
287-
}
288-
289264
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
290265
struct request *rq, unsigned int hctx_idx)
291266
{
@@ -295,9 +270,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
295270
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
296271
struct nvme_rdma_device *dev = queue->device;
297272

298-
if (req->mr)
299-
ib_dereg_mr(req->mr);
300-
301273
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
302274
DMA_TO_DEVICE);
303275
}
@@ -319,21 +291,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
319291
if (ret)
320292
return ret;
321293

322-
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
323-
ctrl->max_fr_pages);
324-
if (IS_ERR(req->mr)) {
325-
ret = PTR_ERR(req->mr);
326-
goto out_free_qe;
327-
}
328-
329294
req->queue = queue;
330295

331296
return 0;
332-
333-
out_free_qe:
334-
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
335-
DMA_TO_DEVICE);
336-
return -ENOMEM;
337297
}
338298

339299
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -433,6 +393,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
433393
struct nvme_rdma_device *dev = queue->device;
434394
struct ib_device *ibdev = dev->dev;
435395

396+
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
397+
436398
rdma_destroy_qp(queue->cm_id);
437399
ib_free_cq(queue->ib_cq);
438400

@@ -442,6 +404,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
442404
nvme_rdma_dev_put(dev);
443405
}
444406

407+
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
408+
{
409+
return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
410+
ibdev->attrs.max_fast_reg_page_list_len);
411+
}
412+
445413
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
446414
{
447415
struct ib_device *ibdev;
@@ -484,8 +452,22 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
484452
goto out_destroy_qp;
485453
}
486454

455+
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
456+
queue->queue_size,
457+
IB_MR_TYPE_MEM_REG,
458+
nvme_rdma_get_max_fr_pages(ibdev));
459+
if (ret) {
460+
dev_err(queue->ctrl->ctrl.device,
461+
"failed to initialize MR pool sized %d for QID %d\n",
462+
queue->queue_size, idx);
463+
goto out_destroy_ring;
464+
}
465+
487466
return 0;
488467

468+
out_destroy_ring:
469+
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
470+
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
489471
out_destroy_qp:
490472
rdma_destroy_qp(queue->cm_id);
491473
out_destroy_ib_cq:
@@ -757,8 +739,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
757739

758740
ctrl->device = ctrl->queues[0].device;
759741

760-
ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
761-
ctrl->device->dev->attrs.max_fast_reg_page_list_len);
742+
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
762743

763744
if (new) {
764745
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
@@ -772,10 +753,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
772753
error = PTR_ERR(ctrl->ctrl.admin_q);
773754
goto out_free_tagset;
774755
}
775-
} else {
776-
error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
777-
if (error)
778-
goto out_free_queue;
779756
}
780757

781758
error = nvme_rdma_start_queue(ctrl, 0);
@@ -855,10 +832,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
855832
goto out_free_tag_set;
856833
}
857834
} else {
858-
ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
859-
if (ret)
860-
goto out_free_io_queues;
861-
862835
blk_mq_update_nr_hw_queues(&ctrl->tag_set,
863836
ctrl->ctrl.queue_count - 1);
864837
}
@@ -1061,6 +1034,11 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
10611034
if (!blk_rq_bytes(rq))
10621035
return;
10631036

1037+
if (req->mr) {
1038+
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1039+
req->mr = NULL;
1040+
}
1041+
10641042
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
10651043
req->nents, rq_data_dir(rq) ==
10661044
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
@@ -1117,12 +1095,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
11171095
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
11181096
int nr;
11191097

1098+
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
1099+
if (WARN_ON_ONCE(!req->mr))
1100+
return -EAGAIN;
1101+
11201102
/*
11211103
* Align the MR to a 4K page size to match the ctrl page size and
11221104
* the block virtual boundary.
11231105
*/
11241106
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
11251107
if (unlikely(nr < count)) {
1108+
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1109+
req->mr = NULL;
11261110
if (nr < 0)
11271111
return nr;
11281112
return -EINVAL;
@@ -1141,8 +1125,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
11411125
IB_ACCESS_REMOTE_READ |
11421126
IB_ACCESS_REMOTE_WRITE;
11431127

1144-
req->mr->need_inval = true;
1145-
11461128
sg->addr = cpu_to_le64(req->mr->iova);
11471129
put_unaligned_le24(req->mr->length, sg->length);
11481130
put_unaligned_le32(req->mr->rkey, sg->key);
@@ -1162,7 +1144,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
11621144

11631145
req->num_sge = 1;
11641146
req->inline_data = false;
1165-
req->mr->need_inval = false;
11661147
refcount_set(&req->ref, 2); /* send and recv completions */
11671148

11681149
c->common.flags |= NVME_CMD_SGL_METABUF;
@@ -1341,8 +1322,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
13411322
req->mr->rkey);
13421323
nvme_rdma_error_recovery(queue->ctrl);
13431324
}
1344-
req->mr->need_inval = false;
1345-
} else if (req->mr->need_inval) {
1325+
} else if (req->mr) {
13461326
ret = nvme_rdma_inv_rkey(queue, req);
13471327
if (unlikely(ret < 0)) {
13481328
dev_err(queue->ctrl->ctrl.device,
@@ -1650,7 +1630,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
16501630
sizeof(struct nvme_command), DMA_TO_DEVICE);
16511631

16521632
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
1653-
req->mr->need_inval ? &req->reg_wr.wr : NULL);
1633+
req->mr ? &req->reg_wr.wr : NULL);
16541634
if (unlikely(err)) {
16551635
nvme_rdma_unmap_data(queue, rq);
16561636
goto err;
@@ -1798,7 +1778,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
17981778
.submit_async_event = nvme_rdma_submit_async_event,
17991779
.delete_ctrl = nvme_rdma_delete_ctrl,
18001780
.get_address = nvmf_get_address,
1801-
.reinit_request = nvme_rdma_reinit_request,
18021781
};
18031782

18041783
static inline bool

0 commit comments

Comments
 (0)