Skip to content

Commit c692509

Browse files
lsgunthbjorn-helgaas
authored andcommitted
nvmet: Optionally use PCI P2P memory
Create a configfs attribute in each nvme-fabrics namespace to enable P2P memory use. The attribute may be enabled (with a boolean) or a specific P2P device may be given (with the device's PCI name). When enabled, the namespace will ensure the underlying block device supports P2P and is compatible with any specified P2P device. If no device was specified it will ensure there is compatible P2P memory somewhere in the system. Enabling a namespace with P2P memory will fail with EINVAL (and an appropriate dmesg error) if any of these conditions are not met. Once a controller is set up on a specific port, the P2P device to use for each namespace will be found and stored in a radix tree by namespace ID. When memory is allocated for a request, the tree is used to look up the P2P device to allocate memory against. If no device is in the tree (because no appropriate device was found), or if allocation of P2P memory fails, fall back to using regular memory. Signed-off-by: Stephen Bates <sbates@raithlin.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> [hch: partial rewrite of the initial code] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
1 parent 5b2322e commit c692509

File tree

5 files changed

+230
-1
lines changed

5 files changed

+230
-1
lines changed

drivers/nvme/target/configfs.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <linux/slab.h>
1818
#include <linux/stat.h>
1919
#include <linux/ctype.h>
20+
#include <linux/pci.h>
21+
#include <linux/pci-p2pdma.h>
2022

2123
#include "nvmet.h"
2224

@@ -340,6 +342,48 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
340342

341343
CONFIGFS_ATTR(nvmet_ns_, device_path);
342344

345+
#ifdef CONFIG_PCI_P2PDMA
346+
static ssize_t nvmet_ns_p2pmem_show(struct config_item *item, char *page)
347+
{
348+
struct nvmet_ns *ns = to_nvmet_ns(item);
349+
350+
return pci_p2pdma_enable_show(page, ns->p2p_dev, ns->use_p2pmem);
351+
}
352+
353+
static ssize_t nvmet_ns_p2pmem_store(struct config_item *item,
354+
const char *page, size_t count)
355+
{
356+
struct nvmet_ns *ns = to_nvmet_ns(item);
357+
struct pci_dev *p2p_dev = NULL;
358+
bool use_p2pmem;
359+
int ret = count;
360+
int error;
361+
362+
mutex_lock(&ns->subsys->lock);
363+
if (ns->enabled) {
364+
ret = -EBUSY;
365+
goto out_unlock;
366+
}
367+
368+
error = pci_p2pdma_enable_store(page, &p2p_dev, &use_p2pmem);
369+
if (error) {
370+
ret = error;
371+
goto out_unlock;
372+
}
373+
374+
ns->use_p2pmem = use_p2pmem;
375+
pci_dev_put(ns->p2p_dev);
376+
ns->p2p_dev = p2p_dev;
377+
378+
out_unlock:
379+
mutex_unlock(&ns->subsys->lock);
380+
381+
return ret;
382+
}
383+
384+
CONFIGFS_ATTR(nvmet_ns_, p2pmem);
385+
#endif /* CONFIG_PCI_P2PDMA */
386+
343387
static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
344388
{
345389
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
@@ -509,6 +553,9 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
509553
&nvmet_ns_attr_ana_grpid,
510554
&nvmet_ns_attr_enable,
511555
&nvmet_ns_attr_buffered_io,
556+
#ifdef CONFIG_PCI_P2PDMA
557+
&nvmet_ns_attr_p2pmem,
558+
#endif
512559
NULL,
513560
};
514561

drivers/nvme/target/core.c

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/module.h>
1616
#include <linux/random.h>
1717
#include <linux/rculist.h>
18+
#include <linux/pci-p2pdma.h>
1819

1920
#include "nvmet.h"
2021

@@ -365,9 +366,93 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
365366
nvmet_file_ns_disable(ns);
366367
}
367368

369+
static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
370+
{
371+
int ret;
372+
struct pci_dev *p2p_dev;
373+
374+
if (!ns->use_p2pmem)
375+
return 0;
376+
377+
if (!ns->bdev) {
378+
pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
379+
return -EINVAL;
380+
}
381+
382+
if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
383+
pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
384+
ns->device_path);
385+
return -EINVAL;
386+
}
387+
388+
if (ns->p2p_dev) {
389+
ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
390+
if (ret < 0)
391+
return -EINVAL;
392+
} else {
393+
/*
394+
* Right now we just check that there is p2pmem available so
395+
* we can report an error to the user right away if there
396+
* is not. We'll find the actual device to use once we
397+
* setup the controller when the port's device is available.
398+
*/
399+
400+
p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
401+
if (!p2p_dev) {
402+
pr_err("no peer-to-peer memory is available for %s\n",
403+
ns->device_path);
404+
return -EINVAL;
405+
}
406+
407+
pci_dev_put(p2p_dev);
408+
}
409+
410+
return 0;
411+
}
412+
413+
/*
414+
* Note: ctrl->subsys->lock should be held when calling this function
415+
*/
416+
static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
417+
struct nvmet_ns *ns)
418+
{
419+
struct device *clients[2];
420+
struct pci_dev *p2p_dev;
421+
int ret;
422+
423+
if (!ctrl->p2p_client)
424+
return;
425+
426+
if (ns->p2p_dev) {
427+
ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
428+
if (ret < 0)
429+
return;
430+
431+
p2p_dev = pci_dev_get(ns->p2p_dev);
432+
} else {
433+
clients[0] = ctrl->p2p_client;
434+
clients[1] = nvmet_ns_dev(ns);
435+
436+
p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
437+
if (!p2p_dev) {
438+
pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
439+
dev_name(ctrl->p2p_client), ns->device_path);
440+
return;
441+
}
442+
}
443+
444+
ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
445+
if (ret < 0)
446+
pci_dev_put(p2p_dev);
447+
448+
pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
449+
ns->nsid);
450+
}
451+
368452
int nvmet_ns_enable(struct nvmet_ns *ns)
369453
{
370454
struct nvmet_subsys *subsys = ns->subsys;
455+
struct nvmet_ctrl *ctrl;
371456
int ret;
372457

373458
mutex_lock(&subsys->lock);
@@ -384,6 +469,13 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
384469
if (ret)
385470
goto out_unlock;
386471

472+
ret = nvmet_p2pmem_ns_enable(ns);
473+
if (ret)
474+
goto out_unlock;
475+
476+
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
477+
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
478+
387479
ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
388480
0, GFP_KERNEL);
389481
if (ret)
@@ -418,13 +510,17 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
418510
mutex_unlock(&subsys->lock);
419511
return ret;
420512
out_dev_put:
513+
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
514+
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
515+
421516
nvmet_ns_dev_disable(ns);
422517
goto out_unlock;
423518
}
424519

425520
void nvmet_ns_disable(struct nvmet_ns *ns)
426521
{
427522
struct nvmet_subsys *subsys = ns->subsys;
523+
struct nvmet_ctrl *ctrl;
428524

429525
mutex_lock(&subsys->lock);
430526
if (!ns->enabled)
@@ -434,6 +530,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
434530
list_del_rcu(&ns->dev_link);
435531
if (ns->nsid == subsys->max_nsid)
436532
subsys->max_nsid = nvmet_max_nsid(subsys);
533+
534+
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
535+
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
536+
437537
mutex_unlock(&subsys->lock);
438538

439539
/*
@@ -450,6 +550,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
450550
percpu_ref_exit(&ns->ref);
451551

452552
mutex_lock(&subsys->lock);
553+
453554
subsys->nr_namespaces--;
454555
nvmet_ns_changed(subsys, ns->nsid);
455556
nvmet_ns_dev_disable(ns);
@@ -727,6 +828,29 @@ EXPORT_SYMBOL_GPL(nvmet_req_execute);
727828

728829
int nvmet_req_alloc_sgl(struct nvmet_req *req)
729830
{
831+
struct pci_dev *p2p_dev = NULL;
832+
833+
if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
834+
if (req->sq->ctrl && req->ns)
835+
p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
836+
req->ns->nsid);
837+
838+
req->p2p_dev = NULL;
839+
if (req->sq->qid && p2p_dev) {
840+
req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
841+
req->transfer_len);
842+
if (req->sg) {
843+
req->p2p_dev = p2p_dev;
844+
return 0;
845+
}
846+
}
847+
848+
/*
849+
* If no P2P memory was available we fallback to using
850+
* regular memory
851+
*/
852+
}
853+
730854
req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
731855
if (!req->sg)
732856
return -ENOMEM;
@@ -737,7 +861,11 @@ EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
737861

738862
void nvmet_req_free_sgl(struct nvmet_req *req)
739863
{
740-
sgl_free(req->sg);
864+
if (req->p2p_dev)
865+
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
866+
else
867+
sgl_free(req->sg);
868+
741869
req->sg = NULL;
742870
req->sg_cnt = 0;
743871
}
@@ -939,6 +1067,37 @@ bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
9391067
return __nvmet_host_allowed(subsys, hostnqn);
9401068
}
9411069

1070+
/*
1071+
* Note: ctrl->subsys->lock should be held when calling this function
1072+
*/
1073+
static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1074+
struct nvmet_req *req)
1075+
{
1076+
struct nvmet_ns *ns;
1077+
1078+
if (!req->p2p_client)
1079+
return;
1080+
1081+
ctrl->p2p_client = get_device(req->p2p_client);
1082+
1083+
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
1084+
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1085+
}
1086+
1087+
/*
1088+
* Note: ctrl->subsys->lock should be held when calling this function
1089+
*/
1090+
static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1091+
{
1092+
struct radix_tree_iter iter;
1093+
void __rcu **slot;
1094+
1095+
radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1096+
pci_dev_put(radix_tree_deref_slot(slot));
1097+
1098+
put_device(ctrl->p2p_client);
1099+
}
1100+
9421101
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
9431102
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
9441103
{
@@ -980,6 +1139,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
9801139

9811140
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
9821141
INIT_LIST_HEAD(&ctrl->async_events);
1142+
INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
9831143

9841144
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
9851145
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
@@ -1044,6 +1204,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
10441204

10451205
mutex_lock(&subsys->lock);
10461206
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
1207+
nvmet_setup_p2p_ns_map(ctrl, req);
10471208
mutex_unlock(&subsys->lock);
10481209

10491210
*ctrlp = ctrl;
@@ -1071,6 +1232,7 @@ static void nvmet_ctrl_free(struct kref *ref)
10711232
struct nvmet_subsys *subsys = ctrl->subsys;
10721233

10731234
mutex_lock(&subsys->lock);
1235+
nvmet_release_p2p_ns_map(ctrl);
10741236
list_del(&ctrl->subsys_entry);
10751237
mutex_unlock(&subsys->lock);
10761238

drivers/nvme/target/io-cmd-bdev.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
7878
op = REQ_OP_READ;
7979
}
8080

81+
if (is_pci_p2pdma_page(sg_page(req->sg)))
82+
op_flags |= REQ_NOMERGE;
83+
8184
sector = le64_to_cpu(req->cmd->rw.slba);
8285
sector <<= (req->ns->blksize_shift - 9);
8386

drivers/nvme/target/nvmet.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/configfs.h>
2727
#include <linux/rcupdate.h>
2828
#include <linux/blkdev.h>
29+
#include <linux/radix-tree.h>
2930

3031
#define NVMET_ASYNC_EVENTS 4
3132
#define NVMET_ERROR_LOG_SLOTS 128
@@ -77,13 +78,21 @@ struct nvmet_ns {
7778
struct completion disable_done;
7879
mempool_t *bvec_pool;
7980
struct kmem_cache *bvec_cache;
81+
82+
int use_p2pmem;
83+
struct pci_dev *p2p_dev;
8084
};
8185

8286
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
8387
{
8488
return container_of(to_config_group(item), struct nvmet_ns, group);
8589
}
8690

91+
static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
92+
{
93+
return ns->bdev ? disk_to_dev(ns->bdev->bd_disk) : NULL;
94+
}
95+
8796
struct nvmet_cq {
8897
u16 qid;
8998
u16 size;
@@ -184,6 +193,9 @@ struct nvmet_ctrl {
184193

185194
char subsysnqn[NVMF_NQN_FIELD_LEN];
186195
char hostnqn[NVMF_NQN_FIELD_LEN];
196+
197+
struct device *p2p_client;
198+
struct radix_tree_root p2p_ns_map;
187199
};
188200

189201
struct nvmet_subsys {
@@ -294,6 +306,9 @@ struct nvmet_req {
294306

295307
void (*execute)(struct nvmet_req *req);
296308
const struct nvmet_fabrics_ops *ops;
309+
310+
struct pci_dev *p2p_dev;
311+
struct device *p2p_client;
297312
};
298313

299314
extern struct workqueue_struct *buffered_io_wq;

drivers/nvme/target/rdma.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,8 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
749749
cmd->send_sge.addr, cmd->send_sge.length,
750750
DMA_TO_DEVICE);
751751

752+
cmd->req.p2p_client = &queue->dev->device->dev;
753+
752754
if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
753755
&queue->nvme_sq, &nvmet_rdma_ops))
754756
return;

0 commit comments

Comments
 (0)