Skip to content

Commit 038bd4c

Browse files
sagigrimbergaxboe
authored andcommitted
nvme: add keep-alive support
Periodic keep-alive is a mandatory feature in NVMe over Fabrics, and optional in NVMe 1.2.1 for PCIe. This patch adds periodic keep-alive sent from the host to verify that the controller is still responsive and vice-versa. The keep-alive timeout is user-defined (with keep_alive_tmo connection parameter) and defaults to 5 seconds. In order to avoid a race condition where the host sends a keep-alive competing with the target side keep-alive timeout expiration, the host adds a grace period of 10 seconds when publishing the keep-alive timeout to the target. In case a keep-alive failed (or timed out), a transport specific error recovery kicks in. For now only NVMe over Fabrics is wired up to support keep alive, but we can add PCIe support easily once controllers actually supporting it become available. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Steve Wise <swise@chelsio.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
1 parent 7b89eae commit 038bd4c

File tree

4 files changed

+119
-1
lines changed

4 files changed

+119
-1
lines changed

drivers/nvme/host/core.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <asm/unaligned.h>
3131

3232
#include "nvme.h"
33+
#include "fabrics.h"
3334

3435
#define NVME_MINORS (1U << MINORBITS)
3536

@@ -463,6 +464,74 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
463464
result, timeout);
464465
}
465466

467+
static void nvme_keep_alive_end_io(struct request *rq, int error)
468+
{
469+
struct nvme_ctrl *ctrl = rq->end_io_data;
470+
471+
blk_mq_free_request(rq);
472+
473+
if (error) {
474+
dev_err(ctrl->device,
475+
"failed nvme_keep_alive_end_io error=%d\n", error);
476+
return;
477+
}
478+
479+
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
480+
}
481+
482+
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
483+
{
484+
struct nvme_command c;
485+
struct request *rq;
486+
487+
memset(&c, 0, sizeof(c));
488+
c.common.opcode = nvme_admin_keep_alive;
489+
490+
rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
491+
NVME_QID_ANY);
492+
if (IS_ERR(rq))
493+
return PTR_ERR(rq);
494+
495+
rq->timeout = ctrl->kato * HZ;
496+
rq->end_io_data = ctrl;
497+
498+
blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
499+
500+
return 0;
501+
}
502+
503+
static void nvme_keep_alive_work(struct work_struct *work)
504+
{
505+
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
506+
struct nvme_ctrl, ka_work);
507+
508+
if (nvme_keep_alive(ctrl)) {
509+
/* allocation failure, reset the controller */
510+
dev_err(ctrl->device, "keep-alive failed\n");
511+
ctrl->ops->reset_ctrl(ctrl);
512+
return;
513+
}
514+
}
515+
516+
void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
517+
{
518+
if (unlikely(ctrl->kato == 0))
519+
return;
520+
521+
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
522+
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
523+
}
524+
EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
525+
526+
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
527+
{
528+
if (unlikely(ctrl->kato == 0))
529+
return;
530+
531+
cancel_delayed_work_sync(&ctrl->ka_work);
532+
}
533+
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
534+
466535
int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
467536
{
468537
struct nvme_command c = { };
@@ -1179,6 +1248,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
11791248

11801249
nvme_set_queue_limits(ctrl, ctrl->admin_q);
11811250
ctrl->sgls = le32_to_cpu(id->sgls);
1251+
ctrl->kas = le16_to_cpu(id->kas);
11821252

11831253
if (ctrl->ops->is_fabrics) {
11841254
ctrl->icdoff = le16_to_cpu(id->icdoff);
@@ -1192,6 +1262,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
11921262
*/
11931263
if (ctrl->cntlid != le16_to_cpu(id->cntlid))
11941264
ret = -EINVAL;
1265+
1266+
if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
1267+
dev_err(ctrl->dev,
1268+
"keep-alive support is mandatory for fabrics\n");
1269+
ret = -EINVAL;
1270+
}
11951271
} else {
11961272
ctrl->cntlid = le16_to_cpu(id->cntlid);
11971273
}

drivers/nvme/host/fabrics.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,12 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
360360
cmd.connect.fctype = nvme_fabrics_type_connect;
361361
cmd.connect.qid = 0;
362362
cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
363+
/*
364+
* Set keep-alive timeout in seconds granularity (ms * 1000)
365+
* and add a grace period for controller kato enforcement
366+
*/
367+
cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 :
368+
cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000);
363369

364370
data = kzalloc(sizeof(*data), GFP_KERNEL);
365371
if (!data)
@@ -499,6 +505,7 @@ static const match_table_t opt_tokens = {
499505
{ NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" },
500506
{ NVMF_OPT_TL_RETRY_COUNT, "tl_retry_count=%d" },
501507
{ NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" },
508+
{ NVMF_OPT_KATO, "keep_alive_tmo=%d" },
502509
{ NVMF_OPT_HOSTNQN, "hostnqn=%s" },
503510
{ NVMF_OPT_ERR, NULL }
504511
};
@@ -610,6 +617,28 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
610617
}
611618
opts->tl_retry_count = token;
612619
break;
620+
case NVMF_OPT_KATO:
621+
if (match_int(args, &token)) {
622+
ret = -EINVAL;
623+
goto out;
624+
}
625+
626+
if (opts->discovery_nqn) {
627+
pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
628+
ret = -EINVAL;
629+
goto out;
630+
}
631+
632+
if (token < 0) {
633+
pr_err("Invalid keep_alive_tmo %d\n", token);
634+
ret = -EINVAL;
635+
goto out;
636+
} else if (token == 0) {
637+
/* Allowed for debug */
638+
pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
639+
}
640+
opts->kato = token;
641+
break;
613642
case NVMF_OPT_HOSTNQN:
614643
if (opts->host) {
615644
pr_err("hostnqn already user-assigned: %s\n",
@@ -661,6 +690,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
661690
}
662691

663692
out:
693+
if (!opts->discovery_nqn && !opts->kato)
694+
opts->kato = NVME_DEFAULT_KATO;
664695
kfree(options);
665696
return ret;
666697
}
@@ -717,7 +748,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
717748

718749
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
719750
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
720-
NVMF_OPT_HOSTNQN)
751+
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
721752

722753
static struct nvme_ctrl *
723754
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)

drivers/nvme/host/fabrics.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ enum {
4949
NVMF_OPT_QUEUE_SIZE = 1 << 4,
5050
NVMF_OPT_NR_IO_QUEUES = 1 << 5,
5151
NVMF_OPT_TL_RETRY_COUNT = 1 << 6,
52+
NVMF_OPT_KATO = 1 << 7,
5253
NVMF_OPT_HOSTNQN = 1 << 8,
5354
NVMF_OPT_RECONNECT_DELAY = 1 << 9,
5455
};
@@ -72,6 +73,7 @@ enum {
7273
* kicking upper layer(s) error recovery.
7374
* @reconnect_delay: Time between two consecutive reconnect attempts.
7475
* @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
76+
* @kato: Keep-alive timeout.
7577
* @host: Virtual NVMe host, contains the NQN and Host ID.
7678
*/
7779
struct nvmf_ctrl_options {
@@ -85,6 +87,7 @@ struct nvmf_ctrl_options {
8587
unsigned short tl_retry_count;
8688
unsigned int reconnect_delay;
8789
bool discovery_nqn;
90+
unsigned int kato;
8891
struct nvmf_host *host;
8992
};
9093

drivers/nvme/host/nvme.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ extern unsigned char admin_timeout;
3838
extern unsigned char shutdown_timeout;
3939
#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
4040

41+
#define NVME_DEFAULT_KATO 5
42+
#define NVME_KATO_GRACE 10
43+
4144
enum {
4245
NVME_NS_LBA = 0,
4346
NVME_NS_LIGHTNVM = 1,
@@ -109,10 +112,13 @@ struct nvme_ctrl {
109112
u8 vwc;
110113
u32 vs;
111114
u32 sgls;
115+
u16 kas;
116+
unsigned int kato;
112117
bool subsystem;
113118
unsigned long quirks;
114119
struct work_struct scan_work;
115120
struct work_struct async_event_work;
121+
struct delayed_work ka_work;
116122

117123
/* Fabrics only */
118124
u16 sqsize;
@@ -273,6 +279,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
273279
int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
274280
dma_addr_t dma_addr, u32 *result);
275281
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
282+
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
283+
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
276284

277285
struct sg_io_hdr;
278286

0 commit comments

Comments
 (0)