Skip to content

Commit 85d0331

Browse files
committed
Merge branch 'nvme-4.12' of git://git.infradead.org/nvme into for-linus
Christoph writes: "A few NVMe fixes for 4.12-rc, PCIe reset fixes and APST fixes, a RDMA reconnect fix, two FC fixes and a general controller removal fix."
2 parents 6460495 + 9947d6a commit 85d0331

File tree

4 files changed

+67
-31
lines changed

4 files changed

+67
-31
lines changed

drivers/nvme/host/core.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
5656
static int nvme_char_major;
5757
module_param(nvme_char_major, int, 0);
5858

59-
static unsigned long default_ps_max_latency_us = 25000;
59+
static unsigned long default_ps_max_latency_us = 100000;
6060
module_param(default_ps_max_latency_us, ulong, 0644);
6161
MODULE_PARM_DESC(default_ps_max_latency_us,
6262
"max power saving latency for new devices; use PM QOS to change per device");
@@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
13421342
* transitioning between power states. Therefore, when running
13431343
* in any given state, we will enter the next lower-power
13441344
* non-operational state after waiting 50 * (enlat + exlat)
1345-
* microseconds, as long as that state's total latency is under
1345+
* microseconds, as long as that state's exit latency is under
13461346
* the requested maximum latency.
13471347
*
13481348
* We will not autonomously enter any non-operational state for
@@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
13871387
* lowest-power state, not the number of states.
13881388
*/
13891389
for (state = (int)ctrl->npss; state >= 0; state--) {
1390-
u64 total_latency_us, transition_ms;
1390+
u64 total_latency_us, exit_latency_us, transition_ms;
13911391

13921392
if (target)
13931393
table->entries[state] = target;
@@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
14081408
NVME_PS_FLAGS_NON_OP_STATE))
14091409
continue;
14101410

1411-
total_latency_us =
1412-
(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
1413-
+ le32_to_cpu(ctrl->psd[state].exit_lat);
1414-
if (total_latency_us > ctrl->ps_max_latency_us)
1411+
exit_latency_us =
1412+
(u64)le32_to_cpu(ctrl->psd[state].exit_lat);
1413+
if (exit_latency_us > ctrl->ps_max_latency_us)
14151414
continue;
14161415

1416+
total_latency_us =
1417+
exit_latency_us +
1418+
le32_to_cpu(ctrl->psd[state].entry_lat);
1419+
14171420
/*
14181421
* This state is good. Use it as the APST idle
14191422
* target for higher power states.
@@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
24382441
struct nvme_ns *ns;
24392442

24402443
mutex_lock(&ctrl->namespaces_mutex);
2444+
2445+
/* Forcibly start all queues to avoid having stuck requests */
2446+
blk_mq_start_hw_queues(ctrl->admin_q);
2447+
24412448
list_for_each_entry(ns, &ctrl->namespaces, list) {
24422449
/*
24432450
* Revalidating a dead namespace sets capacity to 0. This will

drivers/nvme/host/fc.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
11391139
/* *********************** NVME Ctrl Routines **************************** */
11401140

11411141
static void __nvme_fc_final_op_cleanup(struct request *rq);
1142+
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
11421143

11431144
static int
11441145
nvme_fc_reinit_request(void *data, struct request *rq)
@@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
12651266
struct nvme_command *sqe = &op->cmd_iu.sqe;
12661267
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
12671268
union nvme_result result;
1268-
bool complete_rq;
1269+
bool complete_rq, terminate_assoc = true;
12691270

12701271
/*
12711272
* WARNING:
@@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
12941295
* fabricate a CQE, the following fields will not be set as they
12951296
* are not referenced:
12961297
* cqe.sqid, cqe.sqhd, cqe.command_id
1298+
*
1299+
* Failure or error of an individual i/o, in a transport
1300+
* detected fashion unrelated to the nvme completion status,
1301+
* potentially cause the initiator and target sides to get out
1302+
* of sync on SQ head/tail (aka outstanding io count allowed).
1303+
* Per FC-NVME spec, failure of an individual command requires
1304+
* the connection to be terminated, which in turn requires the
1305+
* association to be terminated.
12971306
*/
12981307

12991308
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
@@ -1359,14 +1368,16 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
13591368
goto done;
13601369
}
13611370

1371+
terminate_assoc = false;
1372+
13621373
done:
13631374
if (op->flags & FCOP_FLAGS_AEN) {
13641375
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
13651376
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
13661377
atomic_set(&op->state, FCPOP_STATE_IDLE);
13671378
op->flags = FCOP_FLAGS_AEN; /* clear other flags */
13681379
nvme_fc_ctrl_put(ctrl);
1369-
return;
1380+
goto check_error;
13701381
}
13711382

13721383
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
@@ -1379,6 +1390,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
13791390
nvme_end_request(rq, status, result);
13801391
} else
13811392
__nvme_fc_final_op_cleanup(rq);
1393+
1394+
check_error:
1395+
if (terminate_assoc)
1396+
nvme_fc_error_recovery(ctrl, "transport detected io error");
13821397
}
13831398

13841399
static int
@@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
27912806
ctrl->ctrl.opts = NULL;
27922807
/* initiate nvme ctrl ref counting teardown */
27932808
nvme_uninit_ctrl(&ctrl->ctrl);
2809+
nvme_put_ctrl(&ctrl->ctrl);
27942810

27952811
/* as we're past the point where we transition to the ref
27962812
* counting teardown path, if we return a bad pointer here,

drivers/nvme/host/pci.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
13671367
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
13681368

13691369
/* If there is a reset ongoing, we shouldn't reset again. */
1370-
if (work_busy(&dev->reset_work))
1370+
if (dev->ctrl.state == NVME_CTRL_RESETTING)
13711371
return false;
13721372

13731373
/* We shouldn't reset unless the controller is on fatal error state
@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work)
19031903
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
19041904
int result = -ENODEV;
19051905

1906-
if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
1906+
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
19071907
goto out;
19081908

19091909
/*
@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work)
19131913
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
19141914
nvme_dev_disable(dev, false);
19151915

1916-
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
1917-
goto out;
1918-
19191916
result = nvme_pci_enable(dev);
19201917
if (result)
19211918
goto out;
@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev)
20092006
{
20102007
if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
20112008
return -ENODEV;
2012-
if (work_busy(&dev->reset_work))
2013-
return -ENODEV;
2009+
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
2010+
return -EBUSY;
20142011
if (!queue_work(nvme_workq, &dev->reset_work))
20152012
return -EBUSY;
20162013
return 0;
@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
21362133
if (result)
21372134
goto release_pools;
21382135

2136+
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
21392137
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
21402138

21412139
queue_work(nvme_workq, &dev->reset_work);
@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev)
21792177

21802178
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
21812179

2180+
cancel_work_sync(&dev->reset_work);
21822181
pci_set_drvdata(pdev, NULL);
21832182

21842183
if (!pci_device_is_present(pdev)) {

drivers/nvme/host/rdma.c

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -753,36 +753,33 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
753753
if (ret)
754754
goto requeue;
755755

756-
blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
757-
758756
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
759757
if (ret)
760-
goto stop_admin_q;
758+
goto requeue;
761759

762760
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
763761

764762
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
765763
if (ret)
766-
goto stop_admin_q;
764+
goto requeue;
767765

768766
nvme_start_keep_alive(&ctrl->ctrl);
769767

770768
if (ctrl->queue_count > 1) {
771769
ret = nvme_rdma_init_io_queues(ctrl);
772770
if (ret)
773-
goto stop_admin_q;
771+
goto requeue;
774772

775773
ret = nvme_rdma_connect_io_queues(ctrl);
776774
if (ret)
777-
goto stop_admin_q;
775+
goto requeue;
778776
}
779777

780778
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
781779
WARN_ON_ONCE(!changed);
782780
ctrl->ctrl.opts->nr_reconnects = 0;
783781

784782
if (ctrl->queue_count > 1) {
785-
nvme_start_queues(&ctrl->ctrl);
786783
nvme_queue_scan(&ctrl->ctrl);
787784
nvme_queue_async_events(&ctrl->ctrl);
788785
}
@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
791788

792789
return;
793790

794-
stop_admin_q:
795-
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
796791
requeue:
797792
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
798793
ctrl->ctrl.opts->nr_reconnects);
@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
823818
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
824819
nvme_cancel_request, &ctrl->ctrl);
825820

821+
/*
822+
* queues are not a live anymore, so restart the queues to fail fast
823+
* new IO
824+
*/
825+
blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
826+
nvme_start_queues(&ctrl->ctrl);
827+
826828
nvme_rdma_reconnect_or_remove(ctrl);
827829
}
828830

@@ -1433,19 +1435,30 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
14331435
/*
14341436
* We cannot accept any other command until the Connect command has completed.
14351437
*/
1436-
static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
1438+
static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
14371439
struct request *rq)
14381440
{
14391441
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
14401442
struct nvme_command *cmd = nvme_req(rq)->cmd;
14411443

14421444
if (!blk_rq_is_passthrough(rq) ||
14431445
cmd->common.opcode != nvme_fabrics_command ||
1444-
cmd->fabrics.fctype != nvme_fabrics_type_connect)
1445-
return false;
1446+
cmd->fabrics.fctype != nvme_fabrics_type_connect) {
1447+
/*
1448+
* reconnecting state means transport disruption, which
1449+
* can take a long time and even might fail permanently,
1450+
* so we can't let incoming I/O be requeued forever.
1451+
* fail it fast to allow upper layers a chance to
1452+
* failover.
1453+
*/
1454+
if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
1455+
return -EIO;
1456+
else
1457+
return -EAGAIN;
1458+
}
14461459
}
14471460

1448-
return true;
1461+
return 0;
14491462
}
14501463

14511464
static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
14631476

14641477
WARN_ON_ONCE(rq->tag < 0);
14651478

1466-
if (!nvme_rdma_queue_is_ready(queue, rq))
1467-
return BLK_MQ_RQ_QUEUE_BUSY;
1479+
ret = nvme_rdma_queue_is_ready(queue, rq);
1480+
if (unlikely(ret))
1481+
goto err;
14681482

14691483
dev = queue->device->dev;
14701484
ib_dma_sync_single_for_cpu(dev, sqe->dma,

0 commit comments

Comments
 (0)