Skip to content

Commit 6451fe7

Browse files
committed
nvme: fix irq vs io_queue calculations
Guenter reported an boot hang issue on HPPA after we default to 0 poll queues. We have two issues in the queue count calculations: 1) We don't separate the poll queues from the read/write queues. This is important, since the former doesn't need interrupts. 2) The adjust logic is broken. Adjust the poll queue count before doing nvme_calc_io_queues(). The poll queue count is only limited by the IO queue count we were able to get from the controller, not failures in the IRQ allocation loop. This leaves nvme_calc_io_queues() just adjusting the read/write queue map. Reported-by: Reported-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent b7934ba commit 6451fe7

File tree

1 file changed

+29
-35
lines changed

1 file changed

+29
-35
lines changed

drivers/nvme/host/pci.c

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,60 +2030,40 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
20302030
return ret;
20312031
}
20322032

2033-
static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
2033+
static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
20342034
{
20352035
unsigned int this_w_queues = write_queues;
2036-
unsigned int this_p_queues = poll_queues;
20372036

20382037
/*
20392038
* Setup read/write queue split
20402039
*/
2041-
if (nr_io_queues == 1) {
2040+
if (irq_queues == 1) {
20422041
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
20432042
dev->io_queues[HCTX_TYPE_READ] = 0;
2044-
dev->io_queues[HCTX_TYPE_POLL] = 0;
20452043
return;
20462044
}
20472045

2048-
/*
2049-
* Configure number of poll queues, if set
2050-
*/
2051-
if (this_p_queues) {
2052-
/*
2053-
* We need at least one queue left. With just one queue, we'll
2054-
* have a single shared read/write set.
2055-
*/
2056-
if (this_p_queues >= nr_io_queues) {
2057-
this_w_queues = 0;
2058-
this_p_queues = nr_io_queues - 1;
2059-
}
2060-
2061-
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
2062-
nr_io_queues -= this_p_queues;
2063-
} else
2064-
dev->io_queues[HCTX_TYPE_POLL] = 0;
2065-
20662046
/*
20672047
* If 'write_queues' is set, ensure it leaves room for at least
20682048
* one read queue
20692049
*/
2070-
if (this_w_queues >= nr_io_queues)
2071-
this_w_queues = nr_io_queues - 1;
2050+
if (this_w_queues >= irq_queues)
2051+
this_w_queues = irq_queues - 1;
20722052

20732053
/*
20742054
* If 'write_queues' is set to zero, reads and writes will share
20752055
* a queue set.
20762056
*/
20772057
if (!this_w_queues) {
2078-
dev->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
2058+
dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues;
20792059
dev->io_queues[HCTX_TYPE_READ] = 0;
20802060
} else {
20812061
dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
2082-
dev->io_queues[HCTX_TYPE_READ] = nr_io_queues - this_w_queues;
2062+
dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues;
20832063
}
20842064
}
20852065

2086-
static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues)
2066+
static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
20872067
{
20882068
struct pci_dev *pdev = to_pci_dev(dev->dev);
20892069
int irq_sets[2];
@@ -2093,14 +2073,28 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues)
20932073
.sets = irq_sets,
20942074
};
20952075
int result = 0;
2076+
unsigned int irq_queues, this_p_queues;
2077+
2078+
/*
2079+
* Poll queues don't need interrupts, but we need at least one IO
2080+
* queue left over for non-polled IO.
2081+
*/
2082+
this_p_queues = poll_queues;
2083+
if (this_p_queues >= nr_io_queues) {
2084+
this_p_queues = nr_io_queues - 1;
2085+
irq_queues = 1;
2086+
} else {
2087+
irq_queues = nr_io_queues - this_p_queues;
2088+
}
2089+
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
20962090

20972091
/*
20982092
* For irq sets, we have to ask for minvec == maxvec. This passes
20992093
* any reduction back to us, so we can adjust our queue counts and
21002094
* IRQ vector needs.
21012095
*/
21022096
do {
2103-
nvme_calc_io_queues(dev, nr_io_queues);
2097+
nvme_calc_io_queues(dev, irq_queues);
21042098
irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT];
21052099
irq_sets[1] = dev->io_queues[HCTX_TYPE_READ];
21062100
if (!irq_sets[1])
@@ -2111,11 +2105,11 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues)
21112105
* 1 + 1 queues, just ask for a single vector. We'll share
21122106
* that between the single IO queue and the admin queue.
21132107
*/
2114-
if (!(result < 0 && nr_io_queues == 1))
2115-
nr_io_queues = irq_sets[0] + irq_sets[1] + 1;
2108+
if (result >= 0 && irq_queues > 1)
2109+
irq_queues = irq_sets[0] + irq_sets[1] + 1;
21162110

2117-
result = pci_alloc_irq_vectors_affinity(pdev, nr_io_queues,
2118-
nr_io_queues,
2111+
result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
2112+
irq_queues,
21192113
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
21202114

21212115
/*
@@ -2125,12 +2119,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues)
21252119
* likely does not. Back down to ask for just one vector.
21262120
*/
21272121
if (result == -ENOSPC) {
2128-
nr_io_queues--;
2129-
if (!nr_io_queues)
2122+
irq_queues--;
2123+
if (!irq_queues)
21302124
return result;
21312125
continue;
21322126
} else if (result == -EINVAL) {
2133-
nr_io_queues = 1;
2127+
irq_queues = 1;
21342128
continue;
21352129
} else if (result <= 0)
21362130
return -EIO;

0 commit comments

Comments
 (0)