Skip to content

Commit 34d11ff

Browse files
Jianchao Wangaxboe
authored andcommitted
blk-mq: realloc hctx when hw queue is mapped to another node
When the hw queues and mq_map are updated, a hctx could be mapped to a different numa node. At this moment, we need to realloc the hctx. If fail to do that, go on using previous hctx. Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 5b20285 commit 34d11ff

File tree

1 file changed

+56
-26
lines changed

1 file changed

+56
-26
lines changed

block/blk-mq.c

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2521,6 +2521,39 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
25212521
return hw_ctx_size;
25222522
}
25232523

2524+
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
2525+
struct blk_mq_tag_set *set, struct request_queue *q,
2526+
int hctx_idx, int node)
2527+
{
2528+
struct blk_mq_hw_ctx *hctx;
2529+
2530+
hctx = kzalloc_node(blk_mq_hw_ctx_size(set),
2531+
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2532+
node);
2533+
if (!hctx)
2534+
return NULL;
2535+
2536+
if (!zalloc_cpumask_var_node(&hctx->cpumask,
2537+
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2538+
node)) {
2539+
kfree(hctx);
2540+
return NULL;
2541+
}
2542+
2543+
atomic_set(&hctx->nr_active, 0);
2544+
hctx->numa_node = node;
2545+
hctx->queue_num = hctx_idx;
2546+
2547+
if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) {
2548+
free_cpumask_var(hctx->cpumask);
2549+
kfree(hctx);
2550+
return NULL;
2551+
}
2552+
blk_mq_hctx_kobj_init(hctx);
2553+
2554+
return hctx;
2555+
}
2556+
25242557
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
25252558
struct request_queue *q)
25262559
{
@@ -2531,37 +2564,34 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
25312564
mutex_lock(&q->sysfs_lock);
25322565
for (i = 0; i < set->nr_hw_queues; i++) {
25332566
int node;
2534-
2535-
if (hctxs[i])
2536-
continue;
2567+
struct blk_mq_hw_ctx *hctx;
25372568

25382569
node = blk_mq_hw_queue_to_node(q->mq_map, i);
2539-
hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
2540-
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2541-
node);
2542-
if (!hctxs[i])
2543-
break;
2544-
2545-
if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask,
2546-
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
2547-
node)) {
2548-
kfree(hctxs[i]);
2549-
hctxs[i] = NULL;
2550-
break;
2551-
}
2552-
2553-
atomic_set(&hctxs[i]->nr_active, 0);
2554-
hctxs[i]->numa_node = node;
2555-
hctxs[i]->queue_num = i;
2570+
/*
2571+
* If the hw queue has been mapped to another numa node,
2572+
* we need to realloc the hctx. If allocation fails, fallback
2573+
* to use the previous one.
2574+
*/
2575+
if (hctxs[i] && (hctxs[i]->numa_node == node))
2576+
continue;
25562577

2557-
if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
2558-
free_cpumask_var(hctxs[i]->cpumask);
2559-
kfree(hctxs[i]);
2560-
hctxs[i] = NULL;
2561-
break;
2578+
hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
2579+
if (hctx) {
2580+
if (hctxs[i]) {
2581+
blk_mq_exit_hctx(q, set, hctxs[i], i);
2582+
kobject_put(&hctxs[i]->kobj);
2583+
}
2584+
hctxs[i] = hctx;
2585+
} else {
2586+
if (hctxs[i])
2587+
pr_warn("Allocate new hctx on node %d fails,\
2588+
fallback to previous one on node %d\n",
2589+
node, hctxs[i]->numa_node);
2590+
else
2591+
break;
25622592
}
2563-
blk_mq_hctx_kobj_init(hctxs[i]);
25642593
}
2594+
25652595
for (j = i; j < q->nr_hw_queues; j++) {
25662596
struct blk_mq_hw_ctx *hctx = hctxs[j];
25672597

0 commit comments

Comments
 (0)