Skip to content

Commit c16d6b5

Browse files
Ming Leiaxboe
authored andcommitted
blk-mq: fix dispatch from sw queue
When a request is added to rq list of sw queue(ctx), the rq may be from a different type of hctx, especially after multi queue mapping is introduced. So when dispach request from sw queue via blk_mq_flush_busy_ctxs() or blk_mq_dequeue_from_ctx(), one request belonging to other queue type of hctx can be dispatched to current hctx in case that read queue or poll queue is enabled. This patch fixes this issue by introducing per-queue-type list. Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@redhat.com> Changed by me to not use separately cacheline aligned lists, just place them all in the same cacheline where we had just the one list and lock before. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 7211aef commit c16d6b5

File tree

4 files changed

+68
-45
lines changed

4 files changed

+68
-45
lines changed

block/blk-mq-debugfs.c

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -652,36 +652,43 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
652652
return 0;
653653
}
654654

655-
static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
656-
__acquires(&ctx->lock)
657-
{
658-
struct blk_mq_ctx *ctx = m->private;
659-
660-
spin_lock(&ctx->lock);
661-
return seq_list_start(&ctx->rq_list, *pos);
662-
}
663-
664-
static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
665-
{
666-
struct blk_mq_ctx *ctx = m->private;
667-
668-
return seq_list_next(v, &ctx->rq_list, pos);
669-
}
655+
#define CTX_RQ_SEQ_OPS(name, type) \
656+
static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
657+
__acquires(&ctx->lock) \
658+
{ \
659+
struct blk_mq_ctx *ctx = m->private; \
660+
\
661+
spin_lock(&ctx->lock); \
662+
return seq_list_start(&ctx->rq_lists[type], *pos); \
663+
} \
664+
\
665+
static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v, \
666+
loff_t *pos) \
667+
{ \
668+
struct blk_mq_ctx *ctx = m->private; \
669+
\
670+
return seq_list_next(v, &ctx->rq_lists[type], pos); \
671+
} \
672+
\
673+
static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v) \
674+
__releases(&ctx->lock) \
675+
{ \
676+
struct blk_mq_ctx *ctx = m->private; \
677+
\
678+
spin_unlock(&ctx->lock); \
679+
} \
680+
\
681+
static const struct seq_operations ctx_##name##_rq_list_seq_ops = { \
682+
.start = ctx_##name##_rq_list_start, \
683+
.next = ctx_##name##_rq_list_next, \
684+
.stop = ctx_##name##_rq_list_stop, \
685+
.show = blk_mq_debugfs_rq_show, \
686+
}
687+
688+
CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT);
689+
CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ);
690+
CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL);
670691

671-
static void ctx_rq_list_stop(struct seq_file *m, void *v)
672-
__releases(&ctx->lock)
673-
{
674-
struct blk_mq_ctx *ctx = m->private;
675-
676-
spin_unlock(&ctx->lock);
677-
}
678-
679-
static const struct seq_operations ctx_rq_list_seq_ops = {
680-
.start = ctx_rq_list_start,
681-
.next = ctx_rq_list_next,
682-
.stop = ctx_rq_list_stop,
683-
.show = blk_mq_debugfs_rq_show,
684-
};
685692
static int ctx_dispatched_show(void *data, struct seq_file *m)
686693
{
687694
struct blk_mq_ctx *ctx = data;
@@ -819,7 +826,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
819826
};
820827

821828
static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
822-
{"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops},
829+
{"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
830+
{"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
831+
{"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops},
823832
{"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
824833
{"merged", 0600, ctx_merged_show, ctx_merged_write},
825834
{"completed", 0600, ctx_completed_show, ctx_completed_write},

block/blk-mq-sched.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,11 +302,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
302302
* too much time checking for merges.
303303
*/
304304
static bool blk_mq_attempt_merge(struct request_queue *q,
305+
struct blk_mq_hw_ctx *hctx,
305306
struct blk_mq_ctx *ctx, struct bio *bio)
306307
{
308+
enum hctx_type type = hctx->type;
309+
307310
lockdep_assert_held(&ctx->lock);
308311

309-
if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
312+
if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
310313
ctx->rq_merged++;
311314
return true;
312315
}
@@ -320,17 +323,19 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
320323
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
321324
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
322325
bool ret = false;
326+
enum hctx_type type;
323327

324328
if (e && e->type->ops.bio_merge) {
325329
blk_mq_put_ctx(ctx);
326330
return e->type->ops.bio_merge(hctx, bio);
327331
}
328332

333+
type = hctx->type;
329334
if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
330-
!list_empty_careful(&ctx->rq_list)) {
335+
!list_empty_careful(&ctx->rq_lists[type])) {
331336
/* default per sw-queue merge */
332337
spin_lock(&ctx->lock);
333-
ret = blk_mq_attempt_merge(q, ctx, bio);
338+
ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
334339
spin_unlock(&ctx->lock);
335340
}
336341

block/blk-mq.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -958,9 +958,10 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
958958
struct flush_busy_ctx_data *flush_data = data;
959959
struct blk_mq_hw_ctx *hctx = flush_data->hctx;
960960
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
961+
enum hctx_type type = hctx->type;
961962

962963
spin_lock(&ctx->lock);
963-
list_splice_tail_init(&ctx->rq_list, flush_data->list);
964+
list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
964965
sbitmap_clear_bit(sb, bitnr);
965966
spin_unlock(&ctx->lock);
966967
return true;
@@ -992,12 +993,13 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
992993
struct dispatch_rq_data *dispatch_data = data;
993994
struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
994995
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
996+
enum hctx_type type = hctx->type;
995997

996998
spin_lock(&ctx->lock);
997-
if (!list_empty(&ctx->rq_list)) {
998-
dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
999+
if (!list_empty(&ctx->rq_lists[type])) {
1000+
dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
9991001
list_del_init(&dispatch_data->rq->queuelist);
1000-
if (list_empty(&ctx->rq_list))
1002+
if (list_empty(&ctx->rq_lists[type]))
10011003
sbitmap_clear_bit(sb, bitnr);
10021004
}
10031005
spin_unlock(&ctx->lock);
@@ -1608,15 +1610,16 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
16081610
bool at_head)
16091611
{
16101612
struct blk_mq_ctx *ctx = rq->mq_ctx;
1613+
enum hctx_type type = hctx->type;
16111614

16121615
lockdep_assert_held(&ctx->lock);
16131616

16141617
trace_block_rq_insert(hctx->queue, rq);
16151618

16161619
if (at_head)
1617-
list_add(&rq->queuelist, &ctx->rq_list);
1620+
list_add(&rq->queuelist, &ctx->rq_lists[type]);
16181621
else
1619-
list_add_tail(&rq->queuelist, &ctx->rq_list);
1622+
list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
16201623
}
16211624

16221625
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -1651,6 +1654,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
16511654

16521655
{
16531656
struct request *rq;
1657+
enum hctx_type type = hctx->type;
16541658

16551659
/*
16561660
* preemption doesn't flush plug list, so it's possible ctx->cpu is
@@ -1662,7 +1666,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
16621666
}
16631667

16641668
spin_lock(&ctx->lock);
1665-
list_splice_tail_init(list, &ctx->rq_list);
1669+
list_splice_tail_init(list, &ctx->rq_lists[type]);
16661670
blk_mq_hctx_mark_pending(hctx, ctx);
16671671
spin_unlock(&ctx->lock);
16681672
}
@@ -2200,13 +2204,15 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
22002204
struct blk_mq_hw_ctx *hctx;
22012205
struct blk_mq_ctx *ctx;
22022206
LIST_HEAD(tmp);
2207+
enum hctx_type type;
22032208

22042209
hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
22052210
ctx = __blk_mq_get_ctx(hctx->queue, cpu);
2211+
type = hctx->type;
22062212

22072213
spin_lock(&ctx->lock);
2208-
if (!list_empty(&ctx->rq_list)) {
2209-
list_splice_init(&ctx->rq_list, &tmp);
2214+
if (!list_empty(&ctx->rq_lists[type])) {
2215+
list_splice_init(&ctx->rq_lists[type], &tmp);
22102216
blk_mq_hctx_clear_pending(hctx, ctx);
22112217
}
22122218
spin_unlock(&ctx->lock);
@@ -2343,10 +2349,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
23432349
for_each_possible_cpu(i) {
23442350
struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
23452351
struct blk_mq_hw_ctx *hctx;
2352+
int k;
23462353

23472354
__ctx->cpu = i;
23482355
spin_lock_init(&__ctx->lock);
2349-
INIT_LIST_HEAD(&__ctx->rq_list);
2356+
for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
2357+
INIT_LIST_HEAD(&__ctx->rq_lists[k]);
2358+
23502359
__ctx->queue = q;
23512360

23522361
/*

block/blk-mq.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ struct blk_mq_ctxs {
1818
struct blk_mq_ctx {
1919
struct {
2020
spinlock_t lock;
21-
struct list_head rq_list;
22-
} ____cacheline_aligned_in_smp;
21+
struct list_head rq_lists[HCTX_MAX_TYPES];
22+
} ____cacheline_aligned_in_smp;
2323

2424
unsigned int cpu;
2525
unsigned short index_hw[HCTX_MAX_TYPES];

0 commit comments

Comments
 (0)