Skip to content

Commit 50e1dab

Browse files
committed
blk-mq-sched: fix starvation for multiple hardware queues and shared tags
If we have both multiple hardware queues and shared tag map between devices, we need to ensure that we propagate the hardware queue restart bit higher up. This is because we can get into a situation where we don't have any IO pending on a hardware queue, yet we fail getting a tag to start new IO. If that happens, it's not enough to mark the hardware queue as needing a restart, we need to bubble that up to the higher level queue as well. Signed-off-by: Jens Axboe <axboe@fb.com> Reviewed-by: Omar Sandoval <osandov@fb.com> Tested-by: Hannes Reinecke <hare@suse.com>
1 parent 99cf1dc commit 50e1dab

File tree

5 files changed

+41
-7
lines changed

5 files changed

+41
-7
lines changed

block/blk-mq-sched.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,34 @@ bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
301301
}
302302
EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
303303

304+
static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
305+
{
306+
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
307+
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
308+
if (blk_mq_hctx_has_pending(hctx))
309+
blk_mq_run_hw_queue(hctx, true);
310+
}
311+
}
312+
313+
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
314+
{
315+
unsigned int i;
316+
317+
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
318+
blk_mq_sched_restart_hctx(hctx);
319+
else {
320+
struct request_queue *q = hctx->queue;
321+
322+
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
323+
return;
324+
325+
clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
326+
327+
queue_for_each_hw_ctx(q, hctx, i)
328+
blk_mq_sched_restart_hctx(hctx);
329+
}
330+
}
331+
304332
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
305333
struct blk_mq_hw_ctx *hctx,
306334
unsigned int hctx_idx)

block/blk-mq-sched.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
1919
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
2020
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
2121
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
22+
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
2223

2324
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
2425
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
@@ -123,11 +124,6 @@ blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
123124
BUG_ON(rq->internal_tag == -1);
124125

125126
blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
126-
127-
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
128-
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
129-
blk_mq_run_hw_queue(hctx, true);
130-
}
131127
}
132128

133129
static inline void blk_mq_sched_started_request(struct request *rq)
@@ -160,8 +156,15 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
160156

161157
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
162158
{
163-
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
159+
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
164160
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
161+
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
162+
struct request_queue *q = hctx->queue;
163+
164+
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
165+
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
166+
}
167+
}
165168
}
166169

167170
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)

block/blk-mq.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static LIST_HEAD(all_q_list);
4040
/*
4141
* Check if any of the ctx's have pending work in this hardware queue
4242
*/
43-
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
43+
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
4444
{
4545
return sbitmap_any_bit_set(&hctx->ctx_map) ||
4646
!list_empty_careful(&hctx->dispatch) ||
@@ -345,6 +345,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
345345
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
346346
if (sched_tag != -1)
347347
blk_mq_sched_completed_request(hctx, rq);
348+
blk_mq_sched_restart_queues(hctx);
348349
blk_queue_exit(q);
349350
}
350351

block/blk-mq.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
3333
void blk_mq_wake_waiters(struct request_queue *q);
3434
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
3535
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
36+
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
3637

3738
/*
3839
* Internal helpers for allocating/freeing the request map

include/linux/blkdev.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ struct request_queue {
607607
#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
608608
#define QUEUE_FLAG_DAX 26 /* device supports DAX */
609609
#define QUEUE_FLAG_STATS 27 /* track rq completion times */
610+
#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */
610611

611612
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
612613
(1 << QUEUE_FLAG_STACKABLE) | \

0 commit comments

Comments
 (0)