Skip to content

Commit bd166ef

Browse files
committed
blk-mq-sched: add framework for MQ capable IO schedulers
This adds a set of hooks that intercepts the blk-mq path of allocating/inserting/issuing/completing requests, allowing us to develop a scheduler within that framework. We reuse the existing elevator scheduler API on the registration side, but augment that with the scheduler flagging support for the blk-mq interfce, and with a separate set of ops hooks for MQ devices. We split driver and scheduler tags, so we can run the scheduling independently of device queue depth. Signed-off-by: Jens Axboe <axboe@fb.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Omar Sandoval <osandov@fb.com>
1 parent 2af8cbe commit bd166ef

17 files changed

+984
-194
lines changed

block/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
66
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
77
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
88
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
9-
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
9+
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
1010
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
1111
badblocks.o partitions/
1212

block/blk-cgroup.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,7 +1223,11 @@ int blkcg_activate_policy(struct request_queue *q,
12231223
if (blkcg_policy_enabled(q, pol))
12241224
return 0;
12251225

1226-
blk_queue_bypass_start(q);
1226+
if (q->mq_ops) {
1227+
blk_mq_freeze_queue(q);
1228+
blk_mq_quiesce_queue(q);
1229+
} else
1230+
blk_queue_bypass_start(q);
12271231
pd_prealloc:
12281232
if (!pd_prealloc) {
12291233
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
@@ -1261,7 +1265,10 @@ int blkcg_activate_policy(struct request_queue *q,
12611265

12621266
spin_unlock_irq(q->queue_lock);
12631267
out_bypass_end:
1264-
blk_queue_bypass_end(q);
1268+
if (q->mq_ops)
1269+
blk_mq_unfreeze_queue(q);
1270+
else
1271+
blk_queue_bypass_end(q);
12651272
if (pd_prealloc)
12661273
pol->pd_free_fn(pd_prealloc);
12671274
return ret;
@@ -1284,7 +1291,12 @@ void blkcg_deactivate_policy(struct request_queue *q,
12841291
if (!blkcg_policy_enabled(q, pol))
12851292
return;
12861293

1287-
blk_queue_bypass_start(q);
1294+
if (q->mq_ops) {
1295+
blk_mq_freeze_queue(q);
1296+
blk_mq_quiesce_queue(q);
1297+
} else
1298+
blk_queue_bypass_start(q);
1299+
12881300
spin_lock_irq(q->queue_lock);
12891301

12901302
__clear_bit(pol->plid, q->blkcg_pols);
@@ -1304,7 +1316,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
13041316
}
13051317

13061318
spin_unlock_irq(q->queue_lock);
1307-
blk_queue_bypass_end(q);
1319+
1320+
if (q->mq_ops)
1321+
blk_mq_unfreeze_queue(q);
1322+
else
1323+
blk_queue_bypass_end(q);
13081324
}
13091325
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
13101326

block/blk-core.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
#include "blk.h"
4141
#include "blk-mq.h"
42+
#include "blk-mq-sched.h"
4243
#include "blk-wbt.h"
4344

4445
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
134135
rq->cmd = rq->__cmd;
135136
rq->cmd_len = BLK_MAX_CDB;
136137
rq->tag = -1;
138+
rq->internal_tag = -1;
137139
rq->start_time = jiffies;
138140
set_start_time_ns(rq);
139141
rq->part = NULL;
@@ -2127,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
21272129
if (q->mq_ops) {
21282130
if (blk_queue_io_stat(q))
21292131
blk_account_io_start(rq, true);
2130-
blk_mq_insert_request(rq, false, true, false);
2132+
blk_mq_sched_insert_request(rq, false, true, false);
21312133
return 0;
21322134
}
21332135

block/blk-exec.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/sched/sysctl.h>
1010

1111
#include "blk.h"
12+
#include "blk-mq-sched.h"
1213

1314
/*
1415
* for max sense size
@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
6566
* be reused after dying flag is set
6667
*/
6768
if (q->mq_ops) {
68-
blk_mq_insert_request(rq, at_head, true, false);
69+
blk_mq_sched_insert_request(rq, at_head, true, false);
6970
return;
7071
}
7172

block/blk-flush.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
#include "blk.h"
7575
#include "blk-mq.h"
7676
#include "blk-mq-tag.h"
77+
#include "blk-mq-sched.h"
7778

7879
/* FLUSH/FUA sequences */
7980
enum {
@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
391392
* the comment in flush_end_io().
392393
*/
393394
spin_lock_irqsave(&fq->mq_flush_lock, flags);
394-
if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
395-
blk_mq_run_hw_queue(hctx, true);
395+
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
396396
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
397+
398+
blk_mq_run_hw_queue(hctx, true);
397399
}
398400

399401
/**
@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
453455
*/
454456
if ((policy & REQ_FSEQ_DATA) &&
455457
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
456-
if (q->mq_ops) {
457-
blk_mq_insert_request(rq, false, true, false);
458-
} else
458+
if (q->mq_ops)
459+
blk_mq_sched_insert_request(rq, false, true, false);
460+
else
459461
list_add_tail(&rq->queuelist, &q->queue_head);
460462
return;
461463
}

block/blk-ioc.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ static void ioc_exit_icq(struct io_cq *icq)
4343
if (icq->flags & ICQ_EXITED)
4444
return;
4545

46-
if (et->ops.sq.elevator_exit_icq_fn)
46+
if (et->uses_mq && et->ops.mq.exit_icq)
47+
et->ops.mq.exit_icq(icq);
48+
else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
4749
et->ops.sq.elevator_exit_icq_fn(icq);
4850

4951
icq->flags |= ICQ_EXITED;
@@ -383,7 +385,9 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
383385
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
384386
hlist_add_head(&icq->ioc_node, &ioc->icq_list);
385387
list_add(&icq->q_node, &q->icq_list);
386-
if (et->ops.sq.elevator_init_icq_fn)
388+
if (et->uses_mq && et->ops.mq.init_icq)
389+
et->ops.mq.init_icq(icq);
390+
else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
387391
et->ops.sq.elevator_init_icq_fn(icq);
388392
} else {
389393
kmem_cache_free(et->icq_cache, icq);

block/blk-merge.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
763763
{
764764
struct elevator_queue *e = q->elevator;
765765

766-
if (e->type->ops.sq.elevator_allow_rq_merge_fn)
766+
if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
767767
if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
768768
return 0;
769769

0 commit comments

Comments
 (0)