Skip to content

Commit 6e76871

Browse files
Ming Leiaxboe
authored andcommitted
blk-mq: dequeue request one by one from sw queue if hctx is busy
It won't be efficient to dequeue request one by one from sw queue, but we have to do that when queue is busy for better merge performance. This patch takes the Exponential Weighted Moving Average(EWMA) to figure out if queue is busy, then only dequeue request one by one from sw queue when queue is busy. Fixes: b347689 ("blk-mq-sched: improve dispatching from sw queue") Cc: Kashyap Desai <kashyap.desai@broadcom.com> Cc: Laurence Oberman <loberman@redhat.com> Cc: Omar Sandoval <osandov@fb.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Bart Van Assche <bart.vanassche@wdc.com> Cc: Hannes Reinecke <hare@suse.de> Reported-by: Kashyap Desai <kashyap.desai@broadcom.com> Tested-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent d893ff8 commit 6e76871

File tree

4 files changed

+45
-11
lines changed

4 files changed

+45
-11
lines changed

block/blk-mq-debugfs.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
622622
return 0;
623623
}
624624

625+
static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
626+
{
627+
struct blk_mq_hw_ctx *hctx = data;
628+
629+
seq_printf(m, "%u\n", hctx->dispatch_busy);
630+
return 0;
631+
}
632+
625633
static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
626634
__acquires(&ctx->lock)
627635
{
@@ -783,6 +791,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
783791
{"queued", 0600, hctx_queued_show, hctx_queued_write},
784792
{"run", 0600, hctx_run_show, hctx_run_write},
785793
{"active", 0400, hctx_active_show},
794+
{"dispatch_busy", 0400, hctx_dispatch_busy_show},
786795
{},
787796
};
788797

block/blk-mq-sched.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -206,15 +206,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
206206
}
207207
} else if (has_sched_dispatch) {
208208
blk_mq_do_dispatch_sched(hctx);
209-
} else if (q->mq_ops->get_budget) {
210-
/*
211-
* If we need to get budget before queuing request, we
212-
* dequeue request one by one from sw queue for avoiding
213-
* to mess up I/O merge when dispatch runs out of resource.
214-
*
215-
* TODO: get more budgets, and dequeue more requests in
216-
* one time.
217-
*/
209+
} else if (hctx->dispatch_busy) {
210+
/* dequeue request one by one from sw queue if queue is busy */
218211
blk_mq_do_dispatch_ctx(hctx);
219212
} else {
220213
blk_mq_flush_busy_ctxs(hctx, &rq_list);

block/blk-mq.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
10741074
return true;
10751075
}
10761076

1077+
#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT 8
1078+
#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR 4
1079+
/*
1080+
* Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
1081+
* - EWMA is one simple way to compute running average value
1082+
* - weight(7/8 and 1/8) is applied so that it can decrease exponentially
1083+
* - take 4 as factor for avoiding to get too small(0) result, and this
1084+
* factor doesn't matter because EWMA decreases exponentially
1085+
*/
1086+
static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
1087+
{
1088+
unsigned int ewma;
1089+
1090+
if (hctx->queue->elevator)
1091+
return;
1092+
1093+
ewma = hctx->dispatch_busy;
1094+
1095+
if (!ewma && !busy)
1096+
return;
1097+
1098+
ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
1099+
if (busy)
1100+
ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
1101+
ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
1102+
1103+
hctx->dispatch_busy = ewma;
1104+
}
1105+
10771106
#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
10781107

10791108
/*
@@ -1210,8 +1239,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
12101239
else if (needs_restart && (ret == BLK_STS_RESOURCE))
12111240
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
12121241

1242+
blk_mq_update_dispatch_busy(hctx, true);
12131243
return false;
1214-
}
1244+
} else
1245+
blk_mq_update_dispatch_busy(hctx, false);
12151246

12161247
/*
12171248
* If the host/device is unable to accept more work, inform the

include/linux/blk-mq.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ struct blk_mq_hw_ctx {
3535
struct sbitmap ctx_map;
3636

3737
struct blk_mq_ctx *dispatch_from;
38+
unsigned int dispatch_busy;
3839

39-
struct blk_mq_ctx **ctxs;
4040
unsigned int nr_ctx;
41+
struct blk_mq_ctx **ctxs;
4142

4243
spinlock_t dispatch_wait_lock;
4344
wait_queue_entry_t dispatch_wait;

0 commit comments

Comments
 (0)