Skip to content

Commit d48ece2

Browse files
Jianchao Wangaxboe
authored andcommitted
blk-mq: init hctx sched after update ctx and hctx mapping
Currently, when update nr_hw_queues, IO scheduler's init_hctx will be invoked before the mapping between ctx and hctx is adapted correctly by blk_mq_map_swqueue. The IO scheduler init_hctx (kyber) may depend on this mapping and get wrong result and panic finally. A simply way to fix this is that switch the IO scheduler to 'none' before update the nr_hw_queues, and then switch it back after update nr_hw_queues. blk_mq_sched_init_/exit_hctx are removed due to nobody use them any more. Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent fcedba4 commit d48ece2

File tree

5 files changed

+98
-65
lines changed

5 files changed

+98
-65
lines changed

block/blk-mq-sched.c

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
462462
blk_mq_sched_free_tags(set, hctx, i);
463463
}
464464

465-
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
466-
unsigned int hctx_idx)
467-
{
468-
struct elevator_queue *e = q->elevator;
469-
int ret;
470-
471-
if (!e)
472-
return 0;
473-
474-
ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
475-
if (ret)
476-
return ret;
477-
478-
if (e->type->ops.mq.init_hctx) {
479-
ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
480-
if (ret) {
481-
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
482-
return ret;
483-
}
484-
}
485-
486-
blk_mq_debugfs_register_sched_hctx(q, hctx);
487-
488-
return 0;
489-
}
490-
491-
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
492-
unsigned int hctx_idx)
493-
{
494-
struct elevator_queue *e = q->elevator;
495-
496-
if (!e)
497-
return;
498-
499-
blk_mq_debugfs_unregister_sched_hctx(hctx);
500-
501-
if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
502-
e->type->ops.mq.exit_hctx(hctx, hctx_idx);
503-
hctx->sched_data = NULL;
504-
}
505-
506-
blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
507-
}
508-
509465
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
510466
{
511467
struct blk_mq_hw_ctx *hctx;

block/blk-mq-sched.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
2828
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
2929
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
3030

31-
int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
32-
unsigned int hctx_idx);
33-
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
34-
unsigned int hctx_idx);
35-
3631
static inline bool
3732
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
3833
{

block/blk-mq.c

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,8 +2147,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
21472147
if (set->ops->exit_request)
21482148
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
21492149

2150-
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
2151-
21522150
if (set->ops->exit_hctx)
21532151
set->ops->exit_hctx(hctx, hctx_idx);
21542152

@@ -2216,12 +2214,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
22162214
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
22172215
goto free_bitmap;
22182216

2219-
if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
2220-
goto exit_hctx;
2221-
22222217
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
22232218
if (!hctx->fq)
2224-
goto sched_exit_hctx;
2219+
goto exit_hctx;
22252220

22262221
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
22272222
goto free_fq;
@@ -2235,8 +2230,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
22352230

22362231
free_fq:
22372232
kfree(hctx->fq);
2238-
sched_exit_hctx:
2239-
blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
22402233
exit_hctx:
22412234
if (set->ops->exit_hctx)
22422235
set->ops->exit_hctx(hctx, hctx_idx);
@@ -2898,10 +2891,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
28982891
return ret;
28992892
}
29002893

2894+
/*
2895+
* request_queue and elevator_type pair.
2896+
* It is just used by __blk_mq_update_nr_hw_queues to cache
2897+
* the elevator_type associated with a request_queue.
2898+
*/
2899+
struct blk_mq_qe_pair {
2900+
struct list_head node;
2901+
struct request_queue *q;
2902+
struct elevator_type *type;
2903+
};
2904+
2905+
/*
2906+
* Cache the elevator_type in qe pair list and switch the
2907+
* io scheduler to 'none'
2908+
*/
2909+
static bool blk_mq_elv_switch_none(struct list_head *head,
2910+
struct request_queue *q)
2911+
{
2912+
struct blk_mq_qe_pair *qe;
2913+
2914+
if (!q->elevator)
2915+
return true;
2916+
2917+
qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
2918+
if (!qe)
2919+
return false;
2920+
2921+
INIT_LIST_HEAD(&qe->node);
2922+
qe->q = q;
2923+
qe->type = q->elevator->type;
2924+
list_add(&qe->node, head);
2925+
2926+
mutex_lock(&q->sysfs_lock);
2927+
/*
2928+
* After elevator_switch_mq, the previous elevator_queue will be
2929+
* released by elevator_release. The reference of the io scheduler
2930+
* module get by elevator_get will also be put. So we need to get
2931+
* a reference of the io scheduler module here to prevent it to be
2932+
* removed.
2933+
*/
2934+
__module_get(qe->type->elevator_owner);
2935+
elevator_switch_mq(q, NULL);
2936+
mutex_unlock(&q->sysfs_lock);
2937+
2938+
return true;
2939+
}
2940+
2941+
static void blk_mq_elv_switch_back(struct list_head *head,
2942+
struct request_queue *q)
2943+
{
2944+
struct blk_mq_qe_pair *qe;
2945+
struct elevator_type *t = NULL;
2946+
2947+
list_for_each_entry(qe, head, node)
2948+
if (qe->q == q) {
2949+
t = qe->type;
2950+
break;
2951+
}
2952+
2953+
if (!t)
2954+
return;
2955+
2956+
list_del(&qe->node);
2957+
kfree(qe);
2958+
2959+
mutex_lock(&q->sysfs_lock);
2960+
elevator_switch_mq(q, t);
2961+
mutex_unlock(&q->sysfs_lock);
2962+
}
2963+
29012964
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
29022965
int nr_hw_queues)
29032966
{
29042967
struct request_queue *q;
2968+
LIST_HEAD(head);
29052969

29062970
lockdep_assert_held(&set->tag_list_lock);
29072971

@@ -2912,6 +2976,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
29122976

29132977
list_for_each_entry(q, &set->tag_list, tag_set_list)
29142978
blk_mq_freeze_queue(q);
2979+
/*
2980+
* Switch IO scheduler to 'none', cleaning up the data associated
2981+
* with the previous scheduler. We will switch back once we are done
2982+
* updating the new sw to hw queue mappings.
2983+
*/
2984+
list_for_each_entry(q, &set->tag_list, tag_set_list)
2985+
if (!blk_mq_elv_switch_none(&head, q))
2986+
goto switch_back;
29152987

29162988
set->nr_hw_queues = nr_hw_queues;
29172989
blk_mq_update_queue_map(set);
@@ -2920,6 +2992,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
29202992
blk_mq_queue_reinit(q);
29212993
}
29222994

2995+
switch_back:
2996+
list_for_each_entry(q, &set->tag_list, tag_set_list)
2997+
blk_mq_elv_switch_back(&head, q);
2998+
29232999
list_for_each_entry(q, &set->tag_list, tag_set_list)
29243000
blk_mq_unfreeze_queue(q);
29253001
}

block/blk.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
234234

235235
int elevator_init(struct request_queue *);
236236
int elevator_init_mq(struct request_queue *q);
237+
int elevator_switch_mq(struct request_queue *q,
238+
struct elevator_type *new_e);
237239
void elevator_exit(struct request_queue *, struct elevator_queue *);
238240
int elv_register_queue(struct request_queue *q);
239241
void elv_unregister_queue(struct request_queue *q);

block/elevator.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -933,16 +933,13 @@ void elv_unregister(struct elevator_type *e)
933933
}
934934
EXPORT_SYMBOL_GPL(elv_unregister);
935935

936-
static int elevator_switch_mq(struct request_queue *q,
936+
int elevator_switch_mq(struct request_queue *q,
937937
struct elevator_type *new_e)
938938
{
939939
int ret;
940940

941941
lockdep_assert_held(&q->sysfs_lock);
942942

943-
blk_mq_freeze_queue(q);
944-
blk_mq_quiesce_queue(q);
945-
946943
if (q->elevator) {
947944
if (q->elevator->registered)
948945
elv_unregister_queue(q);
@@ -968,8 +965,6 @@ static int elevator_switch_mq(struct request_queue *q,
968965
blk_add_trace_msg(q, "elv switch: none");
969966

970967
out:
971-
blk_mq_unquiesce_queue(q);
972-
blk_mq_unfreeze_queue(q);
973968
return ret;
974969
}
975970

@@ -1021,8 +1016,17 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
10211016

10221017
lockdep_assert_held(&q->sysfs_lock);
10231018

1024-
if (q->mq_ops)
1025-
return elevator_switch_mq(q, new_e);
1019+
if (q->mq_ops) {
1020+
blk_mq_freeze_queue(q);
1021+
blk_mq_quiesce_queue(q);
1022+
1023+
err = elevator_switch_mq(q, new_e);
1024+
1025+
blk_mq_unquiesce_queue(q);
1026+
blk_mq_unfreeze_queue(q);
1027+
1028+
return err;
1029+
}
10261030

10271031
/*
10281032
* Turn on BYPASS and drain all requests w/ elevator private data.

0 commit comments

Comments
 (0)