Skip to content

Commit 18e5a57

Browse files
Algodev-githubaxboe
authored andcommitted
block, bfq: postpone rq preparation to insert or merge
When invoked for an I/O request rq, the prepare_request hook of bfq increments reference counters in the destination bfq_queue for rq. In this respect, after this hook has been invoked, rq may still be transformed into a request with no icq attached, i.e., for bfq, a request not associated with any bfq_queue. No further hook is invoked to signal this tranformation to bfq (in general, to the destination elevator for rq). This leads bfq into an inconsistent state, because bfq has no chance to correctly lower these counters back. This inconsistency may in its turn cause incorrect scheduling and hangs. It certainly causes memory leaks, by making it impossible for bfq to free the involved bfq_queue. On the bright side, no transformation can still happen for rq after rq has been inserted into bfq, or merged with another, already inserted, request. Exploiting this fact, this commit addresses the above issue by delaying the preparation of an I/O request to when the request is inserted or merged. This change also gives a performance bonus: a lock-contention point gets removed. To prepare a request, bfq needs to hold its scheduler lock. After postponing request preparation to insertion or merging, no lock needs to be grabbed any longer in the prepare_request hook, while the lock already taken to perform insertion or merging is used to preparare the request as well. Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name> Tested-by: Bart Van Assche <bart.vanassche@wdc.com> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 8e3c283 commit 18e5a57

File tree

1 file changed

+57
-29
lines changed

1 file changed

+57
-29
lines changed

block/bfq-iosched.c

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,8 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
18581858
return ELEVATOR_NO_MERGE;
18591859
}
18601860

1861+
static struct bfq_queue *bfq_init_rq(struct request *rq);
1862+
18611863
static void bfq_request_merged(struct request_queue *q, struct request *req,
18621864
enum elv_merge type)
18631865
{
@@ -1866,7 +1868,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
18661868
blk_rq_pos(req) <
18671869
blk_rq_pos(container_of(rb_prev(&req->rb_node),
18681870
struct request, rb_node))) {
1869-
struct bfq_queue *bfqq = RQ_BFQQ(req);
1871+
struct bfq_queue *bfqq = bfq_init_rq(req);
18701872
struct bfq_data *bfqd = bfqq->bfqd;
18711873
struct request *prev, *next_rq;
18721874

@@ -1894,7 +1896,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
18941896
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
18951897
struct request *next)
18961898
{
1897-
struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
1899+
struct bfq_queue *bfqq = bfq_init_rq(rq),
1900+
*next_bfqq = bfq_init_rq(next);
18981901

18991902
if (!RB_EMPTY_NODE(&rq->rb_node))
19001903
goto end;
@@ -4540,14 +4543,12 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
45404543
unsigned int cmd_flags) {}
45414544
#endif
45424545

4543-
static void bfq_prepare_request(struct request *rq, struct bio *bio);
4544-
45454546
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
45464547
bool at_head)
45474548
{
45484549
struct request_queue *q = hctx->queue;
45494550
struct bfq_data *bfqd = q->elevator->elevator_data;
4550-
struct bfq_queue *bfqq = RQ_BFQQ(rq);
4551+
struct bfq_queue *bfqq;
45514552
bool idle_timer_disabled = false;
45524553
unsigned int cmd_flags;
45534554

@@ -4562,24 +4563,13 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
45624563
blk_mq_sched_request_inserted(rq);
45634564

45644565
spin_lock_irq(&bfqd->lock);
4566+
bfqq = bfq_init_rq(rq);
45654567
if (at_head || blk_rq_is_passthrough(rq)) {
45664568
if (at_head)
45674569
list_add(&rq->queuelist, &bfqd->dispatch);
45684570
else
45694571
list_add_tail(&rq->queuelist, &bfqd->dispatch);
4570-
} else {
4571-
if (WARN_ON_ONCE(!bfqq)) {
4572-
/*
4573-
* This should never happen. Most likely rq is
4574-
* a requeued regular request, being
4575-
* re-inserted without being first
4576-
* re-prepared. Do a prepare, to avoid
4577-
* failure.
4578-
*/
4579-
bfq_prepare_request(rq, rq->bio);
4580-
bfqq = RQ_BFQQ(rq);
4581-
}
4582-
4572+
} else { /* bfqq is assumed to be non null here */
45834573
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
45844574
/*
45854575
* Update bfqq, because, if a queue merge has occurred
@@ -4922,32 +4912,70 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
49224912
}
49234913

49244914
/*
4925-
* Allocate bfq data structures associated with this request.
4915+
* Only reset private fields. The actual request preparation will be
4916+
* performed by bfq_init_rq, when rq is either inserted or merged. See
4917+
* comments on bfq_init_rq for the reason behind this delayed
4918+
* preparation.
49264919
*/
49274920
static void bfq_prepare_request(struct request *rq, struct bio *bio)
4921+
{
4922+
/*
4923+
* Regardless of whether we have an icq attached, we have to
4924+
* clear the scheduler pointers, as they might point to
4925+
* previously allocated bic/bfqq structs.
4926+
*/
4927+
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
4928+
}
4929+
4930+
/*
4931+
* If needed, init rq, allocate bfq data structures associated with
4932+
* rq, and increment reference counters in the destination bfq_queue
4933+
* for rq. Return the destination bfq_queue for rq, or NULL is rq is
4934+
* not associated with any bfq_queue.
4935+
*
4936+
* This function is invoked by the functions that perform rq insertion
4937+
* or merging. One may have expected the above preparation operations
4938+
* to be performed in bfq_prepare_request, and not delayed to when rq
4939+
* is inserted or merged. The rationale behind this delayed
4940+
* preparation is that, after the prepare_request hook is invoked for
4941+
* rq, rq may still be transformed into a request with no icq, i.e., a
4942+
* request not associated with any queue. No bfq hook is invoked to
4943+
* signal this tranformation. As a consequence, should these
4944+
* preparation operations be performed when the prepare_request hook
4945+
* is invoked, and should rq be transformed one moment later, bfq
4946+
* would end up in an inconsistent state, because it would have
4947+
* incremented some queue counters for an rq destined to
4948+
* transformation, without any chance to correctly lower these
4949+
* counters back. In contrast, no transformation can still happen for
4950+
* rq after rq has been inserted or merged. So, it is safe to execute
4951+
* these preparation operations when rq is finally inserted or merged.
4952+
*/
4953+
static struct bfq_queue *bfq_init_rq(struct request *rq)
49284954
{
49294955
struct request_queue *q = rq->q;
4956+
struct bio *bio = rq->bio;
49304957
struct bfq_data *bfqd = q->elevator->elevator_data;
49314958
struct bfq_io_cq *bic;
49324959
const int is_sync = rq_is_sync(rq);
49334960
struct bfq_queue *bfqq;
49344961
bool new_queue = false;
49354962
bool bfqq_already_existing = false, split = false;
49364963

4964+
if (unlikely(!rq->elv.icq))
4965+
return NULL;
4966+
49374967
/*
4938-
* Even if we don't have an icq attached, we should still clear
4939-
* the scheduler pointers, as they might point to previously
4940-
* allocated bic/bfqq structs.
4968+
* Assuming that elv.priv[1] is set only if everything is set
4969+
* for this rq. This holds true, because this function is
4970+
* invoked only for insertion or merging, and, after such
4971+
* events, a request cannot be manipulated any longer before
4972+
* being removed from bfq.
49414973
*/
4942-
if (!rq->elv.icq) {
4943-
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
4944-
return;
4945-
}
4974+
if (rq->elv.priv[1])
4975+
return rq->elv.priv[1];
49464976

49474977
bic = icq_to_bic(rq->elv.icq);
49484978

4949-
spin_lock_irq(&bfqd->lock);
4950-
49514979
bfq_check_ioprio_change(bic, bio);
49524980

49534981
bfq_bic_update_cgroup(bic, bio);
@@ -5006,7 +5034,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
50065034
if (unlikely(bfq_bfqq_just_created(bfqq)))
50075035
bfq_handle_burst(bfqd, bfqq);
50085036

5009-
spin_unlock_irq(&bfqd->lock);
5037+
return bfqq;
50105038
}
50115039

50125040
static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)

0 commit comments

Comments
 (0)