Skip to content

Commit 27e6fa9

Browse files
dennisszhouaxboe
authored andcommitted
blkcg: fix ref count issue with bio_blkcg using task_css
The accessor function bio_blkcg either returns the blkcg associated with the bio or finds one in the current context. This can cause an issue when trying to associate a bio with a blkcg. Particularly, it's the third case that is problematic: return css_to_blkcg(task_css(current, io_cgrp_id)); As the above may race against task migration and the cgroup exiting, it is not always ok to take a reference on the blkcg returned from bio_blkcg. This patch adds association ahead of calling bio_blkcg rather than after. This makes association a required and explicit step along the code paths for calling bio_blkcg. blk_get_rl is modified as well to get a reference to the blkcg it may use and blk_put_rl will always put the reference back. Association is also moved above the bio_blkcg call to ensure it will not return NULL in blk-iolatency. BFQ and CFQ utilize this flaw, but due to the complexity, I do not want to address this in this series. I've created a private version of the function with notes not to use it describing the flaw. Hopefully soon, that code can be cleaned up. Signed-off-by: Dennis Zhou <dennisszhou@gmail.com> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 9ff0125 commit 27e6fa9

File tree

6 files changed

+107
-16
lines changed

6 files changed

+107
-16
lines changed

block/bfq-cgroup.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
642642
uint64_t serial_nr;
643643

644644
rcu_read_lock();
645-
serial_nr = bio_blkcg(bio)->css.serial_nr;
645+
serial_nr = __bio_blkcg(bio)->css.serial_nr;
646646

647647
/*
648648
* Check whether blkcg has changed. The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
651651
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
652652
goto out;
653653

654-
bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
654+
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
655655
/*
656656
* Update blkg_path for bfq_log_* functions. We cache this
657657
* path, and update it here, for the following

block/bfq-iosched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4359,7 +4359,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
43594359

43604360
rcu_read_lock();
43614361

4362-
bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
4362+
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
43634363
if (!bfqg) {
43644364
bfqq = &bfqd->oom_bfqq;
43654365
goto out;

block/bio.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,13 +1988,19 @@ int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
19881988
*
19891989
* This function takes an extra reference of @blkcg_css which will be put
19901990
* when @bio is released. The caller must own @bio and is responsible for
1991-
* synchronizing calls to this function.
1991+
* synchronizing calls to this function. If @blkcg_css is NULL, a call to
1992+
* blkcg_get_css finds the current css from the kthread or task.
19921993
*/
19931994
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
19941995
{
19951996
if (unlikely(bio->bi_css))
19961997
return -EBUSY;
1997-
css_get(blkcg_css);
1998+
1999+
if (blkcg_css)
2000+
css_get(blkcg_css);
2001+
else
2002+
blkcg_css = blkcg_get_css();
2003+
19982004
bio->bi_css = blkcg_css;
19992005
return 0;
20002006
}

block/blk-iolatency.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,8 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
401401
return;
402402

403403
rcu_read_lock();
404+
bio_associate_blkcg(bio, NULL);
404405
blkcg = bio_blkcg(bio);
405-
bio_associate_blkcg(bio, &blkcg->css);
406406
blkg = blkg_lookup(blkcg, q);
407407
if (unlikely(!blkg)) {
408408
if (!lock)

block/cfq-iosched.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3753,7 +3753,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
37533753
uint64_t serial_nr;
37543754

37553755
rcu_read_lock();
3756-
serial_nr = bio_blkcg(bio)->css.serial_nr;
3756+
serial_nr = __bio_blkcg(bio)->css.serial_nr;
37573757
rcu_read_unlock();
37583758

37593759
/*
@@ -3818,7 +3818,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
38183818
struct cfq_group *cfqg;
38193819

38203820
rcu_read_lock();
3821-
cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
3821+
cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
38223822
if (!cfqg) {
38233823
cfqq = &cfqd->oom_cfqq;
38243824
goto out;

include/linux/blk-cgroup.h

Lines changed: 93 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,22 +230,100 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
230230
char *input, struct blkg_conf_ctx *ctx);
231231
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
232232

233+
/**
234+
* blkcg_css - find the current css
235+
*
236+
* Find the css associated with either the kthread or the current task.
237+
* This may return a dying css, so it is up to the caller to use tryget logic
238+
* to confirm it is alive and well.
239+
*/
240+
static inline struct cgroup_subsys_state *blkcg_css(void)
241+
{
242+
struct cgroup_subsys_state *css;
243+
244+
css = kthread_blkcg();
245+
if (css)
246+
return css;
247+
return task_css(current, io_cgrp_id);
248+
}
249+
250+
/**
251+
* blkcg_get_css - find and get a reference to the css
252+
*
253+
* Find the css associated with either the kthread or the current task.
254+
* This takes a reference on the blkcg which will need to be managed by the
255+
* caller.
256+
*/
257+
static inline struct cgroup_subsys_state *blkcg_get_css(void)
258+
{
259+
struct cgroup_subsys_state *css;
260+
261+
rcu_read_lock();
262+
263+
css = kthread_blkcg();
264+
if (css) {
265+
css_get(css);
266+
} else {
267+
/*
268+
* This is a bit complicated. It is possible task_css is seeing
269+
* an old css pointer here. This is caused by the current
270+
* thread migrating away from this cgroup and this cgroup dying.
271+
* css_tryget() will fail when trying to take a ref on a cgroup
272+
* that's ref count has hit 0.
273+
*
274+
* Therefore, if it does fail, this means current must have
275+
* been swapped away already and this is waiting for it to
276+
* propagate on the polling cpu. Hence the use of cpu_relax().
277+
*/
278+
while (true) {
279+
css = task_css(current, io_cgrp_id);
280+
if (likely(css_tryget(css)))
281+
break;
282+
cpu_relax();
283+
}
284+
}
285+
286+
rcu_read_unlock();
287+
288+
return css;
289+
}
233290

234291
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
235292
{
236293
return css ? container_of(css, struct blkcg, css) : NULL;
237294
}
238295

239-
static inline struct blkcg *bio_blkcg(struct bio *bio)
296+
/**
297+
* __bio_blkcg - internal version of bio_blkcg for bfq and cfq
298+
*
299+
* DO NOT USE.
300+
* There is a flaw using this version of the function. In particular, this was
301+
* used in a broken paradigm where association was called on the given css. It
302+
* is possible though that the returned css from task_css() is in the process
303+
* of dying due to migration of the current task. So it is improper to assume
304+
* *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
305+
* take additional work to handle more gracefully.
306+
*/
307+
static inline struct blkcg *__bio_blkcg(struct bio *bio)
240308
{
241-
struct cgroup_subsys_state *css;
309+
if (bio && bio->bi_css)
310+
return css_to_blkcg(bio->bi_css);
311+
return css_to_blkcg(blkcg_css());
312+
}
242313

314+
/**
315+
* bio_blkcg - grab the blkcg associated with a bio
316+
* @bio: target bio
317+
*
318+
* This returns the blkcg associated with a bio, NULL if not associated.
319+
* Callers are expected to either handle NULL or know association has been
320+
* done prior to calling this.
321+
*/
322+
static inline struct blkcg *bio_blkcg(struct bio *bio)
323+
{
243324
if (bio && bio->bi_css)
244325
return css_to_blkcg(bio->bi_css);
245-
css = kthread_blkcg();
246-
if (css)
247-
return css_to_blkcg(css);
248-
return css_to_blkcg(task_css(current, io_cgrp_id));
326+
return NULL;
249327
}
250328

251329
static inline bool blk_cgroup_congested(void)
@@ -534,6 +612,10 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
534612
rcu_read_lock();
535613

536614
blkcg = bio_blkcg(bio);
615+
if (blkcg)
616+
css_get(&blkcg->css);
617+
else
618+
blkcg = css_to_blkcg(blkcg_get_css());
537619

538620
/* bypass blkg lookup and use @q->root_rl directly for root */
539621
if (blkcg == &blkcg_root)
@@ -565,6 +647,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
565647
*/
566648
static inline void blk_put_rl(struct request_list *rl)
567649
{
650+
/* an additional ref is always taken for rl */
651+
css_put(&rl->blkg->blkcg->css);
568652
if (rl->blkg->blkcg != &blkcg_root)
569653
blkg_put(rl->blkg);
570654
}
@@ -805,10 +889,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
805889
bool throtl = false;
806890

807891
rcu_read_lock();
808-
blkcg = bio_blkcg(bio);
809892

810893
/* associate blkcg if bio hasn't attached one */
811-
bio_associate_blkcg(bio, &blkcg->css);
894+
bio_associate_blkcg(bio, NULL);
895+
blkcg = bio_blkcg(bio);
812896

813897
blkg = blkg_lookup(blkcg, q);
814898
if (unlikely(!blkg)) {
@@ -930,6 +1014,7 @@ static inline int blkcg_activate_policy(struct request_queue *q,
9301014
static inline void blkcg_deactivate_policy(struct request_queue *q,
9311015
const struct blkcg_policy *pol) { }
9321016

1017+
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
9331018
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
9341019

9351020
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,

0 commit comments

Comments
 (0)