Skip to content

Commit ec13b1d

Browse files
htejunaxboe
authored andcommitted
blkcg: always create the blkcg_gq for the root blkcg
Currently, blkcg does a minor optimization where the root blkcg is created when the first blkcg policy is activated on a queue and destroyed on the deactivation of the last. On systems where blkcg is configured but not used, this saves one blkcg_gq struct per queue. On systems where blkcg is actually used, there's no difference. The only case where this can lead to any meaninful, albeit still minute, save in memory consumption is when all blkcg policies are deactivated after being widely used in the system, which is a hihgly unlikely scenario. The conditional existence of root blkcg_gq has already created several bugs in blkcg and became an issue once again for the new per-cgroup wb_congested mechanism for cgroup writeback support leading to a NULL dereference when no blkcg policy is active. This is really not worth bothering with. This patch makes blkcg always allocate and link the root blkcg_gq and release it only on queue destruction. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
1 parent efa7d1c commit ec13b1d

File tree

1 file changed

+41
-55
lines changed

1 file changed

+41
-55
lines changed

block/blk-cgroup.c

Lines changed: 41 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
235235
blkg->online = true;
236236
spin_unlock(&blkcg->lock);
237237

238-
if (!ret) {
239-
if (blkcg == &blkcg_root) {
240-
q->root_blkg = blkg;
241-
q->root_rl.blkg = blkg;
242-
}
238+
if (!ret)
243239
return blkg;
244-
}
245240

246241
/* @blkg failed fully initialized, use the usual release path */
247242
blkg_put(blkg);
@@ -339,15 +334,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
339334
if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
340335
rcu_assign_pointer(blkcg->blkg_hint, NULL);
341336

342-
/*
343-
* If root blkg is destroyed. Just clear the pointer since root_rl
344-
* does not take reference on root blkg.
345-
*/
346-
if (blkcg == &blkcg_root) {
347-
blkg->q->root_blkg = NULL;
348-
blkg->q->root_rl.blkg = NULL;
349-
}
350-
351337
/*
352338
* Put the reference taken at the time of creation so that when all
353339
* queues are gone, group can be destroyed.
@@ -855,9 +841,45 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
855841
*/
856842
int blkcg_init_queue(struct request_queue *q)
857843
{
858-
might_sleep();
844+
struct blkcg_gq *new_blkg, *blkg;
845+
bool preloaded;
846+
int ret;
847+
848+
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
849+
if (!new_blkg)
850+
return -ENOMEM;
851+
852+
preloaded = !radix_tree_preload(GFP_KERNEL);
859853

860-
return blk_throtl_init(q);
854+
/*
855+
* Make sure the root blkg exists and count the existing blkgs. As
856+
* @q is bypassing at this point, blkg_lookup_create() can't be
857+
* used. Open code insertion.
858+
*/
859+
rcu_read_lock();
860+
spin_lock_irq(q->queue_lock);
861+
blkg = blkg_create(&blkcg_root, q, new_blkg);
862+
spin_unlock_irq(q->queue_lock);
863+
rcu_read_unlock();
864+
865+
if (preloaded)
866+
radix_tree_preload_end();
867+
868+
if (IS_ERR(blkg)) {
869+
kfree(new_blkg);
870+
return PTR_ERR(blkg);
871+
}
872+
873+
q->root_blkg = blkg;
874+
q->root_rl.blkg = blkg;
875+
876+
ret = blk_throtl_init(q);
877+
if (ret) {
878+
spin_lock_irq(q->queue_lock);
879+
blkg_destroy_all(q);
880+
spin_unlock_irq(q->queue_lock);
881+
}
882+
return ret;
861883
}
862884

863885
/**
@@ -958,52 +980,20 @@ int blkcg_activate_policy(struct request_queue *q,
958980
const struct blkcg_policy *pol)
959981
{
960982
LIST_HEAD(pds);
961-
struct blkcg_gq *blkg, *new_blkg;
983+
struct blkcg_gq *blkg;
962984
struct blkg_policy_data *pd, *n;
963985
int cnt = 0, ret;
964-
bool preloaded;
965986

966987
if (blkcg_policy_enabled(q, pol))
967988
return 0;
968989

969-
/* preallocations for root blkg */
970-
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
971-
if (!new_blkg)
972-
return -ENOMEM;
973-
990+
/* count and allocate policy_data for all existing blkgs */
974991
blk_queue_bypass_start(q);
975-
976-
preloaded = !radix_tree_preload(GFP_KERNEL);
977-
978-
/*
979-
* Make sure the root blkg exists and count the existing blkgs. As
980-
* @q is bypassing at this point, blkg_lookup_create() can't be
981-
* used. Open code it.
982-
*/
983992
spin_lock_irq(q->queue_lock);
984-
985-
rcu_read_lock();
986-
blkg = __blkg_lookup(&blkcg_root, q, false);
987-
if (blkg)
988-
blkg_free(new_blkg);
989-
else
990-
blkg = blkg_create(&blkcg_root, q, new_blkg);
991-
rcu_read_unlock();
992-
993-
if (preloaded)
994-
radix_tree_preload_end();
995-
996-
if (IS_ERR(blkg)) {
997-
ret = PTR_ERR(blkg);
998-
goto out_unlock;
999-
}
1000-
1001993
list_for_each_entry(blkg, &q->blkg_list, q_node)
1002994
cnt++;
1003-
1004995
spin_unlock_irq(q->queue_lock);
1005996

1006-
/* allocate policy_data for all existing blkgs */
1007997
while (cnt--) {
1008998
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
1009999
if (!pd) {
@@ -1072,10 +1062,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
10721062

10731063
__clear_bit(pol->plid, q->blkcg_pols);
10741064

1075-
/* if no policy is left, no need for blkgs - shoot them down */
1076-
if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
1077-
blkg_destroy_all(q);
1078-
10791065
list_for_each_entry(blkg, &q->blkg_list, q_node) {
10801066
/* grab blkcg lock too while removing @pd from @blkg */
10811067
spin_lock(&blkg->blkcg->lock);

0 commit comments

Comments
 (0)