Skip to content

Commit 8c772a9

Browse files
liubogithubaxboe
authored andcommitted
blk-iolatency: fix IO hang due to negative inflight counter
Our test reported the following stack, and vmcore showed that ->inflight counter is -1. [ffffc9003fcc38d0] __schedule at ffffffff8173d95d [ffffc9003fcc3958] schedule at ffffffff8173de26 [ffffc9003fcc3970] io_schedule at ffffffff810bb6b6 [ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb [ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3 [ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a [ffffc9003fcc3b08] generic_make_request at ffffffff81368b49 [ffffc9003fcc3b68] submit_bio at ffffffff81368d7d [ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4] [ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4] [ffffc9003fcc3d68] do_writepages at ffffffff811c49ae [ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188 [ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301 [ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4] [ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b [ffffc9003fcc3ee8] do_fsync at ffffffff81285abd [ffffc9003fcc3f18] sys_fsync at ffffffff81285d50 [ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04 [ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e The ->inflight counter may be negative (-1) if 1) blk-iolatency was disabled when the IO was issued, 2) blk-iolatency was enabled before this IO reached its endio, 3) the ->inflight counter is decreased from 0 to -1 in endio() In fact the hang can be easily reproduced by the below script, H=/sys/fs/cgroup/unified/ P=/sys/fs/cgroup/unified/test echo "+io" > $H/cgroup.subtree_control mkdir -p $P echo $$ > $P/cgroup.procs xfs_io -f -d -c "pwrite 0 4k" /dev/sdg echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency xfs_io -f -d -c "pwrite 0 4k" /dev/sdg This fixes the problem by freezing the queue so that while enabling/disabling iolatency, there is no inflight rq running. Note that quiesce_queue is not needed as this only updating iolatency configuration about which dispatching request_queue doesn't care. Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 0803de7 commit 8c772a9

File tree

1 file changed

+45
-7
lines changed

1 file changed

+45
-7
lines changed

block/blk-iolatency.c

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
#include <linux/sched/loadavg.h>
7373
#include <linux/sched/signal.h>
7474
#include <trace/events/block.h>
75+
#include <linux/blk-mq.h>
7576
#include "blk-rq-qos.h"
7677
#include "blk-stat.h"
7778

@@ -601,6 +602,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
601602
return;
602603

603604
enabled = blk_iolatency_enabled(iolat->blkiolat);
605+
if (!enabled)
606+
return;
607+
604608
while (blkg && blkg->parent) {
605609
iolat = blkg_to_lat(blkg);
606610
if (!iolat) {
@@ -610,7 +614,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
610614
rqw = &iolat->rq_wait;
611615

612616
atomic_dec(&rqw->inflight);
613-
if (!enabled || iolat->min_lat_nsec == 0)
617+
if (iolat->min_lat_nsec == 0)
614618
goto next;
615619
iolatency_record_time(iolat, &bio->bi_issue, now,
616620
issue_as_root);
@@ -754,10 +758,13 @@ int blk_iolatency_init(struct request_queue *q)
754758
return 0;
755759
}
756760

757-
static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
761+
/*
762+
* return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
763+
* return 0.
764+
*/
765+
static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
758766
{
759767
struct iolatency_grp *iolat = blkg_to_lat(blkg);
760-
struct blk_iolatency *blkiolat = iolat->blkiolat;
761768
u64 oldval = iolat->min_lat_nsec;
762769

763770
iolat->min_lat_nsec = val;
@@ -766,9 +773,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
766773
BLKIOLATENCY_MAX_WIN_SIZE);
767774

768775
if (!oldval && val)
769-
atomic_inc(&blkiolat->enabled);
776+
return 1;
770777
if (oldval && !val)
771-
atomic_dec(&blkiolat->enabled);
778+
return -1;
779+
return 0;
772780
}
773781

774782
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -800,6 +808,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
800808
u64 lat_val = 0;
801809
u64 oldval;
802810
int ret;
811+
int enable = 0;
803812

804813
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
805814
if (ret)
@@ -834,14 +843,37 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
834843
blkg = ctx.blkg;
835844
oldval = iolat->min_lat_nsec;
836845

837-
iolatency_set_min_lat_nsec(blkg, lat_val);
846+
enable = iolatency_set_min_lat_nsec(blkg, lat_val);
847+
if (enable) {
848+
WARN_ON_ONCE(!blk_get_queue(blkg->q));
849+
blkg_get(blkg);
850+
}
851+
838852
if (oldval != iolat->min_lat_nsec) {
839853
iolatency_clear_scaling(blkg);
840854
}
841855

842856
ret = 0;
843857
out:
844858
blkg_conf_finish(&ctx);
859+
if (ret == 0 && enable) {
860+
struct iolatency_grp *tmp = blkg_to_lat(blkg);
861+
struct blk_iolatency *blkiolat = tmp->blkiolat;
862+
863+
blk_mq_freeze_queue(blkg->q);
864+
865+
if (enable == 1)
866+
atomic_inc(&blkiolat->enabled);
867+
else if (enable == -1)
868+
atomic_dec(&blkiolat->enabled);
869+
else
870+
WARN_ON_ONCE(1);
871+
872+
blk_mq_unfreeze_queue(blkg->q);
873+
874+
blkg_put(blkg);
875+
blk_put_queue(blkg->q);
876+
}
845877
return ret ?: nbytes;
846878
}
847879

@@ -977,8 +1009,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
9771009
{
9781010
struct iolatency_grp *iolat = pd_to_lat(pd);
9791011
struct blkcg_gq *blkg = lat_to_blkg(iolat);
1012+
struct blk_iolatency *blkiolat = iolat->blkiolat;
1013+
int ret;
9801014

981-
iolatency_set_min_lat_nsec(blkg, 0);
1015+
ret = iolatency_set_min_lat_nsec(blkg, 0);
1016+
if (ret == 1)
1017+
atomic_inc(&blkiolat->enabled);
1018+
if (ret == -1)
1019+
atomic_dec(&blkiolat->enabled);
9821020
iolatency_clear_scaling(blkg);
9831021
}
9841022

0 commit comments

Comments
 (0)