Skip to content

Commit ea8c535

Browse files
Coly Liaxboe
authored andcommitted
bcache: set max writeback rate when I/O request is idle
Commit b1092c9 ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9 ("bcache: allow quick writeback when backing idle") Cc: stable@vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli@suse.de> Tested-by: Kai Krakow <kai@kaishome.de> Tested-by: Stefan Priebe <s.priebe@profihost.ag> Cc: Michael Lyle <mlyle@lyle.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent b467a6a commit ea8c535

File tree

7 files changed

+138
-45
lines changed

7 files changed

+138
-45
lines changed

drivers/md/bcache/bcache.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,6 @@ struct cached_dev {
328328
*/
329329
atomic_t has_dirty;
330330

331-
/*
332-
* Set to zero by things that touch the backing volume-- except
333-
* writeback. Incremented by writeback. Used to determine when to
334-
* accelerate idle writeback.
335-
*/
336-
atomic_t backing_idle;
337-
338331
struct bch_ratelimit writeback_rate;
339332
struct delayed_work writeback_rate_update;
340333

@@ -515,6 +508,8 @@ struct cache_set {
515508
struct cache_accounting accounting;
516509

517510
unsigned long flags;
511+
atomic_t idle_counter;
512+
atomic_t at_max_writeback_rate;
518513

519514
struct cache_sb sb;
520515

@@ -524,6 +519,7 @@ struct cache_set {
524519

525520
struct bcache_device **devices;
526521
unsigned devices_max_used;
522+
atomic_t attached_dev_nr;
527523
struct list_head cached_devs;
528524
uint64_t cached_dev_sectors;
529525
atomic_long_t flash_dev_dirty_sectors;

drivers/md/bcache/request.c

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
11031103
generic_make_request(bio);
11041104
}
11051105

1106+
static void quit_max_writeback_rate(struct cache_set *c,
1107+
struct cached_dev *this_dc)
1108+
{
1109+
int i;
1110+
struct bcache_device *d;
1111+
struct cached_dev *dc;
1112+
1113+
/*
1114+
* mutex bch_register_lock may compete with other parallel requesters,
1115+
* or attach/detach operations on other backing device. Waiting to
1116+
* the mutex lock may increase I/O request latency for seconds or more.
1117+
* To avoid such situation, if mutext_trylock() failed, only writeback
1118+
* rate of current cached device is set to 1, and __update_write_back()
1119+
* will decide writeback rate of other cached devices (remember now
1120+
* c->idle_counter is 0 already).
1121+
*/
1122+
if (mutex_trylock(&bch_register_lock)) {
1123+
for (i = 0; i < c->devices_max_used; i++) {
1124+
if (!c->devices[i])
1125+
continue;
1126+
1127+
if (UUID_FLASH_ONLY(&c->uuids[i]))
1128+
continue;
1129+
1130+
d = c->devices[i];
1131+
dc = container_of(d, struct cached_dev, disk);
1132+
/*
1133+
* set writeback rate to default minimum value,
1134+
* then let update_writeback_rate() to decide the
1135+
* upcoming rate.
1136+
*/
1137+
atomic_long_set(&dc->writeback_rate.rate, 1);
1138+
}
1139+
mutex_unlock(&bch_register_lock);
1140+
} else
1141+
atomic_long_set(&this_dc->writeback_rate.rate, 1);
1142+
}
1143+
11061144
/* Cached devices - read & write stuff */
11071145

11081146
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -1120,8 +1158,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
11201158
return BLK_QC_T_NONE;
11211159
}
11221160

1123-
atomic_set(&dc->backing_idle, 0);
1124-
generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
1161+
if (likely(d->c)) {
1162+
if (atomic_read(&d->c->idle_counter))
1163+
atomic_set(&d->c->idle_counter, 0);
1164+
/*
1165+
* If at_max_writeback_rate of cache set is true and new I/O
1166+
* comes, quit max writeback rate of all cached devices
1167+
* attached to this cache set, and set at_max_writeback_rate
1168+
* to false.
1169+
*/
1170+
if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
1171+
atomic_set(&d->c->at_max_writeback_rate, 0);
1172+
quit_max_writeback_rate(d->c, dc);
1173+
}
1174+
}
1175+
1176+
generic_start_io_acct(q,
1177+
bio_op(bio),
1178+
bio_sectors(bio),
1179+
&d->disk->part0);
11251180

11261181
bio_set_dev(bio, dc->bdev);
11271182
bio->bi_iter.bi_sector += dc->sb.data_offset;

drivers/md/bcache/super.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d)
696696
{
697697
lockdep_assert_held(&bch_register_lock);
698698

699+
atomic_dec(&d->c->attached_dev_nr);
700+
699701
if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
700702
struct uuid_entry *u = d->c->uuids + d->id;
701703

@@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
11441146

11451147
bch_cached_dev_run(dc);
11461148
bcache_device_link(&dc->disk, c, "bdev");
1149+
atomic_inc(&c->attached_dev_nr);
11471150

11481151
/* Allow the writeback thread to proceed */
11491152
up_write(&dc->writeback_lock);
@@ -1696,6 +1699,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
16961699
c->block_bits = ilog2(sb->block_size);
16971700
c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
16981701
c->devices_max_used = 0;
1702+
atomic_set(&c->attached_dev_nr, 0);
16991703
c->btree_pages = bucket_pages(c);
17001704
if (c->btree_pages > BTREE_MAX_PAGES)
17011705
c->btree_pages = max_t(int, c->btree_pages / 4,

drivers/md/bcache/sysfs.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ SHOW(__bch_cached_dev)
171171
var_printf(writeback_running, "%i");
172172
var_print(writeback_delay);
173173
var_print(writeback_percent);
174-
sysfs_hprint(writeback_rate, wb ? dc->writeback_rate.rate << 9 : 0);
174+
sysfs_hprint(writeback_rate,
175+
wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
175176
sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
176177
sysfs_printf(io_error_limit, "%i", dc->error_limit);
177178
sysfs_printf(io_disable, "%i", dc->io_disable);
@@ -193,7 +194,9 @@ SHOW(__bch_cached_dev)
193194
* Except for dirty and target, other values should
194195
* be 0 if writeback is not running.
195196
*/
196-
bch_hprint(rate, wb ? dc->writeback_rate.rate << 9 : 0);
197+
bch_hprint(rate,
198+
wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
199+
: 0);
197200
bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
198201
bch_hprint(target, dc->writeback_rate_target << 9);
199202
bch_hprint(proportional,
@@ -261,8 +264,12 @@ STORE(__cached_dev)
261264

262265
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
263266

264-
sysfs_strtoul_clamp(writeback_rate,
265-
dc->writeback_rate.rate, 1, INT_MAX);
267+
if (attr == &sysfs_writeback_rate) {
268+
int v;
269+
270+
sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX);
271+
atomic_long_set(&dc->writeback_rate.rate, v);
272+
}
266273

267274
sysfs_strtoul_clamp(writeback_rate_update_seconds,
268275
dc->writeback_rate_update_seconds,

drivers/md/bcache/util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
200200
{
201201
uint64_t now = local_clock();
202202

203-
d->next += div_u64(done * NSEC_PER_SEC, d->rate);
203+
d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));
204204

205205
/* Bound the time. Don't let us fall further than 2 seconds behind
206206
* (this prevents unnecessary backlog that would make it impossible

drivers/md/bcache/util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ struct bch_ratelimit {
442442
* Rate at which we want to do work, in units per second
443443
* The units here correspond to the units passed to bch_next_delay()
444444
*/
445-
uint32_t rate;
445+
atomic_long_t rate;
446446
};
447447

448448
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)

drivers/md/bcache/writeback.c

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc)
104104

105105
dc->writeback_rate_proportional = proportional_scaled;
106106
dc->writeback_rate_integral_scaled = integral_scaled;
107-
dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
108-
dc->writeback_rate.rate = new_rate;
107+
dc->writeback_rate_change = new_rate -
108+
atomic_long_read(&dc->writeback_rate.rate);
109+
atomic_long_set(&dc->writeback_rate.rate, new_rate);
109110
dc->writeback_rate_target = target;
110111
}
111112

113+
static bool set_at_max_writeback_rate(struct cache_set *c,
114+
struct cached_dev *dc)
115+
{
116+
/*
117+
* Idle_counter is increased everytime when update_writeback_rate() is
118+
* called. If all backing devices attached to the same cache set have
119+
* identical dc->writeback_rate_update_seconds values, it is about 6
120+
* rounds of update_writeback_rate() on each backing device before
121+
* c->at_max_writeback_rate is set to 1, and then max wrteback rate set
122+
* to each dc->writeback_rate.rate.
123+
* In order to avoid extra locking cost for counting exact dirty cached
124+
* devices number, c->attached_dev_nr is used to calculate the idle
125+
* throushold. It might be bigger if not all cached device are in write-
126+
* back mode, but it still works well with limited extra rounds of
127+
* update_writeback_rate().
128+
*/
129+
if (atomic_inc_return(&c->idle_counter) <
130+
atomic_read(&c->attached_dev_nr) * 6)
131+
return false;
132+
133+
if (atomic_read(&c->at_max_writeback_rate) != 1)
134+
atomic_set(&c->at_max_writeback_rate, 1);
135+
136+
atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
137+
138+
/* keep writeback_rate_target as existing value */
139+
dc->writeback_rate_proportional = 0;
140+
dc->writeback_rate_integral_scaled = 0;
141+
dc->writeback_rate_change = 0;
142+
143+
/*
144+
* Check c->idle_counter and c->at_max_writeback_rate agagain in case
145+
* new I/O arrives during before set_at_max_writeback_rate() returns.
146+
* Then the writeback rate is set to 1, and its new value should be
147+
* decided via __update_writeback_rate().
148+
*/
149+
if ((atomic_read(&c->idle_counter) <
150+
atomic_read(&c->attached_dev_nr) * 6) ||
151+
!atomic_read(&c->at_max_writeback_rate))
152+
return false;
153+
154+
return true;
155+
}
156+
112157
static void update_writeback_rate(struct work_struct *work)
113158
{
114159
struct cached_dev *dc = container_of(to_delayed_work(work),
@@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work)
136181
return;
137182
}
138183

139-
down_read(&dc->writeback_lock);
140-
141-
if (atomic_read(&dc->has_dirty) &&
142-
dc->writeback_percent)
143-
__update_writeback_rate(dc);
184+
if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
185+
/*
186+
* If the whole cache set is idle, set_at_max_writeback_rate()
187+
* will set writeback rate to a max number. Then it is
188+
* unncessary to update writeback rate for an idle cache set
189+
* in maximum writeback rate number(s).
190+
*/
191+
if (!set_at_max_writeback_rate(c, dc)) {
192+
down_read(&dc->writeback_lock);
193+
__update_writeback_rate(dc);
194+
up_read(&dc->writeback_lock);
195+
}
196+
}
144197

145-
up_read(&dc->writeback_lock);
146198

147199
/*
148200
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc)
422474

423475
delay = writeback_delay(dc, size);
424476

425-
/* If the control system would wait for at least half a
426-
* second, and there's been no reqs hitting the backing disk
427-
* for awhile: use an alternate mode where we have at most
428-
* one contiguous set of writebacks in flight at a time. If
429-
* someone wants to do IO it will be quick, as it will only
430-
* have to contend with one operation in flight, and we'll
431-
* be round-tripping data to the backing disk as quickly as
432-
* it can accept it.
433-
*/
434-
if (delay >= HZ / 2) {
435-
/* 3 means at least 1.5 seconds, up to 7.5 if we
436-
* have slowed way down.
437-
*/
438-
if (atomic_inc_return(&dc->backing_idle) >= 3) {
439-
/* Wait for current I/Os to finish */
440-
closure_sync(&cl);
441-
/* And immediately launch a new set. */
442-
delay = 0;
443-
}
444-
}
445-
446477
while (!kthread_should_stop() &&
447478
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
448479
delay) {
@@ -741,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
741772
dc->writeback_running = true;
742773
dc->writeback_percent = 10;
743774
dc->writeback_delay = 30;
744-
dc->writeback_rate.rate = 1024;
775+
atomic_long_set(&dc->writeback_rate.rate, 1024);
745776
dc->writeback_rate_minimum = 8;
746777

747778
dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;

0 commit comments

Comments
 (0)