Skip to content

Commit 67f2a93

Browse files
committed
Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer: - Fix DM cache metadata to verify that a cache has block before trying to continue with operation that requires them. - Fix bio-based DM core's dm_make_request() to properly impose device limits on individual bios by making use of blk_queue_split(). - Fix long-standing race with how DM thinp notified userspace of thin-pool mode state changes before they were actually made. - Fix the zoned target's bio completion handling; this is a fairly invassive fix at this stage but it is localized to the zoned target. Any zoned target users will benefit from this fix. * tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm thin: bump target version dm thin: send event about thin-pool state change _after_ making it dm zoned: Fix target BIO completion handling dm: call blk_queue_split() to impose device limits on bios dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()
2 parents 14a996c + 2af6c07 commit 67f2a93

File tree

4 files changed

+81
-119
lines changed

4 files changed

+81
-119
lines changed

drivers/md/dm-cache-metadata.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
930930
bool dirty_flag;
931931
*result = true;
932932

933+
if (from_cblock(cmd->cache_blocks) == 0)
934+
/* Nothing to do */
935+
return 0;
936+
933937
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
934938
from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
935939
if (r) {

drivers/md/dm-thin.c

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t)
195195
struct dm_thin_new_mapping;
196196

197197
/*
198-
* The pool runs in 4 modes. Ordered in degraded order for comparisons.
198+
* The pool runs in various modes. Ordered in degraded order for comparisons.
199199
*/
200200
enum pool_mode {
201201
PM_WRITE, /* metadata may be changed */
@@ -282,9 +282,38 @@ struct pool {
282282
mempool_t mapping_pool;
283283
};
284284

285-
static enum pool_mode get_pool_mode(struct pool *pool);
286285
static void metadata_operation_failed(struct pool *pool, const char *op, int r);
287286

287+
static enum pool_mode get_pool_mode(struct pool *pool)
288+
{
289+
return pool->pf.mode;
290+
}
291+
292+
static void notify_of_pool_mode_change(struct pool *pool)
293+
{
294+
const char *descs[] = {
295+
"write",
296+
"out-of-data-space",
297+
"read-only",
298+
"read-only",
299+
"fail"
300+
};
301+
const char *extra_desc = NULL;
302+
enum pool_mode mode = get_pool_mode(pool);
303+
304+
if (mode == PM_OUT_OF_DATA_SPACE) {
305+
if (!pool->pf.error_if_no_space)
306+
extra_desc = " (queue IO)";
307+
else
308+
extra_desc = " (error IO)";
309+
}
310+
311+
dm_table_event(pool->ti->table);
312+
DMINFO("%s: switching pool to %s%s mode",
313+
dm_device_name(pool->pool_md),
314+
descs[(int)mode], extra_desc ? : "");
315+
}
316+
288317
/*
289318
* Target context for a pool.
290319
*/
@@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws)
23512380
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
23522381
}
23532382

2354-
static void notify_of_pool_mode_change_to_oods(struct pool *pool);
2355-
23562383
/*
23572384
* We're holding onto IO to allow userland time to react. After the
23582385
* timeout either the pool will have been resized (and thus back in
@@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws)
23652392

23662393
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
23672394
pool->pf.error_if_no_space = true;
2368-
notify_of_pool_mode_change_to_oods(pool);
2395+
notify_of_pool_mode_change(pool);
23692396
error_retry_list_with_code(pool, BLK_STS_NOSPC);
23702397
}
23712398
}
@@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
24332460

24342461
/*----------------------------------------------------------------*/
24352462

2436-
static enum pool_mode get_pool_mode(struct pool *pool)
2437-
{
2438-
return pool->pf.mode;
2439-
}
2440-
2441-
static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
2442-
{
2443-
dm_table_event(pool->ti->table);
2444-
DMINFO("%s: switching pool to %s mode",
2445-
dm_device_name(pool->pool_md), new_mode);
2446-
}
2447-
2448-
static void notify_of_pool_mode_change_to_oods(struct pool *pool)
2449-
{
2450-
if (!pool->pf.error_if_no_space)
2451-
notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
2452-
else
2453-
notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
2454-
}
2455-
24562463
static bool passdown_enabled(struct pool_c *pt)
24572464
{
24582465
return pt->adjusted_pf.discard_passdown;
@@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
25012508

25022509
switch (new_mode) {
25032510
case PM_FAIL:
2504-
if (old_mode != new_mode)
2505-
notify_of_pool_mode_change(pool, "failure");
25062511
dm_pool_metadata_read_only(pool->pmd);
25072512
pool->process_bio = process_bio_fail;
25082513
pool->process_discard = process_bio_fail;
@@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
25162521

25172522
case PM_OUT_OF_METADATA_SPACE:
25182523
case PM_READ_ONLY:
2519-
if (!is_read_only_pool_mode(old_mode))
2520-
notify_of_pool_mode_change(pool, "read-only");
25212524
dm_pool_metadata_read_only(pool->pmd);
25222525
pool->process_bio = process_bio_read_only;
25232526
pool->process_discard = process_bio_success;
@@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
25382541
* alarming rate. Adjust your low water mark if you're
25392542
* frequently seeing this mode.
25402543
*/
2541-
if (old_mode != new_mode)
2542-
notify_of_pool_mode_change_to_oods(pool);
25432544
pool->out_of_data_space = true;
25442545
pool->process_bio = process_bio_read_only;
25452546
pool->process_discard = process_discard_bio;
@@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
25522553
break;
25532554

25542555
case PM_WRITE:
2555-
if (old_mode != new_mode)
2556-
notify_of_pool_mode_change(pool, "write");
25572556
if (old_mode == PM_OUT_OF_DATA_SPACE)
25582557
cancel_delayed_work_sync(&pool->no_space_timeout);
25592558
pool->out_of_data_space = false;
@@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
25732572
* doesn't cause an unexpected mode transition on resume.
25742573
*/
25752574
pt->adjusted_pf.mode = new_mode;
2575+
2576+
if (old_mode != new_mode)
2577+
notify_of_pool_mode_change(pool);
25762578
}
25772579

25782580
static void abort_transaction(struct pool *pool)
@@ -4023,7 +4025,7 @@ static struct target_type pool_target = {
40234025
.name = "thin-pool",
40244026
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
40254027
DM_TARGET_IMMUTABLE,
4026-
.version = {1, 20, 0},
4028+
.version = {1, 21, 0},
40274029
.module = THIS_MODULE,
40284030
.ctr = pool_ctr,
40294031
.dtr = pool_dtr,
@@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
43974399

43984400
static struct target_type thin_target = {
43994401
.name = "thin",
4400-
.version = {1, 20, 0},
4402+
.version = {1, 21, 0},
44014403
.module = THIS_MODULE,
44024404
.ctr = thin_ctr,
44034405
.dtr = thin_dtr,

drivers/md/dm-zoned-target.c

Lines changed: 38 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ struct dmz_bioctx {
2020
struct dm_zone *zone;
2121
struct bio *bio;
2222
refcount_t ref;
23-
blk_status_t status;
2423
};
2524

2625
/*
@@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
7877
{
7978
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
8079

81-
if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK)
82-
bioctx->status = status;
83-
bio_endio(bio);
80+
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
81+
bio->bi_status = status;
82+
83+
if (refcount_dec_and_test(&bioctx->ref)) {
84+
struct dm_zone *zone = bioctx->zone;
85+
86+
if (zone) {
87+
if (bio->bi_status != BLK_STS_OK &&
88+
bio_op(bio) == REQ_OP_WRITE &&
89+
dmz_is_seq(zone))
90+
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
91+
dmz_deactivate_zone(zone);
92+
}
93+
bio_endio(bio);
94+
}
8495
}
8596

8697
/*
87-
* Partial clone read BIO completion callback. This terminates the
98+
* Completion callback for an internally cloned target BIO. This terminates the
8899
* target BIO when there are no more references to its context.
89100
*/
90-
static void dmz_read_bio_end_io(struct bio *bio)
101+
static void dmz_clone_endio(struct bio *clone)
91102
{
92-
struct dmz_bioctx *bioctx = bio->bi_private;
93-
blk_status_t status = bio->bi_status;
103+
struct dmz_bioctx *bioctx = clone->bi_private;
104+
blk_status_t status = clone->bi_status;
94105

95-
bio_put(bio);
106+
bio_put(clone);
96107
dmz_bio_endio(bioctx->bio, status);
97108
}
98109

99110
/*
100-
* Issue a BIO to a zone. The BIO may only partially process the
111+
* Issue a clone of a target BIO. The clone may only partially process the
101112
* original target BIO.
102113
*/
103-
static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
104-
struct bio *bio, sector_t chunk_block,
105-
unsigned int nr_blocks)
114+
static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
115+
struct bio *bio, sector_t chunk_block,
116+
unsigned int nr_blocks)
106117
{
107118
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
108-
sector_t sector;
109119
struct bio *clone;
110120

111-
/* BIO remap sector */
112-
sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
113-
114-
/* If the read is not partial, there is no need to clone the BIO */
115-
if (nr_blocks == dmz_bio_blocks(bio)) {
116-
/* Setup and submit the BIO */
117-
bio->bi_iter.bi_sector = sector;
118-
refcount_inc(&bioctx->ref);
119-
generic_make_request(bio);
120-
return 0;
121-
}
122-
123-
/* Partial BIO: we need to clone the BIO */
124121
clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
125122
if (!clone)
126123
return -ENOMEM;
127124

128-
/* Setup the clone */
129-
clone->bi_iter.bi_sector = sector;
125+
bio_set_dev(clone, dmz->dev->bdev);
126+
clone->bi_iter.bi_sector =
127+
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
130128
clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
131-
clone->bi_end_io = dmz_read_bio_end_io;
129+
clone->bi_end_io = dmz_clone_endio;
132130
clone->bi_private = bioctx;
133131

134132
bio_advance(bio, clone->bi_iter.bi_size);
135133

136-
/* Submit the clone */
137134
refcount_inc(&bioctx->ref);
138135
generic_make_request(clone);
139136

137+
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
138+
zone->wp_block += nr_blocks;
139+
140140
return 0;
141141
}
142142

@@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
214214
if (nr_blocks) {
215215
/* Valid blocks found: read them */
216216
nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
217-
ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks);
217+
ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
218218
if (ret)
219219
return ret;
220220
chunk_block += nr_blocks;
@@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
228228
return 0;
229229
}
230230

231-
/*
232-
* Issue a write BIO to a zone.
233-
*/
234-
static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
235-
struct bio *bio, sector_t chunk_block,
236-
unsigned int nr_blocks)
237-
{
238-
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
239-
240-
/* Setup and submit the BIO */
241-
bio_set_dev(bio, dmz->dev->bdev);
242-
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
243-
refcount_inc(&bioctx->ref);
244-
generic_make_request(bio);
245-
246-
if (dmz_is_seq(zone))
247-
zone->wp_block += nr_blocks;
248-
}
249-
250231
/*
251232
* Write blocks directly in a data zone, at the write pointer.
252233
* If a buffer zone is assigned, invalidate the blocks written
@@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz,
265246
return -EROFS;
266247

267248
/* Submit write */
268-
dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks);
249+
ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
250+
if (ret)
251+
return ret;
269252

270253
/*
271254
* Validate the blocks in the data zone and invalidate
@@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
301284
return -EROFS;
302285

303286
/* Submit write */
304-
dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks);
287+
ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
288+
if (ret)
289+
return ret;
305290

306291
/*
307292
* Validate the blocks in the buffer zone
@@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
600585
bioctx->zone = NULL;
601586
bioctx->bio = bio;
602587
refcount_set(&bioctx->ref, 1);
603-
bioctx->status = BLK_STS_OK;
604588

605589
/* Set the BIO pending in the flush list */
606590
if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
@@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
623607
return DM_MAPIO_SUBMITTED;
624608
}
625609

626-
/*
627-
* Completed target BIO processing.
628-
*/
629-
static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
630-
{
631-
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
632-
633-
if (bioctx->status == BLK_STS_OK && *error)
634-
bioctx->status = *error;
635-
636-
if (!refcount_dec_and_test(&bioctx->ref))
637-
return DM_ENDIO_INCOMPLETE;
638-
639-
/* Done */
640-
bio->bi_status = bioctx->status;
641-
642-
if (bioctx->zone) {
643-
struct dm_zone *zone = bioctx->zone;
644-
645-
if (*error && bio_op(bio) == REQ_OP_WRITE) {
646-
if (dmz_is_seq(zone))
647-
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
648-
}
649-
dmz_deactivate_zone(zone);
650-
}
651-
652-
return DM_ENDIO_DONE;
653-
}
654-
655610
/*
656611
* Get zoned device information.
657612
*/
@@ -946,7 +901,6 @@ static struct target_type dmz_type = {
946901
.ctr = dmz_ctr,
947902
.dtr = dmz_dtr,
948903
.map = dmz_map,
949-
.end_io = dmz_end_io,
950904
.io_hints = dmz_io_hints,
951905
.prepare_ioctl = dmz_prepare_ioctl,
952906
.postsuspend = dmz_suspend,

drivers/md/dm.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
15931593
return ret;
15941594
}
15951595

1596+
blk_queue_split(md->queue, &bio);
1597+
15961598
init_clone_info(&ci, md, map, bio);
15971599

15981600
if (bio->bi_opf & REQ_PREFLUSH) {

0 commit comments

Comments
 (0)