Skip to content

Commit cc8394d

Browse files
jthornberkergon
authored andcommitted
dm thin: provide userspace access to pool metadata
This patch implements two new messages that can be sent to the thin pool target allowing it to take a snapshot of the _metadata_. This, read-only snapshot can be accessed by userland, concurrently with the live target. Only one metadata snapshot can be held at a time. The pool's status line will give the block location for the current msnap. Since version 0.1.5 of the userland thin provisioning tools, the thin_dump program displays the msnap as follows: thin_dump -m <msnap root> <metadata dev> Available here: https://github.com/jthornber/thin-provisioning-tools Now that userland can access the metadata we can do various things that have traditionally been kernel side tasks: i) Incremental backups. By using metadata snapshots we can work out what blocks have changed over time. Combined with data snapshots we can ensure the data doesn't change while we back it up. A short proof of concept script can be found here: https://github.com/jthornber/thinp-test-suite/blob/master/incremental_backup_example.rb ii) Migration of thin devices from one pool to another. iii) Merging snapshots back into an external origin. iv) Asyncronous replication. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
1 parent a24c256 commit cc8394d

File tree

5 files changed

+193
-11
lines changed

5 files changed

+193
-11
lines changed

Documentation/device-mapper/thin-provisioning.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,17 @@ iii) Messages
287287
the current transaction id is when you change it with this
288288
compare-and-swap message.
289289

290+
reserve_metadata_snap
291+
292+
Reserve a copy of the data mapping btree for use by userland.
293+
This allows userland to inspect the mappings as they were when
294+
this message was executed. Use the pool's status command to
295+
get the root block associated with the metadata snapshot.
296+
297+
release_metadata_snap
298+
299+
Release a previously reserved copy of the data mapping btree.
300+
290301
'thin' target
291302
-------------
292303

drivers/md/dm-thin-metadata.c

Lines changed: 130 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,31 +1082,155 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
10821082
return 0;
10831083
}
10841084

1085-
static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
1086-
dm_block_t *result)
1085+
static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
1086+
{
1087+
int r, inc;
1088+
struct thin_disk_superblock *disk_super;
1089+
struct dm_block *copy, *sblock;
1090+
dm_block_t held_root;
1091+
1092+
/*
1093+
* Copy the superblock.
1094+
*/
1095+
dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
1096+
r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
1097+
&sb_validator, &copy, &inc);
1098+
if (r)
1099+
return r;
1100+
1101+
BUG_ON(!inc);
1102+
1103+
held_root = dm_block_location(copy);
1104+
disk_super = dm_block_data(copy);
1105+
1106+
if (le64_to_cpu(disk_super->held_root)) {
1107+
DMWARN("Pool metadata snapshot already exists: release this before taking another.");
1108+
1109+
dm_tm_dec(pmd->tm, held_root);
1110+
dm_tm_unlock(pmd->tm, copy);
1111+
pmd->need_commit = 1;
1112+
1113+
return -EBUSY;
1114+
}
1115+
1116+
/*
1117+
* Wipe the spacemap since we're not publishing this.
1118+
*/
1119+
memset(&disk_super->data_space_map_root, 0,
1120+
sizeof(disk_super->data_space_map_root));
1121+
memset(&disk_super->metadata_space_map_root, 0,
1122+
sizeof(disk_super->metadata_space_map_root));
1123+
1124+
/*
1125+
* Increment the data structures that need to be preserved.
1126+
*/
1127+
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
1128+
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
1129+
dm_tm_unlock(pmd->tm, copy);
1130+
1131+
/*
1132+
* Write the held root into the superblock.
1133+
*/
1134+
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1135+
&sb_validator, &sblock);
1136+
if (r) {
1137+
dm_tm_dec(pmd->tm, held_root);
1138+
pmd->need_commit = 1;
1139+
return r;
1140+
}
1141+
1142+
disk_super = dm_block_data(sblock);
1143+
disk_super->held_root = cpu_to_le64(held_root);
1144+
dm_bm_unlock(sblock);
1145+
1146+
pmd->need_commit = 1;
1147+
1148+
return 0;
1149+
}
1150+
1151+
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
1152+
{
1153+
int r;
1154+
1155+
down_write(&pmd->root_lock);
1156+
r = __reserve_metadata_snap(pmd);
1157+
up_write(&pmd->root_lock);
1158+
1159+
return r;
1160+
}
1161+
1162+
static int __release_metadata_snap(struct dm_pool_metadata *pmd)
10871163
{
10881164
int r;
10891165
struct thin_disk_superblock *disk_super;
1090-
struct dm_block *sblock;
1166+
struct dm_block *sblock, *copy;
1167+
dm_block_t held_root;
10911168

10921169
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
10931170
&sb_validator, &sblock);
10941171
if (r)
10951172
return r;
10961173

1174+
disk_super = dm_block_data(sblock);
1175+
held_root = le64_to_cpu(disk_super->held_root);
1176+
disk_super->held_root = cpu_to_le64(0);
1177+
pmd->need_commit = 1;
1178+
1179+
dm_bm_unlock(sblock);
1180+
1181+
if (!held_root) {
1182+
DMWARN("No pool metadata snapshot found: nothing to release.");
1183+
return -EINVAL;
1184+
}
1185+
1186+
r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
1187+
if (r)
1188+
return r;
1189+
1190+
disk_super = dm_block_data(copy);
1191+
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
1192+
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
1193+
dm_sm_dec_block(pmd->metadata_sm, held_root);
1194+
1195+
return dm_tm_unlock(pmd->tm, copy);
1196+
}
1197+
1198+
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
1199+
{
1200+
int r;
1201+
1202+
down_write(&pmd->root_lock);
1203+
r = __release_metadata_snap(pmd);
1204+
up_write(&pmd->root_lock);
1205+
1206+
return r;
1207+
}
1208+
1209+
static int __get_metadata_snap(struct dm_pool_metadata *pmd,
1210+
dm_block_t *result)
1211+
{
1212+
int r;
1213+
struct thin_disk_superblock *disk_super;
1214+
struct dm_block *sblock;
1215+
1216+
r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1217+
&sb_validator, &sblock);
1218+
if (r)
1219+
return r;
1220+
10971221
disk_super = dm_block_data(sblock);
10981222
*result = le64_to_cpu(disk_super->held_root);
10991223

11001224
return dm_bm_unlock(sblock);
11011225
}
11021226

1103-
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
1104-
dm_block_t *result)
1227+
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
1228+
dm_block_t *result)
11051229
{
11061230
int r;
11071231

11081232
down_read(&pmd->root_lock);
1109-
r = __get_held_metadata_root(pmd, result);
1233+
r = __get_metadata_snap(pmd, result);
11101234
up_read(&pmd->root_lock);
11111235

11121236
return r;

drivers/md/dm-thin-metadata.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,18 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
9090

9191
/*
9292
* Hold/get root for userspace transaction.
93+
*
94+
* The metadata snapshot is a copy of the current superblock (minus the
95+
* space maps). Userland can access the data structures for READ
96+
* operations only. A small performance hit is incurred by providing this
97+
* copy of the metadata to userland due to extra copy-on-write operations
98+
* on the metadata nodes. Release this as soon as you finish with it.
9399
*/
94-
int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
100+
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
101+
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
95102

96-
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
97-
dm_block_t *result);
103+
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
104+
dm_block_t *result);
98105

99106
/*
100107
* Actions on a single virtual device.

drivers/md/dm-thin.c

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,13 +2284,45 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
22842284
return 0;
22852285
}
22862286

2287+
static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
2288+
{
2289+
int r;
2290+
2291+
r = check_arg_count(argc, 1);
2292+
if (r)
2293+
return r;
2294+
2295+
r = dm_pool_reserve_metadata_snap(pool->pmd);
2296+
if (r)
2297+
DMWARN("reserve_metadata_snap message failed.");
2298+
2299+
return r;
2300+
}
2301+
2302+
static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
2303+
{
2304+
int r;
2305+
2306+
r = check_arg_count(argc, 1);
2307+
if (r)
2308+
return r;
2309+
2310+
r = dm_pool_release_metadata_snap(pool->pmd);
2311+
if (r)
2312+
DMWARN("release_metadata_snap message failed.");
2313+
2314+
return r;
2315+
}
2316+
22872317
/*
22882318
* Messages supported:
22892319
* create_thin <dev_id>
22902320
* create_snap <dev_id> <origin_id>
22912321
* delete <dev_id>
22922322
* trim <dev_id> <new_size_in_sectors>
22932323
* set_transaction_id <current_trans_id> <new_trans_id>
2324+
* reserve_metadata_snap
2325+
* release_metadata_snap
22942326
*/
22952327
static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
22962328
{
@@ -2310,6 +2342,12 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
23102342
else if (!strcasecmp(argv[0], "set_transaction_id"))
23112343
r = process_set_transaction_id_mesg(argc, argv, pool);
23122344

2345+
else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
2346+
r = process_reserve_metadata_snap_mesg(argc, argv, pool);
2347+
2348+
else if (!strcasecmp(argv[0], "release_metadata_snap"))
2349+
r = process_release_metadata_snap_mesg(argc, argv, pool);
2350+
23132351
else
23142352
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
23152353

@@ -2369,7 +2407,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
23692407
if (r)
23702408
return r;
23712409

2372-
r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
2410+
r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
23732411
if (r)
23742412
return r;
23752413

@@ -2465,7 +2503,7 @@ static struct target_type pool_target = {
24652503
.name = "thin-pool",
24662504
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
24672505
DM_TARGET_IMMUTABLE,
2468-
.version = {1, 1, 0},
2506+
.version = {1, 2, 0},
24692507
.module = THIS_MODULE,
24702508
.ctr = pool_ctr,
24712509
.dtr = pool_dtr,

drivers/md/persistent-data/dm-transaction-manager.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
249249

250250
return r;
251251
}
252+
EXPORT_SYMBOL_GPL(dm_tm_shadow_block);
252253

253254
int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
254255
struct dm_block_validator *v,
@@ -259,6 +260,7 @@ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
259260

260261
return dm_bm_read_lock(tm->bm, b, v, blk);
261262
}
263+
EXPORT_SYMBOL_GPL(dm_tm_read_lock);
262264

263265
int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
264266
{

0 commit comments

Comments
 (0)