Skip to content

Commit 356d9d5

Browse files
Mikulas Patockasnitm
authored andcommitted
dm integrity: allow separate metadata device
Add the ability to store DM integrity metadata on a separate device. This feature is activated with the option "meta_device:/dev/device". Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
1 parent 71e9ddb commit 356d9d5

File tree

1 file changed

+149
-54
lines changed

1 file changed

+149
-54
lines changed

drivers/md/dm-integrity.c

Lines changed: 149 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ struct alg_spec {
139139

140140
struct dm_integrity_c {
141141
struct dm_dev *dev;
142+
struct dm_dev *meta_dev;
142143
unsigned tag_size;
143144
__s8 log2_tag_size;
144145
sector_t start;
@@ -170,7 +171,8 @@ struct dm_integrity_c {
170171
unsigned short journal_section_sectors;
171172
unsigned journal_sections;
172173
unsigned journal_entries;
173-
sector_t device_sectors;
174+
sector_t data_device_sectors;
175+
sector_t meta_device_sectors;
174176
unsigned initial_sectors;
175177
unsigned metadata_run;
176178
__s8 log2_metadata_run;
@@ -345,10 +347,14 @@ static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
345347
static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
346348
sector_t *area, sector_t *offset)
347349
{
348-
__u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
349-
350-
*area = data_sector >> log2_interleave_sectors;
351-
*offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
350+
if (!ic->meta_dev) {
351+
__u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
352+
*area = data_sector >> log2_interleave_sectors;
353+
*offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
354+
} else {
355+
*area = 0;
356+
*offset = data_sector;
357+
}
352358
}
353359

354360
#define sector_to_block(ic, n) \
@@ -387,6 +393,9 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector
387393
{
388394
sector_t result;
389395

396+
if (ic->meta_dev)
397+
return offset;
398+
390399
result = area << ic->sb->log2_interleave_sectors;
391400
if (likely(ic->log2_metadata_run >= 0))
392401
result += (area + 1) << ic->log2_metadata_run;
@@ -416,7 +425,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
416425
io_req.mem.ptr.addr = ic->sb;
417426
io_req.notify.fn = NULL;
418427
io_req.client = ic->io;
419-
io_loc.bdev = ic->dev->bdev;
428+
io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
420429
io_loc.sector = ic->start;
421430
io_loc.count = SB_SECTORS;
422431

@@ -763,7 +772,7 @@ static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned
763772
io_req.notify.fn = NULL;
764773
}
765774
io_req.client = ic->io;
766-
io_loc.bdev = ic->dev->bdev;
775+
io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
767776
io_loc.sector = ic->start + SB_SECTORS + sector;
768777
io_loc.count = n_sectors;
769778

@@ -2306,12 +2315,15 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
23062315
watermark_percentage += ic->journal_entries / 2;
23072316
do_div(watermark_percentage, ic->journal_entries);
23082317
arg_count = 5;
2318+
arg_count += !!ic->meta_dev;
23092319
arg_count += ic->sectors_per_block != 1;
23102320
arg_count += !!ic->internal_hash_alg.alg_string;
23112321
arg_count += !!ic->journal_crypt_alg.alg_string;
23122322
arg_count += !!ic->journal_mac_alg.alg_string;
23132323
DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
23142324
ic->tag_size, ic->mode, arg_count);
2325+
if (ic->meta_dev)
2326+
DMEMIT(" meta_device:%s", ic->meta_dev->name);
23152327
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
23162328
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
23172329
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
@@ -2341,7 +2353,10 @@ static int dm_integrity_iterate_devices(struct dm_target *ti,
23412353
{
23422354
struct dm_integrity_c *ic = ti->private;
23432355

2344-
return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
2356+
if (!ic->meta_dev)
2357+
return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
2358+
else
2359+
return fn(ti, ic->dev, 0, ti->len, data);
23452360
}
23462361

23472362
static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2374,26 +2389,38 @@ static void calculate_journal_section_size(struct dm_integrity_c *ic)
23742389
static int calculate_device_limits(struct dm_integrity_c *ic)
23752390
{
23762391
__u64 initial_sectors;
2377-
sector_t last_sector, last_area, last_offset;
23782392

23792393
calculate_journal_section_size(ic);
23802394
initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
2381-
if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX)
2395+
if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX)
23822396
return -EINVAL;
23832397
ic->initial_sectors = initial_sectors;
23842398

2385-
ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
2386-
(__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
2387-
if (!(ic->metadata_run & (ic->metadata_run - 1)))
2388-
ic->log2_metadata_run = __ffs(ic->metadata_run);
2389-
else
2390-
ic->log2_metadata_run = -1;
2399+
if (!ic->meta_dev) {
2400+
sector_t last_sector, last_area, last_offset;
23912401

2392-
get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
2393-
last_sector = get_data_sector(ic, last_area, last_offset);
2402+
ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
2403+
(__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
2404+
if (!(ic->metadata_run & (ic->metadata_run - 1)))
2405+
ic->log2_metadata_run = __ffs(ic->metadata_run);
2406+
else
2407+
ic->log2_metadata_run = -1;
23942408

2395-
if (last_sector < ic->start || last_sector >= ic->device_sectors)
2396-
return -EINVAL;
2409+
get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
2410+
last_sector = get_data_sector(ic, last_area, last_offset);
2411+
if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
2412+
return -EINVAL;
2413+
} else {
2414+
__u64 meta_size = ic->provided_data_sectors * ic->tag_size;
2415+
meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
2416+
>> (ic->log2_buffer_sectors + SECTOR_SHIFT);
2417+
meta_size <<= ic->log2_buffer_sectors;
2418+
if (ic->initial_sectors + meta_size < ic->initial_sectors ||
2419+
ic->initial_sectors + meta_size > ic->meta_device_sectors)
2420+
return -EINVAL;
2421+
ic->metadata_run = 1;
2422+
ic->log2_metadata_run = 0;
2423+
}
23972424

23982425
return 0;
23992426
}
@@ -2415,26 +2442,51 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
24152442
journal_sections = journal_sectors / ic->journal_section_sectors;
24162443
if (!journal_sections)
24172444
journal_sections = 1;
2418-
ic->sb->journal_sections = cpu_to_le32(journal_sections);
2419-
2420-
if (!interleave_sectors)
2421-
interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
2422-
ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
2423-
ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2424-
ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
24252445

2426-
ic->provided_data_sectors = 0;
2427-
for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) {
2428-
__u64 prev_data_sectors = ic->provided_data_sectors;
2446+
if (!ic->meta_dev) {
2447+
ic->sb->journal_sections = cpu_to_le32(journal_sections);
2448+
if (!interleave_sectors)
2449+
interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
2450+
ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
2451+
ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2452+
ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2453+
2454+
ic->provided_data_sectors = 0;
2455+
for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
2456+
__u64 prev_data_sectors = ic->provided_data_sectors;
2457+
2458+
ic->provided_data_sectors |= (sector_t)1 << test_bit;
2459+
if (calculate_device_limits(ic))
2460+
ic->provided_data_sectors = prev_data_sectors;
2461+
}
2462+
if (!ic->provided_data_sectors)
2463+
return -EINVAL;
2464+
} else {
2465+
ic->sb->log2_interleave_sectors = 0;
2466+
ic->provided_data_sectors = ic->data_device_sectors;
2467+
ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
2468+
2469+
try_smaller_buffer:
2470+
ic->sb->journal_sections = cpu_to_le32(0);
2471+
for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) {
2472+
__u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections);
2473+
__u32 test_journal_sections = prev_journal_sections | (1U << test_bit);
2474+
if (test_journal_sections > journal_sections)
2475+
continue;
2476+
ic->sb->journal_sections = cpu_to_le32(test_journal_sections);
2477+
if (calculate_device_limits(ic))
2478+
ic->sb->journal_sections = cpu_to_le32(prev_journal_sections);
24292479

2430-
ic->provided_data_sectors |= (sector_t)1 << test_bit;
2431-
if (calculate_device_limits(ic))
2432-
ic->provided_data_sectors = prev_data_sectors;
2480+
}
2481+
if (!le32_to_cpu(ic->sb->journal_sections)) {
2482+
if (ic->log2_buffer_sectors > 3) {
2483+
ic->log2_buffer_sectors--;
2484+
goto try_smaller_buffer;
2485+
}
2486+
return -EINVAL;
2487+
}
24332488
}
24342489

2435-
if (!ic->provided_data_sectors)
2436-
return -EINVAL;
2437-
24382490
ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
24392491

24402492
return 0;
@@ -2939,9 +2991,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
29392991
goto bad;
29402992
}
29412993

2942-
ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT;
2943-
journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
2944-
ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
2994+
journal_sectors = 0;
29452995
interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
29462996
buffer_sectors = DEFAULT_BUFFER_SECTORS;
29472997
journal_watermark = DEFAULT_JOURNAL_WATERMARK;
@@ -2964,7 +3014,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
29643014
goto bad;
29653015
}
29663016
if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
2967-
journal_sectors = val;
3017+
journal_sectors = val ? val : 1;
29683018
else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
29693019
interleave_sectors = val;
29703020
else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
@@ -2973,7 +3023,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
29733023
journal_watermark = val;
29743024
else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
29753025
sync_msec = val;
2976-
else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
3026+
else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) {
3027+
if (ic->meta_dev) {
3028+
dm_put_device(ti, ic->meta_dev);
3029+
ic->meta_dev = NULL;
3030+
}
3031+
r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev);
3032+
if (r) {
3033+
ti->error = "Device lookup failed";
3034+
goto bad;
3035+
}
3036+
} else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
29773037
if (val < 1 << SECTOR_SHIFT ||
29783038
val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
29793039
(val & (val -1))) {
@@ -3004,6 +3064,21 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
30043064
}
30053065
}
30063066

3067+
ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT;
3068+
if (!ic->meta_dev)
3069+
ic->meta_device_sectors = ic->data_device_sectors;
3070+
else
3071+
ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT;
3072+
3073+
if (!journal_sectors) {
3074+
journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
3075+
ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
3076+
}
3077+
3078+
if (!buffer_sectors)
3079+
buffer_sectors = 1;
3080+
ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
3081+
30073082
r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
30083083
"Invalid internal hash", "Error setting internal hash key");
30093084
if (r)
@@ -3139,11 +3214,19 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
31393214
goto bad;
31403215
}
31413216
/* make sure that ti->max_io_len doesn't overflow */
3142-
if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
3143-
ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
3144-
r = -EINVAL;
3145-
ti->error = "Invalid interleave_sectors in the superblock";
3146-
goto bad;
3217+
if (!ic->meta_dev) {
3218+
if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
3219+
ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
3220+
r = -EINVAL;
3221+
ti->error = "Invalid interleave_sectors in the superblock";
3222+
goto bad;
3223+
}
3224+
} else {
3225+
if (ic->sb->log2_interleave_sectors) {
3226+
r = -EINVAL;
3227+
ti->error = "Invalid interleave_sectors in the superblock";
3228+
goto bad;
3229+
}
31473230
}
31483231
ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
31493232
if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) {
@@ -3157,20 +3240,28 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
31573240
ti->error = "Journal mac mismatch";
31583241
goto bad;
31593242
}
3243+
3244+
try_smaller_buffer:
31603245
r = calculate_device_limits(ic);
31613246
if (r) {
3247+
if (ic->meta_dev) {
3248+
if (ic->log2_buffer_sectors > 3) {
3249+
ic->log2_buffer_sectors--;
3250+
goto try_smaller_buffer;
3251+
}
3252+
}
31623253
ti->error = "The device is too small";
31633254
goto bad;
31643255
}
3256+
if (!ic->meta_dev)
3257+
ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
3258+
31653259
if (ti->len > ic->provided_data_sectors) {
31663260
r = -EINVAL;
31673261
ti->error = "Not enough provided sectors for requested mapping size";
31683262
goto bad;
31693263
}
31703264

3171-
if (!buffer_sectors)
3172-
buffer_sectors = 1;
3173-
ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT);
31743265

31753266
threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
31763267
threshold += 50;
@@ -3194,8 +3285,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
31943285
(unsigned long long)ic->provided_data_sectors);
31953286
DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
31963287

3197-
ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors),
3198-
1, 0, NULL, NULL);
3288+
ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
3289+
1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL);
31993290
if (IS_ERR(ic->bufio)) {
32003291
r = PTR_ERR(ic->bufio);
32013292
ti->error = "Cannot initialize dm-bufio";
@@ -3227,9 +3318,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
32273318
ic->just_formatted = true;
32283319
}
32293320

3230-
r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
3231-
if (r)
3232-
goto bad;
3321+
if (!ic->meta_dev) {
3322+
r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
3323+
if (r)
3324+
goto bad;
3325+
}
32333326

32343327
if (!ic->internal_hash)
32353328
dm_integrity_set(ti, ic);
@@ -3265,6 +3358,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
32653358
dm_io_client_destroy(ic->io);
32663359
if (ic->dev)
32673360
dm_put_device(ti, ic->dev);
3361+
if (ic->meta_dev)
3362+
dm_put_device(ti, ic->meta_dev);
32683363
dm_integrity_free_page_list(ic, ic->journal);
32693364
dm_integrity_free_page_list(ic, ic->journal_io);
32703365
dm_integrity_free_page_list(ic, ic->journal_xor);

0 commit comments

Comments
 (0)