Skip to content

Commit 581c176

Browse files
adam900710kdave
authored andcommitted
btrfs: Validate child tree block's level and first key
We have several reports about node pointer points to incorrect child tree blocks, which could have even wrong owner and level but still with valid generation and checksum. Although btrfs check could handle it and print error message like: leaf parent key incorrect 60670574592 Kernel doesn't have enough check on this type of corruption correctly. At least add such check to read_tree_block() and btrfs_read_buffer(), where we need two new parameters @Level and @first_key to verify the child tree block. The new @Level check is mandatory and all call sites are already modified to extract expected level from its call chain. While @first_key is optional, the following call sites are skipping such check: 1) Root node/leaf As ROOT_ITEM doesn't contain the first key, skip @first_key check. 2) Direct backref Only parent bytenr and level is known and we need to resolve the key all by ourselves, skip @first_key check. Another note of this verification is, it needs extra info from nodeptr or ROOT_ITEM, so it can't fit into current tree-checker framework, which is limited to node/leaf boundary. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 3c0efdf commit 581c176

File tree

10 files changed

+170
-46
lines changed

10 files changed

+170
-46
lines changed

fs/btrfs/backref.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
738738
BUG_ON(ref->key_for_search.type);
739739
BUG_ON(!ref->wanted_disk_byte);
740740

741-
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
741+
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
742+
ref->level - 1, NULL);
742743
if (IS_ERR(eb)) {
743744
free_pref(ref);
744745
return PTR_ERR(eb);
@@ -1288,7 +1289,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
12881289
ref->level == 0) {
12891290
struct extent_buffer *eb;
12901291

1291-
eb = read_tree_block(fs_info, ref->parent, 0);
1292+
eb = read_tree_block(fs_info, ref->parent, 0,
1293+
ref->level, NULL);
12921294
if (IS_ERR(eb)) {
12931295
ret = PTR_ERR(eb);
12941296
goto out;

fs/btrfs/ctree.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
13541354
struct tree_mod_root *old_root = NULL;
13551355
u64 old_generation = 0;
13561356
u64 logical;
1357+
int level;
13571358

13581359
eb_root = btrfs_read_lock_root_node(root);
13591360
tm = __tree_mod_log_oldest_root(eb_root, time_seq);
@@ -1364,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
13641365
old_root = &tm->old_root;
13651366
old_generation = tm->generation;
13661367
logical = old_root->logical;
1368+
level = old_root->level;
13671369
} else {
13681370
logical = eb_root->start;
1371+
level = btrfs_header_level(eb_root);
13691372
}
13701373

13711374
tm = tree_mod_log_search(fs_info, logical, time_seq);
13721375
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
13731376
btrfs_tree_read_unlock(eb_root);
13741377
free_extent_buffer(eb_root);
1375-
old = read_tree_block(fs_info, logical, 0);
1378+
old = read_tree_block(fs_info, logical, 0, level, NULL);
13761379
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
13771380
if (!IS_ERR(old))
13781381
free_extent_buffer(old);
@@ -1592,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
15921595
btrfs_set_lock_blocking(parent);
15931596

15941597
for (i = start_slot; i <= end_slot; i++) {
1598+
struct btrfs_key first_key;
15951599
int close = 1;
15961600

15971601
btrfs_node_key(parent, &disk_key, i);
@@ -1601,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
16011605
progress_passed = 1;
16021606
blocknr = btrfs_node_blockptr(parent, i);
16031607
gen = btrfs_node_ptr_generation(parent, i);
1608+
btrfs_node_key_to_cpu(parent, &first_key, i);
16041609
if (last_block == 0)
16051610
last_block = blocknr;
16061611

@@ -1624,15 +1629,18 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
16241629
uptodate = 0;
16251630
if (!cur || !uptodate) {
16261631
if (!cur) {
1627-
cur = read_tree_block(fs_info, blocknr, gen);
1632+
cur = read_tree_block(fs_info, blocknr, gen,
1633+
parent_level - 1,
1634+
&first_key);
16281635
if (IS_ERR(cur)) {
16291636
return PTR_ERR(cur);
16301637
} else if (!extent_buffer_uptodate(cur)) {
16311638
free_extent_buffer(cur);
16321639
return -EIO;
16331640
}
16341641
} else if (!uptodate) {
1635-
err = btrfs_read_buffer(cur, gen);
1642+
err = btrfs_read_buffer(cur, gen,
1643+
parent_level - 1,&first_key);
16361644
if (err) {
16371645
free_extent_buffer(cur);
16381646
return err;
@@ -1785,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
17851793
{
17861794
int level = btrfs_header_level(parent);
17871795
struct extent_buffer *eb;
1796+
struct btrfs_key first_key;
17881797

17891798
if (slot < 0 || slot >= btrfs_header_nritems(parent))
17901799
return ERR_PTR(-ENOENT);
17911800

17921801
BUG_ON(level == 0);
17931802

1803+
btrfs_node_key_to_cpu(parent, &first_key, slot);
17941804
eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
1795-
btrfs_node_ptr_generation(parent, slot));
1805+
btrfs_node_ptr_generation(parent, slot),
1806+
level - 1, &first_key);
17961807
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
17971808
free_extent_buffer(eb);
17981809
eb = ERR_PTR(-EIO);
@@ -2388,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
23882399
u64 gen;
23892400
struct extent_buffer *b = *eb_ret;
23902401
struct extent_buffer *tmp;
2402+
struct btrfs_key first_key;
23912403
int ret;
2404+
int parent_level;
23922405

23932406
blocknr = btrfs_node_blockptr(b, slot);
23942407
gen = btrfs_node_ptr_generation(b, slot);
2408+
parent_level = btrfs_header_level(b);
2409+
btrfs_node_key_to_cpu(b, &first_key, slot);
23952410

23962411
tmp = find_extent_buffer(fs_info, blocknr);
23972412
if (tmp) {
@@ -2410,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
24102425
btrfs_set_path_blocking(p);
24112426

24122427
/* now we're allowed to do a blocking uptodate check */
2413-
ret = btrfs_read_buffer(tmp, gen);
2428+
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
24142429
if (!ret) {
24152430
*eb_ret = tmp;
24162431
return 0;
@@ -2437,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
24372452
btrfs_release_path(p);
24382453

24392454
ret = -EAGAIN;
2440-
tmp = read_tree_block(fs_info, blocknr, 0);
2455+
tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
2456+
&first_key);
24412457
if (!IS_ERR(tmp)) {
24422458
/*
24432459
* If the read above didn't mark this buffer up to date,

fs/btrfs/disk-io.c

Lines changed: 82 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -427,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
427427
return ret;
428428
}
429429

430+
static int verify_level_key(struct btrfs_fs_info *fs_info,
431+
struct extent_buffer *eb, int level,
432+
struct btrfs_key *first_key)
433+
{
434+
int found_level;
435+
struct btrfs_key found_key;
436+
int ret;
437+
438+
found_level = btrfs_header_level(eb);
439+
if (found_level != level) {
440+
#ifdef CONFIG_BTRFS_DEBUG
441+
WARN_ON(1);
442+
btrfs_err(fs_info,
443+
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
444+
eb->start, level, found_level);
445+
#endif
446+
return -EIO;
447+
}
448+
449+
if (!first_key)
450+
return 0;
451+
452+
if (found_level)
453+
btrfs_node_key_to_cpu(eb, &found_key, 0);
454+
else
455+
btrfs_item_key_to_cpu(eb, &found_key, 0);
456+
ret = btrfs_comp_cpu_keys(first_key, &found_key);
457+
458+
#ifdef CONFIG_BTRFS_DEBUG
459+
if (ret) {
460+
WARN_ON(1);
461+
btrfs_err(fs_info,
462+
"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
463+
eb->start, first_key->objectid, first_key->type,
464+
first_key->offset, found_key.objectid,
465+
found_key.type, found_key.offset);
466+
}
467+
#endif
468+
return ret;
469+
}
470+
430471
/*
431472
* helper to read a given tree block, doing retries as required when
432473
* the checksums don't match and we have alternate mirrors to try.
474+
*
475+
* @parent_transid: expected transid, skip check if 0
476+
* @level: expected level, mandatory check
477+
* @first_key: expected key of first slot, skip check if NULL
433478
*/
434479
static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
435480
struct extent_buffer *eb,
436-
u64 parent_transid)
481+
u64 parent_transid, int level,
482+
struct btrfs_key *first_key)
437483
{
438484
struct extent_io_tree *io_tree;
439485
int failed = 0;
@@ -448,19 +494,23 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
448494
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
449495
mirror_num);
450496
if (!ret) {
451-
if (!verify_parent_transid(io_tree, eb,
497+
if (verify_parent_transid(io_tree, eb,
452498
parent_transid, 0))
453-
break;
454-
else
455499
ret = -EIO;
500+
else if (verify_level_key(fs_info, eb, level,
501+
first_key))
502+
ret = -EUCLEAN;
503+
else
504+
break;
456505
}
457506

458507
/*
459508
* This buffer's crc is fine, but its contents are corrupted, so
460509
* there is no reason to read the other copies, they won't be
461510
* any less wrong.
462511
*/
463-
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
512+
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
513+
ret == -EUCLEAN)
464514
break;
465515

466516
num_copies = btrfs_num_copies(fs_info,
@@ -1049,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
10491099
buf->start, buf->start + buf->len - 1);
10501100
}
10511101

1102+
/*
1103+
* Read tree block at logical address @bytenr and do variant basic but critical
1104+
* verification.
1105+
*
1106+
* @parent_transid: expected transid of this tree block, skip check if 0
1107+
* @level: expected level, mandatory check
1108+
* @first_key: expected key in slot 0, skip check if NULL
1109+
*/
10521110
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
1053-
u64 parent_transid)
1111+
u64 parent_transid, int level,
1112+
struct btrfs_key *first_key)
10541113
{
10551114
struct extent_buffer *buf = NULL;
10561115
int ret;
@@ -1059,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
10591118
if (IS_ERR(buf))
10601119
return buf;
10611120

1062-
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
1121+
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
1122+
level, first_key);
10631123
if (ret) {
10641124
free_extent_buffer(buf);
10651125
return ERR_PTR(ret);
@@ -1388,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
13881448
struct btrfs_path *path;
13891449
u64 generation;
13901450
int ret;
1451+
int level;
13911452

13921453
path = btrfs_alloc_path();
13931454
if (!path)
@@ -1410,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
14101471
}
14111472

14121473
generation = btrfs_root_generation(&root->root_item);
1474+
level = btrfs_root_level(&root->root_item);
14131475
root->node = read_tree_block(fs_info,
14141476
btrfs_root_bytenr(&root->root_item),
1415-
generation);
1477+
generation, level, NULL);
14161478
if (IS_ERR(root->node)) {
14171479
ret = PTR_ERR(root->node);
14181480
goto find_fail;
@@ -2261,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
22612323
struct btrfs_root *log_tree_root;
22622324
struct btrfs_super_block *disk_super = fs_info->super_copy;
22632325
u64 bytenr = btrfs_super_log_root(disk_super);
2326+
int level = btrfs_super_log_root_level(disk_super);
22642327

22652328
if (fs_devices->rw_devices == 0) {
22662329
btrfs_warn(fs_info, "log replay required on RO media");
@@ -2274,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
22742337
__setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
22752338

22762339
log_tree_root->node = read_tree_block(fs_info, bytenr,
2277-
fs_info->generation + 1);
2340+
fs_info->generation + 1,
2341+
level, NULL);
22782342
if (IS_ERR(log_tree_root->node)) {
22792343
btrfs_warn(fs_info, "failed to read log tree");
22802344
ret = PTR_ERR(log_tree_root->node);
@@ -2390,6 +2454,7 @@ int open_ctree(struct super_block *sb,
23902454
int num_backups_tried = 0;
23912455
int backup_index = 0;
23922456
int clear_free_space_tree = 0;
2457+
int level;
23932458

23942459
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
23952460
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2725,12 +2790,13 @@ int open_ctree(struct super_block *sb,
27252790
}
27262791

27272792
generation = btrfs_super_chunk_root_generation(disk_super);
2793+
level = btrfs_super_chunk_root_level(disk_super);
27282794

27292795
__setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
27302796

27312797
chunk_root->node = read_tree_block(fs_info,
27322798
btrfs_super_chunk_root(disk_super),
2733-
generation);
2799+
generation, level, NULL);
27342800
if (IS_ERR(chunk_root->node) ||
27352801
!extent_buffer_uptodate(chunk_root->node)) {
27362802
btrfs_err(fs_info, "failed to read chunk root");
@@ -2764,10 +2830,11 @@ int open_ctree(struct super_block *sb,
27642830

27652831
retry_root_backup:
27662832
generation = btrfs_super_generation(disk_super);
2833+
level = btrfs_super_root_level(disk_super);
27672834

27682835
tree_root->node = read_tree_block(fs_info,
27692836
btrfs_super_root(disk_super),
2770-
generation);
2837+
generation, level, NULL);
27712838
if (IS_ERR(tree_root->node) ||
27722839
!extent_buffer_uptodate(tree_root->node)) {
27732840
btrfs_warn(fs_info, "failed to read tree root");
@@ -3887,12 +3954,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
38873954
__btrfs_btree_balance_dirty(fs_info, 0);
38883955
}
38893956

3890-
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3957+
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
3958+
struct btrfs_key *first_key)
38913959
{
38923960
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
38933961
struct btrfs_fs_info *fs_info = root->fs_info;
38943962

3895-
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
3963+
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
3964+
level, first_key);
38963965
}
38973966

38983967
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)

fs/btrfs/disk-io.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror)
5252
struct btrfs_device;
5353
struct btrfs_fs_devices;
5454

55-
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info,
56-
u64 bytenr, u64 parent_transid);
55+
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
56+
u64 parent_transid, int level,
57+
struct btrfs_key *first_key);
5758
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
5859
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
5960
int mirror_num, struct extent_buffer **eb);
@@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
123124
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
124125
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
125126
int atomic);
126-
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
127+
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
128+
struct btrfs_key *first_key);
127129
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
128130
void btrfs_csum_final(u32 crc, u8 *result);
129131
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,

fs/btrfs/extent-tree.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8710,6 +8710,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
87108710
u64 parent;
87118711
u32 blocksize;
87128712
struct btrfs_key key;
8713+
struct btrfs_key first_key;
87138714
struct extent_buffer *next;
87148715
int level = wc->level;
87158716
int reada = 0;
@@ -8730,6 +8731,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
87308731
}
87318732

87328733
bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8734+
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
8735+
path->slots[level]);
87338736
blocksize = fs_info->nodesize;
87348737

87358738
next = find_extent_buffer(fs_info, bytenr);
@@ -8794,7 +8797,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
87948797
if (!next) {
87958798
if (reada && level == 1)
87968799
reada_walk_down(trans, root, wc, path);
8797-
next = read_tree_block(fs_info, bytenr, generation);
8800+
next = read_tree_block(fs_info, bytenr, generation, level - 1,
8801+
&first_key);
87988802
if (IS_ERR(next)) {
87998803
return PTR_ERR(next);
88008804
} else if (!extent_buffer_uptodate(next)) {

0 commit comments

Comments
 (0)