Skip to content

Commit 4222ea7

Browse files
fdmananakdave
authored andcommitted
Btrfs: fix deadlock on tree root leaf when finding free extent
When we are writing out a free space cache, during the transaction commit phase, we can end up in a deadlock which results in a stack trace like the following: schedule+0x28/0x80 btrfs_tree_read_lock+0x8e/0x120 [btrfs] ? finish_wait+0x80/0x80 btrfs_read_lock_root_node+0x2f/0x40 [btrfs] btrfs_search_slot+0xf6/0x9f0 [btrfs] ? evict_refill_and_join+0xd0/0xd0 [btrfs] ? inode_insert5+0x119/0x190 btrfs_lookup_inode+0x3a/0xc0 [btrfs] ? kmem_cache_alloc+0x166/0x1d0 btrfs_iget+0x113/0x690 [btrfs] __lookup_free_space_inode+0xd8/0x150 [btrfs] lookup_free_space_inode+0x5b/0xb0 [btrfs] load_free_space_cache+0x7c/0x170 [btrfs] ? cache_block_group+0x72/0x3b0 [btrfs] cache_block_group+0x1b3/0x3b0 [btrfs] ? finish_wait+0x80/0x80 find_free_extent+0x799/0x1010 [btrfs] btrfs_reserve_extent+0x9b/0x180 [btrfs] btrfs_alloc_tree_block+0x1b3/0x4f0 [btrfs] __btrfs_cow_block+0x11d/0x500 [btrfs] btrfs_cow_block+0xdc/0x180 [btrfs] btrfs_search_slot+0x3bd/0x9f0 [btrfs] btrfs_lookup_inode+0x3a/0xc0 [btrfs] ? kmem_cache_alloc+0x166/0x1d0 btrfs_update_inode_item+0x46/0x100 [btrfs] cache_save_setup+0xe4/0x3a0 [btrfs] btrfs_start_dirty_block_groups+0x1be/0x480 [btrfs] btrfs_commit_transaction+0xcb/0x8b0 [btrfs] At cache_save_setup() we need to update the inode item of a block group's cache which is located in the tree root (fs_info->tree_root), which means that it may result in COWing a leaf from that tree. If that happens we need to find a free metadata extent and while looking for one, if we find a block group which was not cached yet we attempt to load its cache by calling cache_block_group(). However this function will try to load the inode of the free space cache, which requires finding the matching inode item in the tree root - if that inode item is located in the same leaf as the inode item of the space cache we are updating at cache_save_setup(), we end up in a deadlock, since we try to obtain a read lock on the same extent buffer that we previously write locked. So fix this by using the tree root's commit root when searching for a block group's free space cache inode item when we are attempting to load a free space cache. This is safe since block groups once loaded stay in memory forever, as well as their caches, so after they are first loaded we will never need to read their inode items again. For new block groups, once they are created they get their ->cached field set to BTRFS_CACHE_FINISHED meaning we will not need to read their inode item. Reported-by: Andrew Nelson <andrew.s.nelson@gmail.com> Link: https://lore.kernel.org/linux-btrfs/CAPTELenq9x5KOWuQ+fa7h1r3nsJG8vyiTH8+ifjURc_duHh2Wg@mail.gmail.com/ Fixes: 9d66e23 ("Btrfs: load free space cache if it exists") Tested-by: Andrew Nelson <andrew.s.nelson@gmail.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 7e17916 commit 4222ea7

File tree

3 files changed

+46
-11
lines changed

3 files changed

+46
-11
lines changed

fs/btrfs/ctree.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
31633163
int btrfs_drop_inode(struct inode *inode);
31643164
int __init btrfs_init_cachep(void);
31653165
void __cold btrfs_destroy_cachep(void);
3166+
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
3167+
struct btrfs_root *root, int *new,
3168+
struct btrfs_path *path);
31663169
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
31673170
struct btrfs_root *root, int *was_new);
31683171
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,

fs/btrfs/free-space-cache.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
7575
* sure NOFS is set to keep us from deadlocking.
7676
*/
7777
nofs_flag = memalloc_nofs_save();
78-
inode = btrfs_iget(fs_info->sb, &location, root, NULL);
78+
inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
79+
btrfs_release_path(path);
7980
memalloc_nofs_restore(nofs_flag);
8081
if (IS_ERR(inode))
8182
return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
838839
path->search_commit_root = 1;
839840
path->skip_locking = 1;
840841

842+
/*
843+
* We must pass a path with search_commit_root set to btrfs_iget in
844+
* order to avoid a deadlock when allocating extents for the tree root.
845+
*
846+
* When we are COWing an extent buffer from the tree root, when looking
847+
* for a free extent, at extent-tree.c:find_free_extent(), we can find
848+
* block group without its free space cache loaded. When we find one
849+
* we must load its space cache which requires reading its free space
850+
* cache's inode item from the root tree. If this inode item is located
851+
* in the same leaf that we started COWing before, then we end up in
852+
* deadlock on the extent buffer (trying to read lock it when we
853+
* previously write locked it).
854+
*
855+
* It's safe to read the inode item using the commit root because
856+
* block groups, once loaded, stay in memory forever (until they are
857+
* removed) as well as their space caches once loaded. New block groups
858+
* once created get their ->cached field set to BTRFS_CACHE_FINISHED so
859+
* we will never try to read their inode item while the fs is mounted.
860+
*/
841861
inode = lookup_free_space_inode(fs_info, block_group, path);
842862
if (IS_ERR(inode)) {
843863
btrfs_free_path(path);

fs/btrfs/inode.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3569,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
35693569
/*
35703570
* read an inode from the btree into the in-memory inode
35713571
*/
3572-
static int btrfs_read_locked_inode(struct inode *inode)
3572+
static int btrfs_read_locked_inode(struct inode *inode,
3573+
struct btrfs_path *in_path)
35733574
{
35743575
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3575-
struct btrfs_path *path;
3576+
struct btrfs_path *path = in_path;
35763577
struct extent_buffer *leaf;
35773578
struct btrfs_inode_item *inode_item;
35783579
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3588,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
35883589
if (!ret)
35893590
filled = true;
35903591

3591-
path = btrfs_alloc_path();
3592-
if (!path)
3593-
return -ENOMEM;
3592+
if (!path) {
3593+
path = btrfs_alloc_path();
3594+
if (!path)
3595+
return -ENOMEM;
3596+
}
35943597

35953598
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
35963599

35973600
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
35983601
if (ret) {
3599-
btrfs_free_path(path);
3602+
if (path != in_path)
3603+
btrfs_free_path(path);
36003604
return ret;
36013605
}
36023606

@@ -3721,7 +3725,8 @@ static int btrfs_read_locked_inode(struct inode *inode)
37213725
btrfs_ino(BTRFS_I(inode)),
37223726
root->root_key.objectid, ret);
37233727
}
3724-
btrfs_free_path(path);
3728+
if (path != in_path)
3729+
btrfs_free_path(path);
37253730

37263731
if (!maybe_acls)
37273732
cache_no_acl(inode);
@@ -5643,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
56435648
/* Get an inode object given its location and corresponding root.
56445649
* Returns in *is_new if the inode was read from disk
56455650
*/
5646-
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5647-
struct btrfs_root *root, int *new)
5651+
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5652+
struct btrfs_root *root, int *new,
5653+
struct btrfs_path *path)
56485654
{
56495655
struct inode *inode;
56505656

@@ -5655,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
56555661
if (inode->i_state & I_NEW) {
56565662
int ret;
56575663

5658-
ret = btrfs_read_locked_inode(inode);
5664+
ret = btrfs_read_locked_inode(inode, path);
56595665
if (!ret) {
56605666
inode_tree_add(inode);
56615667
unlock_new_inode(inode);
@@ -5677,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
56775683
return inode;
56785684
}
56795685

5686+
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5687+
struct btrfs_root *root, int *new)
5688+
{
5689+
return btrfs_iget_path(s, location, root, new, NULL);
5690+
}
5691+
56805692
static struct inode *new_simple_dir(struct super_block *s,
56815693
struct btrfs_key *key,
56825694
struct btrfs_root *root)

0 commit comments

Comments
 (0)