Skip to content

Commit 312b3a9

Browse files
committed
Merge tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - regression fix: transaction commit can run away due to delayed ref waiting heuristic, this is not necessary now because of the proper reservation mechanism introduced in 5.0 - regression fix: potential crash due to use-before-check of an ERR_PTR return value - fix for transaction abort during transaction commit that needs to properly clean up pending block groups - fix deadlock during b-tree node/leaf splitting, when this happens on some of the fundamental trees, we must prevent new tree block allocation to re-enter indirectly via the block group flushing path - potential memory leak after errors during mount * tag 'for-5.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: On error always free subvol_name in btrfs_mount btrfs: clean up pending block groups when transaction commit aborts btrfs: fix potential oops in device_list_add btrfs: don't end the transaction for delayed refs in throttle Btrfs: fix deadlock when allocating tree block during leaf/node split
2 parents 12491ed + 532b618 commit 312b3a9

File tree

4 files changed

+71
-38
lines changed

4 files changed

+71
-38
lines changed

fs/btrfs/ctree.c

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
968968
return 0;
969969
}
970970

971+
static struct extent_buffer *alloc_tree_block_no_bg_flush(
972+
struct btrfs_trans_handle *trans,
973+
struct btrfs_root *root,
974+
u64 parent_start,
975+
const struct btrfs_disk_key *disk_key,
976+
int level,
977+
u64 hint,
978+
u64 empty_size)
979+
{
980+
struct btrfs_fs_info *fs_info = root->fs_info;
981+
struct extent_buffer *ret;
982+
983+
/*
984+
* If we are COWing a node/leaf from the extent, chunk, device or free
985+
* space trees, make sure that we do not finish block group creation of
986+
* pending block groups. We do this to avoid a deadlock.
987+
* COWing can result in allocation of a new chunk, and flushing pending
988+
* block groups (btrfs_create_pending_block_groups()) can be triggered
989+
* when finishing allocation of a new chunk. Creation of a pending block
990+
* group modifies the extent, chunk, device and free space trees,
991+
* therefore we could deadlock with ourselves since we are holding a
992+
* lock on an extent buffer that btrfs_create_pending_block_groups() may
993+
* try to COW later.
994+
* For similar reasons, we also need to delay flushing pending block
995+
* groups when splitting a leaf or node, from one of those trees, since
996+
* we are holding a write lock on it and its parent or when inserting a
997+
* new root node for one of those trees.
998+
*/
999+
if (root == fs_info->extent_root ||
1000+
root == fs_info->chunk_root ||
1001+
root == fs_info->dev_root ||
1002+
root == fs_info->free_space_root)
1003+
trans->can_flush_pending_bgs = false;
1004+
1005+
ret = btrfs_alloc_tree_block(trans, root, parent_start,
1006+
root->root_key.objectid, disk_key, level,
1007+
hint, empty_size);
1008+
trans->can_flush_pending_bgs = true;
1009+
1010+
return ret;
1011+
}
1012+
9711013
/*
9721014
* does the dirty work in cow of a single block. The parent block (if
9731015
* supplied) is updated to point to the new cow copy. The new buffer is marked
@@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
10151057
if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
10161058
parent_start = parent->start;
10171059

1018-
/*
1019-
* If we are COWing a node/leaf from the extent, chunk, device or free
1020-
* space trees, make sure that we do not finish block group creation of
1021-
* pending block groups. We do this to avoid a deadlock.
1022-
* COWing can result in allocation of a new chunk, and flushing pending
1023-
* block groups (btrfs_create_pending_block_groups()) can be triggered
1024-
* when finishing allocation of a new chunk. Creation of a pending block
1025-
* group modifies the extent, chunk, device and free space trees,
1026-
* therefore we could deadlock with ourselves since we are holding a
1027-
* lock on an extent buffer that btrfs_create_pending_block_groups() may
1028-
* try to COW later.
1029-
*/
1030-
if (root == fs_info->extent_root ||
1031-
root == fs_info->chunk_root ||
1032-
root == fs_info->dev_root ||
1033-
root == fs_info->free_space_root)
1034-
trans->can_flush_pending_bgs = false;
1035-
1036-
cow = btrfs_alloc_tree_block(trans, root, parent_start,
1037-
root->root_key.objectid, &disk_key, level,
1038-
search_start, empty_size);
1039-
trans->can_flush_pending_bgs = true;
1060+
cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
1061+
level, search_start, empty_size);
10401062
if (IS_ERR(cow))
10411063
return PTR_ERR(cow);
10421064

@@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
33453367
else
33463368
btrfs_node_key(lower, &lower_key, 0);
33473369

3348-
c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3349-
&lower_key, level, root->node->start, 0);
3370+
c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
3371+
root->node->start, 0);
33503372
if (IS_ERR(c))
33513373
return PTR_ERR(c);
33523374

@@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
34753497
mid = (c_nritems + 1) / 2;
34763498
btrfs_node_key(c, &disk_key, mid);
34773499

3478-
split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3479-
&disk_key, level, c->start, 0);
3500+
split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
3501+
c->start, 0);
34803502
if (IS_ERR(split))
34813503
return PTR_ERR(split);
34823504

@@ -4260,8 +4282,8 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
42604282
else
42614283
btrfs_item_key(l, &disk_key, mid);
42624284

4263-
right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
4264-
&disk_key, 0, l->start, 0);
4285+
right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
4286+
l->start, 0);
42654287
if (IS_ERR(right))
42664288
return PTR_ERR(right);
42674289

fs/btrfs/super.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
16211621
flags | SB_RDONLY, device_name, data);
16221622
if (IS_ERR(mnt_root)) {
16231623
root = ERR_CAST(mnt_root);
1624+
kfree(subvol_name);
16241625
goto out;
16251626
}
16261627

@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
16301631
if (error < 0) {
16311632
root = ERR_PTR(error);
16321633
mntput(mnt_root);
1634+
kfree(subvol_name);
16331635
goto out;
16341636
}
16351637
}
16361638
}
16371639
if (IS_ERR(mnt_root)) {
16381640
root = ERR_CAST(mnt_root);
1641+
kfree(subvol_name);
16391642
goto out;
16401643
}
16411644

fs/btrfs/transaction.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
850850

851851
btrfs_trans_release_chunk_metadata(trans);
852852

853-
if (lock && should_end_transaction(trans) &&
854-
READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
855-
spin_lock(&info->trans_lock);
856-
if (cur_trans->state == TRANS_STATE_RUNNING)
857-
cur_trans->state = TRANS_STATE_BLOCKED;
858-
spin_unlock(&info->trans_lock);
859-
}
860-
861853
if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
862854
if (throttle)
863855
return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
18791871
kmem_cache_free(btrfs_trans_handle_cachep, trans);
18801872
}
18811873

1874+
/*
1875+
* Release reserved delayed ref space of all pending block groups of the
1876+
* transaction and remove them from the list
1877+
*/
1878+
static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
1879+
{
1880+
struct btrfs_fs_info *fs_info = trans->fs_info;
1881+
struct btrfs_block_group_cache *block_group, *tmp;
1882+
1883+
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
1884+
btrfs_delayed_refs_rsv_release(fs_info, 1);
1885+
list_del_init(&block_group->bg_list);
1886+
}
1887+
}
1888+
18821889
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
18831890
{
18841891
/*
@@ -2270,6 +2277,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
22702277
btrfs_scrub_continue(fs_info);
22712278
cleanup_transaction:
22722279
btrfs_trans_release_metadata(trans);
2280+
btrfs_cleanup_pending_block_groups(trans);
22732281
btrfs_trans_release_chunk_metadata(trans);
22742282
trans->block_rsv = NULL;
22752283
btrfs_warn(fs_info, "Skipping commit of aborted transaction.");

fs/btrfs/volumes.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
957957
else
958958
fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
959959

960-
fs_devices->fsid_change = fsid_change_in_progress;
961-
962960
if (IS_ERR(fs_devices))
963961
return ERR_CAST(fs_devices);
964962

963+
fs_devices->fsid_change = fsid_change_in_progress;
964+
965965
mutex_lock(&fs_devices->device_list_mutex);
966966
list_add(&fs_devices->fs_list, &fs_uuids);
967967

0 commit comments

Comments
 (0)