Skip to content

Commit 47ab2a6

Browse files
Josef Bacikmasoncl
authored andcommitted
Btrfs: remove empty block groups automatically
One problem that has plagued us is that a user will use up all of his space with data, remove a bunch of that data, and then try to create a bunch of small files and run out of space. This happens because all the chunks were allocated for data since the metadata requirements were so low. But now there's a bunch of empty data block groups and not enough metadata space to do anything. This patch solves this problem by automatically deleting empty block groups. If we notice the used count go down to 0 when deleting or on mount notice that a block group has a used count of 0 then we will queue it to be deleted. When the cleaner thread runs we will double check to make sure the block group is still empty and then we will delete it. This patch has the side effect of no longer having a bunch of BUG_ON()'s in the chunk delete code, which will be helpful for both this and relocate. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
1 parent 8407f55 commit 47ab2a6

File tree

6 files changed

+226
-49
lines changed

6 files changed

+226
-49
lines changed

fs/btrfs/ctree.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,8 +1298,8 @@ struct btrfs_block_group_cache {
12981298
*/
12991299
struct list_head cluster_list;
13001300

1301-
/* For delayed block group creation */
1302-
struct list_head new_bg_list;
1301+
/* For delayed block group creation or deletion of empty block groups */
1302+
struct list_head bg_list;
13031303
};
13041304

13051305
/* delayed seq elem */
@@ -1568,6 +1568,7 @@ struct btrfs_fs_info {
15681568
int do_barriers;
15691569
int closing;
15701570
int log_root_recovering;
1571+
int open;
15711572

15721573
u64 total_pinned;
15731574

@@ -1717,6 +1718,9 @@ struct btrfs_fs_info {
17171718

17181719
/* Used to reclaim the metadata space in the background. */
17191720
struct work_struct async_reclaim_work;
1721+
1722+
spinlock_t unused_bgs_lock;
1723+
struct list_head unused_bgs;
17201724
};
17211725

17221726
struct btrfs_subvolume_writers {
@@ -3344,6 +3348,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
33443348
u64 size);
33453349
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
33463350
struct btrfs_root *root, u64 group_start);
3351+
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
33473352
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
33483353
struct btrfs_root *root);
33493354
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);

fs/btrfs/disk-io.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,6 +1769,7 @@ static int cleaner_kthread(void *arg)
17691769
}
17701770

17711771
btrfs_run_delayed_iputs(root);
1772+
btrfs_delete_unused_bgs(root->fs_info);
17721773
again = btrfs_clean_one_deleted_snapshot(root);
17731774
mutex_unlock(&root->fs_info->cleaner_mutex);
17741775

@@ -2230,6 +2231,7 @@ int open_ctree(struct super_block *sb,
22302231
spin_lock_init(&fs_info->super_lock);
22312232
spin_lock_init(&fs_info->qgroup_op_lock);
22322233
spin_lock_init(&fs_info->buffer_lock);
2234+
spin_lock_init(&fs_info->unused_bgs_lock);
22332235
rwlock_init(&fs_info->tree_mod_log_lock);
22342236
mutex_init(&fs_info->reloc_mutex);
22352237
mutex_init(&fs_info->delalloc_root_mutex);
@@ -2239,6 +2241,7 @@ int open_ctree(struct super_block *sb,
22392241
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
22402242
INIT_LIST_HEAD(&fs_info->space_info);
22412243
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
2244+
INIT_LIST_HEAD(&fs_info->unused_bgs);
22422245
btrfs_mapping_init(&fs_info->mapping_tree);
22432246
btrfs_init_block_rsv(&fs_info->global_block_rsv,
22442247
BTRFS_BLOCK_RSV_GLOBAL);
@@ -2977,6 +2980,8 @@ int open_ctree(struct super_block *sb,
29772980
fs_info->update_uuid_tree_gen = 1;
29782981
}
29792982

2983+
fs_info->open = 1;
2984+
29802985
return 0;
29812986

29822987
fail_qgroup:
@@ -3688,6 +3693,7 @@ void close_ctree(struct btrfs_root *root)
36883693
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
36893694
btrfs_stop_all_workers(fs_info);
36903695

3696+
fs_info->open = 0;
36913697
free_root_pointers(fs_info, 1);
36923698

36933699
iput(fs_info->btree_inode);

fs/btrfs/extent-tree.c

Lines changed: 134 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5433,6 +5433,20 @@ static int update_block_group(struct btrfs_root *root,
54335433
spin_unlock(&cache->space_info->lock);
54345434
} else {
54355435
old_val -= num_bytes;
5436+
5437+
/*
5438+
* No longer have used bytes in this block group, queue
5439+
* it for deletion.
5440+
*/
5441+
if (old_val == 0) {
5442+
spin_lock(&info->unused_bgs_lock);
5443+
if (list_empty(&cache->bg_list)) {
5444+
btrfs_get_block_group(cache);
5445+
list_add_tail(&cache->bg_list,
5446+
&info->unused_bgs);
5447+
}
5448+
spin_unlock(&info->unused_bgs_lock);
5449+
}
54365450
btrfs_set_block_group_used(&cache->item, old_val);
54375451
cache->pinned += num_bytes;
54385452
cache->space_info->bytes_pinned += num_bytes;
@@ -8855,6 +8869,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
88558869
}
88568870
up_write(&info->commit_root_sem);
88578871

8872+
spin_lock(&info->unused_bgs_lock);
8873+
while (!list_empty(&info->unused_bgs)) {
8874+
block_group = list_first_entry(&info->unused_bgs,
8875+
struct btrfs_block_group_cache,
8876+
bg_list);
8877+
list_del_init(&block_group->bg_list);
8878+
btrfs_put_block_group(block_group);
8879+
}
8880+
spin_unlock(&info->unused_bgs_lock);
8881+
88588882
spin_lock(&info->block_group_cache_lock);
88598883
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
88608884
block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -8989,7 +9013,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
89899013
init_rwsem(&cache->data_rwsem);
89909014
INIT_LIST_HEAD(&cache->list);
89919015
INIT_LIST_HEAD(&cache->cluster_list);
8992-
INIT_LIST_HEAD(&cache->new_bg_list);
9016+
INIT_LIST_HEAD(&cache->bg_list);
89939017
btrfs_init_free_space_ctl(cache);
89949018

89959019
return cache;
@@ -9130,8 +9154,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
91309154
__link_block_group(space_info, cache);
91319155

91329156
set_avail_alloc_bits(root->fs_info, cache->flags);
9133-
if (btrfs_chunk_readonly(root, cache->key.objectid))
9157+
if (btrfs_chunk_readonly(root, cache->key.objectid)) {
91349158
set_block_group_ro(cache, 1);
9159+
} else if (btrfs_block_group_used(&cache->item) == 0) {
9160+
spin_lock(&info->unused_bgs_lock);
9161+
/* Should always be true but just in case. */
9162+
if (list_empty(&cache->bg_list)) {
9163+
btrfs_get_block_group(cache);
9164+
list_add_tail(&cache->bg_list,
9165+
&info->unused_bgs);
9166+
}
9167+
spin_unlock(&info->unused_bgs_lock);
9168+
}
91359169
}
91369170

91379171
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -9172,10 +9206,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
91729206
struct btrfs_key key;
91739207
int ret = 0;
91749208

9175-
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs,
9176-
new_bg_list) {
9177-
list_del_init(&block_group->new_bg_list);
9178-
9209+
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
9210+
list_del_init(&block_group->bg_list);
91799211
if (ret)
91809212
continue;
91819213

@@ -9261,7 +9293,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
92619293

92629294
__link_block_group(cache->space_info, cache);
92639295

9264-
list_add_tail(&cache->new_bg_list, &trans->new_bgs);
9296+
list_add_tail(&cache->bg_list, &trans->new_bgs);
92659297

92669298
set_avail_alloc_bits(extent_root->fs_info, type);
92679299

@@ -9430,6 +9462,101 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
94309462
return ret;
94319463
}
94329464

9465+
/*
9466+
* Process the unused_bgs list and remove any that don't have any allocated
9467+
* space inside of them.
9468+
*/
9469+
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9470+
{
9471+
struct btrfs_block_group_cache *block_group;
9472+
struct btrfs_space_info *space_info;
9473+
struct btrfs_root *root = fs_info->extent_root;
9474+
struct btrfs_trans_handle *trans;
9475+
int ret = 0;
9476+
9477+
if (!fs_info->open)
9478+
return;
9479+
9480+
spin_lock(&fs_info->unused_bgs_lock);
9481+
while (!list_empty(&fs_info->unused_bgs)) {
9482+
u64 start, end;
9483+
9484+
block_group = list_first_entry(&fs_info->unused_bgs,
9485+
struct btrfs_block_group_cache,
9486+
bg_list);
9487+
space_info = block_group->space_info;
9488+
list_del_init(&block_group->bg_list);
9489+
if (ret || btrfs_mixed_space_info(space_info)) {
9490+
btrfs_put_block_group(block_group);
9491+
continue;
9492+
}
9493+
spin_unlock(&fs_info->unused_bgs_lock);
9494+
9495+
/* Don't want to race with allocators so take the groups_sem */
9496+
down_write(&space_info->groups_sem);
9497+
spin_lock(&block_group->lock);
9498+
if (block_group->reserved ||
9499+
btrfs_block_group_used(&block_group->item) ||
9500+
block_group->ro) {
9501+
/*
9502+
* We want to bail if we made new allocations or have
9503+
* outstanding allocations in this block group. We do
9504+
* the ro check in case balance is currently acting on
9505+
* this block group.
9506+
*/
9507+
spin_unlock(&block_group->lock);
9508+
up_write(&space_info->groups_sem);
9509+
goto next;
9510+
}
9511+
spin_unlock(&block_group->lock);
9512+
9513+
/* We don't want to force the issue, only flip if it's ok. */
9514+
ret = set_block_group_ro(block_group, 0);
9515+
up_write(&space_info->groups_sem);
9516+
if (ret < 0) {
9517+
ret = 0;
9518+
goto next;
9519+
}
9520+
9521+
/*
9522+
* Want to do this before we do anything else so we can recover
9523+
* properly if we fail to join the transaction.
9524+
*/
9525+
trans = btrfs_join_transaction(root);
9526+
if (IS_ERR(trans)) {
9527+
btrfs_set_block_group_rw(root, block_group);
9528+
ret = PTR_ERR(trans);
9529+
goto next;
9530+
}
9531+
9532+
/*
9533+
* We could have pending pinned extents for this block group,
9534+
* just delete them, we don't care about them anymore.
9535+
*/
9536+
start = block_group->key.objectid;
9537+
end = start + block_group->key.offset - 1;
9538+
clear_extent_bits(&fs_info->freed_extents[0], start, end,
9539+
EXTENT_DIRTY, GFP_NOFS);
9540+
clear_extent_bits(&fs_info->freed_extents[1], start, end,
9541+
EXTENT_DIRTY, GFP_NOFS);
9542+
9543+
/* Reset pinned so btrfs_put_block_group doesn't complain */
9544+
block_group->pinned = 0;
9545+
9546+
/*
9547+
* Btrfs_remove_chunk will abort the transaction if things go
9548+
* horribly wrong.
9549+
*/
9550+
ret = btrfs_remove_chunk(trans, root,
9551+
block_group->key.objectid);
9552+
btrfs_end_transaction(trans, root);
9553+
next:
9554+
btrfs_put_block_group(block_group);
9555+
spin_lock(&fs_info->unused_bgs_lock);
9556+
}
9557+
spin_unlock(&fs_info->unused_bgs_lock);
9558+
}
9559+
94339560
int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
94349561
{
94359562
struct btrfs_space_info *space_info;

fs/btrfs/tests/free-space-tests.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
4545
spin_lock_init(&cache->lock);
4646
INIT_LIST_HEAD(&cache->list);
4747
INIT_LIST_HEAD(&cache->cluster_list);
48-
INIT_LIST_HEAD(&cache->new_bg_list);
48+
INIT_LIST_HEAD(&cache->bg_list);
4949

5050
btrfs_init_free_space_ctl(cache);
5151

0 commit comments

Comments
 (0)