Skip to content

Commit b947343

Browse files
committed
Btrfs: leave btree locks spinning more often
btrfs_mark_buffer dirty would set dirty bits in the extent_io tree for the buffers it was dirtying. This may require a kmalloc and it was not atomic. So, anyone who called btrfs_mark_buffer_dirty had to set any btree locks they were holding to blocking first. This commit changes dirty tracking for extent buffers to just use a flag in the extent buffer. Now that we have one and only one extent buffer per page, this can be safely done without losing dirty bits along the way. This also introduces a path->leave_spinning flag that callers of btrfs_search_slot can use to indicate they will properly deal with a path returned where all the locks are spinning instead of blocking. Many of the btree search callers now expect spinning paths, resulting in better btree concurrency overall. Signed-off-by: Chris Mason <chris.mason@oracle.com>
1 parent 89573b9 commit b947343

File tree

14 files changed

+172
-96
lines changed

14 files changed

+172
-96
lines changed

fs/btrfs/ctree.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,7 +1684,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
16841684
* we don't really know what they plan on doing with the path
16851685
* from here on, so for now just mark it as blocking
16861686
*/
1687-
btrfs_set_path_blocking(p);
1687+
if (!p->leave_spinning)
1688+
btrfs_set_path_blocking(p);
16881689
return ret;
16891690
}
16901691

@@ -3032,26 +3033,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
30323033
return -EAGAIN;
30333034
}
30343035

3036+
btrfs_set_path_blocking(path);
30353037
ret = split_leaf(trans, root, &orig_key, path,
30363038
sizeof(struct btrfs_item), 1);
30373039
path->keep_locks = 0;
30383040
BUG_ON(ret);
30393041

3042+
btrfs_unlock_up_safe(path, 1);
3043+
leaf = path->nodes[0];
3044+
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3045+
3046+
split:
30403047
/*
30413048
* make sure any changes to the path from split_leaf leave it
30423049
* in a blocking state
30433050
*/
30443051
btrfs_set_path_blocking(path);
30453052

3046-
leaf = path->nodes[0];
3047-
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3048-
3049-
split:
30503053
item = btrfs_item_nr(leaf, path->slots[0]);
30513054
orig_offset = btrfs_item_offset(leaf, item);
30523055
item_size = btrfs_item_size(leaf, item);
30533056

3054-
30553057
buf = kmalloc(item_size, GFP_NOFS);
30563058
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
30573059
path->slots[0]), item_size);
@@ -3545,14 +3547,15 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
35453547
}
35463548

35473549
btrfs_set_header_nritems(leaf, nritems + nr);
3548-
btrfs_mark_buffer_dirty(leaf);
35493550

35503551
ret = 0;
35513552
if (slot == 0) {
35523553
struct btrfs_disk_key disk_key;
35533554
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
35543555
ret = fixup_low_keys(trans, root, path, &disk_key, 1);
35553556
}
3557+
btrfs_unlock_up_safe(path, 1);
3558+
btrfs_mark_buffer_dirty(leaf);
35563559

35573560
if (btrfs_leaf_free_space(root, leaf) < 0) {
35583561
btrfs_print_leaf(root, leaf);
@@ -3596,7 +3599,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
35963599
total_data, total_size, nr);
35973600

35983601
out:
3599-
btrfs_unlock_up_safe(path, 1);
36003602
return ret;
36013603
}
36023604

@@ -3792,6 +3794,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
37923794
slot = path->slots[1];
37933795
extent_buffer_get(leaf);
37943796

3797+
btrfs_set_path_blocking(path);
37953798
wret = push_leaf_left(trans, root, path, 1, 1);
37963799
if (wret < 0 && wret != -ENOSPC)
37973800
ret = wret;

fs/btrfs/ctree.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,15 +401,16 @@ struct btrfs_path {
401401
int locks[BTRFS_MAX_LEVEL];
402402
int reada;
403403
/* keep some upper locks as we walk down */
404-
int keep_locks;
405-
int skip_locking;
406404
int lowest_level;
407405

408406
/*
409407
* set by btrfs_split_item, tells search_slot to keep all locks
410408
* and to force calls to keep space in the nodes
411409
*/
412-
int search_for_split;
410+
unsigned int search_for_split:1;
411+
unsigned int keep_locks:1;
412+
unsigned int skip_locking:1;
413+
unsigned int leave_spinning:1;
413414
};
414415

415416
/*
@@ -779,6 +780,11 @@ struct btrfs_fs_info {
779780
atomic_t throttle_gen;
780781

781782
u64 total_pinned;
783+
784+
/* protected by the delalloc lock, used to keep from writing
785+
* metadata until there is a nice batch
786+
*/
787+
u64 dirty_metadata_bytes;
782788
struct list_head dirty_cowonly_roots;
783789

784790
struct btrfs_fs_devices *fs_devices;

fs/btrfs/dir-item.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
145145
key.objectid = dir;
146146
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
147147
key.offset = btrfs_name_hash(name, name_len);
148+
148149
path = btrfs_alloc_path();
150+
path->leave_spinning = 1;
151+
149152
data_size = sizeof(*dir_item) + name_len;
150153
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
151154
name, name_len);

fs/btrfs/disk-io.c

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
668668
static int btree_writepage(struct page *page, struct writeback_control *wbc)
669669
{
670670
struct extent_io_tree *tree;
671+
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
672+
struct extent_buffer *eb;
673+
int was_dirty;
674+
671675
tree = &BTRFS_I(page->mapping->host)->io_tree;
676+
if (!(current->flags & PF_MEMALLOC)) {
677+
return extent_write_full_page(tree, page,
678+
btree_get_extent, wbc);
679+
}
672680

673-
if (current->flags & PF_MEMALLOC) {
674-
redirty_page_for_writepage(wbc, page);
675-
unlock_page(page);
676-
return 0;
681+
redirty_page_for_writepage(wbc, page);
682+
eb = btrfs_find_tree_block(root, page_offset(page),
683+
PAGE_CACHE_SIZE);
684+
WARN_ON(!eb);
685+
686+
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
687+
if (!was_dirty) {
688+
spin_lock(&root->fs_info->delalloc_lock);
689+
root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
690+
spin_unlock(&root->fs_info->delalloc_lock);
677691
}
678-
return extent_write_full_page(tree, page, btree_get_extent, wbc);
692+
free_extent_buffer(eb);
693+
694+
unlock_page(page);
695+
return 0;
679696
}
680697

681698
static int btree_writepages(struct address_space *mapping,
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
684701
struct extent_io_tree *tree;
685702
tree = &BTRFS_I(mapping->host)->io_tree;
686703
if (wbc->sync_mode == WB_SYNC_NONE) {
704+
struct btrfs_root *root = BTRFS_I(mapping->host)->root;
687705
u64 num_dirty;
688-
u64 start = 0;
689706
unsigned long thresh = 32 * 1024 * 1024;
690707

691708
if (wbc->for_kupdate)
692709
return 0;
693710

694-
num_dirty = count_range_bits(tree, &start, (u64)-1,
695-
thresh, EXTENT_DIRTY);
711+
/* this is a bit racy, but that's ok */
712+
num_dirty = root->fs_info->dirty_metadata_bytes;
696713
if (num_dirty < thresh)
697714
return 0;
698715
}
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
859876
root->fs_info->running_transaction->transid) {
860877
btrfs_assert_tree_locked(buf);
861878

862-
/* ugh, clear_extent_buffer_dirty can be expensive */
863-
btrfs_set_lock_blocking(buf);
879+
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
880+
spin_lock(&root->fs_info->delalloc_lock);
881+
if (root->fs_info->dirty_metadata_bytes >= buf->len)
882+
root->fs_info->dirty_metadata_bytes -= buf->len;
883+
else
884+
WARN_ON(1);
885+
spin_unlock(&root->fs_info->delalloc_lock);
886+
}
864887

888+
/* ugh, clear_extent_buffer_dirty needs to lock the page */
889+
btrfs_set_lock_blocking(buf);
865890
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
866891
buf);
867892
}
@@ -2348,8 +2373,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
23482373
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
23492374
u64 transid = btrfs_header_generation(buf);
23502375
struct inode *btree_inode = root->fs_info->btree_inode;
2351-
2352-
btrfs_set_lock_blocking(buf);
2376+
int was_dirty;
23532377

23542378
btrfs_assert_tree_locked(buf);
23552379
if (transid != root->fs_info->generation) {
@@ -2360,7 +2384,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
23602384
(unsigned long long)root->fs_info->generation);
23612385
WARN_ON(1);
23622386
}
2363-
set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
2387+
was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2388+
buf);
2389+
if (!was_dirty) {
2390+
spin_lock(&root->fs_info->delalloc_lock);
2391+
root->fs_info->dirty_metadata_bytes += buf->len;
2392+
spin_unlock(&root->fs_info->delalloc_lock);
2393+
}
23642394
}
23652395

23662396
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2400,6 +2430,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
24002430
int btree_lock_page_hook(struct page *page)
24012431
{
24022432
struct inode *inode = page->mapping->host;
2433+
struct btrfs_root *root = BTRFS_I(inode)->root;
24032434
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
24042435
struct extent_buffer *eb;
24052436
unsigned long len;
@@ -2415,6 +2446,16 @@ int btree_lock_page_hook(struct page *page)
24152446

24162447
btrfs_tree_lock(eb);
24172448
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2449+
2450+
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2451+
spin_lock(&root->fs_info->delalloc_lock);
2452+
if (root->fs_info->dirty_metadata_bytes >= eb->len)
2453+
root->fs_info->dirty_metadata_bytes -= eb->len;
2454+
else
2455+
WARN_ON(1);
2456+
spin_unlock(&root->fs_info->delalloc_lock);
2457+
}
2458+
24182459
btrfs_tree_unlock(eb);
24192460
free_extent_buffer(eb);
24202461
out:

fs/btrfs/disk-io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
7272
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
7373
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
7474
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
75+
void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
7576
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
7677
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
7778
int wait_on_tree_block_writeback(struct btrfs_root *root,

0 commit comments

Comments
 (0)