Skip to content

Commit 318b067

Browse files
committed
Merge tag 'for-4.20-part1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This is the first batch with fixes and some nice performance improvements. Preliminary results show eg. more files/sec in fsmark, better perf on multi-threaded workloads (filebench, dbench), fewer context switches and overall better memory allocation characteristics (multiple benchmarks). Apart from general performance, there's an improvement for qgroups + balance workload that's been troubling our users. Note for stable: there are 20+ patches tagged for stable, out of 90. Not all of them apply cleanly on all stable versions but the conflicts are mostly due to simple cleanups and resolving should be obvious. The fixes are otherwise independent. Performance improvements: - transition between blocking and spinning modes of path is gone, which originally resulted to more unnecessary wakeups and updates to the path locks, the effects are measurable and improve latency and scalability - qgroups: first batch of changes that should speedup balancing with qgroups on, skip quota accounting on unchanged subtrees, overall gain is about 30+% in runtime - use rb-tree with cached first node for several structures, small improvement to avoid pointer chasing Fixes: - trim - fix: some blockgroups could have been missed if their logical address was past the total filesystem size (ie. after a lot of balancing) - better error reporting, after processing blockgroups and whole device - fix: continue trimming block groups after an error is encountered - check for trim support of the device earlier and avoid some unnecessary work - less interaction with transaction commit that improves latency on slower storage (eg. image files over NFS) - fsync - fix warning when replaying log after fsync of a O_TMPFILE - fix wrong dentries after fsync of file that got its parent replaced - qgroups: fix rescan that might misc some dirty groups - don't clean dirty pages during buffered writes, this could lead to lost updates in some corner cases - some block groups could have been delayed in creation, if the allocation triggered another one - error handling improvements Cleanups: - removed unused struct members and variables - function return type cleanups - delayed refs code refactoring - protect against deadlock that could be caused by crafted image that tries to allocate from a tree that's locked already" * tag 'for-4.20-part1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (93 commits) btrfs: switch return_bigger to bool in find_ref_head btrfs: remove fs_info from btrfs_should_throttle_delayed_refs btrfs: remove fs_info from btrfs_check_space_for_delayed_refs btrfs: delayed-ref: pass delayed_refs directly to btrfs_delayed_ref_lock btrfs: delayed-ref: pass delayed_refs directly to btrfs_select_ref_head btrfs: qgroup: move the qgroup->members check out from (!qgroup)'s else branch btrfs: relocation: Remove redundant tree level check btrfs: relocation: Cleanup while loop using rbtree_postorder_for_each_entry_safe btrfs: qgroup: Avoid calling qgroup functions if qgroup is not enabled Btrfs: fix wrong dentries after fsync of file that got its parent replaced Btrfs: fix warning when replaying log after fsync of a tmpfile btrfs: drop min_size from evict_refill_and_join btrfs: assert on non-empty delayed iputs btrfs: make sure we create all new block groups btrfs: reset max_extent_size on clear in a bitmap btrfs: protect space cache inode alloc with GFP_NOFS btrfs: release metadata before running delayed refs Btrfs: kill btrfs_clear_path_blocking btrfs: dev-replace: remove pointless assert in write unlock btrfs: dev-replace: move replace members out of fs_info ...
2 parents 44adbac + d935279 commit 318b067

40 files changed

+1268
-745
lines changed

fs/btrfs/backref.c

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
112112
}
113113

114114
struct preftree {
115-
struct rb_root root;
115+
struct rb_root_cached root;
116116
unsigned int count;
117117
};
118118

119-
#define PREFTREE_INIT { .root = RB_ROOT, .count = 0 }
119+
#define PREFTREE_INIT { .root = RB_ROOT_CACHED, .count = 0 }
120120

121121
struct preftrees {
122122
struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */
@@ -225,14 +225,15 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
225225
struct prelim_ref *newref,
226226
struct share_check *sc)
227227
{
228-
struct rb_root *root;
228+
struct rb_root_cached *root;
229229
struct rb_node **p;
230230
struct rb_node *parent = NULL;
231231
struct prelim_ref *ref;
232232
int result;
233+
bool leftmost = true;
233234

234235
root = &preftree->root;
235-
p = &root->rb_node;
236+
p = &root->rb_root.rb_node;
236237

237238
while (*p) {
238239
parent = *p;
@@ -242,6 +243,7 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
242243
p = &(*p)->rb_left;
243244
} else if (result > 0) {
244245
p = &(*p)->rb_right;
246+
leftmost = false;
245247
} else {
246248
/* Identical refs, merge them and free @newref */
247249
struct extent_inode_elem *eie = ref->inode_list;
@@ -272,7 +274,7 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
272274
preftree->count++;
273275
trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
274276
rb_link_node(&newref->rbnode, parent, p);
275-
rb_insert_color(&newref->rbnode, root);
277+
rb_insert_color_cached(&newref->rbnode, root, leftmost);
276278
}
277279

278280
/*
@@ -283,11 +285,11 @@ static void prelim_release(struct preftree *preftree)
283285
{
284286
struct prelim_ref *ref, *next_ref;
285287

286-
rbtree_postorder_for_each_entry_safe(ref, next_ref, &preftree->root,
287-
rbnode)
288+
rbtree_postorder_for_each_entry_safe(ref, next_ref,
289+
&preftree->root.rb_root, rbnode)
288290
free_pref(ref);
289291

290-
preftree->root = RB_ROOT;
292+
preftree->root = RB_ROOT_CACHED;
291293
preftree->count = 0;
292294
}
293295

@@ -627,7 +629,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
627629
* freeing the entire indirect tree when we're done. In some test
628630
* cases, the tree can grow quite large (~200k objects).
629631
*/
630-
while ((rnode = rb_first(&preftrees->indirect.root))) {
632+
while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
631633
struct prelim_ref *ref;
632634

633635
ref = rb_entry(rnode, struct prelim_ref, rbnode);
@@ -637,7 +639,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
637639
goto out;
638640
}
639641

640-
rb_erase(&ref->rbnode, &preftrees->indirect.root);
642+
rb_erase_cached(&ref->rbnode, &preftrees->indirect.root);
641643
preftrees->indirect.count--;
642644

643645
if (ref->count == 0) {
@@ -717,9 +719,9 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
717719
struct preftree *tree = &preftrees->indirect_missing_keys;
718720
struct rb_node *node;
719721

720-
while ((node = rb_first(&tree->root))) {
722+
while ((node = rb_first_cached(&tree->root))) {
721723
ref = rb_entry(node, struct prelim_ref, rbnode);
722-
rb_erase(node, &tree->root);
724+
rb_erase_cached(node, &tree->root);
723725

724726
BUG_ON(ref->parent); /* should not be a direct ref */
725727
BUG_ON(ref->key_for_search.type);
@@ -769,7 +771,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
769771
btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
770772

771773
spin_lock(&head->lock);
772-
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
774+
for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
773775
node = rb_entry(n, struct btrfs_delayed_ref_node,
774776
ref_node);
775777
if (node->seq > seq)
@@ -1229,14 +1231,14 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
12291231
if (ret)
12301232
goto out;
12311233

1232-
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
1234+
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
12331235

12341236
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
12351237
extent_item_pos, total_refs, sc, ignore_offset);
12361238
if (ret)
12371239
goto out;
12381240

1239-
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root));
1241+
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root));
12401242

12411243
/*
12421244
* This walks the tree of merged and resolved refs. Tree blocks are
@@ -1245,7 +1247,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
12451247
*
12461248
* We release the entire tree in one go before returning.
12471249
*/
1248-
node = rb_first(&preftrees.direct.root);
1250+
node = rb_first_cached(&preftrees.direct.root);
12491251
while (node) {
12501252
ref = rb_entry(node, struct prelim_ref, rbnode);
12511253
node = rb_next(&ref->rbnode);
@@ -1468,7 +1470,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
14681470
struct seq_list elem = SEQ_LIST_INIT(elem);
14691471
int ret = 0;
14701472
struct share_check shared = {
1471-
.root_objectid = root->objectid,
1473+
.root_objectid = root->root_key.objectid,
14721474
.inum = inum,
14731475
.share_count = 0,
14741476
};
@@ -2031,7 +2033,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
20312033
/* path must be released before calling iterate()! */
20322034
btrfs_debug(fs_root->fs_info,
20332035
"following ref at offset %u for inode %llu in tree %llu",
2034-
cur, found_key.objectid, fs_root->objectid);
2036+
cur, found_key.objectid,
2037+
fs_root->root_key.objectid);
20352038
ret = iterate(parent, name_len,
20362039
(unsigned long)(iref + 1), eb, ctx);
20372040
if (ret)

fs/btrfs/btrfs_inode.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
206206
static inline unsigned long btrfs_inode_hash(u64 objectid,
207207
const struct btrfs_root *root)
208208
{
209-
u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
209+
u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME);
210210

211211
#if BITS_PER_LONG == 32
212212
h = (h >> 32) ^ (h & 0xffffffff);
@@ -339,15 +339,15 @@ static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
339339
struct btrfs_root *root = inode->root;
340340

341341
/* Output minus objectid, which is more meaningful */
342-
if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
342+
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
343343
btrfs_warn_rl(root->fs_info,
344344
"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
345-
root->objectid, btrfs_ino(inode),
345+
root->root_key.objectid, btrfs_ino(inode),
346346
logical_start, csum, csum_expected, mirror_num);
347347
else
348348
btrfs_warn_rl(root->fs_info,
349349
"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
350-
root->objectid, btrfs_ino(inode),
350+
root->root_key.objectid, btrfs_ino(inode),
351351
logical_start, csum, csum_expected, mirror_num);
352352
}
353353

fs/btrfs/check-integrity.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
15941594
{
15951595
unsigned int num_pages;
15961596
unsigned int i;
1597+
size_t size;
15971598
u64 dev_bytenr;
15981599
int ret;
15991600

@@ -1608,9 +1609,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
16081609

16091610
num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
16101611
PAGE_SHIFT;
1611-
block_ctx->mem_to_free = kcalloc(sizeof(*block_ctx->datav) +
1612-
sizeof(*block_ctx->pagev),
1613-
num_pages, GFP_NOFS);
1612+
size = sizeof(*block_ctx->datav) + sizeof(*block_ctx->pagev);
1613+
block_ctx->mem_to_free = kcalloc(num_pages, size, GFP_NOFS);
16141614
if (!block_ctx->mem_to_free)
16151615
return -ENOMEM;
16161616
block_ctx->datav = block_ctx->mem_to_free;

fs/btrfs/compression.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
528528
int mirror_num, unsigned long bio_flags)
529529
{
530530
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
531-
struct extent_io_tree *tree;
532531
struct extent_map_tree *em_tree;
533532
struct compressed_bio *cb;
534533
unsigned long compressed_len;
@@ -545,7 +544,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
545544
int faili = 0;
546545
u32 *sums;
547546

548-
tree = &BTRFS_I(inode)->io_tree;
549547
em_tree = &BTRFS_I(inode)->extent_tree;
550548

551549
/* we need the actual starting offset of this extent in the file */

fs/btrfs/ctree.c

Lines changed: 11 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -52,42 +52,6 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
5252
}
5353
}
5454

55-
/*
56-
* reset all the locked nodes in the patch to spinning locks.
57-
*
58-
* held is used to keep lockdep happy, when lockdep is enabled
59-
* we set held to a blocking lock before we go around and
60-
* retake all the spinlocks in the path. You can safely use NULL
61-
* for held
62-
*/
63-
noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
64-
struct extent_buffer *held, int held_rw)
65-
{
66-
int i;
67-
68-
if (held) {
69-
btrfs_set_lock_blocking_rw(held, held_rw);
70-
if (held_rw == BTRFS_WRITE_LOCK)
71-
held_rw = BTRFS_WRITE_LOCK_BLOCKING;
72-
else if (held_rw == BTRFS_READ_LOCK)
73-
held_rw = BTRFS_READ_LOCK_BLOCKING;
74-
}
75-
btrfs_set_path_blocking(p);
76-
77-
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
78-
if (p->nodes[i] && p->locks[i]) {
79-
btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
80-
if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
81-
p->locks[i] = BTRFS_WRITE_LOCK;
82-
else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
83-
p->locks[i] = BTRFS_READ_LOCK;
84-
}
85-
}
86-
87-
if (held)
88-
btrfs_clear_lock_blocking_rw(held, held_rw);
89-
}
90-
9155
/* this also releases the path */
9256
void btrfs_free_path(struct btrfs_path *p)
9357
{
@@ -207,7 +171,7 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
207171
spin_lock(&fs_info->trans_lock);
208172
if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
209173
/* Want the extent tree to be the last on the list */
210-
if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
174+
if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
211175
list_move_tail(&root->dirty_list,
212176
&fs_info->dirty_cowonly_roots);
213177
else
@@ -1306,7 +1270,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
13061270
}
13071271
}
13081272

1309-
btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
13101273
btrfs_tree_read_unlock_blocking(eb);
13111274
free_extent_buffer(eb);
13121275

@@ -1815,8 +1778,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
18151778
int orig_slot = path->slots[level];
18161779
u64 orig_ptr;
18171780

1818-
if (level == 0)
1819-
return 0;
1781+
ASSERT(level > 0);
18201782

18211783
mid = path->nodes[level];
18221784

@@ -2483,7 +2445,6 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
24832445
btrfs_set_path_blocking(p);
24842446
reada_for_balance(fs_info, p, level);
24852447
sret = split_node(trans, root, p, level);
2486-
btrfs_clear_path_blocking(p, NULL, 0);
24872448

24882449
BUG_ON(sret > 0);
24892450
if (sret) {
@@ -2504,7 +2465,6 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
25042465
btrfs_set_path_blocking(p);
25052466
reada_for_balance(fs_info, p, level);
25062467
sret = balance_level(trans, root, p, level);
2507-
btrfs_clear_path_blocking(p, NULL, 0);
25082468

25092469
if (sret) {
25102470
ret = sret;
@@ -2789,7 +2749,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
27892749
}
27902750
cow_done:
27912751
p->nodes[level] = b;
2792-
btrfs_clear_path_blocking(p, NULL, 0);
2752+
/*
2753+
* Leave path with blocking locks to avoid massive
2754+
* lock context switch, this is made on purpose.
2755+
*/
27932756

27942757
/*
27952758
* we have a lock on b and as long as we aren't changing
@@ -2871,17 +2834,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
28712834
if (!err) {
28722835
btrfs_set_path_blocking(p);
28732836
btrfs_tree_lock(b);
2874-
btrfs_clear_path_blocking(p, b,
2875-
BTRFS_WRITE_LOCK);
28762837
}
28772838
p->locks[level] = BTRFS_WRITE_LOCK;
28782839
} else {
28792840
err = btrfs_tree_read_lock_atomic(b);
28802841
if (!err) {
28812842
btrfs_set_path_blocking(p);
28822843
btrfs_tree_read_lock(b);
2883-
btrfs_clear_path_blocking(p, b,
2884-
BTRFS_READ_LOCK);
28852844
}
28862845
p->locks[level] = BTRFS_READ_LOCK;
28872846
}
@@ -2900,7 +2859,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
29002859
btrfs_set_path_blocking(p);
29012860
err = split_leaf(trans, root, key,
29022861
p, ins_len, ret == 0);
2903-
btrfs_clear_path_blocking(p, NULL, 0);
29042862

29052863
BUG_ON(err > 0);
29062864
if (err) {
@@ -2910,7 +2868,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
29102868
}
29112869
if (!p->search_for_split)
29122870
unlock_up(p, level, lowest_unlock,
2913-
min_write_lock_level, &write_lock_level);
2871+
min_write_lock_level, NULL);
29142872
goto done;
29152873
}
29162874
}
@@ -2961,13 +2919,16 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
29612919

29622920
again:
29632921
b = get_old_root(root, time_seq);
2922+
if (!b) {
2923+
ret = -EIO;
2924+
goto done;
2925+
}
29642926
level = btrfs_header_level(b);
29652927
p->locks[level] = BTRFS_READ_LOCK;
29662928

29672929
while (b) {
29682930
level = btrfs_header_level(b);
29692931
p->nodes[level] = b;
2970-
btrfs_clear_path_blocking(p, NULL, 0);
29712932

29722933
/*
29732934
* we have a lock on b and as long as we aren't changing
@@ -3013,8 +2974,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
30132974
if (!err) {
30142975
btrfs_set_path_blocking(p);
30152976
btrfs_tree_read_lock(b);
3016-
btrfs_clear_path_blocking(p, b,
3017-
BTRFS_READ_LOCK);
30182977
}
30192978
b = tree_mod_log_rewind(fs_info, p, b, time_seq);
30202979
if (!b) {
@@ -5198,7 +5157,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
51985157
path->locks[level - 1] = BTRFS_READ_LOCK;
51995158
path->nodes[level - 1] = cur;
52005159
unlock_up(path, level, 1, 0, NULL);
5201-
btrfs_clear_path_blocking(path, NULL, 0);
52025160
}
52035161
out:
52045162
path->keep_locks = keep_locks;
@@ -5783,8 +5741,6 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
57835741
if (!ret) {
57845742
btrfs_set_path_blocking(path);
57855743
btrfs_tree_read_lock(next);
5786-
btrfs_clear_path_blocking(path, next,
5787-
BTRFS_READ_LOCK);
57885744
}
57895745
next_rw_lock = BTRFS_READ_LOCK;
57905746
}
@@ -5820,8 +5776,6 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
58205776
if (!ret) {
58215777
btrfs_set_path_blocking(path);
58225778
btrfs_tree_read_lock(next);
5823-
btrfs_clear_path_blocking(path, next,
5824-
BTRFS_READ_LOCK);
58255779
}
58265780
next_rw_lock = BTRFS_READ_LOCK;
58275781
}

0 commit comments

Comments
 (0)