Skip to content

Commit 8ecebf4

Browse files
Robbie Kokdave
authored andcommitted
Btrfs: fix unexpected failure of nocow buffered writes after snapshotting when low on space
Commit e9894fd ("Btrfs: fix snapshot vs nocow writting") forced nocow writes to fallback to COW, during writeback, when a snapshot is created. This resulted in writes made before creating the snapshot to unexpectedly fail with ENOSPC during writeback when success (0) was returned to user space through the write system call. The steps leading to this problem are: 1. When it's not possible to allocate data space for a write, the buffered write path checks if a NOCOW write is possible. If it is, it will not reserve space and success (0) is returned to user space. 2. Then when a snapshot is created, the root's will_be_snapshotted atomic is incremented and writeback is triggered for all inode's that belong to the root being snapshotted. Incrementing that atomic forces all previous writes to fallback to COW during writeback (running delalloc). 3. This results in the writeback for the inodes to fail and therefore setting the ENOSPC error in their mappings, so that a subsequent fsync on them will report the error to user space. So it's not a completely silent data loss (since fsync will report ENOSPC) but it's a very unexpected and undesirable behaviour, because if a clean shutdown/unmount of the filesystem happens without previous calls to fsync, it is expected to have the data present in the files after mounting the filesystem again. So fix this by adding a new atomic named snapshot_force_cow to the root structure which prevents this behaviour and works the following way: 1. It is incremented when we start to create a snapshot after triggering writeback and before waiting for writeback to finish. 2. This new atomic is now what is used by writeback (running delalloc) to decide whether we need to fallback to COW or not. Because we incremented this new atomic after triggering writeback in the snapshot creation ioctl, we ensure that all buffered writes that happened before snapshot creation will succeed and not fallback to COW (which would make them fail with ENOSPC). 3. The existing atomic, will_be_snapshotted, is kept because it is used to force new buffered writes, that start after we started snapshotting, to reserve data space even when NOCOW is possible. This makes these writes fail early with ENOSPC when there's no available space to allocate, preventing the unexpected behaviour of writeback later failing with ENOSPC due to a fallback to COW mode. Fixes: e9894fd ("Btrfs: fix snapshot vs nocow writting") Signed-off-by: Robbie Ko <robbieko@synology.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 39379fa commit 8ecebf4

File tree

4 files changed

+22
-21
lines changed

4 files changed

+22
-21
lines changed

fs/btrfs/ctree.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,7 @@ struct btrfs_root {
12801280
int send_in_progress;
12811281
struct btrfs_subvolume_writers *subv_writers;
12821282
atomic_t will_be_snapshotted;
1283+
atomic_t snapshot_force_cow;
12831284

12841285
/* For qgroup metadata reserved space */
12851286
spinlock_t qgroup_meta_rsv_lock;

fs/btrfs/disk-io.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
11871187
atomic_set(&root->log_batch, 0);
11881188
refcount_set(&root->refs, 1);
11891189
atomic_set(&root->will_be_snapshotted, 0);
1190+
atomic_set(&root->snapshot_force_cow, 0);
11901191
root->log_transid = 0;
11911192
root->log_transid_committed = -1;
11921193
root->last_log_commit = 0;

fs/btrfs/inode.c

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
12711271
u64 disk_num_bytes;
12721272
u64 ram_bytes;
12731273
int extent_type;
1274-
int ret, err;
1274+
int ret;
12751275
int type;
12761276
int nocow;
12771277
int check_prev = 1;
@@ -1403,11 +1403,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14031403
* if there are pending snapshots for this root,
14041404
* we fall into common COW way.
14051405
*/
1406-
if (!nolock) {
1407-
err = btrfs_start_write_no_snapshotting(root);
1408-
if (!err)
1409-
goto out_check;
1410-
}
1406+
if (!nolock && atomic_read(&root->snapshot_force_cow))
1407+
goto out_check;
14111408
/*
14121409
* force cow if csum exists in the range.
14131410
* this ensure that csum for a given extent are
@@ -1416,9 +1413,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14161413
ret = csum_exist_in_range(fs_info, disk_bytenr,
14171414
num_bytes);
14181415
if (ret) {
1419-
if (!nolock)
1420-
btrfs_end_write_no_snapshotting(root);
1421-
14221416
/*
14231417
* ret could be -EIO if the above fails to read
14241418
* metadata.
@@ -1431,11 +1425,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14311425
WARN_ON_ONCE(nolock);
14321426
goto out_check;
14331427
}
1434-
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
1435-
if (!nolock)
1436-
btrfs_end_write_no_snapshotting(root);
1428+
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
14371429
goto out_check;
1438-
}
14391430
nocow = 1;
14401431
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
14411432
extent_end = found_key.offset +
@@ -1448,8 +1439,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14481439
out_check:
14491440
if (extent_end <= start) {
14501441
path->slots[0]++;
1451-
if (!nolock && nocow)
1452-
btrfs_end_write_no_snapshotting(root);
14531442
if (nocow)
14541443
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
14551444
goto next_slot;
@@ -1471,8 +1460,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14711460
end, page_started, nr_written, 1,
14721461
NULL);
14731462
if (ret) {
1474-
if (!nolock && nocow)
1475-
btrfs_end_write_no_snapshotting(root);
14761463
if (nocow)
14771464
btrfs_dec_nocow_writers(fs_info,
14781465
disk_bytenr);
@@ -1492,8 +1479,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
14921479
ram_bytes, BTRFS_COMPRESS_NONE,
14931480
BTRFS_ORDERED_PREALLOC);
14941481
if (IS_ERR(em)) {
1495-
if (!nolock && nocow)
1496-
btrfs_end_write_no_snapshotting(root);
14971482
if (nocow)
14981483
btrfs_dec_nocow_writers(fs_info,
14991484
disk_bytenr);
@@ -1532,8 +1517,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
15321517
EXTENT_CLEAR_DATA_RESV,
15331518
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
15341519

1535-
if (!nolock && nocow)
1536-
btrfs_end_write_no_snapshotting(root);
15371520
cur_offset = extent_end;
15381521

15391522
/*

fs/btrfs/ioctl.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
747747
struct btrfs_pending_snapshot *pending_snapshot;
748748
struct btrfs_trans_handle *trans;
749749
int ret;
750+
bool snapshot_force_cow = false;
750751

751752
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
752753
return -EINVAL;
@@ -763,6 +764,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
763764
goto free_pending;
764765
}
765766

767+
/*
768+
* Force new buffered writes to reserve space even when NOCOW is
769+
* possible. This is to avoid later writeback (running dealloc) to
770+
* fallback to COW mode and unexpectedly fail with ENOSPC.
771+
*/
766772
atomic_inc(&root->will_be_snapshotted);
767773
smp_mb__after_atomic();
768774
/* wait for no snapshot writes */
@@ -773,6 +779,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
773779
if (ret)
774780
goto dec_and_free;
775781

782+
/*
783+
* All previous writes have started writeback in NOCOW mode, so now
784+
* we force future writes to fallback to COW mode during snapshot
785+
* creation.
786+
*/
787+
atomic_inc(&root->snapshot_force_cow);
788+
snapshot_force_cow = true;
789+
776790
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
777791

778792
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
@@ -837,6 +851,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
837851
fail:
838852
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
839853
dec_and_free:
854+
if (snapshot_force_cow)
855+
atomic_dec(&root->snapshot_force_cow);
840856
if (atomic_dec_and_test(&root->will_be_snapshotted))
841857
wake_up_var(&root->will_be_snapshotted);
842858
free_pending:

0 commit comments

Comments
 (0)