Skip to content

Commit d2b3964

Browse files
Christoph Hellwigdjwong
authored andcommitted
xfs: fix COW writeback race
Due to the way how xfs_iomap_write_allocate tries to convert the whole found extents from delalloc to real space we can run into a race condition with multiple threads doing writes to this same extent. For the non-COW case that is harmless as the only thing that can happen is that we call xfs_bmapi_write on an extent that has already been converted to a real allocation. For COW writes where we move the extent from the COW to the data fork after I/O completion the race is, however, not quite as harmless. In the worst case we are now calling xfs_bmapi_write on a region that contains hole in the COW work, which will trip up an assert in debug builds or lead to file system corruption in non-debug builds. This seems to be reproducible with workloads of small O_DSYNC write, although so far I've not managed to come up with a with an isolated reproducer. The fix for the issue is relatively simple: tell xfs_bmapi_write that we are only asked to convert delayed allocations and skip holes in that case. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
1 parent 7a308bb commit d2b3964

File tree

3 files changed

+38
-14
lines changed

3 files changed

+38
-14
lines changed

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4514,8 +4514,6 @@ xfs_bmapi_write(
45144514
int n; /* current extent index */
45154515
xfs_fileoff_t obno; /* old block number (offset) */
45164516
int whichfork; /* data or attr fork */
4517-
char inhole; /* current location is hole in file */
4518-
char wasdelay; /* old extent was delayed */
45194517

45204518
#ifdef DEBUG
45214519
xfs_fileoff_t orig_bno; /* original block number value */
@@ -4603,22 +4601,44 @@ xfs_bmapi_write(
46034601
bma.firstblock = firstblock;
46044602

46054603
while (bno < end && n < *nmap) {
4606-
inhole = eof || bma.got.br_startoff > bno;
4607-
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4604+
bool need_alloc = false, wasdelay = false;
46084605

4609-
/*
4610-
* Make sure we only reflink into a hole.
4611-
*/
4612-
if (flags & XFS_BMAPI_REMAP)
4613-
ASSERT(inhole);
4614-
if (flags & XFS_BMAPI_COWFORK)
4615-
ASSERT(!inhole);
4606+
/* in hole or beyoned EOF? */
4607+
if (eof || bma.got.br_startoff > bno) {
4608+
if (flags & XFS_BMAPI_DELALLOC) {
4609+
/*
4610+
* For the COW fork we can reasonably get a
4611+
* request for converting an extent that races
4612+
* with other threads already having converted
4613+
* part of it, as there converting COW to
4614+
* regular blocks is not protected using the
4615+
* IOLOCK.
4616+
*/
4617+
ASSERT(flags & XFS_BMAPI_COWFORK);
4618+
if (!(flags & XFS_BMAPI_COWFORK)) {
4619+
error = -EIO;
4620+
goto error0;
4621+
}
4622+
4623+
if (eof || bno >= end)
4624+
break;
4625+
} else {
4626+
need_alloc = true;
4627+
}
4628+
} else {
4629+
/*
4630+
* Make sure we only reflink into a hole.
4631+
*/
4632+
ASSERT(!(flags & XFS_BMAPI_REMAP));
4633+
if (isnullstartblock(bma.got.br_startblock))
4634+
wasdelay = true;
4635+
}
46164636

46174637
/*
46184638
* First, deal with the hole before the allocated space
46194639
* that we found, if any.
46204640
*/
4621-
if (inhole || wasdelay) {
4641+
if (need_alloc || wasdelay) {
46224642
bma.eof = eof;
46234643
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
46244644
bma.wasdel = wasdelay;

fs/xfs/libxfs/xfs_bmap.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
110110
/* Map something in the CoW fork. */
111111
#define XFS_BMAPI_COWFORK 0x200
112112

113+
/* Only convert delalloc space, don't allocate entirely new extents */
114+
#define XFS_BMAPI_DELALLOC 0x400
115+
113116
#define XFS_BMAPI_FLAGS \
114117
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
115118
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
120123
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
121124
{ XFS_BMAPI_ZERO, "ZERO" }, \
122125
{ XFS_BMAPI_REMAP, "REMAP" }, \
123-
{ XFS_BMAPI_COWFORK, "COWFORK" }
126+
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
127+
{ XFS_BMAPI_DELALLOC, "DELALLOC" }
124128

125129

126130
static inline int xfs_bmapi_aflag(int w)

fs/xfs/xfs_iomap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
681681
xfs_trans_t *tp;
682682
int nimaps;
683683
int error = 0;
684-
int flags = 0;
684+
int flags = XFS_BMAPI_DELALLOC;
685685
int nres;
686686

687687
if (whichfork == XFS_COW_FORK)

0 commit comments

Comments
 (0)