Skip to content

Commit 4f579ae

Browse files
Lukas Czernertytso
authored andcommitted
ext4: fix punch hole on files with indirect mapping
Currently punch hole code on files with direct/indirect mapping has some problems which may lead to a data loss. For example (from Jan Kara): fallocate -n -p 10240000 4096 will punch the range 10240000 - 12632064 instead of the range 1024000 - 10244096. Also the code is a bit weird and it's not using infrastructure provided by indirect.c, but rather creating it's own way. This patch fixes the issues as well as making the operation to run 4 times faster from my testing (punching out 60GB file). It uses similar approach used in ext4_ind_truncate() which takes advantage of ext4_free_branches() function. Also rename the ext4_free_hole_blocks() to something more sensible, like the equivalent we have for extent mapped files. Call it ext4_ind_remove_space(). This has been tested mostly with fsx and some xfstests which are testing punch hole but does not require unwritten extents which are not supported with direct/indirect mapping. Not problems showed up even with 1024k block size. CC: stable@vger.kernel.org Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent 71d4f7d commit 4f579ae

File tree

3 files changed

+205
-82
lines changed

3 files changed

+205
-82
lines changed

fs/ext4/ext4.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,8 +2143,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
21432143
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
21442144
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
21452145
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
2146-
extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
2147-
ext4_lblk_t first, ext4_lblk_t stop);
2146+
extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
2147+
ext4_lblk_t start, ext4_lblk_t end);
21482148

21492149
/* ioctl.c */
21502150
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);

fs/ext4/indirect.c

Lines changed: 202 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,97 +1295,220 @@ void ext4_ind_truncate(handle_t *handle, struct inode *inode)
12951295
}
12961296
}
12971297

1298-
static int free_hole_blocks(handle_t *handle, struct inode *inode,
1299-
struct buffer_head *parent_bh, __le32 *i_data,
1300-
int level, ext4_lblk_t first,
1301-
ext4_lblk_t count, int max)
1298+
/**
1299+
* ext4_ind_remove_space - remove space from the range
1300+
* @handle: JBD handle for this transaction
1301+
* @inode: inode we are dealing with
1302+
* @start: First block to remove
1303+
* @end: One block after the last block to remove (exclusive)
1304+
*
1305+
* Free the blocks in the defined range (end is exclusive endpoint of
1306+
* range). This is used by ext4_punch_hole().
1307+
*/
1308+
int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
1309+
ext4_lblk_t start, ext4_lblk_t end)
13021310
{
1303-
struct buffer_head *bh = NULL;
1311+
struct ext4_inode_info *ei = EXT4_I(inode);
1312+
__le32 *i_data = ei->i_data;
13041313
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1305-
int ret = 0;
1306-
int i, inc;
1307-
ext4_lblk_t offset;
1308-
__le32 blk;
1309-
1310-
inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
1311-
for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
1312-
if (offset >= count + first)
1313-
break;
1314-
if (*i_data == 0 || (offset + inc) <= first)
1315-
continue;
1316-
blk = *i_data;
1317-
if (level > 0) {
1318-
ext4_lblk_t first2;
1319-
ext4_lblk_t count2;
1314+
ext4_lblk_t offsets[4], offsets2[4];
1315+
Indirect chain[4], chain2[4];
1316+
Indirect *partial, *partial2;
1317+
ext4_lblk_t max_block;
1318+
__le32 nr = 0, nr2 = 0;
1319+
int n = 0, n2 = 0;
1320+
unsigned blocksize = inode->i_sb->s_blocksize;
13201321

1321-
bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
1322-
if (!bh) {
1323-
EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
1324-
"Read failure");
1325-
return -EIO;
1326-
}
1327-
if (first > offset) {
1328-
first2 = first - offset;
1329-
count2 = count;
1322+
max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1323+
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1324+
if (end >= max_block)
1325+
end = max_block;
1326+
if ((start >= end) || (start > max_block))
1327+
return 0;
1328+
1329+
n = ext4_block_to_path(inode, start, offsets, NULL);
1330+
n2 = ext4_block_to_path(inode, end, offsets2, NULL);
1331+
1332+
BUG_ON(n > n2);
1333+
1334+
if ((n == 1) && (n == n2)) {
1335+
/* We're punching only within direct block range */
1336+
ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1337+
i_data + offsets2[0]);
1338+
return 0;
1339+
} else if (n2 > n) {
1340+
/*
1341+
* Start and end are on a different levels so we're going to
1342+
* free partial block at start, and partial block at end of
1343+
* the range. If there are some levels in between then
1344+
* do_indirects label will take care of that.
1345+
*/
1346+
1347+
if (n == 1) {
1348+
/*
1349+
* Start is at the direct block level, free
1350+
* everything to the end of the level.
1351+
*/
1352+
ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1353+
i_data + EXT4_NDIR_BLOCKS);
1354+
goto end_range;
1355+
}
1356+
1357+
1358+
partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1359+
if (nr) {
1360+
if (partial == chain) {
1361+
/* Shared branch grows from the inode */
1362+
ext4_free_branches(handle, inode, NULL,
1363+
&nr, &nr+1, (chain+n-1) - partial);
1364+
*partial->p = 0;
13301365
} else {
1331-
first2 = 0;
1332-
count2 = count - (offset - first);
1366+
/* Shared branch grows from an indirect block */
1367+
BUFFER_TRACE(partial->bh, "get_write_access");
1368+
ext4_free_branches(handle, inode, partial->bh,
1369+
partial->p,
1370+
partial->p+1, (chain+n-1) - partial);
13331371
}
1334-
ret = free_hole_blocks(handle, inode, bh,
1335-
(__le32 *)bh->b_data, level - 1,
1336-
first2, count2,
1337-
inode->i_sb->s_blocksize >> 2);
1338-
if (ret) {
1339-
brelse(bh);
1340-
goto err;
1372+
}
1373+
1374+
/*
1375+
* Clear the ends of indirect blocks on the shared branch
1376+
* at the start of the range
1377+
*/
1378+
while (partial > chain) {
1379+
ext4_free_branches(handle, inode, partial->bh,
1380+
partial->p + 1,
1381+
(__le32 *)partial->bh->b_data+addr_per_block,
1382+
(chain+n-1) - partial);
1383+
BUFFER_TRACE(partial->bh, "call brelse");
1384+
brelse(partial->bh);
1385+
partial--;
1386+
}
1387+
1388+
end_range:
1389+
partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1390+
if (nr2) {
1391+
if (partial2 == chain2) {
1392+
/*
1393+
* Remember, end is exclusive so here we're at
1394+
* the start of the next level we're not going
1395+
* to free. Everything was covered by the start
1396+
* of the range.
1397+
*/
1398+
return 0;
1399+
} else {
1400+
/* Shared branch grows from an indirect block */
1401+
partial2--;
13411402
}
1403+
} else {
1404+
/*
1405+
* ext4_find_shared returns Indirect structure which
1406+
* points to the last element which should not be
1407+
* removed by truncate. But this is end of the range
1408+
* in punch_hole so we need to point to the next element
1409+
*/
1410+
partial2->p++;
13421411
}
1343-
if (level == 0 ||
1344-
(bh && all_zeroes((__le32 *)bh->b_data,
1345-
(__le32 *)bh->b_data + addr_per_block))) {
1346-
ext4_free_data(handle, inode, parent_bh,
1347-
i_data, i_data + 1);
1412+
1413+
/*
1414+
* Clear the ends of indirect blocks on the shared branch
1415+
* at the end of the range
1416+
*/
1417+
while (partial2 > chain2) {
1418+
ext4_free_branches(handle, inode, partial2->bh,
1419+
(__le32 *)partial2->bh->b_data,
1420+
partial2->p,
1421+
(chain2+n2-1) - partial2);
1422+
BUFFER_TRACE(partial2->bh, "call brelse");
1423+
brelse(partial2->bh);
1424+
partial2--;
13481425
}
1349-
brelse(bh);
1350-
bh = NULL;
1426+
goto do_indirects;
13511427
}
13521428

1353-
err:
1354-
return ret;
1355-
}
1356-
1357-
int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
1358-
ext4_lblk_t first, ext4_lblk_t stop)
1359-
{
1360-
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1361-
int level, ret = 0;
1362-
int num = EXT4_NDIR_BLOCKS;
1363-
ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
1364-
__le32 *i_data = EXT4_I(inode)->i_data;
1365-
1366-
count = stop - first;
1367-
for (level = 0; level < 4; level++, max *= addr_per_block) {
1368-
if (first < max) {
1369-
ret = free_hole_blocks(handle, inode, NULL, i_data,
1370-
level, first, count, num);
1371-
if (ret)
1372-
goto err;
1373-
if (count > max - first)
1374-
count -= max - first;
1375-
else
1376-
break;
1377-
first = 0;
1378-
} else {
1379-
first -= max;
1429+
/* Punch happened within the same level (n == n2) */
1430+
partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1431+
partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1432+
/*
1433+
* ext4_find_shared returns Indirect structure which
1434+
* points to the last element which should not be
1435+
* removed by truncate. But this is end of the range
1436+
* in punch_hole so we need to point to the next element
1437+
*/
1438+
partial2->p++;
1439+
while ((partial > chain) || (partial2 > chain2)) {
1440+
/* We're at the same block, so we're almost finished */
1441+
if ((partial->bh && partial2->bh) &&
1442+
(partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
1443+
if ((partial > chain) && (partial2 > chain2)) {
1444+
ext4_free_branches(handle, inode, partial->bh,
1445+
partial->p + 1,
1446+
partial2->p,
1447+
(chain+n-1) - partial);
1448+
BUFFER_TRACE(partial->bh, "call brelse");
1449+
brelse(partial->bh);
1450+
BUFFER_TRACE(partial2->bh, "call brelse");
1451+
brelse(partial2->bh);
1452+
}
1453+
return 0;
13801454
}
1381-
i_data += num;
1382-
if (level == 0) {
1383-
num = 1;
1384-
max = 1;
1455+
/*
1456+
* Clear the ends of indirect blocks on the shared branch
1457+
* at the start of the range
1458+
*/
1459+
if (partial > chain) {
1460+
ext4_free_branches(handle, inode, partial->bh,
1461+
partial->p + 1,
1462+
(__le32 *)partial->bh->b_data+addr_per_block,
1463+
(chain+n-1) - partial);
1464+
BUFFER_TRACE(partial->bh, "call brelse");
1465+
brelse(partial->bh);
1466+
partial--;
1467+
}
1468+
/*
1469+
* Clear the ends of indirect blocks on the shared branch
1470+
* at the end of the range
1471+
*/
1472+
if (partial2 > chain2) {
1473+
ext4_free_branches(handle, inode, partial2->bh,
1474+
(__le32 *)partial2->bh->b_data,
1475+
partial2->p,
1476+
(chain2+n-1) - partial2);
1477+
BUFFER_TRACE(partial2->bh, "call brelse");
1478+
brelse(partial2->bh);
1479+
partial2--;
13851480
}
13861481
}
13871482

1388-
err:
1389-
return ret;
1483+
do_indirects:
1484+
/* Kill the remaining (whole) subtrees */
1485+
switch (offsets[0]) {
1486+
default:
1487+
if (++n >= n2)
1488+
return 0;
1489+
nr = i_data[EXT4_IND_BLOCK];
1490+
if (nr) {
1491+
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1492+
i_data[EXT4_IND_BLOCK] = 0;
1493+
}
1494+
case EXT4_IND_BLOCK:
1495+
if (++n >= n2)
1496+
return 0;
1497+
nr = i_data[EXT4_DIND_BLOCK];
1498+
if (nr) {
1499+
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1500+
i_data[EXT4_DIND_BLOCK] = 0;
1501+
}
1502+
case EXT4_DIND_BLOCK:
1503+
if (++n >= n2)
1504+
return 0;
1505+
nr = i_data[EXT4_TIND_BLOCK];
1506+
if (nr) {
1507+
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1508+
i_data[EXT4_TIND_BLOCK] = 0;
1509+
}
1510+
case EXT4_TIND_BLOCK:
1511+
;
1512+
}
1513+
return 0;
13901514
}
1391-

fs/ext4/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3506,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
35063506
ret = ext4_ext_remove_space(inode, first_block,
35073507
stop_block - 1);
35083508
else
3509-
ret = ext4_free_hole_blocks(handle, inode, first_block,
3509+
ret = ext4_ind_remove_space(handle, inode, first_block,
35103510
stop_block);
35113511

35123512
up_write(&EXT4_I(inode)->i_data_sem);

0 commit comments

Comments
 (0)