Skip to content

Commit cc71466

Browse files
committed
xfs: add dedupe range vfs function
Define a VFS function which allows userspace to request that the kernel reflink a range of blocks between two files if the ranges' contents match. The function fits the new VFS ioctl that standardizes the checking for the btrfs EXTENT SAME ioctl. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent 9fe2604 commit cc71466

File tree

3 files changed

+174
-6
lines changed

3 files changed

+174
-6
lines changed

fs/xfs/xfs_file.c

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,8 @@ xfs_file_share_range(
10101010
loff_t pos_in,
10111011
struct file *file_out,
10121012
loff_t pos_out,
1013-
u64 len)
1013+
u64 len,
1014+
bool is_dedupe)
10141015
{
10151016
struct inode *inode_in;
10161017
struct inode *inode_out;
@@ -1019,6 +1020,7 @@ xfs_file_share_range(
10191020
loff_t isize;
10201021
int same_inode;
10211022
loff_t blen;
1023+
unsigned int flags = 0;
10221024

10231025
inode_in = file_inode(file_in);
10241026
inode_out = file_inode(file_out);
@@ -1056,6 +1058,15 @@ xfs_file_share_range(
10561058
pos_in + len > isize)
10571059
return -EINVAL;
10581060

1061+
/* Don't allow dedupe past EOF in the dest file */
1062+
if (is_dedupe) {
1063+
loff_t disize;
1064+
1065+
disize = i_size_read(inode_out);
1066+
if (pos_out >= disize || pos_out + len > disize)
1067+
return -EINVAL;
1068+
}
1069+
10591070
/* If we're linking to EOF, continue to the block boundary. */
10601071
if (pos_in + len == isize)
10611072
blen = ALIGN(isize, bs) - pos_in;
@@ -1079,8 +1090,10 @@ xfs_file_share_range(
10791090
if (ret)
10801091
goto out_unlock;
10811092

1093+
if (is_dedupe)
1094+
flags |= XFS_REFLINK_DEDUPE;
10821095
ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
1083-
pos_out, len);
1096+
pos_out, len, flags);
10841097
if (ret < 0)
10851098
goto out_unlock;
10861099

@@ -1100,7 +1113,7 @@ xfs_file_copy_range(
11001113
int error;
11011114

11021115
error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
1103-
len);
1116+
len, false);
11041117
if (error)
11051118
return error;
11061119
return len;
@@ -1115,7 +1128,33 @@ xfs_file_clone_range(
11151128
u64 len)
11161129
{
11171130
return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
1118-
len);
1131+
len, false);
1132+
}
1133+
1134+
#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
1135+
STATIC ssize_t
1136+
xfs_file_dedupe_range(
1137+
struct file *src_file,
1138+
u64 loff,
1139+
u64 len,
1140+
struct file *dst_file,
1141+
u64 dst_loff)
1142+
{
1143+
int error;
1144+
1145+
/*
1146+
* Limit the total length we will dedupe for each operation.
1147+
* This is intended to bound the total time spent in this
1148+
* ioctl to something sane.
1149+
*/
1150+
if (len > XFS_MAX_DEDUPE_LEN)
1151+
len = XFS_MAX_DEDUPE_LEN;
1152+
1153+
error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
1154+
len, true);
1155+
if (error)
1156+
return error;
1157+
return len;
11191158
}
11201159

11211160
STATIC int
@@ -1779,6 +1818,7 @@ const struct file_operations xfs_file_operations = {
17791818
.fallocate = xfs_file_fallocate,
17801819
.copy_file_range = xfs_file_copy_range,
17811820
.clone_file_range = xfs_file_clone_range,
1821+
.dedupe_file_range = xfs_file_dedupe_range,
17821822
};
17831823

17841824
const struct file_operations xfs_dir_file_operations = {

fs/xfs/xfs_reflink.c

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,111 @@ xfs_reflink_remap_blocks(
11491149
return error;
11501150
}
11511151

1152+
/*
1153+
* Read a page's worth of file data into the page cache. Return the page
1154+
* locked.
1155+
*/
1156+
static struct page *
1157+
xfs_get_page(
1158+
struct inode *inode,
1159+
xfs_off_t offset)
1160+
{
1161+
struct address_space *mapping;
1162+
struct page *page;
1163+
pgoff_t n;
1164+
1165+
n = offset >> PAGE_SHIFT;
1166+
mapping = inode->i_mapping;
1167+
page = read_mapping_page(mapping, n, NULL);
1168+
if (IS_ERR(page))
1169+
return page;
1170+
if (!PageUptodate(page)) {
1171+
put_page(page);
1172+
return ERR_PTR(-EIO);
1173+
}
1174+
lock_page(page);
1175+
return page;
1176+
}
1177+
1178+
/*
1179+
* Compare extents of two files to see if they are the same.
1180+
*/
1181+
static int
1182+
xfs_compare_extents(
1183+
struct inode *src,
1184+
xfs_off_t srcoff,
1185+
struct inode *dest,
1186+
xfs_off_t destoff,
1187+
xfs_off_t len,
1188+
bool *is_same)
1189+
{
1190+
xfs_off_t src_poff;
1191+
xfs_off_t dest_poff;
1192+
void *src_addr;
1193+
void *dest_addr;
1194+
struct page *src_page;
1195+
struct page *dest_page;
1196+
xfs_off_t cmp_len;
1197+
bool same;
1198+
int error;
1199+
1200+
error = -EINVAL;
1201+
same = true;
1202+
while (len) {
1203+
src_poff = srcoff & (PAGE_SIZE - 1);
1204+
dest_poff = destoff & (PAGE_SIZE - 1);
1205+
cmp_len = min(PAGE_SIZE - src_poff,
1206+
PAGE_SIZE - dest_poff);
1207+
cmp_len = min(cmp_len, len);
1208+
ASSERT(cmp_len > 0);
1209+
1210+
trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
1211+
XFS_I(dest), destoff);
1212+
1213+
src_page = xfs_get_page(src, srcoff);
1214+
if (IS_ERR(src_page)) {
1215+
error = PTR_ERR(src_page);
1216+
goto out_error;
1217+
}
1218+
dest_page = xfs_get_page(dest, destoff);
1219+
if (IS_ERR(dest_page)) {
1220+
error = PTR_ERR(dest_page);
1221+
unlock_page(src_page);
1222+
put_page(src_page);
1223+
goto out_error;
1224+
}
1225+
src_addr = kmap_atomic(src_page);
1226+
dest_addr = kmap_atomic(dest_page);
1227+
1228+
flush_dcache_page(src_page);
1229+
flush_dcache_page(dest_page);
1230+
1231+
if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1232+
same = false;
1233+
1234+
kunmap_atomic(dest_addr);
1235+
kunmap_atomic(src_addr);
1236+
unlock_page(dest_page);
1237+
unlock_page(src_page);
1238+
put_page(dest_page);
1239+
put_page(src_page);
1240+
1241+
if (!same)
1242+
break;
1243+
1244+
srcoff += cmp_len;
1245+
destoff += cmp_len;
1246+
len -= cmp_len;
1247+
}
1248+
1249+
*is_same = same;
1250+
return 0;
1251+
1252+
out_error:
1253+
trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
1254+
return error;
1255+
}
1256+
11521257
/*
11531258
* Link a range of blocks from one file to another.
11541259
*/
@@ -1158,12 +1263,14 @@ xfs_reflink_remap_range(
11581263
xfs_off_t srcoff,
11591264
struct xfs_inode *dest,
11601265
xfs_off_t destoff,
1161-
xfs_off_t len)
1266+
xfs_off_t len,
1267+
unsigned int flags)
11621268
{
11631269
struct xfs_mount *mp = src->i_mount;
11641270
xfs_fileoff_t sfsbno, dfsbno;
11651271
xfs_filblks_t fsblen;
11661272
int error;
1273+
bool is_same;
11671274

11681275
if (!xfs_sb_version_hasreflink(&mp->m_sb))
11691276
return -EOPNOTSUPP;
@@ -1175,6 +1282,9 @@ xfs_reflink_remap_range(
11751282
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
11761283
return -EINVAL;
11771284

1285+
if (flags & ~XFS_REFLINK_ALL)
1286+
return -EINVAL;
1287+
11781288
trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
11791289

11801290
/* Lock both files against IO */
@@ -1186,6 +1296,21 @@ xfs_reflink_remap_range(
11861296
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
11871297
}
11881298

1299+
/*
1300+
* Check that the extents are the same.
1301+
*/
1302+
if (flags & XFS_REFLINK_DEDUPE) {
1303+
is_same = false;
1304+
error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
1305+
destoff, len, &is_same);
1306+
if (error)
1307+
goto out_error;
1308+
if (!is_same) {
1309+
error = -EBADE;
1310+
goto out_error;
1311+
}
1312+
}
1313+
11891314
error = xfs_reflink_set_inode_flag(src, dest);
11901315
if (error)
11911316
goto out_error;

fs/xfs/xfs_reflink.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
4343
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
4444
xfs_off_t count);
4545
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
46+
#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */
47+
#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE)
4648
extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
47-
struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len);
49+
struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
50+
unsigned int flags);
4851

4952
#endif /* __XFS_REFLINK_H */

0 commit comments

Comments
 (0)