Skip to content

Commit f65306e

Browse files
committed
xfs: map an inode's offset to an exact physical block
Teach the bmap routine to know how to map a range of file blocks to a specific range of physical blocks, instead of simply allocating fresh blocks. This enables reflink to map a file to blocks that are already in use. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent 77d61fe commit f65306e

File tree

3 files changed

+131
-1
lines changed

3 files changed

+131
-1
lines changed

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3876,6 +3876,63 @@ xfs_bmap_btalloc(
38763876
return 0;
38773877
}
38783878

3879+
/*
3880+
* For a remap operation, just "allocate" an extent at the address that the
3881+
* caller passed in, and ensure that the AGFL is the right size. The caller
3882+
* will then map the "allocated" extent into the file somewhere.
3883+
*/
3884+
STATIC int
3885+
xfs_bmap_remap_alloc(
3886+
struct xfs_bmalloca *ap)
3887+
{
3888+
struct xfs_trans *tp = ap->tp;
3889+
struct xfs_mount *mp = tp->t_mountp;
3890+
xfs_agblock_t bno;
3891+
struct xfs_alloc_arg args;
3892+
int error;
3893+
3894+
/*
3895+
* validate that the block number is legal - the enables us to detect
3896+
* and handle a silent filesystem corruption rather than crashing.
3897+
*/
3898+
memset(&args, 0, sizeof(struct xfs_alloc_arg));
3899+
args.tp = ap->tp;
3900+
args.mp = ap->tp->t_mountp;
3901+
bno = *ap->firstblock;
3902+
args.agno = XFS_FSB_TO_AGNO(mp, bno);
3903+
args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
3904+
if (args.agno >= mp->m_sb.sb_agcount ||
3905+
args.agbno >= mp->m_sb.sb_agblocks)
3906+
return -EFSCORRUPTED;
3907+
3908+
/* "Allocate" the extent from the range we passed in. */
3909+
trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
3910+
ap->blkno = bno;
3911+
ap->ip->i_d.di_nblocks += ap->length;
3912+
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3913+
3914+
/* Fix the freelist, like a real allocator does. */
3915+
args.datatype = ap->datatype;
3916+
args.pag = xfs_perag_get(args.mp, args.agno);
3917+
ASSERT(args.pag);
3918+
3919+
/*
3920+
* The freelist fixing code will decline the allocation if
3921+
* the size and shape of the free space doesn't allow for
3922+
* allocating the extent and updating all the metadata that
3923+
* happens during an allocation. We're remapping, not
3924+
* allocating, so skip that check by pretending to be freeing.
3925+
*/
3926+
error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3927+
if (error)
3928+
goto error0;
3929+
error0:
3930+
xfs_perag_put(args.pag);
3931+
if (error)
3932+
trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
3933+
return error;
3934+
}
3935+
38793936
/*
38803937
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
38813938
* It figures out where to ask the underlying allocator to put the new extent.
@@ -3884,6 +3941,8 @@ STATIC int
38843941
xfs_bmap_alloc(
38853942
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
38863943
{
3944+
if (ap->flags & XFS_BMAPI_REMAP)
3945+
return xfs_bmap_remap_alloc(ap);
38873946
if (XFS_IS_REALTIME_INODE(ap->ip) &&
38883947
xfs_alloc_is_userdata(ap->datatype))
38893948
return xfs_bmap_rtalloc(ap);
@@ -4442,6 +4501,9 @@ xfs_bmapi_write(
44424501
ASSERT(len > 0);
44434502
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
44444503
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4504+
ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
4505+
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
4506+
ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
44454507

44464508
/* zeroing is for currently only for data extents, not metadata */
44474509
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4502,6 +4564,12 @@ xfs_bmapi_write(
45024564
inhole = eof || bma.got.br_startoff > bno;
45034565
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
45044566

4567+
/*
4568+
* Make sure we only reflink into a hole.
4569+
*/
4570+
if (flags & XFS_BMAPI_REMAP)
4571+
ASSERT(inhole);
4572+
45054573
/*
45064574
* First, deal with the hole before the allocated space
45074575
* that we found, if any.

fs/xfs/libxfs/xfs_bmap.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ struct xfs_extent_free_item
9797
*/
9898
#define XFS_BMAPI_ZERO 0x080
9999

100+
/*
101+
* Map the inode offset to the block given in ap->firstblock. Primarily
102+
* used for reflink. The range must be in a hole, and this flag cannot be
103+
* turned on with PREALLOC or CONVERT, and cannot be used on the attr fork.
104+
*/
105+
#define XFS_BMAPI_REMAP 0x100
106+
100107
#define XFS_BMAPI_FLAGS \
101108
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
102109
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -105,7 +112,8 @@ struct xfs_extent_free_item
105112
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
106113
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
107114
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
108-
{ XFS_BMAPI_ZERO, "ZERO" }
115+
{ XFS_BMAPI_ZERO, "ZERO" }, \
116+
{ XFS_BMAPI_REMAP, "REMAP" }
109117

110118

111119
static inline int xfs_bmapi_aflag(int w)

fs/xfs/xfs_trace.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2968,6 +2968,60 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover,
29682968
__entry->new_len)
29692969
);
29702970

2971+
/* simple inode-based error/%ip tracepoint class */
2972+
DECLARE_EVENT_CLASS(xfs_inode_error_class,
2973+
TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip),
2974+
TP_ARGS(ip, error, caller_ip),
2975+
TP_STRUCT__entry(
2976+
__field(dev_t, dev)
2977+
__field(xfs_ino_t, ino)
2978+
__field(int, error)
2979+
__field(unsigned long, caller_ip)
2980+
),
2981+
TP_fast_assign(
2982+
__entry->dev = VFS_I(ip)->i_sb->s_dev;
2983+
__entry->ino = ip->i_ino;
2984+
__entry->error = error;
2985+
__entry->caller_ip = caller_ip;
2986+
),
2987+
TP_printk("dev %d:%d ino %llx error %d caller %ps",
2988+
MAJOR(__entry->dev), MINOR(__entry->dev),
2989+
__entry->ino,
2990+
__entry->error,
2991+
(char *)__entry->caller_ip)
2992+
);
2993+
2994+
#define DEFINE_INODE_ERROR_EVENT(name) \
2995+
DEFINE_EVENT(xfs_inode_error_class, name, \
2996+
TP_PROTO(struct xfs_inode *ip, int error, \
2997+
unsigned long caller_ip), \
2998+
TP_ARGS(ip, error, caller_ip))
2999+
3000+
/* reflink allocator */
3001+
TRACE_EVENT(xfs_bmap_remap_alloc,
3002+
TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
3003+
xfs_extlen_t len),
3004+
TP_ARGS(ip, fsbno, len),
3005+
TP_STRUCT__entry(
3006+
__field(dev_t, dev)
3007+
__field(xfs_ino_t, ino)
3008+
__field(xfs_fsblock_t, fsbno)
3009+
__field(xfs_extlen_t, len)
3010+
),
3011+
TP_fast_assign(
3012+
__entry->dev = VFS_I(ip)->i_sb->s_dev;
3013+
__entry->ino = ip->i_ino;
3014+
__entry->fsbno = fsbno;
3015+
__entry->len = len;
3016+
),
3017+
TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
3018+
MAJOR(__entry->dev), MINOR(__entry->dev),
3019+
__entry->ino,
3020+
__entry->fsbno,
3021+
__entry->len)
3022+
);
3023+
DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
3024+
29713025
#endif /* _TRACE_XFS_H */
29723026

29733027
#undef TRACE_INCLUDE_PATH

0 commit comments

Comments
 (0)