Skip to content

Commit a27a263

Browse files
Christoph HellwigAlex Elder
authored andcommitted
xfs: make log devices with write back caches work
There's no reason not to support cache flushing on external log devices. The only thing this really requires is flushing the data device first both in fsync and log commits. A side effect is that we also have to remove the barrier write test during mount, which has been superflous since the new FLUSH+FUA code anyway. Also use the chance to flush the RT subvolume write cache before the fsync commit, which is required for correct semantics. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
1 parent c46a131 commit a27a263

File tree

3 files changed

+41
-95
lines changed

3 files changed

+41
-95
lines changed

fs/xfs/linux-2.6/xfs_file.c

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -131,19 +131,34 @@ xfs_file_fsync(
131131
{
132132
struct inode *inode = file->f_mapping->host;
133133
struct xfs_inode *ip = XFS_I(inode);
134+
struct xfs_mount *mp = ip->i_mount;
134135
struct xfs_trans *tp;
135136
int error = 0;
136137
int log_flushed = 0;
137138

138139
trace_xfs_file_fsync(ip);
139140

140-
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
141+
if (XFS_FORCED_SHUTDOWN(mp))
141142
return -XFS_ERROR(EIO);
142143

143144
xfs_iflags_clear(ip, XFS_ITRUNCATED);
144145

145146
xfs_ioend_wait(ip);
146147

148+
if (mp->m_flags & XFS_MOUNT_BARRIER) {
149+
/*
150+
* If we have an RT and/or log subvolume we need to make sure
151+
* to flush the write cache the device used for file data
152+
* first. This is to ensure newly written file data make
153+
* it to disk before logging the new inode size in case of
154+
* an extending write.
155+
*/
156+
if (XFS_IS_REALTIME_INODE(ip))
157+
xfs_blkdev_issue_flush(mp->m_rtdev_targp);
158+
else if (mp->m_logdev_targp != mp->m_ddev_targp)
159+
xfs_blkdev_issue_flush(mp->m_ddev_targp);
160+
}
161+
147162
/*
148163
* We always need to make sure that the required inode state is safe on
149164
* disk. The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
175190
* updates. The sync transaction will also force the log.
176191
*/
177192
xfs_iunlock(ip, XFS_ILOCK_SHARED);
178-
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
193+
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
179194
error = xfs_trans_reserve(tp, 0,
180-
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
195+
XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
181196
if (error) {
182197
xfs_trans_cancel(tp, 0);
183198
return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
209224
* force the log.
210225
*/
211226
if (xfs_ipincount(ip)) {
212-
error = _xfs_log_force_lsn(ip->i_mount,
227+
error = _xfs_log_force_lsn(mp,
213228
ip->i_itemp->ili_last_lsn,
214229
XFS_LOG_SYNC, &log_flushed);
215230
}
216231
xfs_iunlock(ip, XFS_ILOCK_SHARED);
217232
}
218233

219-
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
220-
/*
221-
* If the log write didn't issue an ordered tag we need
222-
* to flush the disk cache for the data device now.
223-
*/
224-
if (!log_flushed)
225-
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
226-
227-
/*
228-
* If this inode is on the RT dev we need to flush that
229-
* cache as well.
230-
*/
231-
if (XFS_IS_REALTIME_INODE(ip))
232-
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
233-
}
234+
/*
235+
* If we only have a single device, and the log force about was
236+
* a no-op we might have to flush the data device cache here.
237+
* This can only happen for fdatasync/O_DSYNC if we were overwriting
238+
* an already allocated file and thus do not have any metadata to
239+
* commit.
240+
*/
241+
if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
242+
mp->m_logdev_targp == mp->m_ddev_targp &&
243+
!XFS_IS_REALTIME_INODE(ip) &&
244+
!log_flushed)
245+
xfs_blkdev_issue_flush(mp->m_ddev_targp);
234246

235247
return -error;
236248
}

fs/xfs/linux-2.6/xfs_super.c

Lines changed: 0 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -627,68 +627,6 @@ xfs_blkdev_put(
627627
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
628628
}
629629

630-
/*
631-
* Try to write out the superblock using barriers.
632-
*/
633-
STATIC int
634-
xfs_barrier_test(
635-
xfs_mount_t *mp)
636-
{
637-
xfs_buf_t *sbp = xfs_getsb(mp, 0);
638-
int error;
639-
640-
XFS_BUF_UNDONE(sbp);
641-
XFS_BUF_UNREAD(sbp);
642-
XFS_BUF_UNDELAYWRITE(sbp);
643-
XFS_BUF_WRITE(sbp);
644-
XFS_BUF_UNASYNC(sbp);
645-
XFS_BUF_ORDERED(sbp);
646-
647-
xfsbdstrat(mp, sbp);
648-
error = xfs_buf_iowait(sbp);
649-
650-
/*
651-
* Clear all the flags we set and possible error state in the
652-
* buffer. We only did the write to try out whether barriers
653-
* worked and shouldn't leave any traces in the superblock
654-
* buffer.
655-
*/
656-
XFS_BUF_DONE(sbp);
657-
XFS_BUF_ERROR(sbp, 0);
658-
XFS_BUF_UNORDERED(sbp);
659-
660-
xfs_buf_relse(sbp);
661-
return error;
662-
}
663-
664-
STATIC void
665-
xfs_mountfs_check_barriers(xfs_mount_t *mp)
666-
{
667-
int error;
668-
669-
if (mp->m_logdev_targp != mp->m_ddev_targp) {
670-
xfs_notice(mp,
671-
"Disabling barriers, not supported with external log device");
672-
mp->m_flags &= ~XFS_MOUNT_BARRIER;
673-
return;
674-
}
675-
676-
if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
677-
xfs_notice(mp,
678-
"Disabling barriers, underlying device is readonly");
679-
mp->m_flags &= ~XFS_MOUNT_BARRIER;
680-
return;
681-
}
682-
683-
error = xfs_barrier_test(mp);
684-
if (error) {
685-
xfs_notice(mp,
686-
"Disabling barriers, trial barrier write failed");
687-
mp->m_flags &= ~XFS_MOUNT_BARRIER;
688-
return;
689-
}
690-
}
691-
692630
void
693631
xfs_blkdev_issue_flush(
694632
xfs_buftarg_t *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
12401178
switch (token) {
12411179
case Opt_barrier:
12421180
mp->m_flags |= XFS_MOUNT_BARRIER;
1243-
1244-
/*
1245-
* Test if barriers are actually working if we can,
1246-
* else delay this check until the filesystem is
1247-
* marked writeable.
1248-
*/
1249-
if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1250-
xfs_mountfs_check_barriers(mp);
12511181
break;
12521182
case Opt_nobarrier:
12531183
mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
12821212
/* ro -> rw */
12831213
if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
12841214
mp->m_flags &= ~XFS_MOUNT_RDONLY;
1285-
if (mp->m_flags & XFS_MOUNT_BARRIER)
1286-
xfs_mountfs_check_barriers(mp);
12871215

12881216
/*
12891217
* If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
14651393
if (error)
14661394
goto out_free_sb;
14671395

1468-
if (mp->m_flags & XFS_MOUNT_BARRIER)
1469-
xfs_mountfs_check_barriers(mp);
1470-
14711396
error = xfs_filestream_mount(mp);
14721397
if (error)
14731398
goto out_free_sb;

fs/xfs/xfs_log.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
13721372
XFS_BUF_ASYNC(bp);
13731373
bp->b_flags |= XBF_LOG_BUFFER;
13741374

1375-
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1375+
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
1376+
/*
1377+
* If we have an external log device, flush the data device
1378+
* before flushing the log to make sure all meta data
1379+
* written back from the AIL actually made it to disk
1380+
* before writing out the new log tail LSN in the log buffer.
1381+
*/
1382+
if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
1383+
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
13761384
XFS_BUF_ORDERED(bp);
1385+
}
13771386

13781387
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
13791388
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);

0 commit comments

Comments
 (0)