Skip to content

Commit 2a62ec0

Browse files
committed
Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner: "This is a fix for a regression introduced in 4.5-rc1 by the new torn log write detection code. The regression only affects people moving a clean filesystem between machines/kernels of different architecture (such as changing between 32 bit and 64 bit kernels), but this is the recommended (and only!) safe way to migrate a filesystem between architectures so we really need to ensure it works. The changes are larger than I'd prefer right at the end of the release cycle, but the majority of the change is just factoring code to enable the detection of a clean log at the correct time to avoid this issue. Changes: - Only perform torn log write detection on dirty logs. This prevents failures being detected due to a clean filesystem being moved between machines or kernels of different architectures (e.g. 32 -> 64 bit, BE -> LE, etc). This fixes a regression introduced by the torn log write detection in 4.5-rc1" * tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: only run torn log write detection on dirty logs xfs: refactor in-core log state update to helper xfs: refactor unmount record detection into helper xfs: separate log head record discovery from verification
2 parents 63cf207 + 7f6aff3 commit 2a62ec0

File tree

1 file changed

+168
-103
lines changed

1 file changed

+168
-103
lines changed

fs/xfs/xfs_log_recover.c

Lines changed: 168 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,27 +1109,10 @@ xlog_verify_head(
11091109
bool tmp_wrapped;
11101110

11111111
/*
1112-
* Search backwards through the log looking for the log record header
1113-
* block. This wraps all the way back around to the head so something is
1114-
* seriously wrong if we can't find it.
1115-
*/
1116-
found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
1117-
rhead, wrapped);
1118-
if (found < 0)
1119-
return found;
1120-
if (!found) {
1121-
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1122-
return -EIO;
1123-
}
1124-
1125-
*tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1126-
1127-
/*
1128-
* Now that we have a tail block, check the head of the log for torn
1129-
* writes. Search again until we hit the tail or the maximum number of
1130-
* log record I/Os that could have been in flight at one time. Use a
1131-
* temporary buffer so we don't trash the rhead/bp pointer from the
1132-
* call above.
1112+
* Check the head of the log for torn writes. Search backwards from the
1113+
* head until we hit the tail or the maximum number of log record I/Os
1114+
* that could have been in flight at one time. Use a temporary buffer so
1115+
* we don't trash the rhead/bp pointers from the caller.
11331116
*/
11341117
tmp_bp = xlog_get_bp(log, 1);
11351118
if (!tmp_bp)
@@ -1215,6 +1198,115 @@ xlog_verify_head(
12151198
return error;
12161199
}
12171200

1201+
/*
1202+
* Check whether the head of the log points to an unmount record. In other
1203+
* words, determine whether the log is clean. If so, update the in-core state
1204+
* appropriately.
1205+
*/
1206+
static int
1207+
xlog_check_unmount_rec(
1208+
struct xlog *log,
1209+
xfs_daddr_t *head_blk,
1210+
xfs_daddr_t *tail_blk,
1211+
struct xlog_rec_header *rhead,
1212+
xfs_daddr_t rhead_blk,
1213+
struct xfs_buf *bp,
1214+
bool *clean)
1215+
{
1216+
struct xlog_op_header *op_head;
1217+
xfs_daddr_t umount_data_blk;
1218+
xfs_daddr_t after_umount_blk;
1219+
int hblks;
1220+
int error;
1221+
char *offset;
1222+
1223+
*clean = false;
1224+
1225+
/*
1226+
* Look for unmount record. If we find it, then we know there was a
1227+
* clean unmount. Since 'i' could be the last block in the physical
1228+
* log, we convert to a log block before comparing to the head_blk.
1229+
*
1230+
* Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
1231+
* below. We won't want to clear the unmount record if there is one, so
1232+
* we pass the lsn of the unmount record rather than the block after it.
1233+
*/
1234+
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1235+
int h_size = be32_to_cpu(rhead->h_size);
1236+
int h_version = be32_to_cpu(rhead->h_version);
1237+
1238+
if ((h_version & XLOG_VERSION_2) &&
1239+
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
1240+
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1241+
if (h_size % XLOG_HEADER_CYCLE_SIZE)
1242+
hblks++;
1243+
} else {
1244+
hblks = 1;
1245+
}
1246+
} else {
1247+
hblks = 1;
1248+
}
1249+
after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1250+
after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1251+
if (*head_blk == after_umount_blk &&
1252+
be32_to_cpu(rhead->h_num_logops) == 1) {
1253+
umount_data_blk = rhead_blk + hblks;
1254+
umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1255+
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1256+
if (error)
1257+
return error;
1258+
1259+
op_head = (struct xlog_op_header *)offset;
1260+
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1261+
/*
1262+
* Set tail and last sync so that newly written log
1263+
* records will point recovery to after the current
1264+
* unmount record.
1265+
*/
1266+
xlog_assign_atomic_lsn(&log->l_tail_lsn,
1267+
log->l_curr_cycle, after_umount_blk);
1268+
xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1269+
log->l_curr_cycle, after_umount_blk);
1270+
*tail_blk = after_umount_blk;
1271+
1272+
*clean = true;
1273+
}
1274+
}
1275+
1276+
return 0;
1277+
}
1278+
1279+
static void
1280+
xlog_set_state(
1281+
struct xlog *log,
1282+
xfs_daddr_t head_blk,
1283+
struct xlog_rec_header *rhead,
1284+
xfs_daddr_t rhead_blk,
1285+
bool bump_cycle)
1286+
{
1287+
/*
1288+
* Reset log values according to the state of the log when we
1289+
* crashed. In the case where head_blk == 0, we bump curr_cycle
1290+
* one because the next write starts a new cycle rather than
1291+
* continuing the cycle of the last good log record. At this
1292+
* point we have guaranteed that all partial log records have been
1293+
* accounted for. Therefore, we know that the last good log record
1294+
* written was complete and ended exactly on the end boundary
1295+
* of the physical log.
1296+
*/
1297+
log->l_prev_block = rhead_blk;
1298+
log->l_curr_block = (int)head_blk;
1299+
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1300+
if (bump_cycle)
1301+
log->l_curr_cycle++;
1302+
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1303+
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1304+
xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1305+
BBTOB(log->l_curr_block));
1306+
xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1307+
BBTOB(log->l_curr_block));
1308+
}
1309+
12181310
/*
12191311
* Find the sync block number or the tail of the log.
12201312
*
@@ -1238,22 +1330,20 @@ xlog_find_tail(
12381330
xfs_daddr_t *tail_blk)
12391331
{
12401332
xlog_rec_header_t *rhead;
1241-
xlog_op_header_t *op_head;
12421333
char *offset = NULL;
12431334
xfs_buf_t *bp;
12441335
int error;
1245-
xfs_daddr_t umount_data_blk;
1246-
xfs_daddr_t after_umount_blk;
12471336
xfs_daddr_t rhead_blk;
12481337
xfs_lsn_t tail_lsn;
1249-
int hblks;
12501338
bool wrapped = false;
1339+
bool clean = false;
12511340

12521341
/*
12531342
* Find previous log record
12541343
*/
12551344
if ((error = xlog_find_head(log, head_blk)))
12561345
return error;
1346+
ASSERT(*head_blk < INT_MAX);
12571347

12581348
bp = xlog_get_bp(log, 1);
12591349
if (!bp)
@@ -1271,99 +1361,74 @@ xlog_find_tail(
12711361
}
12721362

12731363
/*
1274-
* Trim the head block back to skip over torn records. We can have
1275-
* multiple log I/Os in flight at any time, so we assume CRC failures
1276-
* back through the previous several records are torn writes and skip
1277-
* them.
1364+
* Search backwards through the log looking for the log record header
1365+
* block. This wraps all the way back around to the head so something is
1366+
* seriously wrong if we can't find it.
12781367
*/
1279-
ASSERT(*head_blk < INT_MAX);
1280-
error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
1281-
&rhead, &wrapped);
1282-
if (error)
1283-
goto done;
1368+
error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
1369+
&rhead_blk, &rhead, &wrapped);
1370+
if (error < 0)
1371+
return error;
1372+
if (!error) {
1373+
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1374+
return -EIO;
1375+
}
1376+
*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
12841377

12851378
/*
1286-
* Reset log values according to the state of the log when we
1287-
* crashed. In the case where head_blk == 0, we bump curr_cycle
1288-
* one because the next write starts a new cycle rather than
1289-
* continuing the cycle of the last good log record. At this
1290-
* point we have guaranteed that all partial log records have been
1291-
* accounted for. Therefore, we know that the last good log record
1292-
* written was complete and ended exactly on the end boundary
1293-
* of the physical log.
1379+
* Set the log state based on the current head record.
12941380
*/
1295-
log->l_prev_block = rhead_blk;
1296-
log->l_curr_block = (int)*head_blk;
1297-
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1298-
if (wrapped)
1299-
log->l_curr_cycle++;
1300-
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1301-
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1302-
xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1303-
BBTOB(log->l_curr_block));
1304-
xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1305-
BBTOB(log->l_curr_block));
1381+
xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
1382+
tail_lsn = atomic64_read(&log->l_tail_lsn);
13061383

13071384
/*
1308-
* Look for unmount record. If we find it, then we know there
1309-
* was a clean unmount. Since 'i' could be the last block in
1310-
* the physical log, we convert to a log block before comparing
1311-
* to the head_blk.
1385+
* Look for an unmount record at the head of the log. This sets the log
1386+
* state to determine whether recovery is necessary.
1387+
*/
1388+
error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
1389+
rhead_blk, bp, &clean);
1390+
if (error)
1391+
goto done;
1392+
1393+
/*
1394+
* Verify the log head if the log is not clean (e.g., we have anything
1395+
* but an unmount record at the head). This uses CRC verification to
1396+
* detect and trim torn writes. If discovered, CRC failures are
1397+
* considered torn writes and the log head is trimmed accordingly.
13121398
*
1313-
* Save the current tail lsn to use to pass to
1314-
* xlog_clear_stale_blocks() below. We won't want to clear the
1315-
* unmount record if there is one, so we pass the lsn of the
1316-
* unmount record rather than the block after it.
1399+
* Note that we can only run CRC verification when the log is dirty
1400+
* because there's no guarantee that the log data behind an unmount
1401+
* record is compatible with the current architecture.
13171402
*/
1318-
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1319-
int h_size = be32_to_cpu(rhead->h_size);
1320-
int h_version = be32_to_cpu(rhead->h_version);
1403+
if (!clean) {
1404+
xfs_daddr_t orig_head = *head_blk;
13211405

1322-
if ((h_version & XLOG_VERSION_2) &&
1323-
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
1324-
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1325-
if (h_size % XLOG_HEADER_CYCLE_SIZE)
1326-
hblks++;
1327-
} else {
1328-
hblks = 1;
1329-
}
1330-
} else {
1331-
hblks = 1;
1332-
}
1333-
after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1334-
after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1335-
tail_lsn = atomic64_read(&log->l_tail_lsn);
1336-
if (*head_blk == after_umount_blk &&
1337-
be32_to_cpu(rhead->h_num_logops) == 1) {
1338-
umount_data_blk = rhead_blk + hblks;
1339-
umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1340-
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1406+
error = xlog_verify_head(log, head_blk, tail_blk, bp,
1407+
&rhead_blk, &rhead, &wrapped);
13411408
if (error)
13421409
goto done;
13431410

1344-
op_head = (xlog_op_header_t *)offset;
1345-
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1346-
/*
1347-
* Set tail and last sync so that newly written
1348-
* log records will point recovery to after the
1349-
* current unmount record.
1350-
*/
1351-
xlog_assign_atomic_lsn(&log->l_tail_lsn,
1352-
log->l_curr_cycle, after_umount_blk);
1353-
xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1354-
log->l_curr_cycle, after_umount_blk);
1355-
*tail_blk = after_umount_blk;
1356-
1357-
/*
1358-
* Note that the unmount was clean. If the unmount
1359-
* was not clean, we need to know this to rebuild the
1360-
* superblock counters from the perag headers if we
1361-
* have a filesystem using non-persistent counters.
1362-
*/
1363-
log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1411+
/* update in-core state again if the head changed */
1412+
if (*head_blk != orig_head) {
1413+
xlog_set_state(log, *head_blk, rhead, rhead_blk,
1414+
wrapped);
1415+
tail_lsn = atomic64_read(&log->l_tail_lsn);
1416+
error = xlog_check_unmount_rec(log, head_blk, tail_blk,
1417+
rhead, rhead_blk, bp,
1418+
&clean);
1419+
if (error)
1420+
goto done;
13641421
}
13651422
}
13661423

1424+
/*
1425+
* Note that the unmount was clean. If the unmount was not clean, we
1426+
* need to know this to rebuild the superblock counters from the perag
1427+
* headers if we have a filesystem using non-persistent counters.
1428+
*/
1429+
if (clean)
1430+
log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1431+
13671432
/*
13681433
* Make sure that there are no blocks in front of the head
13691434
* with the same cycle number as the head. This can happen

0 commit comments

Comments
 (0)