Skip to content

Commit db3658b

Browse files
committed
Avoid bogus "out-of-sequence timeline ID" errors in standby-mode.
When startup process opens a WAL segment after replaying part of it, it validates the first page on the WAL segment, even though the page it's really interested in later in the file. As part of the validation, it checks that the TLI on the page header is >= the TLI it saw on the last page it read. If the segment contains a timeline switch, and we have already replayed it, and then re-open the WAL segment (because of streaming replication got disconnected and reconnected, for example), the TLI check will fail when the first page is validated. Fix that by relaxing the TLI check when re-opening a WAL segment. Backpatch to 9.0. Earlier versions had the same code, but before standby mode was introduced in 9.0, recovery never tried to re-read a segment after partially replaying it. Reported by Amit Kapila, while testing a new feature.
1 parent 866f2dd commit db3658b

File tree

1 file changed

+23
-9
lines changed
  • src/backend/access/transam

1 file changed

+23
-9
lines changed

src/backend/access/transam/xlog.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,7 @@ static uint32 readRecordBufSize = 0;
550550
static XLogRecPtr ReadRecPtr; /* start of last record read */
551551
static XLogRecPtr EndRecPtr; /* end+1 of last record read */
552552
static TimeLineID lastPageTLI = 0;
553+
static TimeLineID lastSegmentTLI = 0;
553554

554555
static XLogRecPtr minRecoveryPoint; /* local copy of
555556
* ControlFile->minRecoveryPoint */
@@ -647,7 +648,7 @@ static void CleanupBackupHistory(void);
647648
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
648649
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
649650
static void CheckRecoveryConsistency(void);
650-
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
651+
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly);
651652
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
652653
static List *readTimeLineHistory(TimeLineID targetTLI);
653654
static bool existsTimeLineHistory(TimeLineID probeTLI);
@@ -3803,7 +3804,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
38033804
* to go backwards (but we can't reset that variable right here, since
38043805
* we might not change files at all).
38053806
*/
3806-
lastPageTLI = 0; /* see comment in ValidXLOGHeader */
3807+
lastPageTLI = lastSegmentTLI = 0; /* see comment in ValidXLOGHeader */
38073808
randAccess = true; /* allow curFileTLI to go backwards too */
38083809
}
38093810

@@ -4066,7 +4067,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
40664067
* ReadRecord. It's not intended for use from anywhere else.
40674068
*/
40684069
static bool
4069-
ValidXLOGHeader(XLogPageHeader hdr, int emode)
4070+
ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly)
40704071
{
40714072
XLogRecPtr recaddr;
40724073

@@ -4161,18 +4162,31 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode)
41614162
* successive pages of a consistent WAL sequence.
41624163
*
41634164
* Of course this check should only be applied when advancing sequentially
4164-
* across pages; therefore ReadRecord resets lastPageTLI to zero when
4165-
* going to a random page.
4165+
* across pages; therefore ReadRecord resets lastPageTLI and
4166+
* lastSegmentTLI to zero when going to a random page.
4167+
*
4168+
* Sometimes we re-open a segment that's already been partially replayed.
4169+
* In that case we cannot perform the normal TLI check: if there is a
4170+
* timeline switch within the segment, the first page has a smaller TLI
4171+
* than later pages following the timeline switch, and we might've read
4172+
* them already. As a weaker test, we still check that it's not smaller
4173+
* than the TLI we last saw at the beginning of a segment. Pass
4174+
* segmentonly = true when re-validating the first page like that, and the
4175+
* page you're actually interested in comes later.
41664176
*/
4167-
if (hdr->xlp_tli < lastPageTLI)
4177+
if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI))
41684178
{
41694179
ereport(emode_for_corrupt_record(emode, recaddr),
41704180
(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
4171-
hdr->xlp_tli, lastPageTLI,
4181+
hdr->xlp_tli,
4182+
segmentonly ? lastSegmentTLI : lastPageTLI,
41724183
readId, readSeg, readOff)));
41734184
return false;
41744185
}
41754186
lastPageTLI = hdr->xlp_tli;
4187+
if (readOff == 0)
4188+
lastSegmentTLI = hdr->xlp_tli;
4189+
41764190
return true;
41774191
}
41784192

@@ -10456,7 +10470,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1045610470
readId, readSeg, readOff)));
1045710471
goto next_record_is_invalid;
1045810472
}
10459-
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
10473+
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true))
1046010474
goto next_record_is_invalid;
1046110475
}
1046210476

@@ -10478,7 +10492,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1047810492
readId, readSeg, readOff)));
1047910493
goto next_record_is_invalid;
1048010494
}
10481-
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
10495+
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, false))
1048210496
goto next_record_is_invalid;
1048310497

1048410498
Assert(targetId == readId);

0 commit comments

Comments
 (0)