Skip to content

Commit 875d3f3

Browse files
committed
Avoid bogus "out-of-sequence timeline ID" errors in standby-mode.
When startup process opens a WAL segment after replaying part of it, it validates the first page on the WAL segment, even though the page it's really interested in later in the file. As part of the validation, it checks that the TLI on the page header is >= the TLI it saw on the last page it read. If the segment contains a timeline switch, and we have already replayed it, and then re-open the WAL segment (because of streaming replication got disconnected and reconnected, for example), the TLI check will fail when the first page is validated. Fix that by relaxing the TLI check when re-opening a WAL segment. Backpatch to 9.0. Earlier versions had the same code, but before standby mode was introduced in 9.0, recovery never tried to re-read a segment after partially replaying it. Reported by Amit Kapila, while testing a new feature.
1 parent 2a18b3e commit 875d3f3

File tree

1 file changed

+23
-9
lines changed
  • src/backend/access/transam

1 file changed

+23
-9
lines changed

src/backend/access/transam/xlog.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ static uint32 readRecordBufSize = 0;
510510
static XLogRecPtr ReadRecPtr; /* start of last record read */
511511
static XLogRecPtr EndRecPtr; /* end+1 of last record read */
512512
static TimeLineID lastPageTLI = 0;
513+
static TimeLineID lastSegmentTLI = 0;
513514

514515
static XLogRecPtr minRecoveryPoint; /* local copy of
515516
* ControlFile->minRecoveryPoint */
@@ -596,7 +597,7 @@ static void CleanupBackupHistory(void);
596597
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
597598
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
598599
static void CheckRecoveryConsistency(void);
599-
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
600+
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly);
600601
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
601602
static List *readTimeLineHistory(TimeLineID targetTLI);
602603
static bool existsTimeLineHistory(TimeLineID probeTLI);
@@ -3742,7 +3743,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
37423743
* to go backwards (but we can't reset that variable right here, since
37433744
* we might not change files at all).
37443745
*/
3745-
lastPageTLI = 0; /* see comment in ValidXLOGHeader */
3746+
lastPageTLI = lastSegmentTLI = 0; /* see comment in ValidXLOGHeader */
37463747
randAccess = true; /* allow curFileTLI to go backwards too */
37473748
}
37483749

@@ -4005,7 +4006,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
40054006
* ReadRecord. It's not intended for use from anywhere else.
40064007
*/
40074008
static bool
4008-
ValidXLOGHeader(XLogPageHeader hdr, int emode)
4009+
ValidXLOGHeader(XLogPageHeader hdr, int emode, bool segmentonly)
40094010
{
40104011
XLogRecPtr recaddr;
40114012

@@ -4100,18 +4101,31 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode)
41004101
* successive pages of a consistent WAL sequence.
41014102
*
41024103
* Of course this check should only be applied when advancing sequentially
4103-
* across pages; therefore ReadRecord resets lastPageTLI to zero when
4104-
* going to a random page.
4104+
* across pages; therefore ReadRecord resets lastPageTLI and
4105+
* lastSegmentTLI to zero when going to a random page.
4106+
*
4107+
* Sometimes we re-open a segment that's already been partially replayed.
4108+
* In that case we cannot perform the normal TLI check: if there is a
4109+
* timeline switch within the segment, the first page has a smaller TLI
4110+
* than later pages following the timeline switch, and we might've read
4111+
* them already. As a weaker test, we still check that it's not smaller
4112+
* than the TLI we last saw at the beginning of a segment. Pass
4113+
* segmentonly = true when re-validating the first page like that, and the
4114+
* page you're actually interested in comes later.
41054115
*/
4106-
if (hdr->xlp_tli < lastPageTLI)
4116+
if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI))
41074117
{
41084118
ereport(emode_for_corrupt_record(emode, recaddr),
41094119
(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
4110-
hdr->xlp_tli, lastPageTLI,
4120+
hdr->xlp_tli,
4121+
segmentonly ? lastSegmentTLI : lastPageTLI,
41114122
readId, readSeg, readOff)));
41124123
return false;
41134124
}
41144125
lastPageTLI = hdr->xlp_tli;
4126+
if (readOff == 0)
4127+
lastSegmentTLI = hdr->xlp_tli;
4128+
41154129
return true;
41164130
}
41174131

@@ -9689,7 +9703,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
96899703
readId, readSeg, readOff)));
96909704
goto next_record_is_invalid;
96919705
}
9692-
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
9706+
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true))
96939707
goto next_record_is_invalid;
96949708
}
96959709

@@ -9711,7 +9725,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
97119725
readId, readSeg, readOff)));
97129726
goto next_record_is_invalid;
97139727
}
9714-
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
9728+
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, false))
97159729
goto next_record_is_invalid;
97169730

97179731
Assert(targetId == readId);

0 commit comments

Comments
 (0)