Skip to content

Commit e7ea2fa

Browse files
committed
Fix corner-case failure to detect improper timeline switch.
rescanLatestTimeLine() contains a guard against switching to a timeline that forked off from the current one prior to the current recovery point, but that guard does not work if the timeline switch occurs before the first WAL recod (which must be the checkpoint record) is read. Without this patch, an improper timeline switch is therefore possible in such cases. This happens because rescanLatestTimeLine() relies on the global variable EndRecPtr to understand the current position of WAL replay. However, EndRecPtr at this point in the code contains the endpoint of the last-replayed record, not the startpoint or endpoint of the record being replayed now. Thus, before any records have been replayed, it's zero, which causes the sanity check to always pass. To fix, pass down the correct timeline explicitly. The EndRecPtr value we want is the one from the xlogreader, which will be the starting position of the record we're about to try to read, rather than the global variable, which is the ending position of the last record we successfully read. They're usually the same, but not in the corner case described here. No back-patch, because in v14 and earlier branhes, we were using the wrong TLI here as well as the wrong LSN. In master, that was fixed by commit 4a92a1c, but that and it's prerequisite patches are too invasive to back-patch for such a minor issue. Patch by me, reviewed by Amul Sul. Discussion: http://postgr.es/m/CA+Tgmoao96EuNeSPd+hspRKcsCddu=b1h-QNRuKfY8VmfNQdfg@mail.gmail.com
1 parent f79962d commit e7ea2fa

File tree

1 file changed

+16
-9
lines changed
  • src/backend/access/transam

1 file changed

+16
-9
lines changed

src/backend/access/transam/xlog.c

+16-9
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,8 @@ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
924924
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
925925
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
926926
bool fetching_ckpt, XLogRecPtr tliRecPtr,
927-
TimeLineID replayTLI);
927+
TimeLineID replayTLI,
928+
XLogRecPtr replayLSN);
928929
static void XLogShutdownWalRcv(void);
929930
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
930931
static void XLogFileClose(void);
@@ -946,7 +947,8 @@ static bool PerformRecoveryXLogAction(void);
946947
static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader,
947948
XLogRecPtr RecPtr, int whichChkpt, bool report,
948949
TimeLineID replayTLI);
949-
static bool rescanLatestTimeLine(TimeLineID replayTLI);
950+
static bool rescanLatestTimeLine(TimeLineID replayTLI,
951+
XLogRecPtr replayLSN);
950952
static void InitControlFile(uint64 sysidentifier);
951953
static void WriteControlFile(void);
952954
static void ReadControlFile(void);
@@ -4620,7 +4622,7 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
46204622
* one and returns 'true'.
46214623
*/
46224624
static bool
4623-
rescanLatestTimeLine(TimeLineID replayTLI)
4625+
rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
46244626
{
46254627
List *newExpectedTLEs;
46264628
bool found;
@@ -4671,13 +4673,13 @@ rescanLatestTimeLine(TimeLineID replayTLI)
46714673
* next timeline was forked off from it *after* the current recovery
46724674
* location.
46734675
*/
4674-
if (currentTle->end < EndRecPtr)
4676+
if (currentTle->end < replayLSN)
46754677
{
46764678
ereport(LOG,
46774679
(errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
46784680
newtarget,
46794681
replayTLI,
4680-
LSN_FORMAT_ARGS(EndRecPtr))));
4682+
LSN_FORMAT_ARGS(replayLSN))));
46814683
return false;
46824684
}
46834685

@@ -12473,7 +12475,8 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1247312475
private->randAccess,
1247412476
private->fetching_ckpt,
1247512477
targetRecPtr,
12476-
private->replayTLI))
12478+
private->replayTLI,
12479+
xlogreader->EndRecPtr))
1247712480
{
1247812481
if (readFile >= 0)
1247912482
close(readFile);
@@ -12626,6 +12629,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1262612629
* 'tliRecPtr' is the position of the WAL record we're interested in. It is
1262712630
* used to decide which timeline to stream the requested WAL from.
1262812631
*
12632+
* 'replayLSN' is the current replay LSN, so that if we scan for new
12633+
* timelines, we can reject a switch to a timeline that branched off before
12634+
* this point.
12635+
*
1262912636
* If the record is not immediately available, the function returns false
1263012637
* if we're not in standby mode. In standby mode, waits for it to become
1263112638
* available.
@@ -12638,7 +12645,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1263812645
static bool
1263912646
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1264012647
bool fetching_ckpt, XLogRecPtr tliRecPtr,
12641-
TimeLineID replayTLI)
12648+
TimeLineID replayTLI, XLogRecPtr replayLSN)
1264212649
{
1264312650
static TimestampTz last_fail_time = 0;
1264412651
TimestampTz now;
@@ -12761,7 +12768,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1276112768
*/
1276212769
if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
1276312770
{
12764-
if (rescanLatestTimeLine(replayTLI))
12771+
if (rescanLatestTimeLine(replayTLI, replayLSN))
1276512772
{
1276612773
currentSource = XLOG_FROM_ARCHIVE;
1276712774
break;
@@ -12888,7 +12895,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1288812895
*/
1288912896
if (recoveryTargetTimeLineGoal ==
1289012897
RECOVERY_TARGET_TIMELINE_LATEST)
12891-
rescanLatestTimeLine(replayTLI);
12898+
rescanLatestTimeLine(replayTLI, replayLSN);
1289212899

1289312900
startWalReceiver = true;
1289412901
}

0 commit comments

Comments
 (0)