Skip to content

Commit 6be7996

Browse files
committed
Fix the tracking of min recovery point timeline.
Forgot to update it at the right place. Also, consider checkpoint record that switches to new timelne to be on the new timeline. This fixes erroneous "requested timeline 2 does not contain minimum recovery point" errors, pointed out by Amit Kapila while testing another patch.
1 parent b46c921 commit 6be7996

File tree

1 file changed

+77
-36
lines changed
  • src/backend/access/transam

1 file changed

+77
-36
lines changed

src/backend/access/transam/xlog.c

+77-36
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ static void SetLatestXTime(TimestampTz xtime);
605605
static void SetCurrentChunkStartTime(TimestampTz xtime);
606606
static void CheckRequiredParameterValues(void);
607607
static void XLogReportParameters(void);
608+
static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI);
608609
static void LocalSetXLogInsertAllowed(void);
609610
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
610611
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
@@ -5909,12 +5910,41 @@ StartupXLOG(void)
59095910
LWLockRelease(XidGenLock);
59105911
}
59115912

5913+
/*
5914+
* Before replaying this record, check if it is a shutdown
5915+
* checkpoint record that causes the current timeline to
5916+
* change. The checkpoint record is already considered to be
5917+
* part of the new timeline, so we update ThisTimeLineID
5918+
* before replaying it. That's important so that replayEndTLI,
5919+
* which is recorded as the minimum recovery point's TLI if
5920+
* recovery stops after this record, is set correctly.
5921+
*/
5922+
if (record->xl_rmid == RM_XLOG_ID &&
5923+
(record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN)
5924+
{
5925+
CheckPoint checkPoint;
5926+
TimeLineID newTLI;
5927+
5928+
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
5929+
newTLI = checkPoint.ThisTimeLineID;
5930+
5931+
if (newTLI != ThisTimeLineID)
5932+
{
5933+
/* Check that it's OK to switch to this TLI */
5934+
checkTimeLineSwitch(EndRecPtr, newTLI);
5935+
5936+
/* Following WAL records should be run with new TLI */
5937+
ThisTimeLineID = newTLI;
5938+
}
5939+
}
5940+
59125941
/*
59135942
* Update shared replayEndRecPtr before replaying this record,
59145943
* so that XLogFlush will update minRecoveryPoint correctly.
59155944
*/
59165945
SpinLockAcquire(&xlogctl->info_lck);
59175946
xlogctl->replayEndRecPtr = EndRecPtr;
5947+
xlogctl->replayEndTLI = ThisTimeLineID;
59185948
SpinLockRelease(&xlogctl->info_lck);
59195949

59205950
/*
@@ -7858,6 +7888,48 @@ UpdateFullPageWrites(void)
78587888
END_CRIT_SECTION();
78597889
}
78607890

7891+
/*
7892+
* Check that it's OK to switch to new timeline during recovery.
7893+
*
7894+
* 'lsn' is the address of the shutdown checkpoint record we're about to
7895+
* replay. (Currently, timeline can only change at a shutdown checkpoint).
7896+
*/
7897+
static void
7898+
checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI)
7899+
{
7900+
/*
7901+
* The new timeline better be in the list of timelines we expect
7902+
* to see, according to the timeline history. It should also not
7903+
* decrease.
7904+
*/
7905+
if (newTLI < ThisTimeLineID || !tliInHistory(newTLI, expectedTLEs))
7906+
ereport(PANIC,
7907+
(errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
7908+
newTLI, ThisTimeLineID)));
7909+
7910+
/*
7911+
* If we have not yet reached min recovery point, and we're about
7912+
* to switch to a timeline greater than the timeline of the min
7913+
* recovery point: trouble. After switching to the new timeline,
7914+
* we could not possibly visit the min recovery point on the
7915+
* correct timeline anymore. This can happen if there is a newer
7916+
* timeline in the archive that branched before the timeline the
7917+
* min recovery point is on, and you attempt to do PITR to the
7918+
* new timeline.
7919+
*/
7920+
if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
7921+
XLByteLT(lsn, minRecoveryPoint) &&
7922+
newTLI > minRecoveryPointTLI)
7923+
ereport(PANIC,
7924+
(errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
7925+
newTLI,
7926+
(uint32) (minRecoveryPoint >> 32),
7927+
(uint32) minRecoveryPoint,
7928+
minRecoveryPointTLI)));
7929+
7930+
/* Looks good */
7931+
}
7932+
78617933
/*
78627934
* XLOG resource manager's routines
78637935
*
@@ -7971,44 +8043,13 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
79718043
}
79728044

79738045
/*
7974-
* TLI may change in a shutdown checkpoint.
8046+
* We should've already switched to the new TLI before replaying this
8047+
* record.
79758048
*/
79768049
if (checkPoint.ThisTimeLineID != ThisTimeLineID)
7977-
{
7978-
/*
7979-
* The new timeline better be in the list of timelines we expect
7980-
* to see, according to the timeline history. It should also not
7981-
* decrease.
7982-
*/
7983-
if (checkPoint.ThisTimeLineID < ThisTimeLineID ||
7984-
!tliInHistory(checkPoint.ThisTimeLineID, expectedTLEs))
7985-
ereport(PANIC,
7986-
(errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
7987-
checkPoint.ThisTimeLineID, ThisTimeLineID)));
7988-
7989-
/*
7990-
* If we have not yet reached min recovery point, and we're about
7991-
* to switch to a timeline greater than the timeline of the min
7992-
* recovery point: trouble. After switching to the new timeline,
7993-
* we could not possibly visit the min recovery point on the
7994-
* correct timeline anymore. This can happen if there is a newer
7995-
* timeline in the archive that branched before the timeline the
7996-
* min recovery point is on, and you attempt to do PITR to the
7997-
* new timeline.
7998-
*/
7999-
if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
8000-
XLByteLT(lsn, minRecoveryPoint) &&
8001-
checkPoint.ThisTimeLineID > minRecoveryPointTLI)
8002-
ereport(PANIC,
8003-
(errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
8004-
checkPoint.ThisTimeLineID,
8005-
(uint32) (minRecoveryPoint >> 32),
8006-
(uint32) minRecoveryPoint,
8007-
minRecoveryPointTLI)));
8008-
8009-
/* Following WAL records should be run with new TLI */
8010-
ThisTimeLineID = checkPoint.ThisTimeLineID;
8011-
}
8050+
ereport(PANIC,
8051+
(errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
8052+
checkPoint.ThisTimeLineID, ThisTimeLineID)));
80128053

80138054
RecoveryRestartPoint(&checkPoint);
80148055
}

0 commit comments

Comments
 (0)