Skip to content

Commit 3f0ab05

Browse files
Switch timelines if we crash soon after promotion.
Previous patch to skip checkpoints at end of recovery didn't correctly perform crash recovery, fumbling the timeline switch. Now we record the minRecoveryPointTLI of the newly selected timeline, so that we crash recover to the correct timeline. Bug report from Fujii Masao, investigated by me.
1 parent 9afc583 commit 3f0ab05

File tree

1 file changed

+37
-1
lines changed
  • src/backend/access/transam

1 file changed

+37
-1
lines changed

src/backend/access/transam/xlog.c

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4828,6 +4828,22 @@ StartupXLOG(void)
48284828
ereport(LOG,
48294829
(errmsg("starting archive recovery")));
48304830
}
4831+
else if (ControlFile->minRecoveryPointTLI > 0)
4832+
{
4833+
/*
4834+
* If the minRecoveryPointTLI is set when not in Archive Recovery
4835+
* it means that we have crashed after ending recovery and
4836+
* yet before we wrote a new checkpoint on the new timeline.
4837+
* That means we are doing a crash recovery that needs to cross
4838+
* timelines to get to our newly assigned timeline again.
4839+
* The timeline we are headed for is exact and not 'latest'.
4840+
* As soon as we hit a checkpoint, the minRecoveryPointTLI is
4841+
* reset, so we will not enter crash recovery again.
4842+
*/
4843+
Assert(ControlFile->minRecoveryPointTLI != 1);
4844+
recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
4845+
recoveryTargetIsLatest = false;
4846+
}
48314847

48324848
/*
48334849
* Take ownership of the wakeup latch if we're going to sleep during
@@ -5075,6 +5091,12 @@ StartupXLOG(void)
50755091
ereport(LOG,
50765092
(errmsg("database system was not properly shut down; "
50775093
"automatic recovery in progress")));
5094+
if (recoveryTargetTLI > 0)
5095+
ereport(LOG,
5096+
(errmsg("crash recovery starts in timeline %u "
5097+
"and has target timeline %u",
5098+
ControlFile->checkPointCopy.ThisTimeLineID,
5099+
recoveryTargetTLI)));
50785100
ControlFile->state = DB_IN_CRASH_RECOVERY;
50795101
}
50805102
ControlFile->prevCheckPoint = ControlFile->checkPoint;
@@ -6945,6 +6967,7 @@ CreateEndOfRecoveryRecord(void)
69456967
{
69466968
xl_end_of_recovery xlrec;
69476969
XLogRecData rdata;
6970+
XLogRecPtr recptr;
69486971

69496972
/* sanity check */
69506973
if (!RecoveryInProgress())
@@ -6962,7 +6985,20 @@ CreateEndOfRecoveryRecord(void)
69626985
rdata.buffer = InvalidBuffer;
69636986
rdata.next = NULL;
69646987

6965-
(void) XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
6988+
recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
6989+
6990+
XLogFlush(recptr);
6991+
6992+
/*
6993+
* Update the control file so that crash recovery can follow
6994+
* the timeline changes to this point.
6995+
*/
6996+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6997+
ControlFile->time = (pg_time_t) xlrec.end_time;
6998+
ControlFile->minRecoveryPoint = recptr;
6999+
ControlFile->minRecoveryPointTLI = ThisTimeLineID;
7000+
UpdateControlFile();
7001+
LWLockRelease(ControlFileLock);
69667002

69677003
END_CRIT_SECTION();
69687004

0 commit comments

Comments
 (0)