Skip to content

Commit fb565f8

Browse files
committed
Consistency check should compare last record replayed, not last record read.
EndRecPtr is the last record that we've read, but not necessarily yet replayed. CheckRecoveryConsistency should compare minRecoveryPoint with the last replayed record instead. This caused recovery to think it's reached consistency too early. Now that we do the check in CheckRecoveryConsistency correctly, we have to move the call of that function to after redoing a record. The current place, after reading a record but before replaying it, is wrong. In particular, if there are no more records after the one ending at minRecoveryPoint, we don't enter hot standby until one extra record is generated and read by the standby, and CheckRecoveryConsistency is called. These two bugs conspired to make the code appear to work correctly, except for the small window between reading the last record that reaches minRecoveryPoint, and replaying it. In the passing, rename recoveryLastRecPtr, which is the last record replayed, to lastReplayedEndRecPtr. This makes it slightly less confusing with replayEndRecPtr, which is the last record read that we're about to replay. Original report from Kyotaro HORIGUCHI, further diagnosis by Fujii Masao. Backpatch to 9.0, where Hot Standby subtly changed the test from "minRecoveryPoint < EndRecPtr" to "minRecoveryPoint <= EndRecPtr". The former works because where the test is performed, we have always read one more record than we've replayed.
1 parent 4d29e8c commit fb565f8

File tree

1 file changed

+21
-15
lines changed
  • src/backend/access/transam

1 file changed

+21
-15
lines changed

src/backend/access/transam/xlog.c

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -448,10 +448,14 @@ typedef struct XLogCtlData
448448
XLogRecPtr lastCheckPointRecPtr;
449449
CheckPoint lastCheckPoint;
450450

451-
/* end+1 of the last record replayed (or being replayed) */
451+
/*
452+
* lastReplayedEndRecPtr points to end+1 of the last record successfully
453+
* replayed. When we're currently replaying a record, ie. in a redo
454+
* function, replayEndRecPtr points to the end+1 of the record being
455+
* replayed, otherwise it's equal to lastReplayedEndRecPtr.
456+
*/
457+
XLogRecPtr lastReplayedEndRecPtr;
452458
XLogRecPtr replayEndRecPtr;
453-
/* end+1 of the last record replayed */
454-
XLogRecPtr recoveryLastRecPtr;
455459
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
456460
TimestampTz recoveryLastXTime;
457461
/* current effective recovery target timeline */
@@ -6589,7 +6593,7 @@ StartupXLOG(void)
65896593
}
65906594

65916595
/*
6592-
* Initialize shared replayEndRecPtr, recoveryLastRecPtr, and
6596+
* Initialize shared replayEndRecPtr, lastReplayedEndRecPtr, and
65936597
* recoveryLastXTime.
65946598
*
65956599
* This is slightly confusing if we're starting from an online
@@ -6602,7 +6606,7 @@ StartupXLOG(void)
66026606
*/
66036607
SpinLockAcquire(&xlogctl->info_lck);
66046608
xlogctl->replayEndRecPtr = ReadRecPtr;
6605-
xlogctl->recoveryLastRecPtr = EndRecPtr;
6609+
xlogctl->lastReplayedEndRecPtr = EndRecPtr;
66066610
xlogctl->recoveryLastXTime = 0;
66076611
xlogctl->currentChunkStartTime = 0;
66086612
xlogctl->recoveryPause = false;
@@ -6694,9 +6698,6 @@ StartupXLOG(void)
66946698
/* Handle interrupt signals of startup process */
66956699
HandleStartupProcInterrupts();
66966700

6697-
/* Allow read-only connections if we're consistent now */
6698-
CheckRecoveryConsistency();
6699-
67006701
/*
67016702
* Pause WAL replay, if requested by a hot-standby session via
67026703
* SetRecoveryPause().
@@ -6797,16 +6798,19 @@ StartupXLOG(void)
67976798
}
67986799

67996800
/*
6800-
* Update shared recoveryLastRecPtr after this record has been
6801-
* replayed.
6801+
* Update lastReplayedEndRecPtr after this record has been
6802+
* successfully replayed.
68026803
*/
68036804
SpinLockAcquire(&xlogctl->info_lck);
6804-
xlogctl->recoveryLastRecPtr = EndRecPtr;
6805+
xlogctl->lastReplayedEndRecPtr = EndRecPtr;
68056806
SpinLockRelease(&xlogctl->info_lck);
68066807

68076808
/* Remember this record as the last-applied one */
68086809
LastRec = ReadRecPtr;
68096810

6811+
/* Allow read-only connections if we're consistent now */
6812+
CheckRecoveryConsistency();
6813+
68106814
/* Exit loop if we reached inclusive recovery target */
68116815
if (!recoveryContinue)
68126816
break;
@@ -7176,10 +7180,11 @@ CheckRecoveryConsistency(void)
71767180
* Have we passed our safe starting point? Note that minRecoveryPoint
71777181
* is known to be incorrectly set if ControlFile->backupEndRequired,
71787182
* until the XLOG_BACKUP_RECORD arrives to advise us of the correct
7179-
* minRecoveryPoint. All we prior to that is its not consistent yet.
7183+
* minRecoveryPoint. All we know prior to that is that we're not
7184+
* consistent yet.
71807185
*/
71817186
if (!reachedConsistency && !ControlFile->backupEndRequired &&
7182-
XLByteLE(minRecoveryPoint, EndRecPtr) &&
7187+
XLByteLE(minRecoveryPoint, XLogCtl->lastReplayedEndRecPtr) &&
71837188
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
71847189
{
71857190
/*
@@ -7191,7 +7196,8 @@ CheckRecoveryConsistency(void)
71917196
reachedConsistency = true;
71927197
ereport(LOG,
71937198
(errmsg("consistent recovery state reached at %X/%X",
7194-
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
7199+
XLogCtl->lastReplayedEndRecPtr.xlogid,
7200+
XLogCtl->lastReplayedEndRecPtr.xrecoff)));
71957201
}
71967202

71977203
/*
@@ -9927,7 +9933,7 @@ GetXLogReplayRecPtr(TimeLineID *targetTLI)
99279933
XLogRecPtr recptr;
99289934

99299935
SpinLockAcquire(&xlogctl->info_lck);
9930-
recptr = xlogctl->recoveryLastRecPtr;
9936+
recptr = xlogctl->lastReplayedEndRecPtr;
99319937
if (targetTLI)
99329938
*targetTLI = xlogctl->RecoveryTargetTLI;
99339939
SpinLockRelease(&xlogctl->info_lck);

0 commit comments

Comments
 (0)