Skip to content

Commit 8b6b374

Browse files
committed
Consistency check should compare last record replayed, not last record read.
EndRecPtr is the last record that we've read, but not necessarily yet replayed. CheckRecoveryConsistency should compare minRecoveryPoint with the last replayed record instead. This caused recovery to think it's reached consistency too early. Now that we do the check in CheckRecoveryConsistency correctly, we have to move the call of that function to after redoing a record. The current place, after reading a record but before replaying it, is wrong. In particular, if there are no more records after the one ending at minRecoveryPoint, we don't enter hot standby until one extra record is generated and read by the standby, and CheckRecoveryConsistency is called. These two bugs conspired to make the code appear to work correctly, except for the small window between reading the last record that reaches minRecoveryPoint, and replaying it. In the passing, rename recoveryLastRecPtr, which is the last record replayed, to lastReplayedEndRecPtr. This makes it slightly less confusing with replayEndRecPtr, which is the last record read that we're about to replay. Original report from Kyotaro HORIGUCHI, further diagnosis by Fujii Masao. Backpatch to 9.0, where Hot Standby subtly changed the test from "minRecoveryPoint < EndRecPtr" to "minRecoveryPoint <= EndRecPtr". The former works because where the test is performed, we have always read one more record than we've replayed.
1 parent 5dd1c28 commit 8b6b374

File tree

1 file changed

+19
-14
lines changed
  • src/backend/access/transam

1 file changed

+19
-14
lines changed

src/backend/access/transam/xlog.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -440,10 +440,14 @@ typedef struct XLogCtlData
440440
XLogRecPtr lastCheckPointRecPtr;
441441
CheckPoint lastCheckPoint;
442442

443-
/* end+1 of the last record replayed (or being replayed) */
443+
/*
444+
* lastReplayedEndRecPtr points to end+1 of the last record successfully
445+
* replayed. When we're currently replaying a record, ie. in a redo
446+
* function, replayEndRecPtr points to the end+1 of the record being
447+
* replayed, otherwise it's equal to lastReplayedEndRecPtr.
448+
*/
449+
XLogRecPtr lastReplayedEndRecPtr;
444450
XLogRecPtr replayEndRecPtr;
445-
/* end+1 of the last record replayed */
446-
XLogRecPtr recoveryLastRecPtr;
447451
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
448452
TimestampTz recoveryLastXTime;
449453
/* Are we requested to pause recovery? */
@@ -6493,7 +6497,7 @@ StartupXLOG(void)
64936497
}
64946498

64956499
/*
6496-
* Initialize shared replayEndRecPtr, recoveryLastRecPtr, and
6500+
* Initialize shared replayEndRecPtr, lastReplayedEndRecPtr, and
64976501
* recoveryLastXTime.
64986502
*
64996503
* This is slightly confusing if we're starting from an online
@@ -6506,7 +6510,7 @@ StartupXLOG(void)
65066510
*/
65076511
SpinLockAcquire(&xlogctl->info_lck);
65086512
xlogctl->replayEndRecPtr = ReadRecPtr;
6509-
xlogctl->recoveryLastRecPtr = EndRecPtr;
6513+
xlogctl->lastReplayedEndRecPtr = EndRecPtr;
65106514
xlogctl->recoveryLastXTime = 0;
65116515
xlogctl->recoveryPause = false;
65126516
SpinLockRelease(&xlogctl->info_lck);
@@ -6596,9 +6600,6 @@ StartupXLOG(void)
65966600
/* Handle interrupt signals of startup process */
65976601
HandleStartupProcInterrupts();
65986602

6599-
/* Allow read-only connections if we're consistent now */
6600-
CheckRecoveryConsistency();
6601-
66026603
/*
66036604
* Pause WAL replay, if requested by a hot-standby session via
66046605
* SetRecoveryPause().
@@ -6669,16 +6670,19 @@ StartupXLOG(void)
66696670
error_context_stack = errcontext.previous;
66706671

66716672
/*
6672-
* Update shared recoveryLastRecPtr after this record has been
6673-
* replayed.
6673+
* Update lastReplayedEndRecPtr after this record has been
6674+
* successfully replayed.
66746675
*/
66756676
SpinLockAcquire(&xlogctl->info_lck);
6676-
xlogctl->recoveryLastRecPtr = EndRecPtr;
6677+
xlogctl->lastReplayedEndRecPtr = EndRecPtr;
66776678
SpinLockRelease(&xlogctl->info_lck);
66786679

66796680
/* Remember this record as the last-applied one */
66806681
LastRec = ReadRecPtr;
66816682

6683+
/* Allow read-only connections if we're consistent now */
6684+
CheckRecoveryConsistency();
6685+
66826686
/* Exit loop if we reached inclusive recovery target */
66836687
if (!recoveryContinue)
66846688
break;
@@ -7032,13 +7036,14 @@ CheckRecoveryConsistency(void)
70327036
* Have we passed our safe starting point?
70337037
*/
70347038
if (!reachedConsistency &&
7035-
XLByteLE(minRecoveryPoint, EndRecPtr) &&
7039+
XLByteLE(minRecoveryPoint, XLogCtl->lastReplayedEndRecPtr) &&
70367040
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
70377041
{
70387042
reachedConsistency = true;
70397043
ereport(LOG,
70407044
(errmsg("consistent recovery state reached at %X/%X",
7041-
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
7045+
XLogCtl->lastReplayedEndRecPtr.xlogid,
7046+
XLogCtl->lastReplayedEndRecPtr.xrecoff)));
70427047
}
70437048

70447049
/*
@@ -9676,7 +9681,7 @@ GetXLogReplayRecPtr(void)
96769681
XLogRecPtr recptr;
96779682

96789683
SpinLockAcquire(&xlogctl->info_lck);
9679-
recptr = xlogctl->recoveryLastRecPtr;
9684+
recptr = xlogctl->lastReplayedEndRecPtr;
96809685
SpinLockRelease(&xlogctl->info_lck);
96819686

96829687
return recptr;

0 commit comments

Comments
 (0)