Skip to content

Commit 5840e31

Browse files
committed
Consistency check should compare last record replayed, not last record read.
EndRecPtr is the last record that we've read, but not necessarily yet replayed. CheckRecoveryConsistency should compare minRecoveryPoint with the last replayed record instead. This caused recovery to think it's reached consistency too early. Now that we do the check in CheckRecoveryConsistency correctly, we have to move the call of that function to after redoing a record. The current place, after reading a record but before replaying it, is wrong. In particular, if there are no more records after the one ending at minRecoveryPoint, we don't enter hot standby until one extra record is generated and read by the standby, and CheckRecoveryConsistency is called. These two bugs conspired to make the code appear to work correctly, except for the small window between reading the last record that reaches minRecoveryPoint, and replaying it. In the passing, rename recoveryLastRecPtr, which is the last record replayed, to lastReplayedEndRecPtr. This makes it slightly less confusing with replayEndRecPtr, which is the last record read that we're about to replay. Original report from Kyotaro HORIGUCHI, further diagnosis by Fujii Masao. Backpatch to 9.0, where Hot Standby subtly changed the test from "minRecoveryPoint < EndRecPtr" to "minRecoveryPoint <= EndRecPtr". The former works because where the test is performed, we have always read one more record than we've replayed.
1 parent fe20ff0 commit 5840e31

File tree

1 file changed

+19
-14
lines changed
  • src/backend/access/transam

1 file changed

+19
-14
lines changed

src/backend/access/transam/xlog.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -402,10 +402,14 @@ typedef struct XLogCtlData
402402
XLogRecPtr lastCheckPointRecPtr;
403403
CheckPoint lastCheckPoint;
404404

405-
/* end+1 of the last record replayed (or being replayed) */
405+
/*
406+
* lastReplayedEndRecPtr points to end+1 of the last record successfully
407+
* replayed. When we're currently replaying a record, ie. in a redo
408+
* function, replayEndRecPtr points to the end+1 of the record being
409+
* replayed, otherwise it's equal to lastReplayedEndRecPtr.
410+
*/
411+
XLogRecPtr lastReplayedEndRecPtr;
406412
XLogRecPtr replayEndRecPtr;
407-
/* end+1 of the last record replayed */
408-
XLogRecPtr recoveryLastRecPtr;
409413
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
410414
TimestampTz recoveryLastXTime;
411415

@@ -6161,7 +6165,7 @@ StartupXLOG(void)
61616165
}
61626166

61636167
/*
6164-
* Initialize shared replayEndRecPtr, recoveryLastRecPtr, and
6168+
* Initialize shared replayEndRecPtr, lastReplayedEndRecPtr, and
61656169
* recoveryLastXTime.
61666170
*
61676171
* This is slightly confusing if we're starting from an online
@@ -6174,7 +6178,7 @@ StartupXLOG(void)
61746178
*/
61756179
SpinLockAcquire(&xlogctl->info_lck);
61766180
xlogctl->replayEndRecPtr = ReadRecPtr;
6177-
xlogctl->recoveryLastRecPtr = EndRecPtr;
6181+
xlogctl->lastReplayedEndRecPtr = EndRecPtr;
61786182
xlogctl->recoveryLastXTime = 0;
61796183
SpinLockRelease(&xlogctl->info_lck);
61806184

@@ -6263,9 +6267,6 @@ StartupXLOG(void)
62636267
/* Handle interrupt signals of startup process */
62646268
HandleStartupProcInterrupts();
62656269

6266-
/* Allow read-only connections if we're consistent now */
6267-
CheckRecoveryConsistency();
6268-
62696270
/*
62706271
* Have we reached our recovery target?
62716272
*/
@@ -6313,15 +6314,18 @@ StartupXLOG(void)
63136314
error_context_stack = errcontext.previous;
63146315

63156316
/*
6316-
* Update shared recoveryLastRecPtr after this record has been
6317-
* replayed.
6317+
* Update lastReplayedEndRecPtr after this record has been
6318+
* successfully replayed.
63186319
*/
63196320
SpinLockAcquire(&xlogctl->info_lck);
6320-
xlogctl->recoveryLastRecPtr = EndRecPtr;
6321+
xlogctl->lastReplayedEndRecPtr = EndRecPtr;
63216322
SpinLockRelease(&xlogctl->info_lck);
63226323

63236324
LastRec = ReadRecPtr;
63246325

6326+
/* Allow read-only connections if we're consistent now */
6327+
CheckRecoveryConsistency();
6328+
63256329
record = ReadRecord(NULL, LOG, false);
63266330
} while (record != NULL && recoveryContinue);
63276331

@@ -6661,13 +6665,14 @@ CheckRecoveryConsistency(void)
66616665
* Have we passed our safe starting point?
66626666
*/
66636667
if (!reachedConsistency &&
6664-
XLByteLE(minRecoveryPoint, EndRecPtr) &&
6668+
XLByteLE(minRecoveryPoint, XLogCtl->lastReplayedEndRecPtr) &&
66656669
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
66666670
{
66676671
reachedConsistency = true;
66686672
ereport(LOG,
66696673
(errmsg("consistent recovery state reached at %X/%X",
6670-
EndRecPtr.xlogid, EndRecPtr.xrecoff)));
6674+
XLogCtl->lastReplayedEndRecPtr.xlogid,
6675+
XLogCtl->lastReplayedEndRecPtr.xrecoff)));
66716676
}
66726677

66736678
/*
@@ -8967,7 +8972,7 @@ pg_last_xlog_replay_location(PG_FUNCTION_ARGS)
89678972
char location[MAXFNAMELEN];
89688973

89698974
SpinLockAcquire(&xlogctl->info_lck);
8970-
recptr = xlogctl->recoveryLastRecPtr;
8975+
recptr = xlogctl->lastReplayedEndRecPtr;
89718976
SpinLockRelease(&xlogctl->info_lck);
89728977

89738978
if (recptr.xlogid == 0 && recptr.xrecoff == 0)

0 commit comments

Comments
 (0)