Skip to content

Commit a5b0c06

Browse files
nmischmichaelpq
authored andcommitted
Skip WAL recycling and preallocation during archive recovery.
The previous commit addressed the chief consequences of a race condition between InstallXLogFileSegment() and KeepFileRestoredFromArchive(). Fix three lesser consequences. A spurious durable_rename_excl() LOG message remained possible. KeepFileRestoredFromArchive() wasted the proceeds of WAL recycling and preallocation. Finally, XLogFileInitInternal() could return a descriptor for a file that KeepFileRestoredFromArchive() had already unlinked. That felt like a recipe for future bugs. This commit has been applied as of cc2c7d6 in v15 and newer versions. This is required on stable branches of v13 and v14 to fix a regression reported by Noah Misch, introduced by 1f95181, causing spurious failures in archive recovery (neither streaming nor archive recovery) with concurrent restartpoints. The backpatched versions of the patches have been aligned on these branches by me, Noah Misch is the author. Tests have been conducted by the both of us. Note that this commit is known to have introduced a regression of its own. This is fixed by the commit following this one, and not grouped in a single commit to keep the commit history consistent across all branches. Reported-by: Arun Thirupathi Author: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com Discussion: https://postgr.es/m/20250306193013.36.nmisch@google.com Backpatch-through: 13
1 parent cbed472 commit a5b0c06

File tree

1 file changed

+57
-8
lines changed
  • src/backend/access/transam

1 file changed

+57
-8
lines changed

src/backend/access/transam/xlog.c

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,16 @@ typedef struct XLogCtlData
675675
*/
676676
bool SharedHotStandbyActive;
677677

678+
/*
679+
* InstallXLogFileSegmentActive indicates whether the checkpointer should
680+
* arrange for future segments by recycling and/or PreallocXlogFiles().
681+
* Protected by ControlFileLock. Only the startup process changes it. If
682+
* true, anyone can use InstallXLogFileSegment(). If false, the startup
683+
* process owns the exclusive right to install segments, by reading from
684+
* the archive and possibly replacing existing files.
685+
*/
686+
bool InstallXLogFileSegmentActive;
687+
678688
/*
679689
* SharedPromoteIsTriggered indicates if a standby promotion has been
680690
* triggered. Protected by info_lck.
@@ -925,6 +935,7 @@ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
925935
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
926936
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
927937
bool fetching_ckpt, XLogRecPtr tliRecPtr);
938+
static void XLogShutdownWalRcv(void);
928939
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
929940
static void XLogFileClose(void);
930941
static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -3608,8 +3619,8 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
36083619
* is false.)
36093620
*
36103621
* Returns true if the file was installed successfully. false indicates that
3611-
* max_segno limit was exceeded, or an error occurred while renaming the
3612-
* file into place.
3622+
* max_segno limit was exceeded, the startup process has disabled this
3623+
* function for now, or an error occurred while renaming the file into place.
36133624
*/
36143625
static bool
36153626
InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
@@ -3621,6 +3632,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
36213632
XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
36223633

36233634
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3635+
if (!XLogCtl->InstallXLogFileSegmentActive)
3636+
{
3637+
LWLockRelease(ControlFileLock);
3638+
return false;
3639+
}
36243640

36253641
if (!find_free)
36263642
{
@@ -3725,6 +3741,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
37253741
*/
37263742
if (source == XLOG_FROM_ARCHIVE)
37273743
{
3744+
Assert(!XLogCtl->InstallXLogFileSegmentActive);
37283745
KeepFileRestoredFromArchive(path, xlogfname);
37293746

37303747
/*
@@ -3926,6 +3943,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
39263943
char path[MAXPGPATH];
39273944
uint64 offset;
39283945

3946+
if (!XLogCtl->InstallXLogFileSegmentActive)
3947+
return; /* unlocked check says no */
3948+
39293949
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
39303950
offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
39313951
if (offset >= (uint32) (0.75 * wal_segment_size))
@@ -4207,6 +4227,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr)
42074227
*/
42084228
if (wal_recycle &&
42094229
endlogSegNo <= recycleSegNo &&
4230+
XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
42104231
lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
42114232
InstallXLogFileSegment(&endlogSegNo, path,
42124233
true, recycleSegNo))
@@ -4220,7 +4241,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr lastredoptr, XLogRecPtr endptr)
42204241
}
42214242
else
42224243
{
4223-
/* No need for any more future segments... */
4244+
/* No need for any more future segments, or recycling failed ... */
42244245
int rc;
42254246

42264247
ereport(DEBUG2,
@@ -5225,6 +5246,7 @@ XLOGShmemInit(void)
52255246
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
52265247
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
52275248
XLogCtl->SharedHotStandbyActive = false;
5249+
XLogCtl->InstallXLogFileSegmentActive = false;
52285250
XLogCtl->SharedPromoteIsTriggered = false;
52295251
XLogCtl->WalWriterSleeping = false;
52305252

@@ -5251,6 +5273,11 @@ BootStrapXLOG(void)
52515273
struct timeval tv;
52525274
pg_crc32c crc;
52535275

5276+
/* allow ordinary WAL segment creation, like StartupXLOG() would */
5277+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
5278+
XLogCtl->InstallXLogFileSegmentActive = true;
5279+
LWLockRelease(ControlFileLock);
5280+
52545281
/*
52555282
* Select a hopefully-unique system identifier code for this installation.
52565283
* We use the result of gettimeofday(), including the fractional seconds
@@ -7531,7 +7558,7 @@ StartupXLOG(void)
75317558
* over these records and subsequent ones if it's still alive when we
75327559
* start writing WAL.
75337560
*/
7534-
ShutdownWalRcv();
7561+
XLogShutdownWalRcv();
75357562

75367563
/*
75377564
* Reset unlogged relations to the contents of their INIT fork. This is
@@ -7556,7 +7583,7 @@ StartupXLOG(void)
75567583
* recovery, e.g., timeline history file) from archive or pg_wal.
75577584
*
75587585
* Note that standby mode must be turned off after killing WAL receiver,
7559-
* i.e., calling ShutdownWalRcv().
7586+
* i.e., calling XLogShutdownWalRcv().
75607587
*/
75617588
Assert(!WalRcvStreaming());
75627589
StandbyMode = false;
@@ -7625,6 +7652,14 @@ StartupXLOG(void)
76257652
*/
76267653
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
76277654

7655+
/*
7656+
* Allow ordinary WAL segment creation before any exitArchiveRecovery(),
7657+
* which sometimes creates a segment, and after the last ReadRecord().
7658+
*/
7659+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7660+
XLogCtl->InstallXLogFileSegmentActive = true;
7661+
LWLockRelease(ControlFileLock);
7662+
76287663
/*
76297664
* Consider whether we need to assign a new timeline ID.
76307665
*
@@ -12491,7 +12526,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1249112526
*/
1249212527
if (StandbyMode && CheckForStandbyTrigger())
1249312528
{
12494-
ShutdownWalRcv();
12529+
XLogShutdownWalRcv();
1249512530
return false;
1249612531
}
1249712532

@@ -12539,7 +12574,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1253912574
* WAL that we restore from archive.
1254012575
*/
1254112576
if (WalRcvStreaming())
12542-
ShutdownWalRcv();
12577+
XLogShutdownWalRcv();
1254312578

1254412579
/*
1254512580
* Before we sleep, re-scan for possible new timelines if
@@ -12669,7 +12704,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1266912704
*/
1267012705
if (pendingWalRcvRestart && !startWalReceiver)
1267112706
{
12672-
ShutdownWalRcv();
12707+
XLogShutdownWalRcv();
1267312708

1267412709
/*
1267512710
* Re-scan for possible new timelines if we were
@@ -12720,6 +12755,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1272012755
tli, curFileTLI);
1272112756
}
1272212757
curFileTLI = tli;
12758+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
12759+
XLogCtl->InstallXLogFileSegmentActive = true;
12760+
LWLockRelease(ControlFileLock);
1272312761
RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
1272412762
PrimarySlotName,
1272512763
wal_receiver_create_temp_slot);
@@ -12882,6 +12920,17 @@ StartupRequestWalReceiverRestart(void)
1288212920
}
1288312921
}
1288412922

12923+
/* Thin wrapper around ShutdownWalRcv(). */
12924+
static void
12925+
XLogShutdownWalRcv(void)
12926+
{
12927+
ShutdownWalRcv();
12928+
12929+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
12930+
XLogCtl->InstallXLogFileSegmentActive = false;
12931+
LWLockRelease(ControlFileLock);
12932+
}
12933+
1288512934
/*
1288612935
* Determine what log level should be used to report a corrupt WAL record
1288712936
* in the current WAL page, previously read by XLogPageRead().

0 commit comments

Comments
 (0)