Skip to content

Commit cc2c7d6

Browse files
committed
Skip WAL recycling and preallocation during archive recovery.
The previous commit addressed the chief consequences of a race condition between InstallXLogFileSegment() and KeepFileRestoredFromArchive(). Fix three lesser consequences. A spurious durable_rename_excl() LOG message remained possible. KeepFileRestoredFromArchive() wasted the proceeds of WAL recycling and preallocation. Finally, XLogFileInitInternal() could return a descriptor for a file that KeepFileRestoredFromArchive() had already unlinked. That felt like a recipe for future bugs. Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com
1 parent 2b3e467 commit cc2c7d6

File tree

1 file changed

+57
-8
lines changed
  • src/backend/access/transam

1 file changed

+57
-8
lines changed

src/backend/access/transam/xlog.c

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,16 @@ typedef struct XLogCtlData
662662
*/
663663
bool SharedHotStandbyActive;
664664

665+
/*
666+
* InstallXLogFileSegmentActive indicates whether the checkpointer should
667+
* arrange for future segments by recycling and/or PreallocXlogFiles().
668+
* Protected by ControlFileLock. Only the startup process changes it. If
669+
* true, anyone can use InstallXLogFileSegment(). If false, the startup
670+
* process owns the exclusive right to install segments, by reading from
671+
* the archive and possibly replacing existing files.
672+
*/
673+
bool InstallXLogFileSegmentActive;
674+
665675
/*
666676
* SharedPromoteIsTriggered indicates if a standby promotion has been
667677
* triggered. Protected by info_lck.
@@ -921,6 +931,7 @@ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
921931
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
922932
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
923933
bool fetching_ckpt, XLogRecPtr tliRecPtr);
934+
static void XLogShutdownWalRcv(void);
924935
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
925936
static void XLogFileClose(void);
926937
static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -3625,8 +3636,8 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
36253636
* is false.)
36263637
*
36273638
* Returns true if the file was installed successfully. false indicates that
3628-
* max_segno limit was exceeded, or an error occurred while renaming the
3629-
* file into place.
3639+
* max_segno limit was exceeded, the startup process has disabled this
3640+
* function for now, or an error occurred while renaming the file into place.
36303641
*/
36313642
static bool
36323643
InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
@@ -3638,6 +3649,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
36383649
XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
36393650

36403651
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3652+
if (!XLogCtl->InstallXLogFileSegmentActive)
3653+
{
3654+
LWLockRelease(ControlFileLock);
3655+
return false;
3656+
}
36413657

36423658
if (!find_free)
36433659
{
@@ -3745,6 +3761,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
37453761
*/
37463762
if (source == XLOG_FROM_ARCHIVE)
37473763
{
3764+
Assert(!XLogCtl->InstallXLogFileSegmentActive);
37483765
KeepFileRestoredFromArchive(path, xlogfname);
37493766

37503767
/*
@@ -3946,6 +3963,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
39463963
char path[MAXPGPATH];
39473964
uint64 offset;
39483965

3966+
if (!XLogCtl->InstallXLogFileSegmentActive)
3967+
return; /* unlocked check says no */
3968+
39493969
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
39503970
offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
39513971
if (offset >= (uint32) (0.75 * wal_segment_size))
@@ -4227,6 +4247,7 @@ RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
42274247
*/
42284248
if (wal_recycle &&
42294249
*endlogSegNo <= recycleSegNo &&
4250+
XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
42304251
lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
42314252
InstallXLogFileSegment(endlogSegNo, path,
42324253
true, recycleSegNo))
@@ -4240,7 +4261,7 @@ RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
42404261
}
42414262
else
42424263
{
4243-
/* No need for any more future segments... */
4264+
/* No need for any more future segments, or recycling failed ... */
42444265
int rc;
42454266

42464267
ereport(DEBUG2,
@@ -5226,6 +5247,7 @@ XLOGShmemInit(void)
52265247
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
52275248
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
52285249
XLogCtl->SharedHotStandbyActive = false;
5250+
XLogCtl->InstallXLogFileSegmentActive = false;
52295251
XLogCtl->SharedPromoteIsTriggered = false;
52305252
XLogCtl->WalWriterSleeping = false;
52315253

@@ -5253,6 +5275,11 @@ BootStrapXLOG(void)
52535275
struct timeval tv;
52545276
pg_crc32c crc;
52555277

5278+
/* allow ordinary WAL segment creation, like StartupXLOG() would */
5279+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
5280+
XLogCtl->InstallXLogFileSegmentActive = true;
5281+
LWLockRelease(ControlFileLock);
5282+
52565283
/*
52575284
* Select a hopefully-unique system identifier code for this installation.
52585285
* We use the result of gettimeofday(), including the fractional seconds
@@ -7619,7 +7646,7 @@ StartupXLOG(void)
76197646
* the startup checkpoint record. It will trump over the checkpoint and
76207647
* subsequent records if it's still alive when we start writing WAL.
76217648
*/
7622-
ShutdownWalRcv();
7649+
XLogShutdownWalRcv();
76237650

76247651
/*
76257652
* Reset unlogged relations to the contents of their INIT fork. This is
@@ -7644,7 +7671,7 @@ StartupXLOG(void)
76447671
* recovery, e.g., timeline history file) from archive or pg_wal.
76457672
*
76467673
* Note that standby mode must be turned off after killing WAL receiver,
7647-
* i.e., calling ShutdownWalRcv().
7674+
* i.e., calling XLogShutdownWalRcv().
76487675
*/
76497676
Assert(!WalRcvStreaming());
76507677
StandbyMode = false;
@@ -7709,6 +7736,14 @@ StartupXLOG(void)
77097736
*/
77107737
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
77117738

7739+
/*
7740+
* Allow ordinary WAL segment creation before any exitArchiveRecovery(),
7741+
* which sometimes creates a segment, and after the last ReadRecord().
7742+
*/
7743+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7744+
XLogCtl->InstallXLogFileSegmentActive = true;
7745+
LWLockRelease(ControlFileLock);
7746+
77127747
/*
77137748
* Consider whether we need to assign a new timeline ID.
77147749
*
@@ -12378,7 +12413,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1237812413
*/
1237912414
if (StandbyMode && CheckForStandbyTrigger())
1238012415
{
12381-
ShutdownWalRcv();
12416+
XLogShutdownWalRcv();
1238212417
return false;
1238312418
}
1238412419

@@ -12426,7 +12461,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1242612461
* WAL that we restore from archive.
1242712462
*/
1242812463
if (WalRcvStreaming())
12429-
ShutdownWalRcv();
12464+
XLogShutdownWalRcv();
1243012465

1243112466
/*
1243212467
* Before we sleep, re-scan for possible new timelines if
@@ -12553,7 +12588,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1255312588
*/
1255412589
if (pendingWalRcvRestart && !startWalReceiver)
1255512590
{
12556-
ShutdownWalRcv();
12591+
XLogShutdownWalRcv();
1255712592

1255812593
/*
1255912594
* Re-scan for possible new timelines if we were
@@ -12603,6 +12638,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1260312638
tli, curFileTLI);
1260412639
}
1260512640
curFileTLI = tli;
12641+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
12642+
XLogCtl->InstallXLogFileSegmentActive = true;
12643+
LWLockRelease(ControlFileLock);
1260612644
RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
1260712645
PrimarySlotName,
1260812646
wal_receiver_create_temp_slot);
@@ -12770,6 +12808,17 @@ StartupRequestWalReceiverRestart(void)
1277012808
}
1277112809
}
1277212810

12811+
/* Thin wrapper around ShutdownWalRcv(). */
12812+
static void
12813+
XLogShutdownWalRcv(void)
12814+
{
12815+
ShutdownWalRcv();
12816+
12817+
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
12818+
XLogCtl->InstallXLogFileSegmentActive = false;
12819+
LWLockRelease(ControlFileLock);
12820+
}
12821+
1277312822
/*
1277412823
* Determine what log level should be used to report a corrupt WAL record
1277512824
* in the current WAL page, previously read by XLogPageRead().

0 commit comments

Comments
 (0)