Skip to content

Commit 02657c4

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments on standbys depending on archive_mode. However, it has introduced a regression with the handling of WAL segments ready to be archived during crash recovery, causing those files to be recycled without getting archived. This commit fixes the regression by tracking in shared memory if a live cluster is either in crash recovery or archive recovery as the handling of WAL segments ready to be archived is different in both cases (those WAL segments should not be removed during crash recovery), and by using this new shared memory state to decide if a segment can be recycled or not. Previously, it was not possible to know if a cluster was in crash recovery or archive recovery as the shared state was able to track only if recovery was happening or not, leading to the problem. A set of TAP tests is added to close the gap here, making sure that WAL segments ready to be archived are correctly handled when a cluster is in archive or crash recovery with archive_mode set to "on" or "always", for both standby and primary. Reported-by: Benoît Lobréau Author: Jehan-Guillaume de Rorthais Reviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael Paquier Discussion: https://postgr.es/m/20200331172229.40ee00dc@firost Backpatch-through: 9.5
1 parent b0b2168 commit 02657c4

File tree

4 files changed

+305
-16
lines changed

4 files changed

+305
-16
lines changed

src/backend/access/transam/xlog.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,9 @@ static TimeLineID receiveTLI = 0;
209209
static bool lastFullPageWrites;
210210

211211
/*
212-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
213-
* known, need to check the shared state".
212+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
213+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
214+
* means "not known, need to check the shared state".
214215
*/
215216
static bool LocalRecoveryInProgress = true;
216217

@@ -635,10 +636,10 @@ typedef struct XLogCtlData
635636
char archiveCleanupCommand[MAXPGPATH];
636637

637638
/*
638-
* SharedRecoveryInProgress indicates if we're still in crash or archive
639+
* SharedRecoveryState indicates if we're still in crash or archive
639640
* recovery. Protected by info_lck.
640641
*/
641-
bool SharedRecoveryInProgress;
642+
RecoveryState SharedRecoveryState;
642643

643644
/*
644645
* SharedHotStandbyActive indicates if we're still in crash or archive
@@ -4295,6 +4296,16 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
42954296
updateMinRecoveryPoint = true;
42964297

42974298
UpdateControlFile();
4299+
4300+
/*
4301+
* We update SharedRecoveryState while holding the lock on
4302+
* ControlFileLock so both states are consistent in shared
4303+
* memory.
4304+
*/
4305+
SpinLockAcquire(&XLogCtl->info_lck);
4306+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
4307+
SpinLockRelease(&XLogCtl->info_lck);
4308+
42984309
LWLockRelease(ControlFileLock);
42994310

43004311
CheckRecoveryConsistency();
@@ -4980,7 +4991,7 @@ XLOGShmemInit(void)
49804991
* in additional info.)
49814992
*/
49824993
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
4983-
XLogCtl->SharedRecoveryInProgress = true;
4994+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
49844995
XLogCtl->SharedHotStandbyActive = false;
49854996
XLogCtl->WalWriterSleeping = false;
49864997

@@ -6803,7 +6814,13 @@ StartupXLOG(void)
68036814
*/
68046815
dbstate_at_startup = ControlFile->state;
68056816
if (InArchiveRecovery)
6817+
{
68066818
ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
6819+
6820+
SpinLockAcquire(&XLogCtl->info_lck);
6821+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
6822+
SpinLockRelease(&XLogCtl->info_lck);
6823+
}
68076824
else
68086825
{
68096826
ereport(LOG,
@@ -6816,6 +6833,10 @@ StartupXLOG(void)
68166833
ControlFile->checkPointCopy.ThisTimeLineID,
68176834
recoveryTargetTLI)));
68186835
ControlFile->state = DB_IN_CRASH_RECOVERY;
6836+
6837+
SpinLockAcquire(&XLogCtl->info_lck);
6838+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
6839+
SpinLockRelease(&XLogCtl->info_lck);
68196840
}
68206841
ControlFile->prevCheckPoint = ControlFile->checkPoint;
68216842
ControlFile->checkPoint = checkPointLoc;
@@ -7841,7 +7862,7 @@ StartupXLOG(void)
78417862
ControlFile->time = (pg_time_t) time(NULL);
78427863

78437864
SpinLockAcquire(&XLogCtl->info_lck);
7844-
XLogCtl->SharedRecoveryInProgress = false;
7865+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE;
78457866
SpinLockRelease(&XLogCtl->info_lck);
78467867

78477868
UpdateControlFile();
@@ -7987,7 +8008,7 @@ RecoveryInProgress(void)
79878008
*/
79888009
volatile XLogCtlData *xlogctl = XLogCtl;
79898010

7990-
LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
8011+
LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
79918012

79928013
/*
79938014
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -7999,8 +8020,8 @@ RecoveryInProgress(void)
79998020
{
80008021
/*
80018022
* If we just exited recovery, make sure we read TimeLineID and
8002-
* RedoRecPtr after SharedRecoveryInProgress (for machines with
8003-
* weak memory ordering).
8023+
* RedoRecPtr after SharedRecoveryState (for machines with weak
8024+
* memory ordering).
80048025
*/
80058026
pg_memory_barrier();
80068027
InitXLOGAccess();
@@ -8016,6 +8037,24 @@ RecoveryInProgress(void)
80168037
}
80178038
}
80188039

8040+
/*
8041+
* Returns current recovery state from shared memory.
8042+
*
8043+
* This returned state is kept consistent with the contents of the control
8044+
* file. See details about the possible values of RecoveryState in xlog.h.
8045+
*/
8046+
RecoveryState
8047+
GetRecoveryState(void)
8048+
{
8049+
RecoveryState retval;
8050+
8051+
SpinLockAcquire(&XLogCtl->info_lck);
8052+
retval = XLogCtl->SharedRecoveryState;
8053+
SpinLockRelease(&XLogCtl->info_lck);
8054+
8055+
return retval;
8056+
}
8057+
80198058
/*
80208059
* Is HotStandby active yet? This is only important in special backends
80218060
* since normal backends won't ever be able to connect until this returns

src/backend/access/transam/xlogarchive.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -608,18 +608,25 @@ XLogArchiveCheckDone(const char *xlog)
608608
{
609609
char archiveStatusPath[MAXPGPATH];
610610
struct stat stat_buf;
611-
bool inRecovery = RecoveryInProgress();
611+
612+
/* The file is always deletable if archive_mode is "off". */
613+
if (!XLogArchivingActive())
614+
return true;
612615

613616
/*
614-
* The file is always deletable if archive_mode is "off". On standbys
615-
* archiving is disabled if archive_mode is "on", and enabled with
616-
* "always". On a primary, archiving is enabled if archive_mode is "on"
617-
* or "always".
617+
* During archive recovery, the file is deletable if archive_mode is not
618+
* "always".
618619
*/
619-
if (!((XLogArchivingActive() && !inRecovery) ||
620-
(XLogArchivingAlways() && inRecovery)))
620+
if (!XLogArchivingAlways() &&
621+
GetRecoveryState() == RECOVERY_STATE_ARCHIVE)
621622
return true;
622623

624+
/*
625+
* At this point of the logic, note that we are either a primary with
626+
* archive_mode set to "on" or "always", or a standby with archive_mode
627+
* set to "always".
628+
*/
629+
623630
/* First check for .done --- this means archiver is done with it */
624631
StatusFilePath(archiveStatusPath, xlog, ".done");
625632
if (stat(archiveStatusPath, &stat_buf) == 0)

src/include/access/xlog.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,14 @@ typedef enum WalLevel
128128
WAL_LEVEL_LOGICAL
129129
} WalLevel;
130130

131+
/* Recovery states */
132+
typedef enum RecoveryState
133+
{
134+
RECOVERY_STATE_CRASH = 0, /* crash recovery */
135+
RECOVERY_STATE_ARCHIVE, /* archive recovery */
136+
RECOVERY_STATE_DONE /* currently in production */
137+
} RecoveryState;
138+
131139
extern PGDLLIMPORT int wal_level;
132140

133141
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -242,6 +250,7 @@ extern const char *xlog_identify(uint8 info);
242250
extern void issue_xlog_fsync(int fd, XLogSegNo segno);
243251

244252
extern bool RecoveryInProgress(void);
253+
extern RecoveryState GetRecoveryState(void);
245254
extern bool HotStandbyActive(void);
246255
extern bool HotStandbyActiveInReplay(void);
247256
extern bool XLogInsertAllowed(void);

0 commit comments

Comments
 (0)