Skip to content

Commit cc2939f

Browse files
committed
Don't archive bogus recycled or preallocated files after timeline switch.
After a timeline switch, we would leave behind recycled WAL segments that are in the future, but on the old timeline. After promotion, and after they become old enough to be recycled again, we would notice that they don't have a .ready or .done file, create a .ready file for them, and archive them. That's bogus, because the files contain garbage, recycled from an older timeline (or prealloced as zeros). We shouldn't archive such files. This could happen when we're following a timeline switch during replay, or when we switch to new timeline at end-of-recovery. To fix, whenever we switch to a new timeline, scan the data directory for WAL segments on the old timeline, but with a higher segment number, and remove them. Those don't belong to our timeline history, and are most likely bogus recycled or preallocated files. They could also be valid files that we streamed from the primary ahead of time, but in any case, they're not needed to recover to the new timeline.
1 parent 3a9951d commit cc2939f

File tree

1 file changed

+193
-87
lines changed
  • src/backend/access/transam

1 file changed

+193
-87
lines changed

src/backend/access/transam/xlog.c

Lines changed: 193 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,7 @@ static void XLogArchiveNotify(const char *xlog);
632632
static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
633633
static bool XLogArchiveCheckDone(const char *xlog);
634634
static bool XLogArchiveIsBusy(const char *xlog);
635+
extern bool XLogArchiveIsReady(const char *xlog);
635636
static void XLogArchiveCleanup(const char *xlog);
636637
static void readRecoveryCommandFile(void);
637638
static void exitArchiveRecovery(TimeLineID endTLI,
@@ -669,6 +670,8 @@ static void ExecuteRecoveryCommand(char *command, char *commandName,
669670
bool failOnerror);
670671
static void PreallocXlogFiles(XLogRecPtr endptr);
671672
static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
673+
static void RemoveXlogFile(const char *segname, XLogRecPtr endptr);
674+
static void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI);
672675
static void UpdateLastRemovedPtr(char *filename);
673676
static void ValidateXLOGDirectoryStructure(void);
674677
static void CleanupBackupHistory(void);
@@ -1495,6 +1498,25 @@ XLogArchiveIsBusy(const char *xlog)
14951498
return true;
14961499
}
14971500

1501+
/*
1502+
* XLogArchiveIsReady
1503+
*
1504+
* Check to see if an XLOG segment file has an archive notification (.ready)
1505+
* file.
1506+
*/
1507+
bool
1508+
XLogArchiveIsReady(const char *xlog)
1509+
{
1510+
char archiveStatusPath[MAXPGPATH];
1511+
struct stat stat_buf;
1512+
1513+
StatusFilePath(archiveStatusPath, xlog, ".ready");
1514+
if (stat(archiveStatusPath, &stat_buf) == 0)
1515+
return true;
1516+
1517+
return false;
1518+
}
1519+
14981520
/*
14991521
* XLogArchiveCleanup
15001522
*
@@ -3536,25 +3558,9 @@ UpdateLastRemovedPtr(char *filename)
35363558
static void
35373559
RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
35383560
{
3539-
uint32 endlogId;
3540-
uint32 endlogSeg;
3541-
int max_advance;
35423561
DIR *xldir;
35433562
struct dirent *xlde;
35443563
char lastoff[MAXFNAMELEN];
3545-
char path[MAXPGPATH];
3546-
3547-
#ifdef WIN32
3548-
char newpath[MAXPGPATH];
3549-
#endif
3550-
struct stat statbuf;
3551-
3552-
/*
3553-
* Initialize info about where to try to recycle to. We allow recycling
3554-
* segments up to XLOGfileslop segments beyond the current XLOG location.
3555-
*/
3556-
XLByteToPrevSeg(endptr, endlogId, endlogSeg);
3557-
max_advance = XLOGfileslop;
35583564

35593565
xldir = AllocateDir(XLOGDIR);
35603566
if (xldir == NULL)
@@ -3570,6 +3576,11 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
35703576

35713577
while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
35723578
{
3579+
/* Ignore files that are not XLOG segments */
3580+
if (strlen(xlde->d_name) != 24 ||
3581+
strspn(xlde->d_name, "0123456789ABCDEF") != 24)
3582+
continue;
3583+
35733584
/*
35743585
* We ignore the timeline part of the XLOG segment identifiers in
35753586
* deciding whether a segment is still needed. This ensures that we
@@ -3581,92 +3592,111 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
35813592
* We use the alphanumeric sorting property of the filenames to decide
35823593
* which ones are earlier than the lastoff segment.
35833594
*/
3584-
if (strlen(xlde->d_name) == 24 &&
3585-
strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
3586-
strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3595+
if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
35873596
{
35883597
if (XLogArchiveCheckDone(xlde->d_name))
35893598
{
3590-
snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3591-
35923599
/* Update the last removed location in shared memory first */
35933600
UpdateLastRemovedPtr(xlde->d_name);
35943601

3595-
/*
3596-
* Before deleting the file, see if it can be recycled as a
3597-
* future log segment. Only recycle normal files, pg_standby
3598-
* for example can create symbolic links pointing to a
3599-
* separate archive directory.
3600-
*/
3601-
if (lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
3602-
InstallXLogFileSegment(&endlogId, &endlogSeg, path,
3603-
true, &max_advance, true))
3604-
{
3605-
ereport(DEBUG2,
3606-
(errmsg("recycled transaction log file \"%s\"",
3607-
xlde->d_name)));
3608-
CheckpointStats.ckpt_segs_recycled++;
3609-
/* Needn't recheck that slot on future iterations */
3610-
if (max_advance > 0)
3611-
{
3612-
NextLogSeg(endlogId, endlogSeg);
3613-
max_advance--;
3614-
}
3615-
}
3616-
else
3617-
{
3618-
/* No need for any more future segments... */
3619-
int rc;
3602+
RemoveXlogFile(xlde->d_name, endptr);
3603+
}
3604+
}
3605+
}
36203606

3621-
ereport(DEBUG2,
3622-
(errmsg("removing transaction log file \"%s\"",
3623-
xlde->d_name)));
3607+
FreeDir(xldir);
3608+
}
36243609

3610+
/*
3611+
* Recycle or remove a log file that's no longer needed.
3612+
*
3613+
* endptr is current (or recent) end of xlog; this is used to determine
3614+
* whether we want to recycle rather than delete no-longer-wanted log files.
3615+
*/
3616+
static void
3617+
RemoveXlogFile(const char *segname, XLogRecPtr endptr)
3618+
{
3619+
char path[MAXPGPATH];
36253620
#ifdef WIN32
3621+
char newpath[MAXPGPATH];
3622+
#endif
3623+
struct stat statbuf;
3624+
uint32 endlogId;
3625+
uint32 endlogSeg;
3626+
int max_advance;
36263627

3627-
/*
3628-
* On Windows, if another process (e.g another backend)
3629-
* holds the file open in FILE_SHARE_DELETE mode, unlink
3630-
* will succeed, but the file will still show up in
3631-
* directory listing until the last handle is closed. To
3632-
* avoid confusing the lingering deleted file for a live
3633-
* WAL file that needs to be archived, rename it before
3634-
* deleting it.
3635-
*
3636-
* If another process holds the file open without
3637-
* FILE_SHARE_DELETE flag, rename will fail. We'll try
3638-
* again at the next checkpoint.
3639-
*/
3640-
snprintf(newpath, MAXPGPATH, "%s.deleted", path);
3641-
if (rename(path, newpath) != 0)
3642-
{
3643-
ereport(LOG,
3644-
(errcode_for_file_access(),
3645-
errmsg("could not rename old transaction log file \"%s\": %m",
3646-
path)));
3647-
continue;
3648-
}
3649-
rc = unlink(newpath);
3628+
/*
3629+
* Initialize info about where to try to recycle to. We allow recycling
3630+
* segments up to XLOGfileslop segments beyond the current XLOG location.
3631+
*/
3632+
XLByteToPrevSeg(endptr, endlogId, endlogSeg);
3633+
max_advance = XLOGfileslop;
3634+
3635+
snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
3636+
3637+
/*
3638+
* Before deleting the file, see if it can be recycled as a future log
3639+
* segment. Only recycle normal files, pg_standby for example can create
3640+
* symbolic links pointing to a separate archive directory.
3641+
*/
3642+
if (lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
3643+
InstallXLogFileSegment(&endlogId, &endlogSeg, path,
3644+
true, &max_advance, true))
3645+
{
3646+
ereport(DEBUG2,
3647+
(errmsg("recycled transaction log file \"%s\"", segname)));
3648+
CheckpointStats.ckpt_segs_recycled++;
3649+
/* Needn't recheck that slot on future iterations */
3650+
if (max_advance > 0)
3651+
{
3652+
NextLogSeg(endlogId, endlogSeg);
3653+
max_advance--;
3654+
}
3655+
}
3656+
else
3657+
{
3658+
/* No need for any more future segments... */
3659+
int rc;
3660+
3661+
ereport(DEBUG2,
3662+
(errmsg("removing transaction log file \"%s\"", segname)));
3663+
3664+
#ifdef WIN32
3665+
/*
3666+
* On Windows, if another process (e.g another backend) holds the file
3667+
* open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
3668+
* will still show up in directory listing until the last handle is
3669+
* closed. To avoid confusing the lingering deleted file for a live
3670+
* WAL file that needs to be archived, rename it before deleting it.
3671+
*
3672+
* If another process holds the file open without FILE_SHARE_DELETE
3673+
* flag, rename will fail. We'll try again at the next checkpoint.
3674+
*/
3675+
snprintf(newpath, MAXPGPATH, "%s.deleted", path);
3676+
if (rename(path, newpath) != 0)
3677+
{
3678+
ereport(LOG,
3679+
(errcode_for_file_access(),
3680+
errmsg("could not rename old transaction log file \"%s\": %m",
3681+
path)));
3682+
return;
3683+
}
3684+
rc = unlink(newpath);
36503685
#else
3651-
rc = unlink(path);
3686+
rc = unlink(path);
36523687
#endif
3653-
if (rc != 0)
3654-
{
3655-
ereport(LOG,
3656-
(errcode_for_file_access(),
3657-
errmsg("could not remove old transaction log file \"%s\": %m",
3658-
path)));
3659-
continue;
3660-
}
3661-
CheckpointStats.ckpt_segs_removed++;
3662-
}
3663-
3664-
XLogArchiveCleanup(xlde->d_name);
3665-
}
3688+
if (rc != 0)
3689+
{
3690+
ereport(LOG,
3691+
(errcode_for_file_access(),
3692+
errmsg("could not remove old transaction log file \"%s\": %m",
3693+
path)));
3694+
return;
36663695
}
3696+
CheckpointStats.ckpt_segs_removed++;
36673697
}
36683698

3669-
FreeDir(xldir);
3699+
XLogArchiveCleanup(segname);
36703700
}
36713701

36723702
/*
@@ -5863,6 +5893,76 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
58635893
(errmsg("archive recovery complete")));
58645894
}
58655895

5896+
/*
5897+
* Remove WAL files that are not part of the given timeline's history.
5898+
*
5899+
* This is called during recovery, whenever we switch to follow a new
5900+
* timeline, and at the end of recovery when we create a new timeline. We
5901+
* wouldn't otherwise care about extra WAL files lying in pg_xlog, but they
5902+
* can be pre-allocated or recycled WAL segments on the old timeline that we
5903+
* haven't used yet, and contain garbage. If we just leave them in pg_xlog,
5904+
* they will eventually be archived, and we can't let that happen. Files that
5905+
* belong to our timeline history are valid, because we have successfully
5906+
* replayed them, but from others we can't be sure.
5907+
*
5908+
* 'switchpoint' is the current point in WAL where we switch to new timeline,
5909+
* and 'newTLI' is the new timeline we switch to.
5910+
*/
5911+
static void
5912+
RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
5913+
{
5914+
DIR *xldir;
5915+
struct dirent *xlde;
5916+
char switchseg[MAXFNAMELEN];
5917+
uint32 endlogId;
5918+
uint32 endlogSeg;
5919+
5920+
XLByteToPrevSeg(switchpoint, endlogId, endlogSeg);
5921+
5922+
xldir = AllocateDir(XLOGDIR);
5923+
if (xldir == NULL)
5924+
ereport(ERROR,
5925+
(errcode_for_file_access(),
5926+
errmsg("could not open transaction log directory \"%s\": %m",
5927+
XLOGDIR)));
5928+
5929+
/*
5930+
* Construct a filename of the last segment to be kept.
5931+
*/
5932+
XLogFileName(switchseg, newTLI, endlogId, endlogSeg);
5933+
5934+
elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
5935+
switchseg);
5936+
5937+
while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
5938+
{
5939+
/* Ignore files that are not XLOG segments */
5940+
if (strlen(xlde->d_name) != 24 ||
5941+
strspn(xlde->d_name, "0123456789ABCDEF") != 24)
5942+
continue;
5943+
5944+
/*
5945+
* Remove files that are on a timeline older than the new one we're
5946+
* switching to, but with a segment number >= the first segment on
5947+
* the new timeline.
5948+
*/
5949+
if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
5950+
strcmp(xlde->d_name + 8, switchseg + 8) > 0)
5951+
{
5952+
/*
5953+
* If the file has already been marked as .ready, however, don't
5954+
* remove it yet. It should be OK to remove it - files that are
5955+
* not part of our timeline history are not required for recovery
5956+
* - but seems safer to let them be archived and removed later.
5957+
*/
5958+
if (!XLogArchiveIsReady(xlde->d_name))
5959+
RemoveXlogFile(xlde->d_name, switchpoint);
5960+
}
5961+
}
5962+
5963+
FreeDir(xldir);
5964+
}
5965+
58665966
/*
58675967
* For point-in-time recovery, this function decides whether we want to
58685968
* stop applying the XLOG at or after the current record.
@@ -7238,6 +7338,12 @@ StartupXLOG(void)
72387338
true);
72397339
}
72407340

7341+
/*
7342+
* Clean up any (possibly bogus) future WAL segments on the old timeline.
7343+
*/
7344+
if (ArchiveRecoveryRequested)
7345+
RemoveNonParentXlogFiles(EndOfLog, ThisTimeLineID);
7346+
72417347
/*
72427348
* Preallocate additional log files, if wanted.
72437349
*/

0 commit comments

Comments
 (0)