Skip to content

Commit cbed472

Browse files
nmischmichaelpq
authored andcommitted
Don't ERROR on PreallocXlogFiles() race condition.
Before a restartpoint finishes PreallocXlogFiles(), a startup process KeepFileRestoredFromArchive() call can unlink the preallocated segment. If a CHECKPOINT sql command had elicited the restartpoint experiencing the race condition, that sql command failed. Moreover, the restartpoint omitted its log_checkpoints message and some inessential resource reclamation. Prevent the ERROR by skipping open() of the segment. Since these consequences are so minor, no back-patch. This commit has been applied as of 2b3e467 in v15 and newer versions. This is required on stable branches of v13 and v14 to fix a regression reported by Noah Misch, introduced by 1f95181, causing spurious failures in archive recovery (neither streaming nor archive recovery) with concurrent restartpoints. The backpatched versions of the patches have been aligned on these branches by me, Noah Misch is the author. Tests have been conducted by the both of us. Reported-by: Arun Thirupathi Author: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com Discussion: https://postgr.es/m/20250306193013.36.nmisch@google.com Backpatch-through: 13
1 parent e77d9cd commit cbed472

File tree

3 files changed

+58
-27
lines changed

3 files changed

+58
-27
lines changed

src/backend/access/transam/xlog.c

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2440,7 +2440,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
24402440
bool ispartialpage;
24412441
bool last_iteration;
24422442
bool finishing_seg;
2443-
bool added;
24442443
int curridx;
24452444
int npages;
24462445
int startidx;
@@ -2507,7 +2506,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25072506
wal_segment_size);
25082507

25092508
/* create/use new log file */
2510-
openLogFile = XLogFileInit(openLogSegNo, &added);
2509+
openLogFile = XLogFileInit(openLogSegNo);
25112510
ReserveExternalFD();
25122511
}
25132512

@@ -3253,23 +3252,21 @@ XLogNeedsFlush(XLogRecPtr record)
32533252
}
32543253

32553254
/*
3256-
* Create a new XLOG file segment, or open a pre-existing one.
3255+
* Try to make a given XLOG file segment exist.
32573256
*
3258-
* logsegno: identify segment to be created/opened.
3257+
* logsegno: identify segment.
32593258
*
32603259
* *added: on return, true if this call raised the number of extant segments.
32613260
*
3262-
* Returns FD of opened file.
3261+
* path: on return, this char[MAXPGPATH] has the path to the logsegno file.
32633262
*
3264-
* Note: errors here are ERROR not PANIC because we might or might not be
3265-
* inside a critical section (eg, during checkpoint there is no reason to
3266-
* take down the system on failure). They will promote to PANIC if we are
3267-
* in a critical section.
3263+
* Returns -1 or FD of opened file. A -1 here is not an error; a caller
3264+
* wanting an open segment should attempt to open "path", which usually will
3265+
* succeed. (This is weird, but it's efficient for the callers.)
32683266
*/
3269-
int
3270-
XLogFileInit(XLogSegNo logsegno, bool *added)
3267+
static int
3268+
XLogFileInitInternal(XLogSegNo logsegno, bool *added, char *path)
32713269
{
3272-
char path[MAXPGPATH];
32733270
char tmppath[MAXPGPATH];
32743271
PGAlignedXLogBlock zbuffer;
32753272
XLogSegNo installed_segno;
@@ -3407,26 +3404,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
34073404
*/
34083405
max_segno = logsegno + CheckPointSegments;
34093406
if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno))
3407+
{
34103408
*added = true;
3409+
elog(DEBUG2, "done creating and filling new WAL file");
3410+
}
34113411
else
34123412
{
34133413
/*
34143414
* No need for any more future segments, or InstallXLogFileSegment()
3415-
* failed to rename the file into place. If the rename failed, opening
3416-
* the file below will fail.
3415+
* failed to rename the file into place. If the rename failed, a
3416+
* caller opening the file may fail.
34173417
*/
34183418
unlink(tmppath);
3419+
elog(DEBUG2, "abandoned new WAL file");
34193420
}
34203421

3422+
return -1;
3423+
}
3424+
3425+
/*
3426+
* Create a new XLOG file segment, or open a pre-existing one.
3427+
*
3428+
* logsegno: identify segment to be created/opened.
3429+
*
3430+
* Returns FD of opened file.
3431+
*
3432+
* Note: errors here are ERROR not PANIC because we might or might not be
3433+
* inside a critical section (eg, during checkpoint there is no reason to
3434+
* take down the system on failure). They will promote to PANIC if we are
3435+
* in a critical section.
3436+
*/
3437+
int
3438+
XLogFileInit(XLogSegNo logsegno)
3439+
{
3440+
bool ignore_added;
3441+
char path[MAXPGPATH];
3442+
int fd;
3443+
3444+
fd = XLogFileInitInternal(logsegno, &ignore_added, path);
3445+
if (fd >= 0)
3446+
return fd;
3447+
34213448
/* Now open original target segment (might not be file I just made) */
34223449
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
34233450
if (fd < 0)
34243451
ereport(ERROR,
34253452
(errcode_for_file_access(),
34263453
errmsg("could not open file \"%s\": %m", path)));
3427-
3428-
elog(DEBUG2, "done creating and filling new WAL file");
3429-
34303454
return fd;
34313455
}
34323456

@@ -3883,22 +3907,33 @@ XLogFileClose(void)
38833907
* High-volume systems will be OK once they've built up a sufficient set of
38843908
* recycled log segments, but the startup transient is likely to include
38853909
* a lot of segment creations by foreground processes, which is not so good.
3910+
*
3911+
* XLogFileInitInternal() can ereport(ERROR). All known causes indicate big
3912+
* trouble; for example, a full filesystem is one cause. The checkpoint WAL
3913+
* and/or ControlFile updates already completed. If a RequestCheckpoint()
3914+
* initiated the present checkpoint and an ERROR ends this function, the
3915+
* command that called RequestCheckpoint() fails. That's not ideal, but it's
3916+
* not worth contorting more functions to use caller-specified elevel values.
3917+
* (With or without RequestCheckpoint(), an ERROR forestalls some inessential
3918+
* reporting and resource reclamation.)
38863919
*/
38873920
static void
38883921
PreallocXlogFiles(XLogRecPtr endptr)
38893922
{
38903923
XLogSegNo _logSegNo;
38913924
int lf;
38923925
bool added;
3926+
char path[MAXPGPATH];
38933927
uint64 offset;
38943928

38953929
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
38963930
offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
38973931
if (offset >= (uint32) (0.75 * wal_segment_size))
38983932
{
38993933
_logSegNo++;
3900-
lf = XLogFileInit(_logSegNo, &added);
3901-
close(lf);
3934+
lf = XLogFileInitInternal(_logSegNo, &added, path);
3935+
if (lf >= 0)
3936+
close(lf);
39023937
if (added)
39033938
CheckpointStats.ckpt_segs_added++;
39043939
}
@@ -5212,7 +5247,6 @@ BootStrapXLOG(void)
52125247
XLogLongPageHeader longpage;
52135248
XLogRecord *record;
52145249
char *recptr;
5215-
bool added;
52165250
uint64 sysidentifier;
52175251
struct timeval tv;
52185252
pg_crc32c crc;
@@ -5309,7 +5343,7 @@ BootStrapXLOG(void)
53095343
record->xl_crc = crc;
53105344

53115345
/* Create first XLOG segment file */
5312-
openLogFile = XLogFileInit(1, &added);
5346+
openLogFile = XLogFileInit(1);
53135347

53145348
/*
53155349
* We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5615,10 +5649,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
56155649
* The switch happened at a segment boundary, so just create the next
56165650
* segment on the new timeline.
56175651
*/
5618-
bool added;
56195652
int fd;
56205653

5621-
fd = XLogFileInit(startLogSegNo, &added);
5654+
fd = XLogFileInit(startLogSegNo);
56225655

56235656
if (close(fd) != 0)
56245657
{

src/backend/replication/walreceiver.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -918,11 +918,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
918918

919919
if (recvFile < 0)
920920
{
921-
bool added = true;
922-
923921
/* Create/use new log file */
924922
XLByteToSeg(recptr, recvSegNo, wal_segment_size);
925-
recvFile = XLogFileInit(recvSegNo, &added);
923+
recvFile = XLogFileInit(recvSegNo);
926924
recvFileTLI = ThisTimeLineID;
927925
}
928926

src/include/access/xlog.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
286286
extern void XLogFlush(XLogRecPtr RecPtr);
287287
extern bool XLogBackgroundFlush(void);
288288
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
289-
extern int XLogFileInit(XLogSegNo segno, bool *added);
289+
extern int XLogFileInit(XLogSegNo segno);
290290
extern int XLogFileOpen(XLogSegNo segno);
291291

292292
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);

0 commit comments

Comments
 (0)