Skip to content

Commit 675b771

Browse files
nmischmichaelpq
authored andcommitted
Don't ERROR on PreallocXlogFiles() race condition.
Before a restartpoint finishes PreallocXlogFiles(), a startup process KeepFileRestoredFromArchive() call can unlink the preallocated segment. If a CHECKPOINT sql command had elicited the restartpoint experiencing the race condition, that sql command failed. Moreover, the restartpoint omitted its log_checkpoints message and some inessential resource reclamation. Prevent the ERROR by skipping open() of the segment. Since these consequences are so minor, no back-patch. This commit has been applied as of 2b3e467 in v15 and newer versions. This is required on stable branches of v13 and v14 to fix a regression reported by Noah Misch, introduced by 1f95181, causing spurious failures in archive recovery (neither streaming nor archive recovery) with concurrent restartpoints. The backpatched versions of the patches have been aligned on these branches by me, Noah Misch is the author. Tests have been conducted by the both of us. Reported-by: Arun Thirupathi Author: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com Discussion: https://postgr.es/m/20250306193013.36.nmisch@google.com Backpatch-through: 13
1 parent 47d2d29 commit 675b771

File tree

3 files changed

+58
-27
lines changed

3 files changed

+58
-27
lines changed

src/backend/access/transam/xlog.c

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2452,7 +2452,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
24522452
bool ispartialpage;
24532453
bool last_iteration;
24542454
bool finishing_seg;
2455-
bool added;
24562455
int curridx;
24572456
int npages;
24582457
int startidx;
@@ -2518,7 +2517,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25182517
wal_segment_size);
25192518

25202519
/* create/use new log file */
2521-
openLogFile = XLogFileInit(openLogSegNo, &added);
2520+
openLogFile = XLogFileInit(openLogSegNo);
25222521
ReserveExternalFD();
25232522
}
25242523

@@ -3283,23 +3282,21 @@ XLogNeedsFlush(XLogRecPtr record)
32833282
}
32843283

32853284
/*
3286-
* Create a new XLOG file segment, or open a pre-existing one.
3285+
* Try to make a given XLOG file segment exist.
32873286
*
3288-
* logsegno: identify segment to be created/opened.
3287+
* logsegno: identify segment.
32893288
*
32903289
* *added: on return, true if this call raised the number of extant segments.
32913290
*
3292-
* Returns FD of opened file.
3291+
* path: on return, this char[MAXPGPATH] has the path to the logsegno file.
32933292
*
3294-
* Note: errors here are ERROR not PANIC because we might or might not be
3295-
* inside a critical section (eg, during checkpoint there is no reason to
3296-
* take down the system on failure). They will promote to PANIC if we are
3297-
* in a critical section.
3293+
* Returns -1 or FD of opened file. A -1 here is not an error; a caller
3294+
* wanting an open segment should attempt to open "path", which usually will
3295+
* succeed. (This is weird, but it's efficient for the callers.)
32983296
*/
3299-
int
3300-
XLogFileInit(XLogSegNo logsegno, bool *added)
3297+
static int
3298+
XLogFileInitInternal(XLogSegNo logsegno, bool *added, char *path)
33013299
{
3302-
char path[MAXPGPATH];
33033300
char tmppath[MAXPGPATH];
33043301
PGAlignedXLogBlock zbuffer;
33053302
XLogSegNo installed_segno;
@@ -3452,26 +3449,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
34523449
*/
34533450
max_segno = logsegno + CheckPointSegments;
34543451
if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno))
3452+
{
34553453
*added = true;
3454+
elog(DEBUG2, "done creating and filling new WAL file");
3455+
}
34563456
else
34573457
{
34583458
/*
34593459
* No need for any more future segments, or InstallXLogFileSegment()
3460-
* failed to rename the file into place. If the rename failed, opening
3461-
* the file below will fail.
3460+
* failed to rename the file into place. If the rename failed, a
3461+
* caller opening the file may fail.
34623462
*/
34633463
unlink(tmppath);
3464+
elog(DEBUG2, "abandoned new WAL file");
34643465
}
34653466

3467+
return -1;
3468+
}
3469+
3470+
/*
3471+
* Create a new XLOG file segment, or open a pre-existing one.
3472+
*
3473+
* logsegno: identify segment to be created/opened.
3474+
*
3475+
* Returns FD of opened file.
3476+
*
3477+
* Note: errors here are ERROR not PANIC because we might or might not be
3478+
* inside a critical section (eg, during checkpoint there is no reason to
3479+
* take down the system on failure). They will promote to PANIC if we are
3480+
* in a critical section.
3481+
*/
3482+
int
3483+
XLogFileInit(XLogSegNo logsegno)
3484+
{
3485+
bool ignore_added;
3486+
char path[MAXPGPATH];
3487+
int fd;
3488+
3489+
fd = XLogFileInitInternal(logsegno, &ignore_added, path);
3490+
if (fd >= 0)
3491+
return fd;
3492+
34663493
/* Now open original target segment (might not be file I just made) */
34673494
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
34683495
if (fd < 0)
34693496
ereport(ERROR,
34703497
(errcode_for_file_access(),
34713498
errmsg("could not open file \"%s\": %m", path)));
3472-
3473-
elog(DEBUG2, "done creating and filling new WAL file");
3474-
34753499
return fd;
34763500
}
34773501

@@ -3928,22 +3952,33 @@ XLogFileClose(void)
39283952
* High-volume systems will be OK once they've built up a sufficient set of
39293953
* recycled log segments, but the startup transient is likely to include
39303954
* a lot of segment creations by foreground processes, which is not so good.
3955+
*
3956+
* XLogFileInitInternal() can ereport(ERROR). All known causes indicate big
3957+
* trouble; for example, a full filesystem is one cause. The checkpoint WAL
3958+
* and/or ControlFile updates already completed. If a RequestCheckpoint()
3959+
* initiated the present checkpoint and an ERROR ends this function, the
3960+
* command that called RequestCheckpoint() fails. That's not ideal, but it's
3961+
* not worth contorting more functions to use caller-specified elevel values.
3962+
* (With or without RequestCheckpoint(), an ERROR forestalls some inessential
3963+
* reporting and resource reclamation.)
39313964
*/
39323965
static void
39333966
PreallocXlogFiles(XLogRecPtr endptr)
39343967
{
39353968
XLogSegNo _logSegNo;
39363969
int lf;
39373970
bool added;
3971+
char path[MAXPGPATH];
39383972
uint64 offset;
39393973

39403974
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
39413975
offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
39423976
if (offset >= (uint32) (0.75 * wal_segment_size))
39433977
{
39443978
_logSegNo++;
3945-
lf = XLogFileInit(_logSegNo, &added);
3946-
close(lf);
3979+
lf = XLogFileInitInternal(_logSegNo, &added, path);
3980+
if (lf >= 0)
3981+
close(lf);
39473982
if (added)
39483983
CheckpointStats.ckpt_segs_added++;
39493984
}
@@ -5258,7 +5293,6 @@ BootStrapXLOG(void)
52585293
XLogLongPageHeader longpage;
52595294
XLogRecord *record;
52605295
char *recptr;
5261-
bool added;
52625296
uint64 sysidentifier;
52635297
struct timeval tv;
52645298
pg_crc32c crc;
@@ -5355,7 +5389,7 @@ BootStrapXLOG(void)
53555389
record->xl_crc = crc;
53565390

53575391
/* Create first XLOG segment file */
5358-
openLogFile = XLogFileInit(1, &added);
5392+
openLogFile = XLogFileInit(1);
53595393

53605394
/*
53615395
* We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5661,10 +5695,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
56615695
* The switch happened at a segment boundary, so just create the next
56625696
* segment on the new timeline.
56635697
*/
5664-
bool added;
56655698
int fd;
56665699

5667-
fd = XLogFileInit(startLogSegNo, &added);
5700+
fd = XLogFileInit(startLogSegNo);
56685701

56695702
if (close(fd) != 0)
56705703
{

src/backend/replication/walreceiver.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -890,11 +890,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
890890

891891
if (recvFile < 0)
892892
{
893-
bool added = true;
894-
895893
/* Create/use new log file */
896894
XLByteToSeg(recptr, recvSegNo, wal_segment_size);
897-
recvFile = XLogFileInit(recvSegNo, &added);
895+
recvFile = XLogFileInit(recvSegNo);
898896
recvFileTLI = ThisTimeLineID;
899897
}
900898

src/include/access/xlog.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
296296
extern void XLogFlush(XLogRecPtr RecPtr);
297297
extern bool XLogBackgroundFlush(void);
298298
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
299-
extern int XLogFileInit(XLogSegNo segno, bool *added);
299+
extern int XLogFileInit(XLogSegNo segno);
300300
extern int XLogFileOpen(XLogSegNo segno);
301301

302302
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);

0 commit comments

Comments
 (0)