Skip to content

Commit ca572db

Browse files
committed
Fix scenario where streaming standby gets stuck at a continuation record.
If a continuation record is split so that its first half has already been removed from the master, and is only present in pg_wal, and there is a recycled WAL segment in the standby server that looks like it would contain the second half, recovery would get stuck. The code in XLogPageRead() incorrectly started streaming at the beginning of the WAL record, even if we had already read the first page. Backpatch to 9.4. In principle, older versions have the same problem, but without replication slots, there was no straightforward mechanism to prevent the master from recycling old WAL that was still needed by standby. Without such a mechanism, I think it's reasonable to assume that there's enough slack in how many old segments are kept around to not run into this, or you have a WAL archive. Reported by Jonathon Nelson. Analysis and patch by Kyotaro HORIGUCHI, with some extra comments by me. Discussion: https://www.postgresql.org/message-id/CACJqAM3xVz0JY1XFDKPP%2BJoJAjoGx%3DGNuOAshEDWCext7BFvCQ%40mail.gmail.com
1 parent e1d6347 commit ca572db

File tree

3 files changed

+62
-13
lines changed

3 files changed

+62
-13
lines changed

src/backend/access/transam/xlog.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11611,6 +11611,40 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1161111611
Assert(reqLen <= readLen);
1161211612

1161311613
*readTLI = curFileTLI;
11614+
11615+
/*
11616+
* Check the page header immediately, so that we can retry immediately if
11617+
* it's not valid. This may seem unnecessary, because XLogReadRecord()
11618+
* validates the page header anyway, and would propagate the failure up to
11619+
* ReadRecord(), which would retry. However, there's a corner case with
11620+
* continuation records, if a record is split across two pages such that
11621+
* we would need to read the two pages from different sources. For
11622+
* example, imagine a scenario where a streaming replica is started up,
11623+
* and replay reaches a record that's split across two WAL segments. The
11624+
* first page is only available locally, in pg_wal, because it's already
11625+
* been recycled in the master. The second page, however, is not present
11626+
* in pg_wal, and we should stream it from the master. There is a recycled
11627+
* WAL segment present in pg_wal, with garbage contents, however. We would
11628+
* read the first page from the local WAL segment, but when reading the
11629+
* second page, we would read the bogus, recycled, WAL segment. If we
11630+
* didn't catch that case here, we would never recover, because
11631+
* ReadRecord() would retry reading the whole record from the beginning.
11632+
*
11633+
* Of course, this only catches errors in the page header, which is what
11634+
* happens in the case of a recycled WAL segment. Other kinds of errors or
11635+
* corruption still has the same problem. But this at least fixes the
11636+
* common case, which can happen as part of normal operation.
11637+
*
11638+
* Validating the page header is cheap enough that doing it twice
11639+
* shouldn't be a big deal from a performance point of view.
11640+
*/
11641+
if (!XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
11642+
{
11643+
/* reset any error XLogReaderValidatePageHeader() might have set */
11644+
xlogreader->errormsg_buf[0] = '\0';
11645+
goto next_record_is_invalid;
11646+
}
11647+
1161411648
return readLen;
1161511649

1161611650
next_record_is_invalid:
@@ -11745,12 +11779,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1174511779
}
1174611780
else
1174711781
{
11748-
ptr = tliRecPtr;
11782+
ptr = RecPtr;
11783+
11784+
/*
11785+
* Use the record begin position to determine the
11786+
* TLI, rather than the position we're reading.
11787+
*/
1174911788
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
1175011789

1175111790
if (curFileTLI > 0 && tli < curFileTLI)
1175211791
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
11753-
(uint32) (ptr >> 32), (uint32) ptr,
11792+
(uint32) (tliRecPtr >> 32),
11793+
(uint32) tliRecPtr,
1175411794
tli, curFileTLI);
1175511795
}
1175611796
curFileTLI = tli;

src/backend/access/transam/xlogreader.c

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727

2828
static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
2929

30-
static bool ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
31-
XLogPageHeader hdr);
3230
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
3331
XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
3432
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
@@ -531,7 +529,6 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
531529
*/
532530
if (targetSegNo != state->readSegNo && targetPageOff != 0)
533531
{
534-
XLogPageHeader hdr;
535532
XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
536533

537534
readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
@@ -543,9 +540,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
543540
/* we can be sure to have enough WAL available, we scrolled back */
544541
Assert(readLen == XLOG_BLCKSZ);
545542

546-
hdr = (XLogPageHeader) state->readBuf;
547-
548-
if (!ValidXLogPageHeader(state, targetSegmentPtr, hdr))
543+
if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
544+
state->readBuf))
549545
goto err;
550546
}
551547

@@ -582,7 +578,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
582578
/*
583579
* Now that we know we have the full header, validate it.
584580
*/
585-
if (!ValidXLogPageHeader(state, pageptr, hdr))
581+
if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
586582
goto err;
587583

588584
/* update read state information */
@@ -707,15 +703,19 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
707703
}
708704

709705
/*
710-
* Validate a page header
706+
* Validate a page header.
707+
*
708+
* Check if 'phdr' is valid as the header of the XLog page at position
709+
* 'recptr'.
711710
*/
712-
static bool
713-
ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
714-
XLogPageHeader hdr)
711+
bool
712+
XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
713+
char *phdr)
715714
{
716715
XLogRecPtr recaddr;
717716
XLogSegNo segno;
718717
int32 offset;
718+
XLogPageHeader hdr = (XLogPageHeader) phdr;
719719

720720
Assert((recptr % XLOG_BLCKSZ) == 0);
721721

@@ -803,6 +803,11 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
803803
return false;
804804
}
805805

806+
/*
807+
* Check that the address on the page agrees with what we expected.
808+
* This check typically fails when an old WAL segment is recycled,
809+
* and hasn't yet been overwritten with new data yet.
810+
*/
806811
if (hdr->xlp_pageaddr != recaddr)
807812
{
808813
char fname[MAXFNAMELEN];

src/include/access/xlogreader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ extern void XLogReaderFree(XLogReaderState *state);
199199
extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
200200
XLogRecPtr recptr, char **errormsg);
201201

202+
/* Validate a page */
203+
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
204+
XLogRecPtr recptr, char *phdr);
205+
202206
/* Invalidate read state */
203207
extern void XLogReaderInvalReadState(XLogReaderState *state);
204208

0 commit comments

Comments
 (0)