Skip to content

Commit 59c02a3

Browse files
committed
Fix assert failure at end of recovery, broken by XLogInsert scaling patch.
Initialization of the first XLOG buffer at end-of-recovery was broken for the case that the last read WAL record ended at a page boundary. Instead of trying to copy the last full xlog page to the buffer cache in that case, just set shared state so that the next page is initialized when the first WAL record after startup is inserted. (that's what we did in earlier version, too) To make the shared state required for that case less surprising, replace the XLogCtl->curridx variable, which was the index of the latest initialized buffer, with an XLogRecPtr of how far the buffers have been initialized. That also allows us to get rid of the XLogRecEndPtrToBufIdx macro. While we're at it, make a similar change for XLogCtl->Write.curridx, getting rid of that variable and calculating the next buffer to write from XLogCtl->LogwrtResult instead.
1 parent 3f2adac commit 59c02a3

File tree

1 file changed

+45
-66
lines changed
  • src/backend/access/transam

1 file changed

+45
-66
lines changed

src/backend/access/transam/xlog.c

Lines changed: 45 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -457,15 +457,6 @@ typedef struct XLogCtlInsert
457457
XLogRecPtr lastBackupStart;
458458
} XLogCtlInsert;
459459

460-
/*
461-
* Shared state data for XLogWrite/XLogFlush.
462-
*/
463-
typedef struct XLogCtlWrite
464-
{
465-
int curridx; /* cache index of next block to write */
466-
pg_time_t lastSegSwitchTime; /* time of last xlog segment switch */
467-
} XLogCtlWrite;
468-
469460
/*
470461
* Total shared-memory state for XLOG.
471462
*/
@@ -482,12 +473,12 @@ typedef struct XLogCtlData
482473
XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG
483474
* segment */
484475

485-
/* Fake LSN counter, for unlogged relations. Protected by ulsn_lck */
476+
/* Fake LSN counter, for unlogged relations. Protected by ulsn_lck. */
486477
XLogRecPtr unloggedLSN;
487478
slock_t ulsn_lck;
488479

489-
/* Protected by WALWriteLock: */
490-
XLogCtlWrite Write;
480+
/* Time of last xlog segment switch. Protected by WALWriteLock. */
481+
pg_time_t lastSegSwitchTime;
491482

492483
/*
493484
* Protected by info_lck and WALWriteLock (you must hold either lock to
@@ -496,15 +487,15 @@ typedef struct XLogCtlData
496487
XLogwrtResult LogwrtResult;
497488

498489
/*
499-
* Latest initialized block index in cache.
490+
* Latest initialized page in the cache (last byte position + 1).
500491
*
501-
* To change curridx and the identity of a buffer, you need to hold
502-
* WALBufMappingLock. To change the identity of a buffer that's still
492+
* To change the identity of a buffer (and InitializedUpTo), you need to
493+
* hold WALBufMappingLock. To change the identity of a buffer that's still
503494
* dirty, the old page needs to be written out first, and for that you
504495
* need WALWriteLock, and you need to ensure that there are no in-progress
505496
* insertions to the page by calling WaitXLogInsertionsToFinish().
506497
*/
507-
int curridx;
498+
XLogRecPtr InitializedUpTo;
508499

509500
/*
510501
* These values do not change after startup, although the pointed-to pages
@@ -618,16 +609,10 @@ static ControlFileData *ControlFile = NULL;
618609
/*
619610
* XLogRecPtrToBufIdx returns the index of the WAL buffer that holds, or
620611
* would hold if it was in cache, the page containing 'recptr'.
621-
*
622-
* XLogRecEndPtrToBufIdx is the same, but a pointer to the first byte of a
623-
* page is taken to mean the previous page.
624612
*/
625613
#define XLogRecPtrToBufIdx(recptr) \
626614
(((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
627615

628-
#define XLogRecEndPtrToBufIdx(recptr) \
629-
((((recptr) - 1) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
630-
631616
/*
632617
* These are the number of bytes in a WAL page and segment usable for WAL data.
633618
*/
@@ -2409,9 +2394,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
24092394
* Now that we have the lock, check if someone initialized the page
24102395
* already.
24112396
*/
2412-
while (upto >= XLogCtl->xlblocks[XLogCtl->curridx] || opportunistic)
2397+
while (upto >= XLogCtl->InitializedUpTo || opportunistic)
24132398
{
2414-
nextidx = NextBufIdx(XLogCtl->curridx);
2399+
nextidx = XLogRecPtrToBufIdx(XLogCtl->InitializedUpTo);
24152400

24162401
/*
24172402
* Get ending-offset of the buffer page we need to replace (this may
@@ -2484,11 +2469,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
24842469
* Now the next buffer slot is free and we can set it up to be the next
24852470
* output page.
24862471
*/
2487-
NewPageBeginPtr = XLogCtl->xlblocks[XLogCtl->curridx];
2472+
NewPageBeginPtr = XLogCtl->InitializedUpTo;
24882473
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
24892474

2490-
Assert(NewPageEndPtr % XLOG_BLCKSZ == 0);
2491-
Assert(XLogRecEndPtrToBufIdx(NewPageEndPtr) == nextidx);
24922475
Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
24932476

24942477
NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
@@ -2547,7 +2530,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
25472530

25482531
*((volatile XLogRecPtr *) &XLogCtl->xlblocks[nextidx]) = NewPageEndPtr;
25492532

2550-
XLogCtl->curridx = nextidx;
2533+
XLogCtl->InitializedUpTo = NewPageEndPtr;
25512534

25522535
npages++;
25532536
}
@@ -2598,7 +2581,6 @@ XLogCheckpointNeeded(XLogSegNo new_segno)
25982581
static void
25992582
XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
26002583
{
2601-
XLogCtlWrite *Write = &XLogCtl->Write;
26022584
bool ispartialpage;
26032585
bool last_iteration;
26042586
bool finishing_seg;
@@ -2631,12 +2613,10 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
26312613

26322614
/*
26332615
* Within the loop, curridx is the cache block index of the page to
2634-
* consider writing. We advance Write->curridx only after successfully
2635-
* writing pages. (Right now, this refinement is useless since we are
2636-
* going to PANIC if any error occurs anyway; but someday it may come in
2637-
* useful.)
2616+
* consider writing. Begin at the buffer containing the next unwritten
2617+
* page, or last partially written page.
26382618
*/
2639-
curridx = Write->curridx;
2619+
curridx = XLogRecPtrToBufIdx(LogwrtResult.Write);
26402620

26412621
while (LogwrtResult.Write < WriteRqst.Write)
26422622
{
@@ -2747,7 +2727,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
27472727

27482728
/* Update state for write */
27492729
openLogOff += nbytes;
2750-
Write->curridx = ispartialpage ? curridx : NextBufIdx(curridx);
27512730
npages = 0;
27522731

27532732
/*
@@ -2775,7 +2754,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
27752754
if (XLogArchivingActive())
27762755
XLogArchiveNotifySeg(openLogSegNo);
27772756

2778-
Write->lastSegSwitchTime = (pg_time_t) time(NULL);
2757+
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
27792758

27802759
/*
27812760
* Request a checkpoint if we've consumed too much xlog since
@@ -2807,7 +2786,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
28072786
}
28082787

28092788
Assert(npages == 0);
2810-
Assert(curridx == Write->curridx);
28112789

28122790
/*
28132791
* If asked to flush, do so
@@ -6021,12 +5999,10 @@ StartupXLOG(void)
60215999
XLogSegNo endLogSegNo;
60226000
TimeLineID PrevTimeLineID;
60236001
XLogRecord *record;
6024-
uint32 freespace;
60256002
TransactionId oldestActiveXID;
60266003
bool backupEndRequired = false;
60276004
bool backupFromStandby = false;
60286005
DBState dbstate_at_startup;
6029-
int firstIdx;
60306006
XLogReaderState *xlogreader;
60316007
XLogPageReadPrivate private;
60326008
bool fast_promoted = false;
@@ -7034,48 +7010,51 @@ StartupXLOG(void)
70347010
openLogOff = 0;
70357011
Insert = &XLogCtl->Insert;
70367012
Insert->PrevBytePos = XLogRecPtrToBytePos(LastRec);
7037-
7038-
firstIdx = XLogRecEndPtrToBufIdx(EndOfLog);
7039-
XLogCtl->curridx = firstIdx;
7040-
7041-
XLogCtl->xlblocks[firstIdx] = ((EndOfLog - 1) / XLOG_BLCKSZ + 1) * XLOG_BLCKSZ;
7013+
Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
70427014

70437015
/*
70447016
* Tricky point here: readBuf contains the *last* block that the LastRec
70457017
* record spans, not the one it starts in. The last block is indeed the
70467018
* one we want to use.
70477019
*/
7048-
Assert(readOff == (XLogCtl->xlblocks[firstIdx] - XLOG_BLCKSZ) % XLogSegSize);
7049-
memcpy((char *) &XLogCtl->pages[firstIdx * XLOG_BLCKSZ], xlogreader->readBuf, XLOG_BLCKSZ);
7050-
Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
7020+
if (EndOfLog % XLOG_BLCKSZ != 0)
7021+
{
7022+
char *page;
7023+
int len;
7024+
int firstIdx;
7025+
XLogRecPtr pageBeginPtr;
70517026

7052-
LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
7027+
pageBeginPtr = EndOfLog - (EndOfLog % XLOG_BLCKSZ);
7028+
Assert(readOff == pageBeginPtr % XLogSegSize);
70537029

7054-
XLogCtl->LogwrtResult = LogwrtResult;
7030+
firstIdx = XLogRecPtrToBufIdx(EndOfLog);
70557031

7056-
XLogCtl->LogwrtRqst.Write = EndOfLog;
7057-
XLogCtl->LogwrtRqst.Flush = EndOfLog;
7032+
/* Copy the valid part of the last block, and zero the rest */
7033+
page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
7034+
len = EndOfLog % XLOG_BLCKSZ;
7035+
memcpy(page, xlogreader->readBuf, len);
7036+
memset(page + len, 0, XLOG_BLCKSZ - len);
70587037

7059-
freespace = INSERT_FREESPACE(EndOfLog);
7060-
if (freespace > 0)
7061-
{
7062-
/* Make sure rest of page is zero */
7063-
MemSet(&XLogCtl->pages[firstIdx * XLOG_BLCKSZ] + EndOfLog % XLOG_BLCKSZ, 0, freespace);
7064-
XLogCtl->Write.curridx = firstIdx;
7038+
XLogCtl->xlblocks[firstIdx] = pageBeginPtr + XLOG_BLCKSZ;
7039+
XLogCtl->InitializedUpTo = pageBeginPtr + XLOG_BLCKSZ;
70657040
}
70667041
else
70677042
{
70687043
/*
7069-
* Whenever LogwrtResult points to exactly the end of a page,
7070-
* Write.curridx must point to the *next* page (see XLogWrite()).
7071-
*
7072-
* Note: it might seem we should do AdvanceXLInsertBuffer() here, but
7073-
* this is sufficient. The first actual attempt to insert a log
7074-
* record will advance the insert state.
7044+
* There is no partial block to copy. Just set InitializedUpTo,
7045+
* and let the first attempt to insert a log record to initialize
7046+
* the next buffer.
70757047
*/
7076-
XLogCtl->Write.curridx = NextBufIdx(firstIdx);
7048+
XLogCtl->InitializedUpTo = EndOfLog;
70777049
}
70787050

7051+
LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
7052+
7053+
XLogCtl->LogwrtResult = LogwrtResult;
7054+
7055+
XLogCtl->LogwrtRqst.Write = EndOfLog;
7056+
XLogCtl->LogwrtRqst.Flush = EndOfLog;
7057+
70797058
/* Pre-scan prepared transactions to find out the range of XIDs present */
70807059
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
70817060

@@ -7199,7 +7178,7 @@ StartupXLOG(void)
71997178
LWLockRelease(ControlFileLock);
72007179

72017180
/* start the archive_timeout timer running */
7202-
XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL);
7181+
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
72037182

72047183
/* also initialize latestCompletedXid, to nextXid - 1 */
72057184
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
@@ -7710,7 +7689,7 @@ GetLastSegSwitchTime(void)
77107689

77117690
/* Need WALWriteLock, but shared lock is sufficient */
77127691
LWLockAcquire(WALWriteLock, LW_SHARED);
7713-
result = XLogCtl->Write.lastSegSwitchTime;
7692+
result = XLogCtl->lastSegSwitchTime;
77147693
LWLockRelease(WALWriteLock);
77157694

77167695
return result;

0 commit comments

Comments
 (0)