Skip to content

Commit 09c2e7c

Browse files
committed
hash: Fix write-ahead logging bugs related to init forks.
One, logging for CREATE INDEX was oblivious to the fact that when an unlogged table is created, *only* operations on the init fork should be logged. Two, init fork buffers need to be flushed after they are written; otherwise, a filesystem-level copy following recovery may do the wrong thing. (There may be a better fix for this issue than the one used here, but this is transposed from the similar logic already present in XLogReadBufferForRedoExtended, and a broader refactoring after beta2 seems inadvisable.) Amit Kapila, reviewed by Ashutosh Sharma, Kyotaro Horiguchi, and Michael Paquier Discussion: http://postgr.es/m/CAA4eK1JpcMsEtOL_J7WODumeEfyrPi7FPYHeVdS7fyyrCrgp4w@mail.gmail.com
1 parent 2f7f45a commit 09c2e7c

File tree

2 files changed

+43
-7
lines changed

2 files changed

+43
-7
lines changed

src/backend/access/hash/hash_xlog.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ hash_xlog_init_meta_page(XLogReaderState *record)
3333
XLogRecPtr lsn = record->EndRecPtr;
3434
Page page;
3535
Buffer metabuf;
36+
ForkNumber forknum;
3637

3738
xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
3839

@@ -44,6 +45,17 @@ hash_xlog_init_meta_page(XLogReaderState *record)
4445
page = (Page) BufferGetPage(metabuf);
4546
PageSetLSN(page, lsn);
4647
MarkBufferDirty(metabuf);
48+
49+
/*
50+
* Force the on-disk state of init forks to always be in sync with the
51+
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
52+
* special handling for init forks as create index operations don't log a
53+
* full page image of the metapage.
54+
*/
55+
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
56+
if (forknum == INIT_FORKNUM)
57+
FlushOneBuffer(metabuf);
58+
4759
/* all done */
4860
UnlockReleaseBuffer(metabuf);
4961
}
@@ -60,6 +72,7 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
6072
Page page;
6173
HashMetaPage metap;
6274
uint32 num_buckets;
75+
ForkNumber forknum;
6376

6477
xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
6578

@@ -70,6 +83,16 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
7083
_hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
7184
PageSetLSN(BufferGetPage(bitmapbuf), lsn);
7285
MarkBufferDirty(bitmapbuf);
86+
87+
/*
88+
* Force the on-disk state of init forks to always be in sync with the
89+
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
90+
* special handling for init forks as create index operations don't log a
91+
* full page image of the metapage.
92+
*/
93+
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
94+
if (forknum == INIT_FORKNUM)
95+
FlushOneBuffer(bitmapbuf);
7396
UnlockReleaseBuffer(bitmapbuf);
7497

7598
/* add the new bitmap page to the metapage's list of bitmaps */
@@ -90,6 +113,10 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
90113

91114
PageSetLSN(page, lsn);
92115
MarkBufferDirty(metabuf);
116+
117+
XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
118+
if (forknum == INIT_FORKNUM)
119+
FlushOneBuffer(metabuf);
93120
}
94121
if (BufferIsValid(metabuf))
95122
UnlockReleaseBuffer(metabuf);

src/backend/access/hash/hashpage.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -345,12 +345,20 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
345345
int32 ffactor;
346346
uint32 num_buckets;
347347
uint32 i;
348+
bool use_wal;
348349

349350
/* safety check */
350351
if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
351352
elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
352353
RelationGetRelationName(rel));
353354

355+
/*
356+
* WAL log creation of pages if the relation is persistent, or this is the
357+
* init fork. Init forks for unlogged relations always need to be WAL
358+
* logged.
359+
*/
360+
use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
361+
354362
/*
355363
* Determine the target fill factor (in tuples per bucket) for this index.
356364
* The idea is to make the fill factor correspond to pages about as full
@@ -384,7 +392,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
384392
metap = HashPageGetMeta(pg);
385393

386394
/* XLOG stuff */
387-
if (RelationNeedsWAL(rel))
395+
if (use_wal)
388396
{
389397
xl_hash_init_meta_page xlrec;
390398
XLogRecPtr recptr;
@@ -427,11 +435,12 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
427435
_hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
428436
MarkBufferDirty(buf);
429437

430-
log_newpage(&rel->rd_node,
431-
forkNum,
432-
blkno,
433-
BufferGetPage(buf),
434-
true);
438+
if (use_wal)
439+
log_newpage(&rel->rd_node,
440+
forkNum,
441+
blkno,
442+
BufferGetPage(buf),
443+
true);
435444
_hash_relbuf(rel, buf);
436445
}
437446

@@ -459,7 +468,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
459468
MarkBufferDirty(metabuf);
460469

461470
/* XLOG stuff */
462-
if (RelationNeedsWAL(rel))
471+
if (use_wal)
463472
{
464473
xl_hash_init_bitmap_page xlrec;
465474
XLogRecPtr recptr;

0 commit comments

Comments
 (0)