Skip to content

Commit ccadf73

Browse files
committed
Use the buffer cache when initializing an unlogged index.
Some of the ambuildempty functions used smgrwrite() directly, followed by smgrimmedsync(). A few small problems with that: Firstly, one is supposed to use smgrextend() when extending a relation, not smgrwrite(). It doesn't make much difference in production builds. smgrextend() updates the relation size cache, so you miss that, but that's harmless because we never use the cached relation size of an init fork. But if you compile with CHECK_WRITE_VS_EXTEND, you get an assertion failure. Secondly, the smgrwrite() calls were performed before WAL-logging, so the page image written to disk had 0/0 as the LSN, not the LSN of the WAL record. That's also harmless in practice, but seems sloppy. Thirdly, it's better to use the buffer cache, because then you don't need to smgrimmedsync() the relation to disk, which adds latency. Bypassing the cache makes sense for bulk operations like index creation, but not when you're just initializing an empty index. Creation of unlogged tables is hardly performance bottleneck in any real world applications, but nevertheless. Backpatch to v16, but no further. These issues should be harmless in practice, so better to not rock the boat in older branches. Reviewed-by: Robert Haas Discussion: https://www.postgresql.org/message-id/6e5bbc08-cdfc-b2b3-9e23-1a914b9850a9@iki.fi
1 parent ee99330 commit ccadf73

File tree

5 files changed

+62
-89
lines changed

5 files changed

+62
-89
lines changed

contrib/bloom/blinsert.c

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
129129
RelationGetRelationName(index));
130130

131131
/* Initialize the meta page */
132-
BloomInitMetapage(index);
132+
BloomInitMetapage(index, MAIN_FORKNUM);
133133

134134
/* Initialize the bloom build state */
135135
memset(&buildstate, 0, sizeof(buildstate));
@@ -163,31 +163,8 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
163163
void
164164
blbuildempty(Relation index)
165165
{
166-
Page metapage;
167-
168-
/* Construct metapage. */
169-
metapage = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
170-
BloomFillMetapage(index, metapage);
171-
172-
/*
173-
* Write the page and log it. It might seem that an immediate sync would
174-
* be sufficient to guarantee that the file exists on disk, but recovery
175-
* itself might remove it while replaying, for example, an
176-
* XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
177-
* this even when wal_level=minimal.
178-
*/
179-
PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
180-
smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
181-
metapage, true);
182-
log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
183-
BLOOM_METAPAGE_BLKNO, metapage, true);
184-
185-
/*
186-
* An immediate sync is required even if we xlog'd the page, because the
187-
* write did not go through shared_buffers and therefore a concurrent
188-
* checkpoint may have moved the redo pointer past our xlog record.
189-
*/
190-
smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
166+
/* Initialize the meta page */
167+
BloomInitMetapage(index, INIT_FORKNUM);
191168
}
192169

193170
/*

contrib/bloom/bloom.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ typedef BloomScanOpaqueData *BloomScanOpaque;
177177
/* blutils.c */
178178
extern void initBloomState(BloomState *state, Relation index);
179179
extern void BloomFillMetapage(Relation index, Page metaPage);
180-
extern void BloomInitMetapage(Relation index);
180+
extern void BloomInitMetapage(Relation index, ForkNumber forknum);
181181
extern void BloomInitPage(Page page, uint16 flags);
182182
extern Buffer BloomNewBuffer(Relation index);
183183
extern void signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno);

contrib/bloom/blutils.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,17 +443,19 @@ BloomFillMetapage(Relation index, Page metaPage)
443443
* Initialize metapage for bloom index.
444444
*/
445445
void
446-
BloomInitMetapage(Relation index)
446+
BloomInitMetapage(Relation index, ForkNumber forknum)
447447
{
448448
Buffer metaBuffer;
449449
Page metaPage;
450450
GenericXLogState *state;
451451

452452
/*
453453
* Make a new page; since it is first page it should be associated with
454-
* block number 0 (BLOOM_METAPAGE_BLKNO).
454+
* block number 0 (BLOOM_METAPAGE_BLKNO). No need to hold the extension
455+
* lock because there cannot be concurrent inserters yet.
455456
*/
456-
metaBuffer = BloomNewBuffer(index);
457+
metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL);
458+
LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
457459
Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
458460

459461
/* Initialize contents of meta page */

src/backend/access/nbtree/nbtree.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -151,31 +151,32 @@ bthandler(PG_FUNCTION_ARGS)
151151
void
152152
btbuildempty(Relation index)
153153
{
154+
Buffer metabuf;
154155
Page metapage;
155156

156-
/* Construct metapage. */
157-
metapage = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
158-
_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
159-
160157
/*
161-
* Write the page and log it. It might seem that an immediate sync would
162-
* be sufficient to guarantee that the file exists on disk, but recovery
163-
* itself might remove it while replaying, for example, an
164-
* XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE record. Therefore, we need
165-
* this even when wal_level=minimal.
158+
* Initalize the metapage.
159+
*
160+
* Regular index build bypasses the buffer manager and uses smgr functions
161+
* directly, with an smgrimmedsync() call at the end. That makes sense
162+
* when the index is large, but for an empty index, it's better to use the
163+
* buffer cache to avoid the smgrimmedsync().
166164
*/
167-
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
168-
smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, BTREE_METAPAGE,
169-
metapage, true);
170-
log_newpage(&RelationGetSmgr(index)->smgr_rlocator.locator, INIT_FORKNUM,
171-
BTREE_METAPAGE, metapage, true);
165+
metabuf = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
166+
Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
167+
_bt_lockbuf(index, metabuf, BT_WRITE);
172168

173-
/*
174-
* An immediate sync is required even if we xlog'd the page, because the
175-
* write did not go through shared_buffers and therefore a concurrent
176-
* checkpoint may have moved the redo pointer past our xlog record.
177-
*/
178-
smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
169+
START_CRIT_SECTION();
170+
171+
metapage = BufferGetPage(metabuf);
172+
_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
173+
MarkBufferDirty(metabuf);
174+
log_newpage_buffer(metabuf, true);
175+
176+
END_CRIT_SECTION();
177+
178+
_bt_unlockbuf(index, metabuf);
179+
ReleaseBuffer(metabuf);
179180
}
180181

181182
/*

src/backend/access/spgist/spginsert.c

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -155,49 +155,42 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
155155
void
156156
spgbuildempty(Relation index)
157157
{
158-
Page page;
159-
160-
/* Construct metapage. */
161-
page = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
162-
SpGistInitMetapage(page);
158+
Buffer metabuffer,
159+
rootbuffer,
160+
nullbuffer;
163161

164162
/*
165-
* Write the page and log it unconditionally. This is important
166-
* particularly for indexes created on tablespaces and databases whose
167-
* creation happened after the last redo pointer as recovery removes any
168-
* of their existing content when the corresponding create records are
169-
* replayed.
163+
* Initialize the meta page and root pages
170164
*/
171-
PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
172-
smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
173-
page, true);
174-
log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
175-
SPGIST_METAPAGE_BLKNO, page, true);
176-
177-
/* Likewise for the root page. */
178-
SpGistInitPage(page, SPGIST_LEAF);
179-
180-
PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
181-
smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_ROOT_BLKNO,
182-
page, true);
183-
log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
184-
SPGIST_ROOT_BLKNO, page, true);
185-
186-
/* Likewise for the null-tuples root page. */
187-
SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
188-
189-
PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
190-
smgrwrite(RelationGetSmgr(index), INIT_FORKNUM, SPGIST_NULL_BLKNO,
191-
page, true);
192-
log_newpage(&(RelationGetSmgr(index))->smgr_rlocator.locator, INIT_FORKNUM,
193-
SPGIST_NULL_BLKNO, page, true);
165+
metabuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
166+
LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
167+
rootbuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
168+
LockBuffer(rootbuffer, BUFFER_LOCK_EXCLUSIVE);
169+
nullbuffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
170+
LockBuffer(nullbuffer, BUFFER_LOCK_EXCLUSIVE);
194171

195-
/*
196-
* An immediate sync is required even if we xlog'd the pages, because the
197-
* writes did not go through shared buffers and therefore a concurrent
198-
* checkpoint may have moved the redo pointer past our xlog record.
199-
*/
200-
smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
172+
Assert(BufferGetBlockNumber(metabuffer) == SPGIST_METAPAGE_BLKNO);
173+
Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_ROOT_BLKNO);
174+
Assert(BufferGetBlockNumber(nullbuffer) == SPGIST_NULL_BLKNO);
175+
176+
START_CRIT_SECTION();
177+
178+
SpGistInitMetapage(BufferGetPage(metabuffer));
179+
MarkBufferDirty(metabuffer);
180+
SpGistInitBuffer(rootbuffer, SPGIST_LEAF);
181+
MarkBufferDirty(rootbuffer);
182+
SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
183+
MarkBufferDirty(nullbuffer);
184+
185+
log_newpage_buffer(metabuffer, true);
186+
log_newpage_buffer(rootbuffer, true);
187+
log_newpage_buffer(nullbuffer, true);
188+
189+
END_CRIT_SECTION();
190+
191+
UnlockReleaseBuffer(metabuffer);
192+
UnlockReleaseBuffer(rootbuffer);
193+
UnlockReleaseBuffer(nullbuffer);
201194
}
202195

203196
/*

0 commit comments

Comments
 (0)