Skip to content

Commit 8e403f2

Browse files
committed
Log the creation of an init fork unconditionally.
Previously, it was thought that this only needed to be done for the benefit of possible standbys, so wal_level = minimal skipped it. But that's not safe, because during crash recovery we might replay XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record which recursively removes the directory that contains the new init fork. So log it always. The user-visible effect of this bug is that if you create a database or tablespace, then create an unlogged table, then crash without checkpointing, then restart, accessing the table will fail, because the it won't have been properly reset. This commit fixes that. Michael Paquier, per a report from Konstantin Knizhnik. Wording of the comments per a suggestion from me.
1 parent 433e65c commit 8e403f2

File tree

3 files changed

+29
-20
lines changed

3 files changed

+29
-20
lines changed

src/backend/access/nbtree/nbtree.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,18 @@ btbuildempty(PG_FUNCTION_ARGS)
216216
metapage = (Page) palloc(BLCKSZ);
217217
_bt_initmetapage(metapage, P_NONE, 0);
218218

219-
/* Write the page. If archiving/streaming, XLOG it. */
219+
/*
220+
* Write the page and log it. It might seem that an immediate sync
221+
* would be sufficient to guarantee that the file exists on disk, but
222+
* recovery itself might remove it while replaying, for example, an
223+
* XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
224+
* need this even when wal_level=minimal.
225+
*/
220226
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
221227
smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
222228
(char *) metapage, true);
223-
if (XLogIsNeeded())
224-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
225-
BTREE_METAPAGE, metapage);
229+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
230+
BTREE_METAPAGE, metapage);
226231

227232
/*
228233
* An immediate sync is required even if we xlog'd the page, because the

src/backend/access/spgist/spginsert.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,33 +163,36 @@ spgbuildempty(PG_FUNCTION_ARGS)
163163
page = (Page) palloc(BLCKSZ);
164164
SpGistInitMetapage(page);
165165

166-
/* Write the page. If archiving/streaming, XLOG it. */
166+
/*
167+
* Write the page and log it unconditionally. This is important
168+
* particularly for indexes created on tablespaces and databases
169+
* whose creation happened after the last redo pointer as recovery
170+
* removes any of their existing content when the corresponding
171+
* create records are replayed.
172+
*/
167173
PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
168174
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
169175
(char *) page, true);
170-
if (XLogIsNeeded())
171-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
172-
SPGIST_METAPAGE_BLKNO, page);
176+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
177+
SPGIST_METAPAGE_BLKNO, page);
173178

174179
/* Likewise for the root page. */
175180
SpGistInitPage(page, SPGIST_LEAF);
176181

177182
PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
178183
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_ROOT_BLKNO,
179184
(char *) page, true);
180-
if (XLogIsNeeded())
181-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
182-
SPGIST_ROOT_BLKNO, page);
185+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
186+
SPGIST_ROOT_BLKNO, page);
183187

184188
/* Likewise for the null-tuples root page. */
185189
SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
186190

187191
PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
188192
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_NULL_BLKNO,
189193
(char *) page, true);
190-
if (XLogIsNeeded())
191-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
192-
SPGIST_NULL_BLKNO, page);
194+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
195+
SPGIST_NULL_BLKNO, page);
193196

194197
/*
195198
* An immediate sync is required even if we xlog'd the pages, because the

src/backend/catalog/heap.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,18 +1347,19 @@ heap_create_with_catalog(const char *relname,
13471347

13481348
/*
13491349
* Set up an init fork for an unlogged table so that it can be correctly
1350-
* reinitialized on restart. Since we're going to do an immediate sync, we
1351-
* only need to xlog this if archiving or streaming is enabled. And the
1352-
* immediate sync is required, because otherwise there's no guarantee that
1353-
* this will hit the disk before the next checkpoint moves the redo pointer.
1350+
* reinitialized on restart. An immediate sync is required even if the
1351+
* page has been logged, because the write did not go through
1352+
* shared_buffers and therefore a concurrent checkpoint may have moved
1353+
* the redo pointer past our xlog record. Recovery may as well remove it
1354+
* while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
1355+
* record. Therefore, logging is necessary even if wal_level=minimal.
13541356
*/
13551357
void
13561358
heap_create_init_fork(Relation rel)
13571359
{
13581360
RelationOpenSmgr(rel);
13591361
smgrcreate(rel->rd_smgr, INIT_FORKNUM, false);
1360-
if (XLogIsNeeded())
1361-
log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
1362+
log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
13621363
smgrimmedsync(rel->rd_smgr, INIT_FORKNUM);
13631364
}
13641365

0 commit comments

Comments
 (0)