Skip to content

Commit df22071

Browse files
committed
Handle interleavings between CREATE DATABASE steps and base backup.
Restoring a base backup taken in the middle of CreateDirAndVersionFile() or write_relmap_file() would lose the function's effects. The symptom was absence of the database directory, PG_VERSION file, or pg_filenode.map. If missing the directory, recovery would fail. Either missing file would not fail recovery but would render the new database unusable. Fix CreateDirAndVersionFile() with the transam/README "action first and then write a WAL entry" strategy. That has a side benefit of moving filesystem mutations out of a critical section, reducing the ways to PANIC. Fix the write_relmap_file() call with a lock acquisition, so it interacts with checkpoints like non-CREATE DATABASE calls do. Back-patch to v15, where commit 9c08aea introduced STRATEGY=WAL_LOG and made it the default. Discussion: https://postgr.es/m/20240130195003.0a.nmisch@google.com
1 parent 272a7c3 commit df22071

File tree

2 files changed

+32
-28
lines changed

2 files changed

+32
-28
lines changed

src/backend/commands/dbcommands.c

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -462,35 +462,12 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
462462
char buf[16];
463463

464464
/*
465-
* Prepare version data before starting a critical section.
466-
*
467-
* Note that we don't have to copy this from the source database; there's
468-
* only one legal value.
465+
* Note that we don't have to copy version data from the source database;
466+
* there's only one legal value.
469467
*/
470468
sprintf(buf, "%s\n", PG_MAJORVERSION);
471469
nbytes = strlen(PG_MAJORVERSION) + 1;
472470

473-
/* If we are not in WAL replay then write the WAL. */
474-
if (!isRedo)
475-
{
476-
xl_dbase_create_wal_log_rec xlrec;
477-
XLogRecPtr lsn;
478-
479-
START_CRIT_SECTION();
480-
481-
xlrec.db_id = dbid;
482-
xlrec.tablespace_id = tsid;
483-
484-
XLogBeginInsert();
485-
XLogRegisterData((char *) (&xlrec),
486-
sizeof(xl_dbase_create_wal_log_rec));
487-
488-
lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
489-
490-
/* As always, WAL must hit the disk before the data update does. */
491-
XLogFlush(lsn);
492-
}
493-
494471
/* Create database directory. */
495472
if (MakePGDirectory(dbpath) < 0)
496473
{
@@ -534,9 +511,24 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
534511
/* Close the version file. */
535512
CloseTransientFile(fd);
536513

537-
/* Critical section done. */
514+
/* If we are not in WAL replay then write the WAL. */
538515
if (!isRedo)
516+
{
517+
xl_dbase_create_wal_log_rec xlrec;
518+
519+
START_CRIT_SECTION();
520+
521+
xlrec.db_id = dbid;
522+
xlrec.tablespace_id = tsid;
523+
524+
XLogBeginInsert();
525+
XLogRegisterData((char *) (&xlrec),
526+
sizeof(xl_dbase_create_wal_log_rec));
527+
528+
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
529+
539530
END_CRIT_SECTION();
531+
}
540532
}
541533

542534
/*

src/backend/utils/cache/relmapper.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,14 +303,15 @@ RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath)
303303
* Write the same data into the destination database's relmap file.
304304
*
305305
* No sinval is needed because no one can be connected to the destination
306-
* database yet. For the same reason, there is no need to acquire
307-
* RelationMappingLock.
306+
* database yet.
308307
*
309308
* There's no point in trying to preserve files here. The new database
310309
* isn't usable yet anyway, and won't ever be if we can't install a relmap
311310
* file.
312311
*/
312+
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
313313
write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
314+
LWLockRelease(RelationMappingLock);
314315
}
315316

316317
/*
@@ -633,10 +634,12 @@ RelationMapFinishBootstrap(void)
633634
Assert(pending_local_updates.num_mappings == 0);
634635

635636
/* Write the files; no WAL or sinval needed */
637+
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
636638
write_relmap_file(&shared_map, false, false, false,
637639
InvalidOid, GLOBALTABLESPACE_OID, "global");
638640
write_relmap_file(&local_map, false, false, false,
639641
MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
642+
LWLockRelease(RelationMappingLock);
640643
}
641644

642645
/*
@@ -891,6 +894,15 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
891894
char mapfilename[MAXPGPATH];
892895
char maptempfilename[MAXPGPATH];
893896

897+
/*
898+
* Even without concurrent use of this map, CheckPointRelationMap() relies
899+
* on this locking. Without it, a restore of a base backup taken after
900+
* this function's XLogInsert() and before its durable_rename() would not
901+
* have the changes. wal_level=minimal doesn't need the lock, but this
902+
* isn't performance-critical enough for such a micro-optimization.
903+
*/
904+
Assert(LWLockHeldByMeInMode(RelationMappingLock, LW_EXCLUSIVE));
905+
894906
/*
895907
* Fill in the overhead fields and update CRC.
896908
*/

0 commit comments

Comments
 (0)