Skip to content

Commit 93519b0

Browse files
committed
Fix race condition in relcache init file invalidation.
The previous code tried to synchronize by unlinking the init file twice, but that doesn't actually work: it leaves a window wherein a third process could read the already-stale init file but miss the SI messages that would tell it the data is stale. The result would be bizarre failures in catalog accesses, typically "could not read block 0 in file ..." later during startup. Instead, hold RelCacheInitLock across both the unlink and the sending of the SI messages. This is more straightforward, and might even be a bit faster since only one unlink call is needed. This has been wrong since it was put in (in 2002!), so back-patch to all supported releases.
1 parent f239ec5 commit 93519b0

File tree

4 files changed

+57
-49
lines changed

4 files changed

+57
-49
lines changed

src/backend/access/transam/twophase.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,10 +1338,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
13381338
* after we send the SI messages. See AtEOXact_Inval()
13391339
*/
13401340
if (hdr->initfileinval)
1341-
RelationCacheInitFileInvalidate(true);
1341+
RelationCacheInitFilePreInvalidate();
13421342
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
13431343
if (hdr->initfileinval)
1344-
RelationCacheInitFileInvalidate(false);
1344+
RelationCacheInitFilePostInvalidate();
13451345

13461346
/* And now do the callbacks */
13471347
if (isCommit)

src/backend/utils/cache/inval.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -849,24 +849,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
849849
return numSharedInvalidMessagesArray;
850850
}
851851

852-
#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
853-
{ \
854-
DatabasePath = GetDatabasePath(dbo, tbo); \
855-
elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
856-
RelationCacheInitFileInvalidate(tf); \
857-
pfree(DatabasePath); \
858-
}
859-
860852
/*
861853
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
862854
* to process invalidation messages added to commit records.
863855
*
864856
* Relcache init file invalidation requires processing both
865857
* before and after we send the SI messages. See AtEOXact_Inval()
866-
*
867-
* We deliberately avoid SetDatabasePath() since it is intended to be used
868-
* only once by normal backends, so we set DatabasePath directly then
869-
* pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
870858
*/
871859
void
872860
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
@@ -880,12 +868,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
880868
(RelcacheInitFileInval ? " and relcache file invalidation" : ""));
881869

882870
if (RelcacheInitFileInval)
883-
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true);
871+
{
872+
/*
873+
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
874+
* but we should not use SetDatabasePath during recovery, since it is
875+
* intended to be used only once by normal backends. Hence, a quick
876+
* hack: set DatabasePath directly then unset after use.
877+
*/
878+
DatabasePath = GetDatabasePath(dbid, tsid);
879+
elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
880+
DatabasePath);
881+
RelationCacheInitFilePreInvalidate();
882+
pfree(DatabasePath);
883+
DatabasePath = NULL;
884+
}
884885

885886
SendSharedInvalidMessages(msgs, nmsgs);
886887

887888
if (RelcacheInitFileInval)
888-
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false);
889+
RelationCacheInitFilePostInvalidate();
889890
}
890891

891892
/*
@@ -926,7 +927,7 @@ AtEOXact_Inval(bool isCommit)
926927
* unless we committed.
927928
*/
928929
if (transInvalInfo->RelcacheInitFileInval)
929-
RelationCacheInitFileInvalidate(true);
930+
RelationCacheInitFilePreInvalidate();
930931

931932
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
932933
&transInvalInfo->CurrentCmdInvalidMsgs);
@@ -935,7 +936,7 @@ AtEOXact_Inval(bool isCommit)
935936
SendSharedInvalidMessages);
936937

937938
if (transInvalInfo->RelcacheInitFileInval)
938-
RelationCacheInitFileInvalidate(false);
939+
RelationCacheInitFilePostInvalidate();
939940
}
940941
else if (transInvalInfo != NULL)
941942
{

src/backend/utils/cache/relcache.c

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4402,8 +4402,8 @@ write_relcache_init_file(bool shared)
44024402
* updated by SI message processing, but we can't be sure whether what we
44034403
* wrote out was up-to-date.)
44044404
*
4405-
* This mustn't run concurrently with RelationCacheInitFileInvalidate, so
4406-
* grab a serialization lock for the duration.
4405+
* This mustn't run concurrently with the code that unlinks an init file
4406+
* and sends SI messages, so grab a serialization lock for the duration.
44074407
*/
44084408
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
44094409

@@ -4467,19 +4467,22 @@ RelationIdIsInInitFile(Oid relationId)
44674467
* changed one or more of the relation cache entries that are kept in the
44684468
* local init file.
44694469
*
4470-
* We actually need to remove the init file twice: once just before sending
4471-
* the SI messages that include relcache inval for such relations, and once
4472-
* just after sending them. The unlink before ensures that a backend that's
4473-
* currently starting cannot read the now-obsolete init file and then miss
4474-
* the SI messages that will force it to update its relcache entries. (This
4475-
* works because the backend startup sequence gets into the PGPROC array before
4476-
* trying to load the init file.) The unlink after is to synchronize with a
4477-
* backend that may currently be trying to write an init file based on data
4478-
* that we've just rendered invalid. Such a backend will see the SI messages,
4479-
* but we can't leave the init file sitting around to fool later backends.
4470+
* To be safe against concurrent inspection or rewriting of the init file,
4471+
* we must take RelCacheInitLock, then remove the old init file, then send
4472+
* the SI messages that include relcache inval for such relations, and then
4473+
* release RelCacheInitLock. This serializes the whole affair against
4474+
* write_relcache_init_file, so that we can be sure that any other process
4475+
* that's concurrently trying to create a new init file won't move an
4476+
* already-stale version into place after we unlink. Also, because we unlink
4477+
* before sending the SI messages, a backend that's currently starting cannot
4478+
* read the now-obsolete init file and then miss the SI messages that will
4479+
* force it to update its relcache entries. (This works because the backend
4480+
* startup sequence gets into the sinval array before trying to load the init
4481+
* file.)
44804482
*
4481-
* Ignore any failure to unlink the file, since it might not be there if
4482-
* no backend has been started since the last removal.
4483+
* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
4484+
* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
4485+
* send any pending SI messages between those calls.
44834486
*
44844487
* Notice this deals only with the local init file, not the shared init file.
44854488
* The reason is that there can never be a "significant" change to the
@@ -4489,34 +4492,37 @@ RelationIdIsInInitFile(Oid relationId)
44894492
* be invalid enough to make it necessary to remove it.
44904493
*/
44914494
void
4492-
RelationCacheInitFileInvalidate(bool beforeSend)
4495+
RelationCacheInitFilePreInvalidate(void)
44934496
{
44944497
char initfilename[MAXPGPATH];
44954498

44964499
snprintf(initfilename, sizeof(initfilename), "%s/%s",
44974500
DatabasePath, RELCACHE_INIT_FILENAME);
44984501

4499-
if (beforeSend)
4500-
{
4501-
/* no interlock needed here */
4502-
unlink(initfilename);
4503-
}
4504-
else
4502+
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4503+
4504+
if (unlink(initfilename) < 0)
45054505
{
45064506
/*
4507-
* We need to interlock this against write_relcache_init_file, to
4508-
* guard against possibility that someone renames a new-but-
4509-
* already-obsolete init file into place just after we unlink. With
4510-
* the interlock, it's certain that write_relcache_init_file will
4511-
* notice our SI inval message before renaming into place, or else
4512-
* that we will execute second and successfully unlink the file.
4507+
* The file might not be there if no backend has been started since
4508+
* the last removal. But complain about failures other than ENOENT.
4509+
* Fortunately, it's not too late to abort the transaction if we
4510+
* can't get rid of the would-be-obsolete init file.
45134511
*/
4514-
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4515-
unlink(initfilename);
4516-
LWLockRelease(RelCacheInitLock);
4512+
if (errno != ENOENT)
4513+
ereport(ERROR,
4514+
(errcode_for_file_access(),
4515+
errmsg("could not remove cache file \"%s\": %m",
4516+
initfilename)));
45174517
}
45184518
}
45194519

4520+
void
4521+
RelationCacheInitFilePostInvalidate(void)
4522+
{
4523+
LWLockRelease(RelCacheInitLock);
4524+
}
4525+
45204526
/*
45214527
* Remove the init files during postmaster startup.
45224528
*

src/include/utils/relcache.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
9696
* Routines to help manage rebuilding of relcache init files
9797
*/
9898
extern bool RelationIdIsInInitFile(Oid relationId);
99-
extern void RelationCacheInitFileInvalidate(bool beforeSend);
99+
extern void RelationCacheInitFilePreInvalidate(void);
100+
extern void RelationCacheInitFilePostInvalidate(void);
100101
extern void RelationCacheInitFileRemove(void);
101102

102103
/* should be used only by relcache.c and catcache.c */

0 commit comments

Comments
 (0)