Skip to content

Commit be25a08

Browse files
committed
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we must also delete the relcache "init file" if the init file contains a copy of that rel's entry. The old way of doing this relied on a specially maintained list of the OIDs of relations present in the init file: we made the list either when reading the file in, or when writing the file out. The problem is that when writing the file out, we included only rels present in our local relcache, which might have already suffered some deletions due to relcache inval events. In such cases we correctly decided not to overwrite the real init file with incomplete data --- but we still used the incomplete initFileRelationIds list for the rest of the current session. This could result in wrong decisions about whether the session's own actions require deletion of the init file, potentially allowing an init file created by some other concurrent session to be left around even though it's been made stale. Since we don't support changing the schema of a system catalog at runtime, the only likely scenario in which this would cause a problem in the field involves a "vacuum full" on a catalog concurrently with other activity, and even then it's far from easy to provoke. Remarkably, this has been broken since 2002 (in commit 7863404), but we had never seen a reproducible test case until recently. If it did happen in the field, the symptoms would probably involve unexpected "cache lookup failed" errors to begin with, then "could not open file" failures after the next checkpoint, as all accesses to the affected catalog stopped working. Recovery would require manually removing the stale "pg_internal.init" file. To fix, get rid of the initFileRelationIds list, and instead consult syscache.c's list of relations used in catalog caches to decide whether a relation is included in the init file. This should be a tad more efficient anyway, since we're replacing linear search of a list with ~100 entries with a binary search. It's a bit ugly that the init file contents are now so directly tied to the catalog caches, but in practice that won't make much difference. Back-patch to all supported branches.
1 parent 247263d commit be25a08

File tree

5 files changed

+87
-57
lines changed

5 files changed

+87
-57
lines changed

src/backend/utils/cache/inval.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,10 +506,13 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
506506
(void) GetCurrentCommandId(true);
507507

508508
/*
509-
* If the relation being invalidated is one of those cached in the
509+
* If the relation being invalidated is one of those cached in the local
510510
* relcache init file, mark that we need to zap that file at commit.
511+
* (Note: perhaps it would be better if this code were a bit more
512+
* decoupled from the knowledge that the init file contains exactly those
513+
* non-shared rels used in catalog caches.)
511514
*/
512-
if (RelationIdIsInInitFile(relId))
515+
if (OidIsValid(dbId) && RelationSupportsSysCache(relId))
513516
transInvalInfo->RelcacheInitFileInval = true;
514517
}
515518

src/backend/utils/cache/relcache.c

Lines changed: 17 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,6 @@ bool criticalSharedRelcachesBuilt = false;
130130
*/
131131
static long relcacheInvalsReceived = 0L;
132132

133-
/*
134-
* This list remembers the OIDs of the non-shared relations cached in the
135-
* database's local relcache init file. Note that there is no corresponding
136-
* list for the shared relcache init file, for reasons explained in the
137-
* comments for RelationCacheInitFileRemove.
138-
*/
139-
static List *initFileRelationIds = NIL;
140-
141133
/*
142134
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
143135
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -3375,9 +3367,6 @@ RelationCacheInitializePhase3(void)
33753367
*/
33763368
InitCatalogCachePhase2();
33773369

3378-
/* reset initFileRelationIds list; we'll fill it during write */
3379-
initFileRelationIds = NIL;
3380-
33813370
/* now write the files */
33823371
write_relcache_init_file(true);
33833372
write_relcache_init_file(false);
@@ -4767,10 +4756,6 @@ load_relcache_init_file(bool shared)
47674756
for (relno = 0; relno < num_rels; relno++)
47684757
{
47694758
RelationCacheInsert(rels[relno], false);
4770-
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4771-
if (!shared)
4772-
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
4773-
initFileRelationIds);
47744759
}
47754760

47764761
pfree(rels);
@@ -4807,9 +4792,15 @@ write_relcache_init_file(bool shared)
48074792
int magic;
48084793
HASH_SEQ_STATUS status;
48094794
RelIdCacheEnt *idhentry;
4810-
MemoryContext oldcxt;
48114795
int i;
48124796

4797+
/*
4798+
* If we have already received any relcache inval events, there's no
4799+
* chance of succeeding so we may as well skip the whole thing.
4800+
*/
4801+
if (relcacheInvalsReceived != 0L)
4802+
return;
4803+
48134804
/*
48144805
* We must write a temporary file and rename it into place. Otherwise,
48154806
* another backend starting at about the same time might crash trying to
@@ -4869,6 +4860,16 @@ write_relcache_init_file(bool shared)
48694860
if (relform->relisshared != shared)
48704861
continue;
48714862

4863+
/*
4864+
* Ignore if not supposed to be in init file. We can allow any shared
4865+
* relation that's been loaded so far to be in the shared init file,
4866+
* but unshared relations must be used for catalog caches. (Note: if
4867+
* you want to change the criterion for rels to be kept in the init
4868+
* file, see also inval.c.)
4869+
*/
4870+
if (!shared && !RelationSupportsSysCache(RelationGetRelid(rel)))
4871+
continue;
4872+
48724873
/* first write the relcache entry proper */
48734874
write_item(rel, sizeof(RelationData), fp);
48744875

@@ -4925,15 +4926,6 @@ write_relcache_init_file(bool shared)
49254926
relform->relnatts * sizeof(int16),
49264927
fp);
49274928
}
4928-
4929-
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4930-
if (!shared)
4931-
{
4932-
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4933-
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
4934-
initFileRelationIds);
4935-
MemoryContextSwitchTo(oldcxt);
4936-
}
49374929
}
49384930

49394931
if (FreeFile(fp))
@@ -4992,21 +4984,6 @@ write_item(const void *data, Size len, FILE *fp)
49924984
elog(FATAL, "could not write init file");
49934985
}
49944986

4995-
/*
4996-
* Detect whether a given relation (identified by OID) is one of the ones
4997-
* we store in the local relcache init file.
4998-
*
4999-
* Note that we effectively assume that all backends running in a database
5000-
* would choose to store the same set of relations in the init file;
5001-
* otherwise there are cases where we'd fail to detect the need for an init
5002-
* file invalidation. This does not seem likely to be a problem in practice.
5003-
*/
5004-
bool
5005-
RelationIdIsInInitFile(Oid relationId)
5006-
{
5007-
return list_member_oid(initFileRelationIds, relationId);
5008-
}
5009-
50104987
/*
50114988
* Invalidate (remove) the init file during commit of a transaction that
50124989
* changed one or more of the relation cache entries that are kept in the

src/backend/utils/cache/syscache.c

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -798,17 +798,23 @@ static const struct cachedesc cacheinfo[] = {
798798
}
799799
};
800800

801-
static CatCache *SysCache[
802-
lengthof(cacheinfo)];
803-
static int SysCacheSize = lengthof(cacheinfo);
801+
#define SysCacheSize ((int) lengthof(cacheinfo))
802+
803+
static CatCache *SysCache[SysCacheSize];
804+
804805
static bool CacheInitialized = false;
805806

806-
static Oid SysCacheRelationOid[
807-
lengthof(cacheinfo)];
807+
/* Sorted array of OIDs of tables that have caches on them */
808+
static Oid SysCacheRelationOid[SysCacheSize];
808809
static int SysCacheRelationOidSize;
809810

811+
/* Sorted array of OIDs of tables and indexes used by caches */
812+
static Oid SysCacheSupportingRelOid[SysCacheSize * 2];
813+
static int SysCacheSupportingRelOidSize;
814+
810815
static int oid_compare(const void *a, const void *b);
811816

817+
812818
/*
813819
* InitCatalogCache - initialize the caches
814820
*
@@ -822,11 +828,11 @@ InitCatalogCache(void)
822828
{
823829
int cacheId;
824830
int i,
825-
j = 0;
831+
j;
826832

827833
Assert(!CacheInitialized);
828834

829-
MemSet(SysCache, 0, sizeof(SysCache));
835+
SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0;
830836

831837
for (cacheId = 0; cacheId < SysCacheSize; cacheId++)
832838
{
@@ -839,20 +845,39 @@ InitCatalogCache(void)
839845
if (!PointerIsValid(SysCache[cacheId]))
840846
elog(ERROR, "could not initialize cache %u (%d)",
841847
cacheinfo[cacheId].reloid, cacheId);
848+
/* Accumulate data for OID lists, too */
842849
SysCacheRelationOid[SysCacheRelationOidSize++] =
843850
cacheinfo[cacheId].reloid;
851+
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
852+
cacheinfo[cacheId].reloid;
853+
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
854+
cacheinfo[cacheId].indoid;
844855
/* see comments for RelationInvalidatesSnapshotsOnly */
845856
Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid));
846857
}
847858

848-
/* Sort and dedup OIDs. */
859+
Assert(SysCacheRelationOidSize <= lengthof(SysCacheRelationOid));
860+
Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid));
861+
862+
/* Sort and de-dup OID arrays, so we can use binary search. */
849863
pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize,
850864
sizeof(Oid), oid_compare);
851-
for (i = 1; i < SysCacheRelationOidSize; ++i)
865+
for (i = 1, j = 0; i < SysCacheRelationOidSize; i++)
866+
{
852867
if (SysCacheRelationOid[i] != SysCacheRelationOid[j])
853868
SysCacheRelationOid[++j] = SysCacheRelationOid[i];
869+
}
854870
SysCacheRelationOidSize = j + 1;
855871

872+
pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
873+
sizeof(Oid), oid_compare);
874+
for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
875+
{
876+
if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
877+
SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
878+
}
879+
SysCacheSupportingRelOidSize = j + 1;
880+
856881
CacheInitialized = true;
857882
}
858883

@@ -1195,15 +1220,40 @@ RelationHasSysCache(Oid relid)
11951220
return false;
11961221
}
11971222

1223+
/*
1224+
* Test whether a relation supports a system cache, ie it is either a
1225+
* cached table or the index used for a cache.
1226+
*/
1227+
bool
1228+
RelationSupportsSysCache(Oid relid)
1229+
{
1230+
int low = 0,
1231+
high = SysCacheSupportingRelOidSize - 1;
1232+
1233+
while (low <= high)
1234+
{
1235+
int middle = low + (high - low) / 2;
1236+
1237+
if (SysCacheSupportingRelOid[middle] == relid)
1238+
return true;
1239+
if (SysCacheSupportingRelOid[middle] < relid)
1240+
low = middle + 1;
1241+
else
1242+
high = middle - 1;
1243+
}
1244+
1245+
return false;
1246+
}
1247+
11981248

11991249
/*
12001250
* OID comparator for pg_qsort
12011251
*/
12021252
static int
12031253
oid_compare(const void *a, const void *b)
12041254
{
1205-
Oid oa = *((Oid *) a);
1206-
Oid ob = *((Oid *) b);
1255+
Oid oa = *((const Oid *) a);
1256+
Oid ob = *((const Oid *) b);
12071257

12081258
if (oa == ob)
12091259
return 0;

src/include/utils/relcache.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
116116
/*
117117
* Routines to help manage rebuilding of relcache init files
118118
*/
119-
extern bool RelationIdIsInInitFile(Oid relationId);
120119
extern void RelationCacheInitFilePreInvalidate(void);
121120
extern void RelationCacheInitFilePostInvalidate(void);
122121
extern void RelationCacheInitFileRemove(void);

src/include/utils/syscache.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "access/attnum.h"
2020
#include "access/htup.h"
21-
/* we purposedly do not include utils/catcache.h here */
21+
/* we intentionally do not include utils/catcache.h here */
2222

2323
/*
2424
* SysCache identifiers.
@@ -125,8 +125,9 @@ struct catclist;
125125
extern struct catclist *SearchSysCacheList(int cacheId, int nkeys,
126126
Datum key1, Datum key2, Datum key3, Datum key4);
127127

128-
extern bool RelationInvalidatesSnapshotsOnly(Oid);
129-
extern bool RelationHasSysCache(Oid);
128+
extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
129+
extern bool RelationHasSysCache(Oid relid);
130+
extern bool RelationSupportsSysCache(Oid relid);
130131

131132
/*
132133
* The use of the macros below rather than direct calls to the corresponding

0 commit comments

Comments
 (0)