Skip to content

Commit 5d00b76

Browse files
committed
Make pgstat tabstat lookup hash table less fragile.
Code review for commit 090010f. Fix cases where an elog(ERROR) partway through a function would leave the persistent data structures in a corrupt state. pgstat_report_stat got this wrong by invalidating PgStat_TableEntry structs before removing hashtable entries pointing to them, and get_tabstat_entry got it wrong by ignoring the possibility of palloc failure after it had already created a hashtable entry. Also, avoid leaking a memory context per transaction, which the previous code did through misunderstanding hash_create's API. We do not need to create a context to hold the hash table; hash_create will do that. (The leak wasn't that large, amounting to only a memory context header per iteration, but it's still surprising that nobody noticed it yet.)
1 parent b91e5b4 commit 5d00b76

File tree

1 file changed

+64
-60
lines changed

1 file changed

+64
-60
lines changed

src/backend/postmaster/pgstat.c

+64-60
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ typedef struct TabStatusArray
174174
static TabStatusArray *pgStatTabList = NULL;
175175

176176
/*
177-
* pgStatTabHash entry
177+
* pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer
178178
*/
179179
typedef struct TabStatHashEntry
180180
{
@@ -805,6 +805,17 @@ pgstat_report_stat(bool force)
805805
return;
806806
last_report = now;
807807

808+
/*
809+
* Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
810+
* entries it points to. (Should we fail partway through the loop below,
811+
* it's okay to have removed the hashtable already --- the only
812+
* consequence is we'd get multiple entries for the same table in the
813+
* pgStatTabList, and that's safe.)
814+
*/
815+
if (pgStatTabHash)
816+
hash_destroy(pgStatTabHash);
817+
pgStatTabHash = NULL;
818+
808819
/*
809820
* Scan through the TabStatusArray struct(s) to find tables that actually
810821
* have counts, and build messages to send. We have to separate shared
@@ -855,14 +866,6 @@ pgstat_report_stat(bool force)
855866
tsa->tsa_used = 0;
856867
}
857868

858-
/*
859-
* pgStatTabHash is outdated on this point so we have to clean it,
860-
* hash_destroy() will remove hash memory context, allocated in
861-
* make_sure_stat_tab_initialized()
862-
*/
863-
hash_destroy(pgStatTabHash);
864-
pgStatTabHash = NULL;
865-
866869
/*
867870
* Send partial messages. Make sure that any pending xact commit/abort
868871
* gets counted, even if there are no table stats to send.
@@ -1707,41 +1710,6 @@ pgstat_initstats(Relation rel)
17071710
rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
17081711
}
17091712

1710-
/*
1711-
* Make sure pgStatTabList and pgStatTabHash are initialized.
1712-
*/
1713-
static void
1714-
make_sure_stat_tab_initialized()
1715-
{
1716-
HASHCTL ctl;
1717-
MemoryContext new_ctx;
1718-
1719-
if(!pgStatTabList)
1720-
{
1721-
/* This is first time procedure is called */
1722-
pgStatTabList = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
1723-
sizeof(TabStatusArray));
1724-
}
1725-
1726-
if(pgStatTabHash)
1727-
return;
1728-
1729-
/* Hash table was freed or never existed. */
1730-
1731-
new_ctx = AllocSetContextCreate(
1732-
TopMemoryContext,
1733-
"PGStatLookupHashTableContext",
1734-
ALLOCSET_DEFAULT_SIZES);
1735-
1736-
memset(&ctl, 0, sizeof(ctl));
1737-
ctl.keysize = sizeof(Oid);
1738-
ctl.entrysize = sizeof(TabStatHashEntry);
1739-
ctl.hcxt = new_ctx;
1740-
1741-
pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup hash table",
1742-
TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
1743-
}
1744-
17451713
/*
17461714
* get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
17471715
*/
@@ -1753,65 +1721,101 @@ get_tabstat_entry(Oid rel_id, bool isshared)
17531721
TabStatusArray *tsa;
17541722
bool found;
17551723

1756-
make_sure_stat_tab_initialized();
1724+
/*
1725+
* Create hash table if we don't have it already.
1726+
*/
1727+
if (pgStatTabHash == NULL)
1728+
{
1729+
HASHCTL ctl;
1730+
1731+
memset(&ctl, 0, sizeof(ctl));
1732+
ctl.keysize = sizeof(Oid);
1733+
ctl.entrysize = sizeof(TabStatHashEntry);
1734+
1735+
pgStatTabHash = hash_create("pgstat TabStatusArray lookup hash table",
1736+
TABSTAT_QUANTUM,
1737+
&ctl,
1738+
HASH_ELEM | HASH_BLOBS);
1739+
}
17571740

17581741
/*
17591742
* Find an entry or create a new one.
17601743
*/
17611744
hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
1762-
if(found)
1745+
if (!found)
1746+
{
1747+
/* initialize new entry with null pointer */
1748+
hash_entry->tsa_entry = NULL;
1749+
}
1750+
1751+
/*
1752+
* If entry is already valid, we're done.
1753+
*/
1754+
if (hash_entry->tsa_entry)
17631755
return hash_entry->tsa_entry;
17641756

17651757
/*
1766-
* `hash_entry` was just created and now we have to fill it.
1767-
* First make sure there is a free space in a last element of pgStatTabList.
1758+
* Locate the first pgStatTabList entry with free space, making a new list
1759+
* entry if needed. Note that we could get an OOM failure here, but if so
1760+
* we have left the hashtable and the list in a consistent state.
17681761
*/
1769-
tsa = pgStatTabList;
1770-
while(tsa->tsa_used == TABSTAT_QUANTUM)
1762+
if (pgStatTabList == NULL)
17711763
{
1772-
if(tsa->tsa_next == NULL)
1773-
{
1774-
tsa->tsa_next = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
1775-
sizeof(TabStatusArray));
1776-
}
1764+
/* Set up first pgStatTabList entry */
1765+
pgStatTabList = (TabStatusArray *)
1766+
MemoryContextAllocZero(TopMemoryContext,
1767+
sizeof(TabStatusArray));
1768+
}
17771769

1770+
tsa = pgStatTabList;
1771+
while (tsa->tsa_used >= TABSTAT_QUANTUM)
1772+
{
1773+
if (tsa->tsa_next == NULL)
1774+
tsa->tsa_next = (TabStatusArray *)
1775+
MemoryContextAllocZero(TopMemoryContext,
1776+
sizeof(TabStatusArray));
17781777
tsa = tsa->tsa_next;
17791778
}
17801779

17811780
/*
1782-
* Add an entry.
1781+
* Allocate a PgStat_TableStatus entry within this list entry. We assume
1782+
* the entry was already zeroed, either at creation or after last use.
17831783
*/
17841784
entry = &tsa->tsa_entries[tsa->tsa_used++];
17851785
entry->t_id = rel_id;
17861786
entry->t_shared = isshared;
17871787

17881788
/*
1789-
* Add a corresponding entry to pgStatTabHash.
1789+
* Now we can fill the entry in pgStatTabHash.
17901790
*/
17911791
hash_entry->tsa_entry = entry;
1792+
17921793
return entry;
17931794
}
17941795

17951796
/*
17961797
* find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
17971798
*
17981799
* If no entry, return NULL, don't create a new one
1800+
*
1801+
* Note: if we got an error in the most recent execution of pgstat_report_stat,
1802+
* it's possible that an entry exists but there's no hashtable entry for it.
1803+
* That's okay, we'll treat this case as "doesn't exist".
17991804
*/
18001805
PgStat_TableStatus *
18011806
find_tabstat_entry(Oid rel_id)
18021807
{
18031808
TabStatHashEntry* hash_entry;
18041809

1805-
/*
1806-
* There are no entries at all.
1807-
*/
1810+
/* If hashtable doesn't exist, there are no entries at all */
18081811
if(!pgStatTabHash)
18091812
return NULL;
18101813

18111814
hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL);
18121815
if(!hash_entry)
18131816
return NULL;
18141817

1818+
/* Note that this step could also return NULL, but that's correct */
18151819
return hash_entry->tsa_entry;
18161820
}
18171821

0 commit comments

Comments
 (0)