Skip to content

Commit b36fbd9

Browse files
committed
Improve consistency of replication slot statistics
The replication slot stats stored in shared memory rely on an internal index number. Both pgstat_reset_replslot() and pgstat_fetch_replslot() lacked some LWLock protections with ReplicationSlotControlLock while operating on these index numbers. This issue could cause these two functions to potentially operate on incorrect slots when taken in isolation in the event of slots dropped and/or re-created concurrently. Note that pg_stat_get_replication_slot() is called once per slot when querying pg_stat_replication_slots, meaning that the stats are retrieved across multiple ReplicationSlotControlLock acquisitions. So, while this commit improves more consistency, it may still be possible that statistics are not completely consistent for a single scan of pg_stat_replication_slots under concurrent replication slot drop or creation activity. The issue should unlikely be a problem in practice, causing the report of inconsistent stats or or the stats reset of an incorrect slot, so no backpatch is done. Author: Bertrand Drouvot Reviewed-by: Heikki Linnakangas, Shveta Malik, Michael Paquier Discussion: https://postgr.es/m/ZeGq1HDWFfLkjh4o@ip-10-97-1-34.eu-west-3.compute.internal
1 parent f500ba0 commit b36fbd9

File tree

1 file changed

+25
-17
lines changed

1 file changed

+25
-17
lines changed

src/backend/utils/activity/pgstat_replslot.c

+25-17
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#include "utils/pgstat_internal.h"
3030

3131

32-
static int get_replslot_index(const char *name);
32+
static int get_replslot_index(const char *name, bool need_lock);
3333

3434

3535
/*
@@ -45,8 +45,10 @@ pgstat_reset_replslot(const char *name)
4545

4646
Assert(name != NULL);
4747

48+
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
49+
4850
/* Check if the slot exits with the given name. */
49-
slot = SearchNamedReplicationSlot(name, true);
51+
slot = SearchNamedReplicationSlot(name, false);
5052

5153
if (!slot)
5254
ereport(ERROR,
@@ -55,15 +57,14 @@ pgstat_reset_replslot(const char *name)
5557
name)));
5658

5759
/*
58-
* Nothing to do for physical slots as we collect stats only for logical
59-
* slots.
60+
* Reset stats if it is a logical slot. Nothing to do for physical slots
61+
* as we collect stats only for logical slots.
6062
*/
61-
if (SlotIsPhysical(slot))
62-
return;
63+
if (SlotIsLogical(slot))
64+
pgstat_reset(PGSTAT_KIND_REPLSLOT, InvalidOid,
65+
ReplicationSlotIndex(slot));
6366

64-
/* reset this one entry */
65-
pgstat_reset(PGSTAT_KIND_REPLSLOT, InvalidOid,
66-
ReplicationSlotIndex(slot));
67+
LWLockRelease(ReplicationSlotControlLock);
6768
}
6869

6970
/*
@@ -163,13 +164,20 @@ pgstat_drop_replslot(ReplicationSlot *slot)
163164
PgStat_StatReplSlotEntry *
164165
pgstat_fetch_replslot(NameData slotname)
165166
{
166-
int idx = get_replslot_index(NameStr(slotname));
167+
int idx;
168+
PgStat_StatReplSlotEntry *slotentry = NULL;
167169

168-
if (idx == -1)
169-
return NULL;
170+
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
171+
172+
idx = get_replslot_index(NameStr(slotname), false);
173+
174+
if (idx != -1)
175+
slotentry = (PgStat_StatReplSlotEntry *) pgstat_fetch_entry(PGSTAT_KIND_REPLSLOT,
176+
InvalidOid, idx);
177+
178+
LWLockRelease(ReplicationSlotControlLock);
170179

171-
return (PgStat_StatReplSlotEntry *)
172-
pgstat_fetch_entry(PGSTAT_KIND_REPLSLOT, InvalidOid, idx);
180+
return slotentry;
173181
}
174182

175183
void
@@ -188,7 +196,7 @@ pgstat_replslot_to_serialized_name_cb(const PgStat_HashKey *key, const PgStatSha
188196
bool
189197
pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key)
190198
{
191-
int idx = get_replslot_index(NameStr(*name));
199+
int idx = get_replslot_index(NameStr(*name), true);
192200

193201
/* slot might have been deleted */
194202
if (idx == -1)
@@ -208,13 +216,13 @@ pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
208216
}
209217

210218
static int
211-
get_replslot_index(const char *name)
219+
get_replslot_index(const char *name, bool need_lock)
212220
{
213221
ReplicationSlot *slot;
214222

215223
Assert(name != NULL);
216224

217-
slot = SearchNamedReplicationSlot(name, true);
225+
slot = SearchNamedReplicationSlot(name, need_lock);
218226

219227
if (!slot)
220228
return -1;

0 commit comments

Comments
 (0)