Skip to content

Commit 3fa17d3

Browse files
author
Amit Kapila
committed
Use HTAB for replication slot statistics.
Previously, we used to use the array of size max_replication_slots to store stats for replication slots. But that had two problems in the cases where a message for dropping a slot gets lost: 1) the stats for the new slot are not recorded if the array is full and 2) writing beyond the end of the array if the user reduces the max_replication_slots. This commit uses HTAB for replication slot statistics, resolving both problems. Now, pgstat_vacuum_stat() search for all the dead replication slots in stats hashtable and tell the collector to remove them. To avoid showing the stats for the already-dropped slots, pg_stat_replication_slots view searches slot stats by the slot name taken from pg_replication_slots. Also, we send a message for creating a slot at slot creation, initializing the stats. This reduces the possibility that the stats are accumulated into the old slot stats when a message for dropping a slot gets lost. Reported-by: Andres Freund Author: Sawada Masahiko, test case by Vignesh C Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
1 parent e7eea52 commit 3fa17d3

File tree

12 files changed

+389
-260
lines changed

12 files changed

+389
-260
lines changed

contrib/test_decoding/t/001_repl_stats.pl

+54-15
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,33 @@
22
# drop replication slot and restart.
33
use strict;
44
use warnings;
5+
use File::Path qw(rmtree);
56
use PostgresNode;
67
use TestLib;
7-
use Test::More tests => 1;
8+
use Test::More tests => 2;
89

910
# Test set-up
1011
my $node = get_new_node('test');
1112
$node->init(allows_streaming => 'logical');
1213
$node->append_conf('postgresql.conf', 'synchronous_commit = on');
1314
$node->start;
1415

16+
# Check that replication slot stats are expected.
17+
sub test_slot_stats
18+
{
19+
my ($node, $expected, $msg) = @_;
20+
21+
my $result = $node->safe_psql(
22+
'postgres', qq[
23+
SELECT slot_name, total_txns > 0 AS total_txn,
24+
total_bytes > 0 AS total_bytes
25+
FROM pg_stat_replication_slots
26+
ORDER BY slot_name]);
27+
is($result, $expected, $msg);
28+
}
29+
1530
# Create table.
16-
$node->safe_psql('postgres',
17-
"CREATE TABLE test_repl_stat(col1 int)");
31+
$node->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 int)");
1832

1933
# Create replication slots.
2034
$node->safe_psql(
@@ -26,7 +40,8 @@
2640
]);
2741

2842
# Insert some data.
29-
$node->safe_psql('postgres', "INSERT INTO test_repl_stat values(generate_series(1, 5));");
43+
$node->safe_psql('postgres',
44+
"INSERT INTO test_repl_stat values(generate_series(1, 5));");
3045

3146
$node->safe_psql(
3247
'postgres', qq[
@@ -50,27 +65,51 @@
5065

5166
# Test to drop one of the replication slot and verify replication statistics data is
5267
# fine after restart.
53-
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot4')");
68+
$node->safe_psql('postgres',
69+
"SELECT pg_drop_replication_slot('regression_slot4')");
5470

5571
$node->stop;
5672
$node->start;
5773

5874
# Verify statistics data present in pg_stat_replication_slots are sane after
5975
# restart.
60-
my $result = $node->safe_psql('postgres',
61-
"SELECT slot_name, total_txns > 0 AS total_txn,
62-
total_bytes > 0 AS total_bytes FROM pg_stat_replication_slots
63-
ORDER BY slot_name"
64-
);
65-
is($result, qq(regression_slot1|t|t
76+
test_slot_stats(
77+
$node,
78+
qq(regression_slot1|t|t
6679
regression_slot2|t|t
67-
regression_slot3|t|t), 'check replication statistics are updated');
80+
regression_slot3|t|t),
81+
'check replication statistics are updated');
82+
83+
# Test to remove one of the replication slots and adjust
84+
# max_replication_slots accordingly to the number of slots. This leads
85+
# to a mismatch between the number of slots present in the stats file and the
86+
# number of stats present in the shared memory, simulating the scenario for
87+
# drop slot message lost by the statistics collector process. We verify
88+
# replication statistics data is fine after restart.
89+
90+
$node->stop;
91+
my $datadir = $node->data_dir;
92+
my $slot3_replslotdir = "$datadir/pg_replslot/regression_slot3";
93+
94+
rmtree($slot3_replslotdir);
95+
96+
$node->append_conf('postgresql.conf', 'max_replication_slots = 2');
97+
$node->start;
98+
99+
# Verify statistics data present in pg_stat_replication_slots are sane after
100+
# restart.
101+
test_slot_stats(
102+
$node,
103+
qq(regression_slot1|t|t
104+
regression_slot2|t|t),
105+
'check replication statistics after removing the slot file');
68106

69107
# cleanup
70108
$node->safe_psql('postgres', "DROP TABLE test_repl_stat");
71-
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot1')");
72-
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot2')");
73-
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot3')");
109+
$node->safe_psql('postgres',
110+
"SELECT pg_drop_replication_slot('regression_slot1')");
111+
$node->safe_psql('postgres',
112+
"SELECT pg_drop_replication_slot('regression_slot2')");
74113

75114
# shutdown
76115
$node->stop;

src/backend/catalog/system_views.sql

+16-14
Original file line numberDiff line numberDiff line change
@@ -866,20 +866,6 @@ CREATE VIEW pg_stat_replication AS
866866
JOIN pg_stat_get_wal_senders() AS W ON (S.pid = W.pid)
867867
LEFT JOIN pg_authid AS U ON (S.usesysid = U.oid);
868868

869-
CREATE VIEW pg_stat_replication_slots AS
870-
SELECT
871-
s.slot_name,
872-
s.spill_txns,
873-
s.spill_count,
874-
s.spill_bytes,
875-
s.stream_txns,
876-
s.stream_count,
877-
s.stream_bytes,
878-
s.total_txns,
879-
s.total_bytes,
880-
s.stats_reset
881-
FROM pg_stat_get_replication_slots() AS s;
882-
883869
CREATE VIEW pg_stat_slru AS
884870
SELECT
885871
s.name,
@@ -984,6 +970,22 @@ CREATE VIEW pg_replication_slots AS
984970
FROM pg_get_replication_slots() AS L
985971
LEFT JOIN pg_database D ON (L.datoid = D.oid);
986972

973+
CREATE VIEW pg_stat_replication_slots AS
974+
SELECT
975+
s.slot_name,
976+
s.spill_txns,
977+
s.spill_count,
978+
s.spill_bytes,
979+
s.stream_txns,
980+
s.stream_count,
981+
s.stream_bytes,
982+
s.total_txns,
983+
s.total_bytes,
984+
s.stats_reset
985+
FROM pg_replication_slots as r,
986+
LATERAL pg_stat_get_replication_slot(slot_name) as s
987+
WHERE r.datoid IS NOT NULL; -- excluding physical slots
988+
987989
CREATE VIEW pg_stat_database AS
988990
SELECT
989991
D.oid AS datid,

0 commit comments

Comments
 (0)