Skip to content

Commit d42f60c

Browse files
Test that vacuum removes tuples older than OldestXmin
If vacuum fails to prune a tuple killed before OldestXmin, it will later find that tuple dead in lazy_scan_prune() and loop infinitely. Add a test reproducing this scenario to the recovery suite which creates a table on a primary, updates the table to generate dead tuples for vacuum, and then, during the vacuum, uses a replica to force GlobalVisState->maybe_needed on the primary to move backwards and precede the value of OldestXmin set at the beginning of vacuuming the table. This commit is separate from the fix in case there are test stability issues. Discussion of the bug: https://postgr.es/m/CAAKRu_Y_NJzF4-8gzTTeaOuUL3CcGoXPjXcAHbTTygT8AyVqag%40mail.gmail.com Discussion of the test: https://postgr.es/m/CAAKRu_apNU2MPBK96V%2BbXjTq0RiZ-%3DA4ZTaysakpx9jxbq1dbQ%40mail.gmail.com Author: Melanie Plageman Reviewed-by: Peter Geoghegan
1 parent 45ce054 commit d42f60c

File tree

1 file changed

+251
-0
lines changed

1 file changed

+251
-0
lines changed
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
use strict;
2+
use warnings;
3+
use PostgreSQL::Test::Cluster;
4+
use Test::More;
5+
6+
# Test that vacuum prunes away all dead tuples killed before OldestXmin
7+
#
8+
# This test creates a table on a primary, updates the table to generate dead
9+
# tuples for vacuum, and then, during the vacuum, uses the replica to force
10+
# GlobalVisState->maybe_needed on the primary to move backwards and precede
11+
# the value of OldestXmin set at the beginning of vacuuming the table.
12+
13+
# Set up nodes
14+
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
15+
$node_primary->init(allows_streaming => 'physical');
16+
17+
$node_primary->append_conf(
18+
'postgresql.conf', qq[
19+
hot_standby_feedback = on
20+
autovacuum = off
21+
log_min_messages = INFO
22+
maintenance_work_mem = 1024
23+
]);
24+
$node_primary->start;
25+
26+
my $node_replica = PostgreSQL::Test::Cluster->new('standby');
27+
28+
$node_primary->backup('my_backup');
29+
$node_replica->init_from_backup($node_primary, 'my_backup',
30+
has_streaming => 1);
31+
32+
$node_replica->start;
33+
34+
my $test_db = "test_db";
35+
$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db");
36+
37+
# Save the original connection info for later use
38+
my $orig_conninfo = $node_primary->connstr();
39+
40+
my $table1 = "vac_horizon_floor_table";
41+
42+
# Long-running Primary Session A
43+
my $psql_primaryA =
44+
$node_primary->background_psql($test_db, on_error_stop => 1);
45+
46+
# Long-running Primary Session B
47+
my $psql_primaryB =
48+
$node_primary->background_psql($test_db, on_error_stop => 1);
49+
50+
# Because vacuum's first pass, pruning, is where we use the GlobalVisState to
51+
# check tuple visibility, GlobalVisState->maybe_needed must move backwards
52+
# during pruning before checking the visibility for a tuple which would have
53+
# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but
54+
# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed.
55+
#
56+
# We must not only force the horizon on the primary to move backwards but also
57+
# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState
58+
# is forced to update during index vacuuming.
59+
#
60+
# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the
61+
# end of a round of index vacuuming, updating the backend's GlobalVisState
62+
# and, in our case, moving maybe_needed backwards.
63+
#
64+
# Then vacuum's first (pruning) pass will continue and pruning will find our
65+
# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to
66+
# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin.
67+
#
68+
# Thus, we must force at least two rounds of index vacuuming to ensure that
69+
# some tuple visibility checks will happen after a round of index vacuuming.
70+
# To accomplish this, we set maintenance_work_mem to its minimum value and
71+
# insert and update enough rows that we force at least one round of index
72+
# vacuuming before getting to a dead tuple which was killed after the
73+
# standby is disconnected.
74+
$node_primary->safe_psql($test_db, qq[
75+
CREATE TABLE ${table1}(col1 int) with (autovacuum_enabled=false);
76+
INSERT INTO $table1 SELECT generate_series(1, 200000);
77+
CREATE INDEX on ${table1}(col1);
78+
UPDATE $table1 SET col1 = 0 WHERE col1 > 1;
79+
INSERT INTO $table1 VALUES(1);
80+
]);
81+
82+
# We will later move the primary forward while the standby is disconnected.
83+
# For now, however, there is no reason not to wait for the standby to catch
84+
# up.
85+
my $primary_lsn = $node_primary->lsn('flush');
86+
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
87+
88+
# Test that the WAL receiver is up and running.
89+
$node_replica->poll_query_until($test_db, qq[
90+
select exists (select * from pg_stat_wal_receiver);] , 't');
91+
92+
# Set primary_conninfo to something invalid on the replica and reload the
93+
# config. Once the config is reloaded, the startup process will force the WAL
94+
# receiver to restart and it will be unable to reconnect because of the
95+
# invalid connection information.
96+
$node_replica->safe_psql($test_db, qq[
97+
ALTER SYSTEM SET primary_conninfo = '';
98+
SELECT pg_reload_conf();
99+
]);
100+
101+
# Wait until the WAL receiver has shut down and been unable to start up again.
102+
$node_replica->poll_query_until($test_db, qq[
103+
select exists (select * from pg_stat_wal_receiver);] , 'f');
104+
105+
# Now insert and update a tuple which will be visible to the vacuum on the
106+
# primary but which will have xmax newer than the oldest xmin on the standby
107+
# that was recently disconnected.
108+
my $res = $psql_primaryA->query_safe(
109+
qq[
110+
INSERT INTO $table1 VALUES (99);
111+
UPDATE $table1 SET col1 = 100 WHERE col1 = 99;
112+
SELECT 'after_update';
113+
]
114+
);
115+
116+
# Make sure the UPDATE finished
117+
like($res, qr/^after_update$/m, "UPDATE occurred on primary session A");
118+
119+
# Open a cursor on the primary whose pin will keep VACUUM from getting a
120+
# cleanup lock on the first page of the relation. We want VACUUM to be able to
121+
# start, calculate initial values for OldestXmin and GlobalVisState and then
122+
# be unable to proceed with pruning our dead tuples. This will allow us to
123+
# reconnect the standby and push the horizon back before we start actual
124+
# pruning and vacuuming.
125+
my $primary_cursor1 = "vac_horizon_floor_cursor";
126+
127+
# The first value inserted into the table was a 1, so FETCH FORWARD should
128+
# return a 1. That's how we know the cursor has a pin.
129+
$res = $psql_primaryB->query_safe(
130+
qq[
131+
BEGIN;
132+
DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 1;
133+
FETCH FORWARD FROM $primary_cursor1;
134+
]
135+
);
136+
137+
is($res, 1, qq[Cursor query returned $res. Expected value 1.]);
138+
139+
# Get the PID of the session which will run the VACUUM FREEZE so that we can
140+
# use it to filter pg_stat_activity later.
141+
my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();");
142+
143+
# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs()
144+
# and establish values of OldestXmin and GlobalVisState which are newer than
145+
# all of our dead tuples. Then it will be unable to get a cleanup lock to
146+
# start pruning, so it will hang. We use VACUUM FREEZE because it will wait
147+
# for a cleanup lock instead of skipping the page pinned by the cursor.
148+
$psql_primaryA->{stdin} .= qq[
149+
VACUUM (VERBOSE, FREEZE) $table1;
150+
\\echo VACUUM
151+
];
152+
153+
# Make sure the VACUUM command makes it to the server.
154+
$psql_primaryA->{run}->pump_nb();
155+
156+
# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is
157+
# just waiting on the lock to start vacuuming. We don't want the standby to
158+
# re-establish a connection to the primary and push the horizon back until
159+
# we've saved initial values in GlobalVisState and calculated OldestXmin.
160+
$node_primary->poll_query_until($test_db,
161+
qq[
162+
SELECT count(*) >= 1 FROM pg_stat_activity
163+
WHERE pid = $vacuum_pid
164+
AND wait_event = 'BufferPin';
165+
],
166+
't');
167+
168+
# Ensure the WAL receiver is still not active on the replica.
169+
$node_replica->poll_query_until($test_db, qq[
170+
select exists (select * from pg_stat_wal_receiver);] , 'f');
171+
172+
# Allow the WAL receiver connection to re-establish.
173+
$node_replica->safe_psql(
174+
$test_db, qq[
175+
ALTER SYSTEM SET primary_conninfo = '$orig_conninfo';
176+
SELECT pg_reload_conf();
177+
]);
178+
179+
# Ensure the new WAL receiver has connected.
180+
$node_replica->poll_query_until($test_db, qq[
181+
select exists (select * from pg_stat_wal_receiver);] , 't');
182+
183+
# Once the WAL sender is shown on the primary, the replica should have
184+
# connected with the primary and pushed the horizon backward. Primary Session
185+
# A won't see that until the VACUUM FREEZE proceeds and does its first round
186+
# of index vacuuming.
187+
$node_primary->poll_query_until($test_db, qq[
188+
select exists (select * from pg_stat_replication);] , 't');
189+
190+
# Move the cursor forward to the next 1. We inserted the 1 much later, so
191+
# advancing the cursor should allow vacuum to proceed vacuuming most pages of
192+
# the relation. Because we set maintanence_work_mem sufficiently low, we
193+
# expect that a round of index vacuuming has happened and that the vacuum is
194+
# now waiting for the cursor to release its pin on the last page of the
195+
# relation.
196+
$res = $psql_primaryB->query_safe("FETCH $primary_cursor1");
197+
is($res, 1,
198+
qq[Cursor query returned $res from second fetch. Expected value 1.]);
199+
200+
# Prevent the test from incorrectly passing by confirming that we did indeed
201+
# do a pass of index vacuuming.
202+
$node_primary->poll_query_until($test_db, qq[
203+
SELECT index_vacuum_count > 0
204+
FROM pg_stat_progress_vacuum
205+
WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass;
206+
] , 't');
207+
208+
# Commit the transaction with the open cursor so that the VACUUM can finish.
209+
$psql_primaryB->query_until(
210+
qr/^commit$/m,
211+
qq[
212+
COMMIT;
213+
\\echo commit
214+
]
215+
);
216+
217+
# VACUUM proceeds with pruning and does a visibility check on each tuple. In
218+
# older versions of Postgres, pruning found our final dead tuple
219+
# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new
220+
# value of maybe_needed. Then lazy_scan_prune() would infinitely loop
221+
# because HeapTupleSatisfiesVacuum() would find the tuple HEAPTUPLE_DEAD
222+
# because its xmax preceded OldestXmin. This was fixed by passing
223+
# OldestXmin to heap_page_prune() and removing all tuples whose xmaxes
224+
# precede OldestXmin.
225+
#
226+
# With the fix, VACUUM should finish successfully, incrementing the table
227+
# vacuum_count.
228+
$node_primary->poll_query_until($test_db,
229+
qq[
230+
SELECT vacuum_count > 0
231+
FROM pg_stat_all_tables WHERE relname = '${table1}';
232+
]
233+
, 't');
234+
235+
$primary_lsn = $node_primary->lsn('flush');
236+
237+
# Make sure something causes us to flush
238+
$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);");
239+
240+
# Nothing on the replica should cause a recovery conflict, so this should
241+
# finish successfully.
242+
$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn);
243+
244+
## Shut down psqls
245+
$psql_primaryA->quit;
246+
$psql_primaryB->quit;
247+
248+
$node_replica->stop();
249+
$node_primary->stop();
250+
251+
done_testing();

0 commit comments

Comments
 (0)