|
| 1 | +use strict; |
| 2 | +use warnings; |
| 3 | +use PostgreSQL::Test::Cluster; |
| 4 | +use Test::More; |
| 5 | + |
| 6 | +# Test that vacuum prunes away all dead tuples killed before OldestXmin |
| 7 | + |
| 8 | +# Set up nodes |
| 9 | +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); |
| 10 | +$node_primary->init(allows_streaming => 'physical'); |
| 11 | + |
| 12 | +$node_primary->append_conf( |
| 13 | + 'postgresql.conf', qq[ |
| 14 | +hot_standby_feedback = on |
| 15 | +autovacuum = off |
| 16 | +log_min_messages = INFO |
| 17 | +maintenance_work_mem = 1024 |
| 18 | +]); |
| 19 | +$node_primary->start; |
| 20 | + |
| 21 | +my $node_replica = PostgreSQL::Test::Cluster->new('standby'); |
| 22 | + |
| 23 | +$node_primary->backup('my_backup'); |
| 24 | +$node_replica->init_from_backup($node_primary, 'my_backup', |
| 25 | + has_streaming => 1); |
| 26 | + |
| 27 | +$node_replica->start; |
| 28 | + |
| 29 | +my $test_db = "test_db"; |
| 30 | +$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db"); |
| 31 | + |
| 32 | +# Save the original connection info for later use |
| 33 | +my $orig_conninfo = $node_primary->connstr(); |
| 34 | + |
| 35 | +my $table1 = "vac_horizon_floor_table"; |
| 36 | + |
| 37 | +# Long-running Primary Session A |
| 38 | +my $psql_primaryA = |
| 39 | + $node_primary->background_psql($test_db, on_error_stop => 1); |
| 40 | + |
| 41 | +# Long-running Primary Session B |
| 42 | +my $psql_primaryB = |
| 43 | + $node_primary->background_psql($test_db, on_error_stop => 1); |
| 44 | + |
| 45 | +# Because vacuum's first pass, pruning, is where we use the GlobalVisState to |
| 46 | +# check tuple visibility, GlobalVisState->maybe_needed must move backwards |
| 47 | +# during pruning before checking the visibility for a tuple which would have |
| 48 | +# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but |
| 49 | +# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed. |
| 50 | +# |
| 51 | +# We must not only force the horizon on the primary to move backwards but also |
| 52 | +# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState |
| 53 | +# is forced to update during index vacuuming. |
| 54 | +# |
| 55 | +# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the |
| 56 | +# end of a round of index vacuuming, updating the backend's GlobalVisState |
| 57 | +# and, in our case, moving maybe_needed backwards. |
| 58 | +# |
| 59 | +# Then vacuum's first (pruning) pass will continue and pruning will find our |
| 60 | +# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to |
| 61 | +# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin. |
| 62 | +# |
| 63 | +# Thus, we must force at least two rounds of index vacuuming to ensure that |
| 64 | +# some tuple visibility checks will happen after a round of index vacuuming. |
| 65 | +# To accomplish this, we set maintenance_work_mem to its minimum value and |
| 66 | +# insert and update enough rows that we force at least one round of index |
| 67 | +# vacuuming before getting to a dead tuple which was killed after the |
| 68 | +# standby is disconnected. |
| 69 | +$node_primary->safe_psql($test_db, qq[ |
| 70 | + CREATE TABLE ${table1}(col1 int) with (autovacuum_enabled=false); |
| 71 | + INSERT INTO $table1 SELECT generate_series(1, 200000); |
| 72 | + CREATE INDEX on ${table1}(col1); |
| 73 | + UPDATE $table1 SET col1 = 0 WHERE col1 > 1; |
| 74 | + INSERT INTO $table1 VALUES(1); |
| 75 | +]); |
| 76 | + |
| 77 | +# We will later move the primary forward while the standby is disconnected. |
| 78 | +# For now, however, there is no reason not to wait for the standby to catch |
| 79 | +# up. |
| 80 | +my $primary_lsn = $node_primary->lsn('flush'); |
| 81 | +$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); |
| 82 | + |
| 83 | +# Test that the WAL receiver is up and running. |
| 84 | +$node_replica->poll_query_until($test_db, qq[ |
| 85 | + select exists (select * from pg_stat_wal_receiver);] , 't'); |
| 86 | + |
| 87 | +# Set primary_conninfo to something invalid on the replica and reload the |
| 88 | +# config. Once the config is reloaded, the startup process will force the WAL |
| 89 | +# receiver to restart and it will be unable to reconnect because of the |
| 90 | +# invalid connection information. |
| 91 | +$node_replica->safe_psql($test_db, qq[ |
| 92 | + ALTER SYSTEM SET primary_conninfo = ''; |
| 93 | + SELECT pg_reload_conf(); |
| 94 | + ]); |
| 95 | + |
| 96 | +# Wait until the WAL receiver has shut down and been unable to start up again. |
| 97 | +$node_replica->poll_query_until($test_db, qq[ |
| 98 | + select exists (select * from pg_stat_wal_receiver);] , 'f'); |
| 99 | + |
| 100 | +# Now insert and update a tuple which will be visible to the vacuum on the |
| 101 | +# primary but which will have xmax newer than the oldest xmin on the standby |
| 102 | +# that was recently disconnected. |
| 103 | +my $res = $psql_primaryA->query_safe( |
| 104 | + qq[ |
| 105 | + INSERT INTO $table1 VALUES (99); |
| 106 | + UPDATE $table1 SET col1 = 100 WHERE col1 = 99; |
| 107 | + SELECT 'after_update'; |
| 108 | + ] |
| 109 | + ); |
| 110 | + |
| 111 | +# Make sure the UPDATE finished |
| 112 | +like($res, qr/^after_update$/m, "UPDATE occurred on primary session A"); |
| 113 | + |
| 114 | +# Open a cursor on the primary whose pin will keep VACUUM from getting a |
| 115 | +# cleanup lock on the first page of the relation. We want VACUUM to be able to |
| 116 | +# start, calculate initial values for OldestXmin and GlobalVisState and then |
| 117 | +# be unable to proceed with pruning our dead tuples. This will allow us to |
| 118 | +# reconnect the standby and push the horizon back before we start actual |
| 119 | +# pruning and vacuuming. |
| 120 | +my $primary_cursor1 = "vac_horizon_floor_cursor"; |
| 121 | + |
| 122 | +# The first value inserted into the table was a 1, so FETCH FORWARD should |
| 123 | +# return a 1. That's how we know the cursor has a pin. |
| 124 | +$res = $psql_primaryB->query_safe( |
| 125 | + qq[ |
| 126 | + BEGIN; |
| 127 | + DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 1; |
| 128 | + FETCH FORWARD FROM $primary_cursor1; |
| 129 | + ] |
| 130 | + ); |
| 131 | + |
| 132 | +is($res, 1, qq[Cursor query returned $res. Expected value 1.]); |
| 133 | + |
| 134 | +# Get the PID of the session which will run the VACUUM FREEZE so that we can |
| 135 | +# use it to filter pg_stat_activity later. |
| 136 | +my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();"); |
| 137 | + |
| 138 | +# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs() |
| 139 | +# and establish values of OldestXmin and GlobalVisState which are newer than |
| 140 | +# all of our dead tuples. Then it will be unable to get a cleanup lock to |
| 141 | +# start pruning, so it will hang. We use VACUUM FREEZE because it will wait |
| 142 | +# for a cleanup lock instead of skipping the page pinned by the cursor. |
| 143 | +$psql_primaryA->{stdin} .= qq[ |
| 144 | + VACUUM (VERBOSE, FREEZE) $table1; |
| 145 | + \\echo VACUUM |
| 146 | + ]; |
| 147 | + |
| 148 | +# Make sure the VACUUM command makes it to the server. |
| 149 | +$psql_primaryA->{run}->pump_nb(); |
| 150 | + |
| 151 | +# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is |
| 152 | +# just waiting on the lock to start vacuuming. We don't want the standby to |
| 153 | +# re-establish a connection to the primary and push the horizon back until |
| 154 | +# we've saved initial values in GlobalVisState and calculated OldestXmin. |
| 155 | +$node_primary->poll_query_until($test_db, |
| 156 | + qq[ |
| 157 | + SELECT count(*) >= 1 FROM pg_stat_activity |
| 158 | + WHERE pid = $vacuum_pid |
| 159 | + AND wait_event = 'BufferPin'; |
| 160 | + ], |
| 161 | + 't'); |
| 162 | + |
| 163 | +# Ensure the WAL receiver is still not active on the replica. |
| 164 | +$node_replica->poll_query_until($test_db, qq[ |
| 165 | + select exists (select * from pg_stat_wal_receiver);] , 'f'); |
| 166 | + |
| 167 | +# Allow the WAL receiver connection to re-establish. |
| 168 | +$node_replica->safe_psql( |
| 169 | + $test_db, qq[ |
| 170 | + ALTER SYSTEM SET primary_conninfo = '$orig_conninfo'; |
| 171 | + SELECT pg_reload_conf(); |
| 172 | + ]); |
| 173 | + |
| 174 | +# Ensure the new WAL receiver has connected. |
| 175 | +$node_replica->poll_query_until($test_db, qq[ |
| 176 | + select exists (select * from pg_stat_wal_receiver);] , 't'); |
| 177 | + |
| 178 | +# Once the WAL sender is shown on the primary, the replica should have |
| 179 | +# connected with the primary and pushed the horizon backward. Primary Session |
| 180 | +# A won't see that until the VACUUM FREEZE proceeds and does its first round |
| 181 | +# of index vacuuming. |
| 182 | +$node_primary->poll_query_until($test_db, qq[ |
| 183 | + select exists (select * from pg_stat_replication);] , 't'); |
| 184 | + |
| 185 | +# Move the cursor forward to the next 1. We inserted the 1 much later, so |
| 186 | +# advancing the cursor should allow vacuum to proceed vacuuming most pages of |
| 187 | +# the relation. Because we set maintanence_work_mem sufficiently low, we |
| 188 | +# expect that a round of index vacuuming has happened and that the vacuum is |
| 189 | +# now waiting for the cursor to release its pin on the last page of the |
| 190 | +# relation. |
| 191 | +$res = $psql_primaryB->query_safe("FETCH $primary_cursor1"); |
| 192 | +is($res, 1, |
| 193 | + qq[Cursor query returned $res from second fetch. Expected value 1.]); |
| 194 | + |
| 195 | +# Prevent the test from incorrectly passing by confirming that we did indeed |
| 196 | +# do a pass of index vacuuming. |
| 197 | +$node_primary->poll_query_until($test_db, qq[ |
| 198 | + SELECT index_vacuum_count > 0 |
| 199 | + FROM pg_stat_progress_vacuum |
| 200 | + WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass; |
| 201 | + ] , 't'); |
| 202 | + |
| 203 | +# Commit the transaction with the open cursor so that the VACUUM can finish. |
| 204 | +$psql_primaryB->query_until( |
| 205 | + qr/^commit$/m, |
| 206 | + qq[ |
| 207 | + COMMIT; |
| 208 | + \\echo commit |
| 209 | + ] |
| 210 | + ); |
| 211 | + |
| 212 | +# VACUUM proceeds with pruning and does a visibility check on each tuple. In |
| 213 | +# older versions of Postgres, pruning found our final dead tuple |
| 214 | +# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new |
| 215 | +# value of maybe_needed. Then lazy_scan_prune() would infinitely loop |
| 216 | +# because HeapTupleSatisfiesVacuum() would find the tuple HEAPTUPLE_DEAD |
| 217 | +# because its xmax preceded OldestXmin. This was fixed by passing |
| 218 | +# OldestXmin to heap_page_prune() and removing all tuples whose xmaxes |
| 219 | +# precede OldestXmin. |
| 220 | +# |
| 221 | +# With the fix, VACUUM should finish successfully, incrementing the table |
| 222 | +# vacuum_count. |
| 223 | +$node_primary->poll_query_until($test_db, |
| 224 | + qq[ |
| 225 | + SELECT vacuum_count > 0 |
| 226 | + FROM pg_stat_all_tables WHERE relname = '${table1}'; |
| 227 | + ] |
| 228 | + , 't'); |
| 229 | + |
| 230 | +$primary_lsn = $node_primary->lsn('flush'); |
| 231 | + |
| 232 | +# Make sure something causes us to flush |
| 233 | +$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);"); |
| 234 | + |
| 235 | +# Nothing on the replica should cause a recovery conflict, so this should |
| 236 | +# finish successfully. |
| 237 | +$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); |
| 238 | + |
| 239 | +## Shut down psqls |
| 240 | +$psql_primaryA->quit; |
| 241 | +$psql_primaryB->quit; |
| 242 | + |
| 243 | +$node_replica->stop(); |
| 244 | +$node_primary->stop(); |
| 245 | + |
| 246 | +done_testing(); |
0 commit comments