Skip to content

Commit 86392e8

Browse files
author
Amit Kapila
committed
Stabilize 035_standby_logical_decoding.pl.
Some tests try to invalidate logical slots on the standby server by running VACUUM on the primary. The problem is that xl_running_xacts was getting generated and replayed before the VACUUM command, leading to the advancement of the active slot's catalog_xmin. Due to this, active slots were not getting invalidated, leading to test failures. We fix it by skipping the generation of xl_running_xacts for the required tests with the help of injection points. As the required interface for injection points was not present in back branches, we fixed the failing tests in them by disallowing the slot to become active for the required cases (where rows_removed conflict could be generated). Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/Z6oQXc8LmiTLfwLA@ip-10-97-1-34.eu-west-3.compute.internal
1 parent e2a82cd commit 86392e8

File tree

1 file changed

+29
-20
lines changed

1 file changed

+29
-20
lines changed

src/test/recovery/t/035_standby_logical_decoding.pl

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -205,17 +205,14 @@ sub reactive_slots_change_hfs_and_wait_for_xmins
205205

206206
change_hot_standby_feedback_and_wait_for_xmins($hsf, $invalidated);
207207

208-
$handle =
209-
make_slot_active($node_standby, $slot_prefix, 1, \$stdout, \$stderr);
210-
211208
# reset stat: easier to check for confl_active_logicalslot in pg_stat_database_conflicts
212209
$node_standby->psql('testdb', q[select pg_stat_reset();]);
213210
}
214211

215212
# Check invalidation in the logfile and in pg_stat_database_conflicts
216213
sub check_for_invalidation
217214
{
218-
my ($slot_prefix, $log_start, $test_name) = @_;
215+
my ($slot_prefix, $log_start, $test_name, $checks_active_slot) = @_;
219216

220217
my $active_slot = $slot_prefix . 'activeslot';
221218
my $inactive_slot = $slot_prefix . 'inactiveslot';
@@ -231,13 +228,17 @@ sub check_for_invalidation
231228
$log_start),
232229
"activeslot slot invalidation is logged $test_name");
233230

234-
# Verify that pg_stat_database_conflicts.confl_active_logicalslot has been updated
235-
ok( $node_standby->poll_query_until(
236-
'postgres',
237-
"select (confl_active_logicalslot = 1) from pg_stat_database_conflicts where datname = 'testdb'",
238-
't'),
239-
'confl_active_logicalslot updated'
240-
) or die "Timed out waiting confl_active_logicalslot to be updated";
231+
if ($checks_active_slot)
232+
{
233+
# Verify that pg_stat_database_conflicts.confl_active_logicalslot has
234+
# been updated
235+
ok( $node_standby->poll_query_until(
236+
'postgres',
237+
"select (confl_active_logicalslot = 1) from pg_stat_database_conflicts where datname = 'testdb'",
238+
't'),
239+
'confl_active_logicalslot updated'
240+
) or die "Timed out waiting confl_active_logicalslot to be updated";
241+
}
241242
}
242243

243244
# Launch $sql query, wait for a new snapshot that has a newer horizon and
@@ -250,7 +251,11 @@ sub check_for_invalidation
250251
# seeing a xl_running_xacts that would advance an active replication slot's
251252
# catalog_xmin. Advancing the active replication slot's catalog_xmin
252253
# would break some tests that expect the active slot to conflict with
253-
# the catalog xmin horizon.
254+
# the catalog xmin horizon. Even with the above precaution, there is a risk
255+
# of xl_running_xacts record being logged and replayed before the VACUUM
256+
# command, leading to the test failure. So, we ensured that replication slots
257+
# are not activated for tests that can invalidate slots due to 'rows_removed'
258+
# conflict reason.
254259
sub wait_until_vacuum_can_remove
255260
{
256261
my ($vac_option, $sql, $to_vac) = @_;
@@ -532,7 +537,7 @@ sub wait_until_vacuum_can_remove
532537
$node_subscriber->stop;
533538

534539
##################################################
535-
# Recovery conflict: Invalidate conflicting slots, including in-use slots
540+
# Recovery conflict: Invalidate conflicting slots
536541
# Scenario 1: hot_standby_feedback off and vacuum FULL
537542
##################################################
538543

@@ -550,7 +555,7 @@ sub wait_until_vacuum_can_remove
550555
$node_primary->wait_for_replay_catchup($node_standby);
551556

552557
# Check invalidation in the logfile and in pg_stat_database_conflicts
553-
check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class');
558+
check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class', 0);
554559

555560
# Verify slots are reported as conflicting in pg_replication_slots
556561
check_slots_conflicting_status(1);
@@ -620,7 +625,7 @@ sub wait_until_vacuum_can_remove
620625
"invalidated logical slots do not lead to retaining WAL");
621626

622627
##################################################
623-
# Recovery conflict: Invalidate conflicting slots, including in-use slots
628+
# Recovery conflict: Invalidate conflicting slots
624629
# Scenario 2: conflict due to row removal with hot_standby_feedback off.
625630
##################################################
626631

@@ -641,7 +646,7 @@ sub wait_until_vacuum_can_remove
641646
$node_primary->wait_for_replay_catchup($node_standby);
642647

643648
# Check invalidation in the logfile and in pg_stat_database_conflicts
644-
check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class');
649+
check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class', 0);
645650

646651
# Verify slots are reported as conflicting in pg_replication_slots
647652
check_slots_conflicting_status(1);
@@ -677,7 +682,7 @@ sub wait_until_vacuum_can_remove
677682

678683
# Check invalidation in the logfile and in pg_stat_database_conflicts
679684
check_for_invalidation('shared_row_removal_', $logstart,
680-
'with vacuum on pg_authid');
685+
'with vacuum on pg_authid', 0);
681686

682687
# Verify slots are reported as conflicting in pg_replication_slots
683688
check_slots_conflicting_status(1);
@@ -701,6 +706,10 @@ sub wait_until_vacuum_can_remove
701706
reactive_slots_change_hfs_and_wait_for_xmins('shared_row_removal_',
702707
'no_conflict_', 0, 1);
703708

709+
# As this scenario is not expected to produce any conflict, so activate the slot.
710+
# See comments atop wait_until_vacuum_can_remove().
711+
make_slot_active($node_standby, 'no_conflict_', 1, \$stdout, \$stderr);
712+
704713
# This should not trigger a conflict
705714
wait_until_vacuum_can_remove(
706715
'', 'CREATE TABLE conflict_test(x integer, y text);
@@ -738,7 +747,7 @@ sub wait_until_vacuum_can_remove
738747
$node_standby->restart;
739748

740749
##################################################
741-
# Recovery conflict: Invalidate conflicting slots, including in-use slots
750+
# Recovery conflict: Invalidate conflicting slots
742751
# Scenario 5: conflict due to on-access pruning.
743752
##################################################
744753

@@ -763,7 +772,7 @@ sub wait_until_vacuum_can_remove
763772
$node_primary->wait_for_replay_catchup($node_standby);
764773

765774
# Check invalidation in the logfile and in pg_stat_database_conflicts
766-
check_for_invalidation('pruning_', $logstart, 'with on-access pruning');
775+
check_for_invalidation('pruning_', $logstart, 'with on-access pruning', 0);
767776

768777
# Verify slots are reported as conflicting in pg_replication_slots
769778
check_slots_conflicting_status(1);
@@ -807,7 +816,7 @@ sub wait_until_vacuum_can_remove
807816
$node_primary->wait_for_replay_catchup($node_standby);
808817

809818
# Check invalidation in the logfile and in pg_stat_database_conflicts
810-
check_for_invalidation('wal_level_', $logstart, 'due to wal_level');
819+
check_for_invalidation('wal_level_', $logstart, 'due to wal_level', 1);
811820

812821
# Verify slots are reported as conflicting in pg_replication_slots
813822
check_slots_conflicting_status(1);

0 commit comments

Comments
 (0)