Skip to content

Commit 17a165d

Browse files
author
Amit Kapila
committed
Stabilize 035_standby_logical_decoding.pl.
Some tests try to invalidate logical slots on the standby server by running VACUUM on the primary. The problem is that xl_running_xacts was getting generated and replayed before the VACUUM command, leading to the advancement of the active slot's catalog_xmin. Due to this, active slots were not getting invalidated, leading to test failures. We fix it by skipping the generation of xl_running_xacts for the required tests with the help of injection points. As the required interface for injection points was not present in back branches, we fixed the failing tests in them by disallowing the slot to become active for the required cases (where rows_removed conflict could be generated). Author: Hayato Kuroda <kuroda.hayato@fujitsu.com> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/Z6oQXc8LmiTLfwLA@ip-10-97-1-34.eu-west-3.compute.internal
1 parent b8b1e87 commit 17a165d

File tree

1 file changed

+29
-35
lines changed

1 file changed

+29
-35
lines changed

src/test/recovery/t/035_standby_logical_decoding.pl

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -205,17 +205,14 @@ sub reactive_slots_change_hfs_and_wait_for_xmins
205205

206206
change_hot_standby_feedback_and_wait_for_xmins($hsf, $invalidated);
207207

208-
$handle =
209-
make_slot_active($node_standby, $slot_prefix, 1, \$stdout, \$stderr);
210-
211208
# reset stat: easier to check for confl_active_logicalslot in pg_stat_database_conflicts
212209
$node_standby->psql('testdb', q[select pg_stat_reset();]);
213210
}
214211

215212
# Check invalidation in the logfile and in pg_stat_database_conflicts
216213
sub check_for_invalidation
217214
{
218-
my ($slot_prefix, $log_start, $test_name) = @_;
215+
my ($slot_prefix, $log_start, $test_name, $checks_active_slot) = @_;
219216

220217
my $active_slot = $slot_prefix . 'activeslot';
221218
my $inactive_slot = $slot_prefix . 'inactiveslot';
@@ -231,13 +228,17 @@ sub check_for_invalidation
231228
$log_start),
232229
"activeslot slot invalidation is logged $test_name");
233230

234-
# Verify that pg_stat_database_conflicts.confl_active_logicalslot has been updated
235-
ok( $node_standby->poll_query_until(
236-
'postgres',
237-
"select (confl_active_logicalslot = 1) from pg_stat_database_conflicts where datname = 'testdb'",
238-
't'),
239-
'confl_active_logicalslot updated'
240-
) or die "Timed out waiting confl_active_logicalslot to be updated";
231+
if ($checks_active_slot)
232+
{
233+
# Verify that pg_stat_database_conflicts.confl_active_logicalslot has
234+
# been updated
235+
ok( $node_standby->poll_query_until(
236+
'postgres',
237+
"select (confl_active_logicalslot = 1) from pg_stat_database_conflicts where datname = 'testdb'",
238+
't'),
239+
'confl_active_logicalslot updated'
240+
) or die "Timed out waiting confl_active_logicalslot to be updated";
241+
}
241242
}
242243

243244
# Launch $sql query, wait for a new snapshot that has a newer horizon and
@@ -250,7 +251,11 @@ sub check_for_invalidation
250251
# seeing a xl_running_xacts that would advance an active replication slot's
251252
# catalog_xmin. Advancing the active replication slot's catalog_xmin
252253
# would break some tests that expect the active slot to conflict with
253-
# the catalog xmin horizon.
254+
# the catalog xmin horizon. Even with the above precaution, there is a risk
255+
# of xl_running_xacts record being logged and replayed before the VACUUM
256+
# command, leading to the test failure. So, we ensured that replication slots
257+
# are not activated for tests that can invalidate slots due to 'rows_removed'
258+
# conflict reason.
254259
sub wait_until_vacuum_can_remove
255260
{
256261
my ($vac_option, $sql, $to_vac) = @_;
@@ -532,11 +537,8 @@ sub wait_until_vacuum_can_remove
532537
$node_subscriber->stop;
533538

534539
##################################################
535-
# Recovery conflict: Invalidate conflicting slots, including in-use slots
540+
# Recovery conflict: Invalidate conflicting slots
536541
# Scenario 1: hot_standby_feedback off and vacuum FULL
537-
#
538-
# In passing, ensure that replication slot stats are not removed when the
539-
# active slot is invalidated.
540542
##################################################
541543

542544
# One way to produce recovery conflict is to create/drop a relation and
@@ -550,10 +552,6 @@ sub wait_until_vacuum_can_remove
550552
$node_primary->safe_psql('testdb',
551553
qq[INSERT INTO decoding_test(x,y) SELECT 100,'100';]);
552554

553-
$node_standby->poll_query_until('testdb',
554-
qq[SELECT total_txns > 0 FROM pg_stat_replication_slots WHERE slot_name = 'vacuum_full_activeslot']
555-
) or die "replication slot stats of vacuum_full_activeslot not updated";
556-
557555
# This should trigger the conflict
558556
wait_until_vacuum_can_remove(
559557
'full', 'CREATE TABLE conflict_test(x integer, y text);
@@ -562,19 +560,11 @@ sub wait_until_vacuum_can_remove
562560
$node_primary->wait_for_replay_catchup($node_standby);
563561

564562
# Check invalidation in the logfile and in pg_stat_database_conflicts
565-
check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class');
563+
check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class', 0);
566564

567565
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
568566
check_slots_conflict_reason('vacuum_full_', 'rows_removed');
569567

570-
# Ensure that replication slot stats are not removed after invalidation.
571-
is( $node_standby->safe_psql(
572-
'testdb',
573-
qq[SELECT total_txns > 0 FROM pg_stat_replication_slots WHERE slot_name = 'vacuum_full_activeslot']
574-
),
575-
't',
576-
'replication slot stats not removed after invalidation');
577-
578568
$handle =
579569
make_slot_active($node_standby, 'vacuum_full_', 0, \$stdout, \$stderr);
580570

@@ -639,7 +629,7 @@ sub wait_until_vacuum_can_remove
639629
"invalidated logical slots do not lead to retaining WAL");
640630

641631
##################################################
642-
# Recovery conflict: Invalidate conflicting slots, including in-use slots
632+
# Recovery conflict: Invalidate conflicting slots
643633
# Scenario 2: conflict due to row removal with hot_standby_feedback off.
644634
##################################################
645635

@@ -660,7 +650,7 @@ sub wait_until_vacuum_can_remove
660650
$node_primary->wait_for_replay_catchup($node_standby);
661651

662652
# Check invalidation in the logfile and in pg_stat_database_conflicts
663-
check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class');
653+
check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class', 0);
664654

665655
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
666656
check_slots_conflict_reason('row_removal_', 'rows_removed');
@@ -696,7 +686,7 @@ sub wait_until_vacuum_can_remove
696686

697687
# Check invalidation in the logfile and in pg_stat_database_conflicts
698688
check_for_invalidation('shared_row_removal_', $logstart,
699-
'with vacuum on pg_authid');
689+
'with vacuum on pg_authid', 0);
700690

701691
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
702692
check_slots_conflict_reason('shared_row_removal_', 'rows_removed');
@@ -720,6 +710,10 @@ sub wait_until_vacuum_can_remove
720710
reactive_slots_change_hfs_and_wait_for_xmins('shared_row_removal_',
721711
'no_conflict_', 0, 1);
722712

713+
# As this scenario is not expected to produce any conflict, so activate the slot.
714+
# See comments atop wait_until_vacuum_can_remove().
715+
make_slot_active($node_standby, 'no_conflict_', 1, \$stdout, \$stderr);
716+
723717
# This should not trigger a conflict
724718
wait_until_vacuum_can_remove(
725719
'', 'CREATE TABLE conflict_test(x integer, y text);
@@ -763,7 +757,7 @@ sub wait_until_vacuum_can_remove
763757
$node_standby->restart;
764758

765759
##################################################
766-
# Recovery conflict: Invalidate conflicting slots, including in-use slots
760+
# Recovery conflict: Invalidate conflicting slots
767761
# Scenario 5: conflict due to on-access pruning.
768762
##################################################
769763

@@ -788,7 +782,7 @@ sub wait_until_vacuum_can_remove
788782
$node_primary->wait_for_replay_catchup($node_standby);
789783

790784
# Check invalidation in the logfile and in pg_stat_database_conflicts
791-
check_for_invalidation('pruning_', $logstart, 'with on-access pruning');
785+
check_for_invalidation('pruning_', $logstart, 'with on-access pruning', 0);
792786

793787
# Verify reason for conflict is 'rows_removed' in pg_replication_slots
794788
check_slots_conflict_reason('pruning_', 'rows_removed');
@@ -832,7 +826,7 @@ sub wait_until_vacuum_can_remove
832826
$node_primary->wait_for_replay_catchup($node_standby);
833827

834828
# Check invalidation in the logfile and in pg_stat_database_conflicts
835-
check_for_invalidation('wal_level_', $logstart, 'due to wal_level');
829+
check_for_invalidation('wal_level_', $logstart, 'due to wal_level', 1);
836830

837831
# Verify reason for conflict is 'wal_level_insufficient' in pg_replication_slots
838832
check_slots_conflict_reason('wal_level_', 'wal_level_insufficient');

0 commit comments

Comments
 (0)