Skip to content

Commit 2aecbd7

Browse files
committed
Log more info when wait-for-catchup tests time out.
Cluster.pm's wait_for_catchup and allied subroutines don't provide enough information to diagnose the problem when a wait times out. In hopes of debugging some intermittent buildfarm failures, let's dump the ending state of the relevant system view when that happens. Add this to v17 too, but not stable branches. Discussion: https://postgr.es/m/352068.1723422725@sss.pgh.pa.us
1 parent 760162f commit 2aecbd7

File tree

1 file changed

+23
-4
lines changed

1 file changed

+23
-4
lines changed

src/test/perl/PostgreSQL/Test/Cluster.pm

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2948,6 +2948,11 @@ sub wait_for_catchup
29482948
}
29492949
else
29502950
{
2951+
# Fetch additional detail for debugging purposes
2952+
$query = qq[SELECT * FROM pg_catalog.pg_stat_replication];
2953+
my $details = $self->safe_psql('postgres', $query);
2954+
diag qq(Last pg_stat_replication contents:
2955+
${details});
29512956
croak "timed out waiting for catchup";
29522957
}
29532958
}
@@ -3015,8 +3020,15 @@ sub wait_for_slot_catchup
30153020
. $self->name . "\n";
30163021
my $query =
30173022
qq[SELECT '$target_lsn' <= ${mode}_lsn FROM pg_catalog.pg_replication_slots WHERE slot_name = '$slot_name';];
3018-
$self->poll_query_until('postgres', $query)
3019-
or croak "timed out waiting for catchup";
3023+
if (!$self->poll_query_until('postgres', $query))
3024+
{
3025+
# Fetch additional detail for debugging purposes
3026+
$query = qq[SELECT * FROM pg_catalog.pg_replication_slots];
3027+
my $details = $self->safe_psql('postgres', $query);
3028+
diag qq(Last pg_replication_slots contents:
3029+
${details});
3030+
croak "timed out waiting for catchup";
3031+
}
30203032
print "done\n";
30213033
return;
30223034
}
@@ -3051,8 +3063,15 @@ sub wait_for_subscription_sync
30513063
print "Waiting for all subscriptions in \"$name\" to synchronize data\n";
30523064
my $query =
30533065
qq[SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');];
3054-
$self->poll_query_until($dbname, $query)
3055-
or croak "timed out waiting for subscriber to synchronize data";
3066+
if (!$self->poll_query_until($dbname, $query))
3067+
{
3068+
# Fetch additional detail for debugging purposes
3069+
$query = qq[SELECT * FROM pg_subscription_rel];
3070+
my $details = $self->safe_psql($dbname, $query);
3071+
diag qq(Last pg_subscription_rel contents:
3072+
${details});
3073+
croak "timed out waiting for subscriber to synchronize data";
3074+
}
30563075

30573076
# Then, wait for the replication to catchup if required.
30583077
if (defined($publisher))

0 commit comments

Comments
 (0)