Skip to content

Commit 6eb5b9a

Browse files
committed
Fix corner case failure of new standby to follow new primary.
This only happens if (1) the new standby has no WAL available locally, (2) the new standby is starting from the old timeline, (3) the promotion happened in the WAL segment from which the new standby is starting, (4) the timeline history file for the new timeline is available from the archive but the WAL files for are not (i.e. this is a race), (5) the WAL files for the new timeline are available via streaming, and (6) recovery_target_timeline='latest'. Commit ee99427 introduced this logic and was an improvement over the previous code, but it mishandled this case. If recovery_target_timeline='latest' and restore_command is set, validateRecoveryParameters() can change recoveryTargetTLI to be different from receiveTLI. If streaming is then tried afterward, expectedTLEs gets initialized with the history of the wrong timeline. It's supposed to be a list of entries explaining how to get to the target timeline, but in this case it ends up with a list of entries explaining how to get to the new standby's original timeline, which isn't right. Dilip Kumar and Robert Haas, reviewed by Kyotaro Horiguchi. Discussion: http://postgr.es/m/CAFiTN-sE-jr=LB8jQuxeqikd-Ux+jHiXyh4YDiZMPedgQKup0g@mail.gmail.com
1 parent 75212a8 commit 6eb5b9a

File tree

3 files changed

+126
-1
lines changed

3 files changed

+126
-1
lines changed

src/backend/access/transam/xlog.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -11835,11 +11835,19 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1183511835
* pg_xlog by now. Use XLOG_FROM_STREAM so that
1183611836
* source info is set correctly and XLogReceiptTime
1183711837
* isn't changed.
11838+
*
11839+
* NB: We must set readTimeLineHistory based on
11840+
* recoveryTargetTLI, not receiveTLI. Normally they'll
11841+
* be the same, but if recovery_target_timeline is
11842+
* 'latest' and archiving is configured, then it's
11843+
* possible that we managed to retrieve one or more
11844+
* new timeline history files from the archive,
11845+
* updating recoveryTargetTLI.
1183811846
*/
1183911847
if (readFile < 0)
1184011848
{
1184111849
if (!expectedTLEs)
11842-
expectedTLEs = readTimeLineHistory(receiveTLI);
11850+
expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
1184311851
readFile = XLogFileRead(readSegNo, PANIC,
1184411852
receiveTLI,
1184511853
XLOG_FROM_STREAM, false);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
2+
# Copyright (c) 2021, PostgreSQL Global Development Group
3+
4+
# Testing streaming replication where standby is promoted and a new cascading
5+
# standby (without WAL) is connected to the promoted standby. Both archiving
6+
# and streaming are enabled, but only the history file is available from the
7+
# archive, so the WAL files all have to be streamed. Test that the cascading
8+
# standby can follow the new primary (promoted standby).
9+
use strict;
10+
use warnings;
11+
use PostgresNode;
12+
use TestLib;
13+
use FindBin;
14+
use Test::More tests => 1;
15+
16+
# Initialize primary node
17+
my $node_primary = get_new_node('primary');
18+
19+
# Set up an archive command that will copy the history file but not the WAL
20+
# files. No real archive command should behave this way; the point is to
21+
# simulate a race condition where the new cascading standby starts up after
22+
# the timeline history file reaches the archive but before any of the WAL files
23+
# get there.
24+
$node_primary->init(allows_streaming => 1, has_archiving => 1);
25+
my $perlbin = $^X;
26+
$perlbin =~ s{\\}{\\\\}g if ($TestLib::windows_os);
27+
my $archivedir_primary = $node_primary->archive_dir;
28+
$node_primary->append_conf('postgresql.conf', qq(
29+
archive_command = '$perlbin "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"'
30+
));
31+
$node_primary->start;
32+
33+
# Take backup from primary
34+
my $backup_name = 'my_backup';
35+
$node_primary->backup($backup_name);
36+
37+
# Create streaming standby linking to primary
38+
my $node_standby = get_new_node('standby');
39+
$node_standby->init_from_backup($node_primary, $backup_name,
40+
allows_streaming => 1, has_streaming => 1, has_archiving => 1);
41+
$node_standby->start;
42+
43+
# Take backup of standby.
44+
$node_standby->backup($backup_name);
45+
46+
# Clear out WAL files from pg_xlog so that when we creating the cascading
47+
# standby it will start up with no WAL available.
48+
my $pgxlogdir = $node_standby->backup_dir . "/" . $backup_name . "/pg_xlog";
49+
opendir(my $dh, $pgxlogdir) or die "failed to open $pgxlogdir: $!";
50+
while (my $f = readdir($dh))
51+
{
52+
next if -d "$pgxlogdir/$f";
53+
unlink("$pgxlogdir/$f") or die "failed to unlink $pgxlogdir/$f: $!";
54+
}
55+
closedir($dh);
56+
57+
# Create cascading standby but don't start it yet.
58+
# Must set up both streaming and archiving.
59+
my $node_cascade = get_new_node('cascade');
60+
$node_cascade->init_from_backup($node_standby, $backup_name,
61+
has_streaming => 1);
62+
$node_cascade->enable_restoring($node_primary);
63+
$node_cascade->append_conf('recovery.conf', qq(
64+
recovery_target_timeline='latest'
65+
));
66+
67+
# Promote the standby.
68+
$node_standby->promote;
69+
70+
# Wait for promotion to complete
71+
$node_standby->poll_query_until('postgres',
72+
"SELECT NOT pg_is_in_recovery();")
73+
or die "Timed out while waiting for promotion";
74+
75+
# Find next WAL segment to be archived
76+
my $walfile_to_be_archived = $node_standby->safe_psql('postgres',
77+
"SELECT pg_xlogfile_name(pg_current_xlog_location());");
78+
79+
# Make WAL segment eligible for archival
80+
$node_standby->safe_psql('postgres', 'SELECT pg_switch_xlog()');
81+
82+
# Wait until the WAL segment has been archived.
83+
# Since the history file gets created on promotion and is archived before any
84+
# WAL segment, this is enough to guarantee that the history file was
85+
# archived.
86+
my $archive_wait_query =
87+
"SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;";
88+
$node_standby->poll_query_until('postgres', $archive_wait_query)
89+
or die "Timed out while waiting for WAL segment to be archived";
90+
my $last_archived_wal_file = $walfile_to_be_archived;
91+
92+
# Start cascade node
93+
$node_cascade->start;
94+
95+
# Create some content on promoted standby and check its presence on the
96+
# cascading standby.
97+
$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a");
98+
99+
# Wait for the replication to catch up
100+
$node_standby->wait_for_catchup($node_cascade, 'replay',
101+
$node_standby->lsn('insert'));
102+
103+
# Check that cascading standby has the new content
104+
my $result =
105+
$node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int");
106+
print "cascade: $result\n";
107+
is($result, 1, 'check streamed content on cascade standby');

src/test/recovery/t/cp_history_files

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/perl
2+
3+
use File::Copy;
4+
use strict;
5+
use warnings;
6+
7+
die "wrong number of arguments" if @ARGV != 2;
8+
my ($source, $target) = @ARGV;
9+
exit if $source !~ /history/;
10+
copy($source, $target) or die "couldn't copy $source to $target: $!";

0 commit comments

Comments
 (0)