Skip to content

Commit a1924a4

Browse files
committed
Add test for postmaster crash restarts.
Given that I managed to break this... We probably should extend the tests to also cover other sub-processes dying, but that's something for later. Author: Andres Freund Discussion: https://postgr.es/m/20170917080752.rcmihzfmgbeuqjk2@alap3.anarazel.de
1 parent ec9e05b commit a1924a4

File tree

1 file changed

+192
-0
lines changed

1 file changed

+192
-0
lines changed
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
#
2+
# Tests restarts of postgres due to crashes of a subprocess.
3+
#
4+
# Two longer-running psql subprocesses are used: One to kill a
5+
# backend, triggering a crash-restart cycle, one to detect when
6+
# postmaster noticed the backend died. The second backend is
7+
# necessary because it's otherwise hard to determine if postmaster is
8+
# still accepting new sessions (because it hasn't noticed that the
9+
# backend died), or because it's already restarted.
10+
#
11+
use strict;
12+
use warnings;
13+
use PostgresNode;
14+
use TestLib;
15+
use Test::More;
16+
use Config;
17+
use Time::HiRes qw(usleep);
18+
19+
if ($Config{osname} eq 'MSWin32')
20+
{
21+
# some Windows Perls at least don't like IPC::Run's
22+
# start/kill_kill regime.
23+
plan skip_all => "Test fails on Windows perl";
24+
}
25+
else
26+
{
27+
plan tests => 12;
28+
}
29+
30+
my $node = get_new_node('master');
31+
$node->init(allows_streaming => 1);
32+
$node->start();
33+
34+
# by default PostgresNode doesn't doesn't restart after a crash
35+
$node->safe_psql('postgres',
36+
q[ALTER SYSTEM SET restart_after_crash = 1;
37+
ALTER SYSTEM SET log_connections = 1;
38+
SELECT pg_reload_conf();]);
39+
40+
# Run psql, keeping session alive, so we have an alive backend to kill.
41+
my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
42+
my $killme = IPC::Run::start(
43+
[ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
44+
$node->connstr('postgres') ],
45+
'<',
46+
\$killme_stdin,
47+
'>',
48+
\$killme_stdout,
49+
'2>',
50+
\$killme_stderr);
51+
52+
# Need a second psql to check if crash-restart happened.
53+
my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
54+
my $monitor = IPC::Run::start(
55+
[ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
56+
$node->connstr('postgres') ],
57+
'<',
58+
\$monitor_stdin,
59+
'>',
60+
\$monitor_stdout,
61+
'2>',
62+
\$monitor_stderr);
63+
64+
#create table, insert row that should survive
65+
$killme_stdin .= q[
66+
CREATE TABLE alive(status text);
67+
INSERT INTO alive VALUES($$committed-before-sigquit$$);
68+
SELECT pg_backend_pid();
69+
];
70+
$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/;
71+
my $pid = $killme_stdout;
72+
chomp($pid);
73+
$killme_stdout = '';
74+
75+
#insert a row that should *not* survive, due to in-progress xact
76+
$killme_stdin .= q[
77+
BEGIN;
78+
INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status;
79+
];
80+
$killme->pump until $killme_stdout =~ /in-progress-before-sigquit/;
81+
$killme_stdout = '';
82+
83+
84+
# Start longrunning query in second session, it's failure will signal
85+
# that crash-restart has occurred.
86+
$monitor_stdin .= q[
87+
SELECT pg_sleep(3600);
88+
];
89+
$monitor->pump;
90+
91+
92+
# kill once with QUIT - we expect psql to exit, while emitting error message first
93+
my $cnt = kill 'QUIT', $pid;
94+
95+
# Exactly process should have been alive to be killed
96+
is($cnt, 1, "exactly one process killed with SIGQUIT");
97+
98+
# Check that psql sees the killed backend as having been terminated
99+
$killme_stdin .= q[
100+
SELECT 1;
101+
];
102+
$killme->pump until $killme_stderr =~ /WARNING: terminating connection because of crash of another server process/;
103+
104+
ok(1, "psql query died successfully after SIGQUIT");
105+
$killme->kill_kill;
106+
107+
# Check if the crash-restart cycle has occurred
108+
$monitor->pump until $monitor_stderr =~ /WARNING: terminating connection because of crash of another server process/;
109+
$monitor->kill_kill;
110+
ok(1, "psql monitor died successfully after SIGQUIT");
111+
112+
# Wait till server restarts
113+
is($node->poll_query_until('postgres', 'SELECT $$restarted$$;', 'restarted'), "1", "reconnected after SIGQUIT");
114+
115+
# restart psql processes, now that the crash cycle finished
116+
($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
117+
$killme->run();
118+
($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
119+
$monitor->run();
120+
121+
122+
# Acquire pid of new backend
123+
$killme_stdin .= q[
124+
SELECT pg_backend_pid();
125+
];
126+
$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/;
127+
$pid = $killme_stdout;
128+
chomp($pid);
129+
$pid = $killme_stdout;
130+
131+
# Insert test rows
132+
$killme_stdin .= q[
133+
INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status;
134+
BEGIN;
135+
INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status;
136+
];
137+
$killme->pump until $killme_stdout =~ /in-progress-before-sigkill/;
138+
$killme_stdout = '';
139+
140+
$monitor_stdin .= q[
141+
SELECT $$restart$$;
142+
];
143+
$monitor->pump until $monitor_stdout =~ /restart/;
144+
$monitor_stdout = '';
145+
146+
# Re-start longrunning query in second session, it's failure will signal
147+
# that crash-restart has occurred.
148+
$monitor_stdin = q[
149+
SELECT pg_sleep(3600);
150+
];
151+
$monitor->pump_nb; # don't wait for query results to come back
152+
153+
154+
# kill with SIGKILL this time - we expect the backend to exit, without
155+
# being able to emit an error error message
156+
$cnt = kill 'KILL', $pid;
157+
is($cnt, 1, "exactly one process killed with KILL");
158+
159+
# Check that psql sees the server as being terminated. No WARNING,
160+
# because signal handlers aren't being run on SIGKILL.
161+
$killme_stdin .= q[
162+
SELECT 1;
163+
];
164+
$killme->pump until $killme_stderr =~ /server closed the connection unexpectedly/;
165+
$killme->kill_kill;
166+
ok(1, "psql query died successfully after SIGKILL");
167+
168+
# Wait till server restarts (note that we should get the WARNING here)
169+
$monitor->pump until $monitor_stderr =~ /WARNING: terminating connection because of crash of another server process/;
170+
ok(1, "psql monitor died successfully after SIGKILL");
171+
$monitor->kill_kill;
172+
173+
# Wait till server restarts
174+
is($node->poll_query_until('postgres', 'SELECT 1', '1'), "1", "reconnected after SIGKILL");
175+
176+
# Make sure the committed rows survived, in-progress ones not
177+
is($node->safe_psql('postgres', 'SELECT * FROM alive'),
178+
"committed-before-sigquit\ncommitted-before-sigkill", 'data survived');
179+
180+
is($node->safe_psql('postgres', 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status'),
181+
'before-orderly-restart', 'can still write after crash restart');
182+
183+
# Just to be sure, check that an orderly restart now still works
184+
$node->restart();
185+
186+
is($node->safe_psql('postgres', 'SELECT * FROM alive'),
187+
"committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart", 'data survived');
188+
189+
is($node->safe_psql('postgres', 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status'),
190+
'after-orderly-restart', 'can still write after orderly restart');
191+
192+
$node->stop();

0 commit comments

Comments
 (0)