|
| 1 | +# |
| 2 | +# Tests restarts of postgres due to crashes of a subprocess. |
| 3 | +# |
| 4 | +# Two longer-running psql subprocesses are used: One to kill a |
| 5 | +# backend, triggering a crash-restart cycle, one to detect when |
| 6 | +# postmaster noticed the backend died. The second backend is |
| 7 | +# necessary because it's otherwise hard to determine if postmaster is |
| 8 | +# still accepting new sessions (because it hasn't noticed that the |
| 9 | +# backend died), or because it's already restarted. |
| 10 | +# |
| 11 | +use strict; |
| 12 | +use warnings; |
| 13 | +use PostgresNode; |
| 14 | +use TestLib; |
| 15 | +use Test::More; |
| 16 | +use Config; |
| 17 | +use Time::HiRes qw(usleep); |
| 18 | + |
| 19 | +if ($Config{osname} eq 'MSWin32') |
| 20 | +{ |
| 21 | + # some Windows Perls at least don't like IPC::Run's |
| 22 | + # start/kill_kill regime. |
| 23 | + plan skip_all => "Test fails on Windows perl"; |
| 24 | +} |
| 25 | +else |
| 26 | +{ |
| 27 | + plan tests => 12; |
| 28 | +} |
| 29 | + |
| 30 | +my $node = get_new_node('master'); |
| 31 | +$node->init(allows_streaming => 1); |
| 32 | +$node->start(); |
| 33 | + |
| 34 | +# by default PostgresNode doesn't doesn't restart after a crash |
| 35 | +$node->safe_psql('postgres', |
| 36 | + q[ALTER SYSTEM SET restart_after_crash = 1; |
| 37 | + ALTER SYSTEM SET log_connections = 1; |
| 38 | + SELECT pg_reload_conf();]); |
| 39 | + |
| 40 | +# Run psql, keeping session alive, so we have an alive backend to kill. |
| 41 | +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); |
| 42 | +my $killme = IPC::Run::start( |
| 43 | + [ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', |
| 44 | + $node->connstr('postgres') ], |
| 45 | + '<', |
| 46 | + \$killme_stdin, |
| 47 | + '>', |
| 48 | + \$killme_stdout, |
| 49 | + '2>', |
| 50 | + \$killme_stderr); |
| 51 | + |
| 52 | +# Need a second psql to check if crash-restart happened. |
| 53 | +my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', ''); |
| 54 | +my $monitor = IPC::Run::start( |
| 55 | + [ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', |
| 56 | + $node->connstr('postgres') ], |
| 57 | + '<', |
| 58 | + \$monitor_stdin, |
| 59 | + '>', |
| 60 | + \$monitor_stdout, |
| 61 | + '2>', |
| 62 | + \$monitor_stderr); |
| 63 | + |
| 64 | +#create table, insert row that should survive |
| 65 | +$killme_stdin .= q[ |
| 66 | +CREATE TABLE alive(status text); |
| 67 | +INSERT INTO alive VALUES($$committed-before-sigquit$$); |
| 68 | +SELECT pg_backend_pid(); |
| 69 | +]; |
| 70 | +$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/; |
| 71 | +my $pid = $killme_stdout; |
| 72 | +chomp($pid); |
| 73 | +$killme_stdout = ''; |
| 74 | + |
| 75 | +#insert a row that should *not* survive, due to in-progress xact |
| 76 | +$killme_stdin .= q[ |
| 77 | +BEGIN; |
| 78 | +INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status; |
| 79 | +]; |
| 80 | +$killme->pump until $killme_stdout =~ /in-progress-before-sigquit/; |
| 81 | +$killme_stdout = ''; |
| 82 | + |
| 83 | + |
| 84 | +# Start longrunning query in second session, it's failure will signal |
| 85 | +# that crash-restart has occurred. |
| 86 | +$monitor_stdin .= q[ |
| 87 | +SELECT pg_sleep(3600); |
| 88 | +]; |
| 89 | +$monitor->pump; |
| 90 | + |
| 91 | + |
| 92 | +# kill once with QUIT - we expect psql to exit, while emitting error message first |
| 93 | +my $cnt = kill 'QUIT', $pid; |
| 94 | + |
| 95 | +# Exactly process should have been alive to be killed |
| 96 | +is($cnt, 1, "exactly one process killed with SIGQUIT"); |
| 97 | + |
| 98 | +# Check that psql sees the killed backend as having been terminated |
| 99 | +$killme_stdin .= q[ |
| 100 | +SELECT 1; |
| 101 | +]; |
| 102 | +$killme->pump until $killme_stderr =~ /WARNING: terminating connection because of crash of another server process/; |
| 103 | + |
| 104 | +ok(1, "psql query died successfully after SIGQUIT"); |
| 105 | +$killme->kill_kill; |
| 106 | + |
| 107 | +# Check if the crash-restart cycle has occurred |
| 108 | +$monitor->pump until $monitor_stderr =~ /WARNING: terminating connection because of crash of another server process/; |
| 109 | +$monitor->kill_kill; |
| 110 | +ok(1, "psql monitor died successfully after SIGQUIT"); |
| 111 | + |
| 112 | +# Wait till server restarts |
| 113 | +is($node->poll_query_until('postgres', 'SELECT $$restarted$$;', 'restarted'), "1", "reconnected after SIGQUIT"); |
| 114 | + |
| 115 | +# restart psql processes, now that the crash cycle finished |
| 116 | +($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); |
| 117 | +$killme->run(); |
| 118 | +($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', ''); |
| 119 | +$monitor->run(); |
| 120 | + |
| 121 | + |
| 122 | +# Acquire pid of new backend |
| 123 | +$killme_stdin .= q[ |
| 124 | +SELECT pg_backend_pid(); |
| 125 | +]; |
| 126 | +$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/; |
| 127 | +$pid = $killme_stdout; |
| 128 | +chomp($pid); |
| 129 | +$pid = $killme_stdout; |
| 130 | + |
| 131 | +# Insert test rows |
| 132 | +$killme_stdin .= q[ |
| 133 | +INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status; |
| 134 | +BEGIN; |
| 135 | +INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status; |
| 136 | +]; |
| 137 | +$killme->pump until $killme_stdout =~ /in-progress-before-sigkill/; |
| 138 | +$killme_stdout = ''; |
| 139 | + |
| 140 | +$monitor_stdin .= q[ |
| 141 | +SELECT $$restart$$; |
| 142 | +]; |
| 143 | +$monitor->pump until $monitor_stdout =~ /restart/; |
| 144 | +$monitor_stdout = ''; |
| 145 | + |
| 146 | +# Re-start longrunning query in second session, it's failure will signal |
| 147 | +# that crash-restart has occurred. |
| 148 | +$monitor_stdin = q[ |
| 149 | +SELECT pg_sleep(3600); |
| 150 | +]; |
| 151 | +$monitor->pump_nb; # don't wait for query results to come back |
| 152 | + |
| 153 | + |
| 154 | +# kill with SIGKILL this time - we expect the backend to exit, without |
| 155 | +# being able to emit an error error message |
| 156 | +$cnt = kill 'KILL', $pid; |
| 157 | +is($cnt, 1, "exactly one process killed with KILL"); |
| 158 | + |
| 159 | +# Check that psql sees the server as being terminated. No WARNING, |
| 160 | +# because signal handlers aren't being run on SIGKILL. |
| 161 | +$killme_stdin .= q[ |
| 162 | +SELECT 1; |
| 163 | +]; |
| 164 | +$killme->pump until $killme_stderr =~ /server closed the connection unexpectedly/; |
| 165 | +$killme->kill_kill; |
| 166 | +ok(1, "psql query died successfully after SIGKILL"); |
| 167 | + |
| 168 | +# Wait till server restarts (note that we should get the WARNING here) |
| 169 | +$monitor->pump until $monitor_stderr =~ /WARNING: terminating connection because of crash of another server process/; |
| 170 | +ok(1, "psql monitor died successfully after SIGKILL"); |
| 171 | +$monitor->kill_kill; |
| 172 | + |
| 173 | +# Wait till server restarts |
| 174 | +is($node->poll_query_until('postgres', 'SELECT 1', '1'), "1", "reconnected after SIGKILL"); |
| 175 | + |
| 176 | +# Make sure the committed rows survived, in-progress ones not |
| 177 | +is($node->safe_psql('postgres', 'SELECT * FROM alive'), |
| 178 | + "committed-before-sigquit\ncommitted-before-sigkill", 'data survived'); |
| 179 | + |
| 180 | +is($node->safe_psql('postgres', 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status'), |
| 181 | + 'before-orderly-restart', 'can still write after crash restart'); |
| 182 | + |
| 183 | +# Just to be sure, check that an orderly restart now still works |
| 184 | +$node->restart(); |
| 185 | + |
| 186 | +is($node->safe_psql('postgres', 'SELECT * FROM alive'), |
| 187 | + "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart", 'data survived'); |
| 188 | + |
| 189 | +is($node->safe_psql('postgres', 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status'), |
| 190 | + 'after-orderly-restart', 'can still write after orderly restart'); |
| 191 | + |
| 192 | +$node->stop(); |
0 commit comments