Skip to content

Commit 086221c

Browse files
committed
Prevent panic during shutdown checkpoint
When the checkpointer writes the shutdown checkpoint, it checks afterwards whether any WAL has been written since it started and throws a PANIC if so. At that point, only walsenders are still active, so one might think this could not happen, but walsenders can also generate WAL, for instance in BASE_BACKUP and certain variants of CREATE_REPLICATION_SLOT. So they can trigger this panic if such a command is run while the shutdown checkpoint is being written. To fix this, divide the walsender shutdown into two phases. First, the postmaster sends a SIGUSR2 signal to all walsenders. The walsenders then put themselves into the "stopping" state. In this state, they reject any new commands. (For simplicity, we reject all new commands, so that in the future we do not have to track meticulously which commands might generate WAL.) The checkpointer waits for all walsenders to reach this state before proceeding with the shutdown checkpoint. After the shutdown checkpoint is done, the postmaster sends SIGINT (previously unused) to the walsenders. This triggers the existing shutdown behavior of sending out the shutdown checkpoint record and then terminating. Author: Michael Paquier <michael.paquier@gmail.com> Reported-by: Fujii Masao <masao.fujii@gmail.com>
1 parent 499ae5f commit 086221c

File tree

6 files changed

+141
-24
lines changed

6 files changed

+141
-24
lines changed

doc/src/sgml/monitoring.sgml

+5
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
16901690
<literal>backup</>: This WAL sender is sending a backup.
16911691
</para>
16921692
</listitem>
1693+
<listitem>
1694+
<para>
1695+
<literal>stopping</>: This WAL sender is stopping.
1696+
</para>
1697+
</listitem>
16931698
</itemizedlist>
16941699
</entry>
16951700
</row>

src/backend/access/transam/xlog.c

+6
Original file line numberDiff line numberDiff line change
@@ -8325,6 +8325,12 @@ ShutdownXLOG(int code, Datum arg)
83258325
ereport(IsPostmasterEnvironment ? LOG : NOTICE,
83268326
(errmsg("shutting down")));
83278327

8328+
/*
8329+
* Wait for WAL senders to be in stopping state. This prevents commands
8330+
* from writing new WAL.
8331+
*/
8332+
WalSndWaitStopping();
8333+
83288334
if (RecoveryInProgress())
83298335
CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
83308336
else

src/backend/postmaster/postmaster.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -2918,7 +2918,7 @@ reaper(SIGNAL_ARGS)
29182918
* Waken walsenders for the last time. No regular backends
29192919
* should be around anymore.
29202920
*/
2921-
SignalChildren(SIGUSR2);
2921+
SignalChildren(SIGINT);
29222922

29232923
pmState = PM_SHUTDOWN_2;
29242924

@@ -3656,7 +3656,9 @@ PostmasterStateMachine(void)
36563656
/*
36573657
* If we get here, we are proceeding with normal shutdown. All
36583658
* the regular children are gone, and it's time to tell the
3659-
* checkpointer to do a shutdown checkpoint.
3659+
* checkpointer to do a shutdown checkpoint. All WAL senders
3660+
* are told to switch to a stopping state so that the shutdown
3661+
* checkpoint can go ahead.
36603662
*/
36613663
Assert(Shutdown > NoShutdown);
36623664
/* Start the checkpointer if not running */
@@ -3665,6 +3667,7 @@ PostmasterStateMachine(void)
36653667
/* And tell it to shut down */
36663668
if (CheckpointerPID != 0)
36673669
{
3670+
SignalSomeChildren(SIGUSR2, BACKEND_TYPE_WALSND);
36683671
signal_child(CheckpointerPID, SIGUSR2);
36693672
pmState = PM_SHUTDOWN;
36703673
}

src/backend/replication/walsender.c

+122-21
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@
2424
* are treated as not a crash but approximately normal termination;
2525
* the walsender will exit quickly without sending any more XLOG records.
2626
*
27-
* If the server is shut down, postmaster sends us SIGUSR2 after all
28-
* regular backends have exited and the shutdown checkpoint has been written.
29-
* This instructs walsender to send any outstanding WAL, including the
30-
* shutdown checkpoint record, wait for it to be replicated to the standby,
31-
* and then exit.
27+
* If the server is shut down, postmaster sends us SIGUSR2 after all regular
28+
* backends have exited. This causes the walsender to switch to the "stopping"
29+
* state. In this state, the walsender will reject any replication command
30+
* that may generate WAL activity. The checkpointer begins the shutdown
31+
* checkpoint once all walsenders are confirmed as stopping. When the shutdown
32+
* checkpoint finishes, the postmaster sends us SIGINT. This instructs
33+
* walsender to send any outstanding WAL, including the shutdown checkpoint
34+
* record, wait for it to be replicated to the standby, and then exit.
3235
*
3336
*
3437
* Portions Copyright (c) 2010-2017, PostgreSQL Global Development Group
@@ -177,13 +180,14 @@ static bool WalSndCaughtUp = false;
177180

178181
/* Flags set by signal handlers for later service in main loop */
179182
static volatile sig_atomic_t got_SIGHUP = false;
180-
static volatile sig_atomic_t walsender_ready_to_stop = false;
183+
static volatile sig_atomic_t got_SIGINT = false;
184+
static volatile sig_atomic_t got_SIGUSR2 = false;
181185

182186
/*
183-
* This is set while we are streaming. When not set, SIGUSR2 signal will be
187+
* This is set while we are streaming. When not set, SIGINT signal will be
184188
* handled like SIGTERM. When set, the main loop is responsible for checking
185-
* walsender_ready_to_stop and terminating when it's set (after streaming any
186-
* remaining WAL).
189+
* got_SIGINT and terminating when it's set (after streaming any remaining
190+
* WAL).
187191
*/
188192
static volatile sig_atomic_t replication_active = false;
189193

@@ -213,6 +217,7 @@ static struct
213217
/* Signal handlers */
214218
static void WalSndSigHupHandler(SIGNAL_ARGS);
215219
static void WalSndXLogSendHandler(SIGNAL_ARGS);
220+
static void WalSndSwitchStopping(SIGNAL_ARGS);
216221
static void WalSndLastCycleHandler(SIGNAL_ARGS);
217222

218223
/* Prototypes for private functions */
@@ -299,11 +304,14 @@ WalSndErrorCleanup(void)
299304
ReplicationSlotCleanup();
300305

301306
replication_active = false;
302-
if (walsender_ready_to_stop)
307+
if (got_SIGINT)
303308
proc_exit(0);
304309

305310
/* Revert back to startup state */
306311
WalSndSetState(WALSNDSTATE_STARTUP);
312+
313+
if (got_SIGUSR2)
314+
WalSndSetState(WALSNDSTATE_STOPPING);
307315
}
308316

309317
/*
@@ -676,7 +684,7 @@ StartReplication(StartReplicationCmd *cmd)
676684
WalSndLoop(XLogSendPhysical);
677685

678686
replication_active = false;
679-
if (walsender_ready_to_stop)
687+
if (got_SIGINT)
680688
proc_exit(0);
681689
WalSndSetState(WALSNDSTATE_STARTUP);
682690

@@ -1053,7 +1061,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)
10531061
{
10541062
ereport(LOG,
10551063
(errmsg("terminating walsender process after promotion")));
1056-
walsender_ready_to_stop = true;
1064+
got_SIGINT = true;
10571065
}
10581066

10591067
WalSndSetState(WALSNDSTATE_CATCHUP);
@@ -1103,7 +1111,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)
11031111
ReplicationSlotRelease();
11041112

11051113
replication_active = false;
1106-
if (walsender_ready_to_stop)
1114+
if (got_SIGINT)
11071115
proc_exit(0);
11081116
WalSndSetState(WALSNDSTATE_STARTUP);
11091117

@@ -1290,6 +1298,14 @@ WalSndWaitForWal(XLogRecPtr loc)
12901298
else
12911299
RecentFlushPtr = GetXLogReplayRecPtr(NULL);
12921300

1301+
/*
1302+
* If postmaster asked us to switch to the stopping state, do so.
1303+
* Shutdown is in progress and this will allow the checkpointer to
1304+
* move on with the shutdown checkpoint.
1305+
*/
1306+
if (got_SIGUSR2)
1307+
WalSndSetState(WALSNDSTATE_STOPPING);
1308+
12931309
/*
12941310
* If postmaster asked us to stop, don't wait here anymore. This will
12951311
* cause the xlogreader to return without reading a full record, which
@@ -1299,7 +1315,7 @@ WalSndWaitForWal(XLogRecPtr loc)
12991315
* RecentFlushPtr, so we can send all remaining data before shutting
13001316
* down.
13011317
*/
1302-
if (walsender_ready_to_stop)
1318+
if (got_SIGINT)
13031319
break;
13041320

13051321
/*
@@ -1373,6 +1389,22 @@ exec_replication_command(const char *cmd_string)
13731389
MemoryContext cmd_context;
13741390
MemoryContext old_context;
13751391

1392+
/*
1393+
* If WAL sender has been told that shutdown is getting close, switch its
1394+
* status accordingly to handle the next replication commands correctly.
1395+
*/
1396+
if (got_SIGUSR2)
1397+
WalSndSetState(WALSNDSTATE_STOPPING);
1398+
1399+
/*
1400+
* Throw error if in stopping mode. We need prevent commands that could
1401+
* generate WAL while the shutdown checkpoint is being written. To be
1402+
* safe, we just prohibit all new commands.
1403+
*/
1404+
if (MyWalSnd->state == WALSNDSTATE_STOPPING)
1405+
ereport(ERROR,
1406+
(errmsg("cannot execute new commands while WAL sender is in stopping mode")));
1407+
13761408
/*
13771409
* CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next
13781410
* command arrives. Clean up the old stuff if there's anything.
@@ -2095,13 +2127,20 @@ WalSndLoop(WalSndSendDataCallback send_data)
20952127
}
20962128

20972129
/*
2098-
* When SIGUSR2 arrives, we send any outstanding logs up to the
2130+
* At the reception of SIGUSR2, switch the WAL sender to the stopping
2131+
* state.
2132+
*/
2133+
if (got_SIGUSR2)
2134+
WalSndSetState(WALSNDSTATE_STOPPING);
2135+
2136+
/*
2137+
* When SIGINT arrives, we send any outstanding logs up to the
20992138
* shutdown checkpoint record (i.e., the latest record), wait for
21002139
* them to be replicated to the standby, and exit. This may be a
21012140
* normal termination at shutdown, or a promotion, the walsender
21022141
* is not sure which.
21032142
*/
2104-
if (walsender_ready_to_stop)
2143+
if (got_SIGINT)
21052144
WalSndDone(send_data);
21062145
}
21072146

@@ -2841,7 +2880,23 @@ WalSndXLogSendHandler(SIGNAL_ARGS)
28412880
errno = save_errno;
28422881
}
28432882

2844-
/* SIGUSR2: set flag to do a last cycle and shut down afterwards */
2883+
/* SIGUSR2: set flag to switch to stopping state */
2884+
static void
2885+
WalSndSwitchStopping(SIGNAL_ARGS)
2886+
{
2887+
int save_errno = errno;
2888+
2889+
got_SIGUSR2 = true;
2890+
SetLatch(MyLatch);
2891+
2892+
errno = save_errno;
2893+
}
2894+
2895+
/*
2896+
* SIGINT: set flag to do a last cycle and shut down afterwards. The WAL
2897+
* sender should already have been switched to WALSNDSTATE_STOPPING at
2898+
* this point.
2899+
*/
28452900
static void
28462901
WalSndLastCycleHandler(SIGNAL_ARGS)
28472902
{
@@ -2856,7 +2911,7 @@ WalSndLastCycleHandler(SIGNAL_ARGS)
28562911
if (!replication_active)
28572912
kill(MyProcPid, SIGTERM);
28582913

2859-
walsender_ready_to_stop = true;
2914+
got_SIGINT = true;
28602915
SetLatch(MyLatch);
28612916

28622917
errno = save_errno;
@@ -2869,14 +2924,14 @@ WalSndSignals(void)
28692924
/* Set up signal handlers */
28702925
pqsignal(SIGHUP, WalSndSigHupHandler); /* set flag to read config
28712926
* file */
2872-
pqsignal(SIGINT, SIG_IGN); /* not used */
2927+
pqsignal(SIGINT, WalSndLastCycleHandler); /* request a last cycle and
2928+
* shutdown */
28732929
pqsignal(SIGTERM, die); /* request shutdown */
28742930
pqsignal(SIGQUIT, quickdie); /* hard crash time */
28752931
InitializeTimeouts(); /* establishes SIGALRM handler */
28762932
pqsignal(SIGPIPE, SIG_IGN);
28772933
pqsignal(SIGUSR1, WalSndXLogSendHandler); /* request WAL sending */
2878-
pqsignal(SIGUSR2, WalSndLastCycleHandler); /* request a last cycle and
2879-
* shutdown */
2934+
pqsignal(SIGUSR2, WalSndSwitchStopping); /* switch to stopping state */
28802935

28812936
/* Reset some signals that are accepted by postmaster but not here */
28822937
pqsignal(SIGCHLD, SIG_DFL);
@@ -2954,6 +3009,50 @@ WalSndWakeup(void)
29543009
}
29553010
}
29563011

3012+
/*
3013+
* Wait that all the WAL senders have reached the stopping state. This is
3014+
* used by the checkpointer to control when shutdown checkpoints can
3015+
* safely begin.
3016+
*/
3017+
void
3018+
WalSndWaitStopping(void)
3019+
{
3020+
for (;;)
3021+
{
3022+
int i;
3023+
bool all_stopped = true;
3024+
3025+
for (i = 0; i < max_wal_senders; i++)
3026+
{
3027+
WalSndState state;
3028+
WalSnd *walsnd = &WalSndCtl->walsnds[i];
3029+
3030+
SpinLockAcquire(&walsnd->mutex);
3031+
3032+
if (walsnd->pid == 0)
3033+
{
3034+
SpinLockRelease(&walsnd->mutex);
3035+
continue;
3036+
}
3037+
3038+
state = walsnd->state;
3039+
SpinLockRelease(&walsnd->mutex);
3040+
3041+
if (state != WALSNDSTATE_STOPPING)
3042+
{
3043+
all_stopped = false;
3044+
break;
3045+
}
3046+
}
3047+
3048+
/* safe to leave if confirmation is done for all WAL senders */
3049+
if (all_stopped)
3050+
return;
3051+
3052+
pg_usleep(10000L); /* wait for 10 msec */
3053+
}
3054+
}
3055+
29573056
/* Set state for current walsender (only called in walsender) */
29583057
void
29593058
WalSndSetState(WalSndState state)
@@ -2987,6 +3086,8 @@ WalSndGetStateString(WalSndState state)
29873086
return "catchup";
29883087
case WALSNDSTATE_STREAMING:
29893088
return "streaming";
3089+
case WALSNDSTATE_STOPPING:
3090+
return "stopping";
29903091
}
29913092
return "UNKNOWN";
29923093
}

src/include/replication/walsender.h

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ extern void WalSndSignals(void);
4444
extern Size WalSndShmemSize(void);
4545
extern void WalSndShmemInit(void);
4646
extern void WalSndWakeup(void);
47+
extern void WalSndWaitStopping(void);
4748
extern void WalSndRqstFileReload(void);
4849

4950
/*

src/include/replication/walsender_private.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ typedef enum WalSndState
2424
WALSNDSTATE_STARTUP = 0,
2525
WALSNDSTATE_BACKUP,
2626
WALSNDSTATE_CATCHUP,
27-
WALSNDSTATE_STREAMING
27+
WALSNDSTATE_STREAMING,
28+
WALSNDSTATE_STOPPING
2829
} WalSndState;
2930

3031
/*

0 commit comments

Comments
 (0)