Skip to content

Commit 32fd2b5

Browse files
committed
Be clear about whether a recovery pause has taken effect.
Previously, the code and documentation seem to have essentially assumed than a call to pg_wal_replay_pause() would take place immediately, but that's not the case, because we only check for a pause in certain places. This means that a tool that uses this function and then wants to do something else afterward that is dependent on the pause having taken effect doesn't know how long it needs to wait to be sure that no more WAL is going to be replayed. To avoid that, add a new function pg_get_wal_replay_pause_state() which returns either 'not paused', 'paused requested', or 'paused'. After calling pg_wal_replay_pause() the status will immediate change from 'not paused' to 'pause requested'; when the startup process has noticed this, the status will change to 'pause'. For backward compatibility, pg_is_wal_replay_paused() still exists and returns the same thing as before: true if a pause has been requested, whether or not it has taken effect yet; and false if not. The documentation is updated to clarify. To improve the changes that a pause request is quickly confirmed effective, adjust things so that WaitForWALToBecomeAvailable will swiftly reach a call to recoveryPausesHere() when a pause request is made. Dilip Kumar, reviewed by Simon Riggs, Kyotaro Horiguchi, Yugo Nagata, Masahiko Sawada, and Bharath Rupireddy. Discussion: http://postgr.es/m/CAFiTN-vcLLWEm8Zr%3DYK83rgYrT9pbC8VJCfa1kY9vL3AUPfu6g%40mail.gmail.com
1 parent 51c54bb commit 32fd2b5

File tree

5 files changed

+153
-22
lines changed

5 files changed

+153
-22
lines changed

doc/src/sgml/func.sgml

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25344,7 +25344,24 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
2534425344
<returnvalue>boolean</returnvalue>
2534525345
</para>
2534625346
<para>
25347-
Returns true if recovery is paused.
25347+
Returns true if recovery pause is requested.
25348+
</para></entry>
25349+
</row>
25350+
25351+
<row>
25352+
<entry role="func_table_entry"><para role="func_signature">
25353+
<indexterm>
25354+
<primary>pg_get_wal_replay_pause_state</primary>
25355+
</indexterm>
25356+
<function>pg_get_wal_replay_pause_state</function> ()
25357+
<returnvalue>text</returnvalue>
25358+
</para>
25359+
<para>
25360+
Returns recovery pause state. The return values are <literal>
25361+
not paused</literal> if pause is not requested, <literal>
25362+
pause requested</literal> if pause is requested but recovery is
25363+
not yet paused and, <literal>paused</literal> if the recovery is
25364+
actually paused.
2534825365
</para></entry>
2534925366
</row>
2535025367

@@ -25383,10 +25400,15 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
2538325400
<returnvalue>void</returnvalue>
2538425401
</para>
2538525402
<para>
25386-
Pauses recovery. While recovery is paused, no further database
25387-
changes are applied. If hot standby is active, all new queries will
25388-
see the same consistent snapshot of the database, and no further query
25389-
conflicts will be generated until recovery is resumed.
25403+
Request to pause recovery. A request doesn't mean that recovery stops
25404+
right away. If you want a guarantee that recovery is actually paused,
25405+
you need to check for the recovery pause state returned by
25406+
<function>pg_get_wal_replay_pause_state()</function>. Note that
25407+
<function>pg_is_wal_replay_paused()</function> returns whether a request
25408+
is made. While recovery is paused, no further database changes are applied.
25409+
If hot standby is active, all new queries will see the same consistent
25410+
snapshot of the database, and no further query conflicts will be generated
25411+
until recovery is resumed.
2539025412
</para>
2539125413
<para>
2539225414
This function is restricted to superusers by default, but other users

src/backend/access/transam/xlog.c

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -722,8 +722,8 @@ typedef struct XLogCtlData
722722
* only relevant for replication or archive recovery
723723
*/
724724
TimestampTz currentChunkStartTime;
725-
/* Are we requested to pause recovery? */
726-
bool recoveryPause;
725+
/* Recovery pause state */
726+
RecoveryPauseState recoveryPauseState;
727727

728728
/*
729729
* lastFpwDisableRecPtr points to the start of the last replayed
@@ -895,6 +895,7 @@ static void validateRecoveryParameters(void);
895895
static void exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog);
896896
static bool recoveryStopsBefore(XLogReaderState *record);
897897
static bool recoveryStopsAfter(XLogReaderState *record);
898+
static void ConfirmRecoveryPaused(void);
898899
static void recoveryPausesHere(bool endOfRecovery);
899900
static bool recoveryApplyDelay(XLogReaderState *record);
900901
static void SetLatestXTime(TimestampTz xtime);
@@ -6034,7 +6035,7 @@ recoveryStopsAfter(XLogReaderState *record)
60346035
}
60356036

60366037
/*
6037-
* Wait until shared recoveryPause flag is cleared.
6038+
* Wait until shared recoveryPauseState is set to RECOVERY_NOT_PAUSED.
60386039
*
60396040
* endOfRecovery is true if the recovery target is reached and
60406041
* the paused state starts at the end of recovery because of
@@ -6064,34 +6065,72 @@ recoveryPausesHere(bool endOfRecovery)
60646065
(errmsg("recovery has paused"),
60656066
errhint("Execute pg_wal_replay_resume() to continue.")));
60666067

6067-
while (RecoveryIsPaused())
6068+
/* loop until recoveryPauseState is set to RECOVERY_NOT_PAUSED */
6069+
while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
60686070
{
60696071
HandleStartupProcInterrupts();
60706072
if (CheckForStandbyTrigger())
60716073
return;
60726074
pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
6075+
6076+
/*
6077+
* If recovery pause is requested then set it paused. While we are in
6078+
* the loop, user might resume and pause again so set this every time.
6079+
*/
6080+
ConfirmRecoveryPaused();
6081+
60736082
pg_usleep(1000000L); /* 1000 ms */
60746083
pgstat_report_wait_end();
60756084
}
60766085
}
60776086

6078-
bool
6079-
RecoveryIsPaused(void)
6087+
/*
6088+
* Get the current state of the recovery pause request.
6089+
*/
6090+
RecoveryPauseState
6091+
GetRecoveryPauseState(void)
60806092
{
6081-
bool recoveryPause;
6093+
RecoveryPauseState state;
60826094

60836095
SpinLockAcquire(&XLogCtl->info_lck);
6084-
recoveryPause = XLogCtl->recoveryPause;
6096+
state = XLogCtl->recoveryPauseState;
60856097
SpinLockRelease(&XLogCtl->info_lck);
60866098

6087-
return recoveryPause;
6099+
return state;
60886100
}
60896101

6102+
/*
6103+
* Set the recovery pause state.
6104+
*
6105+
* If recovery pause is requested then sets the recovery pause state to
6106+
* 'pause requested' if it is not already 'paused'. Otherwise, sets it
6107+
* to 'not paused' to resume the recovery. The recovery pause will be
6108+
* confirmed by the ConfirmRecoveryPaused.
6109+
*/
60906110
void
60916111
SetRecoveryPause(bool recoveryPause)
60926112
{
60936113
SpinLockAcquire(&XLogCtl->info_lck);
6094-
XLogCtl->recoveryPause = recoveryPause;
6114+
6115+
if (!recoveryPause)
6116+
XLogCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
6117+
else if (XLogCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
6118+
XLogCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED;
6119+
6120+
SpinLockRelease(&XLogCtl->info_lck);
6121+
}
6122+
6123+
/*
6124+
* Confirm the recovery pause by setting the recovery pause state to
6125+
* RECOVERY_PAUSED.
6126+
*/
6127+
static void
6128+
ConfirmRecoveryPaused(void)
6129+
{
6130+
/* If recovery pause is requested then set it paused */
6131+
SpinLockAcquire(&XLogCtl->info_lck);
6132+
if (XLogCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
6133+
XLogCtl->recoveryPauseState = RECOVERY_PAUSED;
60956134
SpinLockRelease(&XLogCtl->info_lck);
60966135
}
60976136

@@ -6292,7 +6331,7 @@ RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue
62926331
errdetail("If recovery is unpaused, the server will shut down."),
62936332
errhint("You can then restart the server after making the necessary configuration changes.")));
62946333

6295-
while (RecoveryIsPaused())
6334+
while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
62966335
{
62976336
HandleStartupProcInterrupts();
62986337

@@ -6311,6 +6350,13 @@ RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue
63116350
warned_for_promote = true;
63126351
}
63136352

6353+
/*
6354+
* If recovery pause is requested then set it paused. While we
6355+
* are in the loop, user might resume and pause again so set
6356+
* this every time.
6357+
*/
6358+
ConfirmRecoveryPaused();
6359+
63146360
pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
63156361
pg_usleep(1000000L); /* 1000 ms */
63166362
pgstat_report_wait_end();
@@ -7205,7 +7251,7 @@ StartupXLOG(void)
72057251
XLogCtl->lastReplayedTLI = XLogCtl->replayEndTLI;
72067252
XLogCtl->recoveryLastXTime = 0;
72077253
XLogCtl->currentChunkStartTime = 0;
7208-
XLogCtl->recoveryPause = false;
7254+
XLogCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
72097255
SpinLockRelease(&XLogCtl->info_lck);
72107256

72117257
/* Also ensure XLogReceiptTime has a sane value */
@@ -7309,7 +7355,8 @@ StartupXLOG(void)
73097355
* otherwise would is a minor issue, so it doesn't seem worth
73107356
* adding another spinlock cycle to prevent that.
73117357
*/
7312-
if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
7358+
if (((volatile XLogCtlData *) XLogCtl)->recoveryPauseState !=
7359+
RECOVERY_NOT_PAUSED)
73137360
recoveryPausesHere(false);
73147361

73157362
/*
@@ -7334,7 +7381,8 @@ StartupXLOG(void)
73347381
* here otherwise pausing during the delay-wait wouldn't
73357382
* work.
73367383
*/
7337-
if (((volatile XLogCtlData *) XLogCtl)->recoveryPause)
7384+
if (((volatile XLogCtlData *) XLogCtl)->recoveryPauseState !=
7385+
RECOVERY_NOT_PAUSED)
73387386
recoveryPausesHere(false);
73397387
}
73407388

@@ -12656,6 +12704,14 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1265612704
elog(ERROR, "unexpected WAL source %d", currentSource);
1265712705
}
1265812706

12707+
/*
12708+
* Check for recovery pause here so that we can confirm more quickly
12709+
* that a requested pause has actually taken effect.
12710+
*/
12711+
if (((volatile XLogCtlData *) XLogCtl)->recoveryPauseState !=
12712+
RECOVERY_NOT_PAUSED)
12713+
recoveryPausesHere(false);
12714+
1265912715
/*
1266012716
* This possibly-long loop needs to handle interrupts of startup
1266112717
* process.

src/backend/access/transam/xlogfuncs.c

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ pg_walfile_name(PG_FUNCTION_ARGS)
517517
}
518518

519519
/*
520-
* pg_wal_replay_pause - pause recovery now
520+
* pg_wal_replay_pause - Request to pause recovery
521521
*
522522
* Permission checking for this function is managed through the normal
523523
* GRANT system.
@@ -540,6 +540,9 @@ pg_wal_replay_pause(PG_FUNCTION_ARGS)
540540

541541
SetRecoveryPause(true);
542542

543+
/* wake up the recovery process so that it can process the pause request */
544+
WakeupRecovery();
545+
543546
PG_RETURN_VOID();
544547
}
545548

@@ -582,7 +585,45 @@ pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
582585
errmsg("recovery is not in progress"),
583586
errhint("Recovery control functions can only be executed during recovery.")));
584587

585-
PG_RETURN_BOOL(RecoveryIsPaused());
588+
PG_RETURN_BOOL(GetRecoveryPauseState() != RECOVERY_NOT_PAUSED);
589+
}
590+
591+
/*
592+
* pg_get_wal_replay_pause_state - Returns the recovery pause state.
593+
*
594+
* Returned values:
595+
*
596+
* 'not paused' - if pause is not requested
597+
* 'pause requested' - if pause is requested but recovery is not yet paused
598+
* 'paused' - if recovery is paused
599+
*/
600+
Datum
601+
pg_get_wal_replay_pause_state(PG_FUNCTION_ARGS)
602+
{
603+
char *statestr = NULL;
604+
605+
if (!RecoveryInProgress())
606+
ereport(ERROR,
607+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
608+
errmsg("recovery is not in progress"),
609+
errhint("Recovery control functions can only be executed during recovery.")));
610+
611+
/* get the recovery pause state */
612+
switch(GetRecoveryPauseState())
613+
{
614+
case RECOVERY_NOT_PAUSED:
615+
statestr = "not paused";
616+
break;
617+
case RECOVERY_PAUSE_REQUESTED:
618+
statestr = "pause requested";
619+
break;
620+
case RECOVERY_PAUSED:
621+
statestr = "paused";
622+
break;
623+
}
624+
625+
Assert(statestr != NULL);
626+
PG_RETURN_TEXT_P(cstring_to_text(statestr));
586627
}
587628

588629
/*

src/include/access/xlog.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,14 @@ typedef enum RecoveryState
175175
RECOVERY_STATE_DONE /* currently in production */
176176
} RecoveryState;
177177

178+
/* Recovery pause states */
179+
typedef enum RecoveryPauseState
180+
{
181+
RECOVERY_NOT_PAUSED, /* pause not requested */
182+
RECOVERY_PAUSE_REQUESTED, /* pause requested, but not yet paused */
183+
RECOVERY_PAUSED /* recovery is paused */
184+
} RecoveryPauseState;
185+
178186
extern PGDLLIMPORT int wal_level;
179187

180188
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -311,7 +319,7 @@ extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
311319
extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
312320
extern XLogRecPtr GetXLogInsertRecPtr(void);
313321
extern XLogRecPtr GetXLogWriteRecPtr(void);
314-
extern bool RecoveryIsPaused(void);
322+
extern RecoveryPauseState GetRecoveryPauseState(void);
315323
extern void SetRecoveryPause(bool recoveryPause);
316324
extern TimestampTz GetLatestXTime(void);
317325
extern TimestampTz GetCurrentChunkReplayStartTime(void);

src/include/catalog/pg_proc.dat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6234,6 +6234,10 @@
62346234
proname => 'pg_is_wal_replay_paused', provolatile => 'v',
62356235
prorettype => 'bool', proargtypes => '',
62366236
prosrc => 'pg_is_wal_replay_paused' },
6237+
{ oid => '1137', descr => 'get wal replay pause state',
6238+
proname => 'pg_get_wal_replay_pause_state', provolatile => 'v',
6239+
prorettype => 'text', proargtypes => '',
6240+
prosrc => 'pg_get_wal_replay_pause_state' },
62376241

62386242
{ oid => '2621', descr => 'reload configuration files',
62396243
proname => 'pg_reload_conf', provolatile => 'v', prorettype => 'bool',

0 commit comments

Comments
 (0)