Skip to content

Commit 1e61480

Browse files
committed
Allow walreceiver configuration to change on reload
The parameters primary_conninfo, primary_slot_name and wal_receiver_create_temp_slot can now be changed with a simple "reload" signal, no longer requiring a server restart. This is achieved by signalling the walreceiver process to terminate and having it start again with the new values. Thanks to Andres Freund, Kyotaro Horiguchi, Fujii Masao for discussion. Author: Sergei Kornilov <sk@zsrv.org> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/19513901543181143@sas1-19a94364928d.qloud-c.yandex.net
1 parent 092c693 commit 1e61480

File tree

10 files changed

+179
-68
lines changed

10 files changed

+179
-68
lines changed

doc/src/sgml/config.sgml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4028,7 +4028,12 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
40284028
<varname>primary_conninfo</varname> string.
40294029
</para>
40304030
<para>
4031-
This parameter can only be set at server start.
4031+
This parameter can only be set in the <filename>postgresql.conf</filename>
4032+
file or on the server command line.
4033+
If this parameter is changed while the WAL receiver process is
4034+
running, that process is signalled to shut down and expected to
4035+
restart with the new setting (except if <varname>primary_conninfo</varname>
4036+
is an empty string).
40324037
This setting has no effect if the server is not in standby mode.
40334038
</para>
40344039
</listitem>
@@ -4045,9 +4050,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
40454050
connecting to the sending server via streaming replication to control
40464051
resource removal on the upstream node
40474052
(see <xref linkend="streaming-replication-slots"/>).
4048-
This parameter can only be set at server start.
4053+
This parameter can only be set in the <filename>postgresql.conf</filename>
4054+
file or on the server command line.
4055+
If this parameter is changed while the WAL receiver process is running,
4056+
that process is signalled to shut down and expected to restart with the
4057+
new setting.
40494058
This setting has no effect if <varname>primary_conninfo</varname> is not
4050-
set.
4059+
set or the server is not in standby mode.
40514060
</para>
40524061
</listitem>
40534062
</varlistentry>
@@ -4160,10 +4169,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
41604169
</term>
41614170
<listitem>
41624171
<para>
4163-
Specifies whether a WAL receiver should create a temporary replication
4172+
Specifies whether the WAL receiver process should create a temporary replication
41644173
slot on the remote instance when no permanent replication slot to use
41654174
has been configured (using <xref linkend="guc-primary-slot-name"/>).
4166-
The default is off. This parameter can only be set at server start.
4175+
The default is off. This parameter can only be set in the
4176+
<filename>postgresql.conf</filename> file or on the server command line.
4177+
If this parameter is changed while the WAL receiver process is running,
4178+
that process is signalled to shut down and expected to restart with
4179+
the new setting.
41674180
</para>
41684181
</listitem>
41694182
</varlistentry>

doc/src/sgml/high-availability.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -816,8 +816,8 @@ archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r'
816816
When the standby is started and <varname>primary_conninfo</varname> is set
817817
correctly, the standby will connect to the primary after replaying all
818818
WAL files available in the archive. If the connection is established
819-
successfully, you will see a walreceiver process in the standby, and
820-
a corresponding walsender process in the primary.
819+
successfully, you will see a <literal>walreceiver</literal> in the standby, and
820+
a corresponding <literal>walsender</literal> process in the primary.
821821
</para>
822822

823823
<sect3 id="streaming-replication-authentication">

src/backend/access/transam/xlog.c

Lines changed: 89 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -816,9 +816,13 @@ static XLogSource readSource = XLOG_FROM_ANY;
816816
* currently have a WAL file open. If lastSourceFailed is set, our last
817817
* attempt to read from currentSource failed, and we should try another source
818818
* next.
819+
*
820+
* pendingWalRcvRestart is set when a config change occurs that requires a
821+
* walreceiver restart. This is only valid in XLOG_FROM_STREAM state.
819822
*/
820823
static XLogSource currentSource = XLOG_FROM_ANY;
821824
static bool lastSourceFailed = false;
825+
static bool pendingWalRcvRestart = false;
822826

823827
typedef struct XLogPageReadPrivate
824828
{
@@ -11905,6 +11909,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1190511909
for (;;)
1190611910
{
1190711911
XLogSource oldSource = currentSource;
11912+
bool startWalReceiver = false;
1190811913

1190911914
/*
1191011915
* First check if we failed to read from the current source, and
@@ -11939,54 +11944,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1193911944
return false;
1194011945

1194111946
/*
11942-
* If primary_conninfo is set, launch walreceiver to try
11943-
* to stream the missing WAL.
11944-
*
11945-
* If fetching_ckpt is true, RecPtr points to the initial
11946-
* checkpoint location. In that case, we use RedoStartLSN
11947-
* as the streaming start position instead of RecPtr, so
11948-
* that when we later jump backwards to start redo at
11949-
* RedoStartLSN, we will have the logs streamed already.
11950-
*/
11951-
if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
11952-
{
11953-
XLogRecPtr ptr;
11954-
TimeLineID tli;
11955-
11956-
if (fetching_ckpt)
11957-
{
11958-
ptr = RedoStartLSN;
11959-
tli = ControlFile->checkPointCopy.ThisTimeLineID;
11960-
}
11961-
else
11962-
{
11963-
ptr = RecPtr;
11964-
11965-
/*
11966-
* Use the record begin position to determine the
11967-
* TLI, rather than the position we're reading.
11968-
*/
11969-
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
11970-
11971-
if (curFileTLI > 0 && tli < curFileTLI)
11972-
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
11973-
(uint32) (tliRecPtr >> 32),
11974-
(uint32) tliRecPtr,
11975-
tli, curFileTLI);
11976-
}
11977-
curFileTLI = tli;
11978-
RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
11979-
PrimarySlotName,
11980-
wal_receiver_create_temp_slot);
11981-
receivedUpto = 0;
11982-
}
11983-
11984-
/*
11985-
* Move to XLOG_FROM_STREAM state in either case. We'll
11986-
* get immediate failure if we didn't launch walreceiver,
11987-
* and move on to the next state.
11947+
* Move to XLOG_FROM_STREAM state, and set to start a
11948+
* walreceiver if necessary.
1198811949
*/
1198911950
currentSource = XLOG_FROM_STREAM;
11951+
startWalReceiver = true;
1199011952
break;
1199111953

1199211954
case XLOG_FROM_STREAM:
@@ -12138,7 +12100,71 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1213812100
Assert(StandbyMode);
1213912101

1214012102
/*
12141-
* Check if WAL receiver is still active.
12103+
* First, shutdown walreceiver if its restart has been
12104+
* requested -- but no point if we're already slated for
12105+
* starting it.
12106+
*/
12107+
if (pendingWalRcvRestart && !startWalReceiver)
12108+
{
12109+
ShutdownWalRcv();
12110+
12111+
/*
12112+
* Re-scan for possible new timelines if we were
12113+
* requested to recover to the latest timeline.
12114+
*/
12115+
if (recoveryTargetTimeLineGoal ==
12116+
RECOVERY_TARGET_TIMELINE_LATEST)
12117+
rescanLatestTimeLine();
12118+
12119+
startWalReceiver = true;
12120+
}
12121+
pendingWalRcvRestart = false;
12122+
12123+
/*
12124+
* Launch walreceiver if needed.
12125+
*
12126+
* If fetching_ckpt is true, RecPtr points to the initial
12127+
* checkpoint location. In that case, we use RedoStartLSN
12128+
* as the streaming start position instead of RecPtr, so
12129+
* that when we later jump backwards to start redo at
12130+
* RedoStartLSN, we will have the logs streamed already.
12131+
*/
12132+
if (startWalReceiver &&
12133+
PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
12134+
{
12135+
XLogRecPtr ptr;
12136+
TimeLineID tli;
12137+
12138+
if (fetching_ckpt)
12139+
{
12140+
ptr = RedoStartLSN;
12141+
tli = ControlFile->checkPointCopy.ThisTimeLineID;
12142+
}
12143+
else
12144+
{
12145+
ptr = RecPtr;
12146+
12147+
/*
12148+
* Use the record begin position to determine the
12149+
* TLI, rather than the position we're reading.
12150+
*/
12151+
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
12152+
12153+
if (curFileTLI > 0 && tli < curFileTLI)
12154+
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
12155+
(uint32) (tliRecPtr >> 32),
12156+
(uint32) tliRecPtr,
12157+
tli, curFileTLI);
12158+
}
12159+
curFileTLI = tli;
12160+
RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
12161+
PrimarySlotName,
12162+
wal_receiver_create_temp_slot);
12163+
receivedUpto = 0;
12164+
}
12165+
12166+
/*
12167+
* Check if WAL receiver is active or wait to start up.
1214212168
*/
1214312169
if (!WalRcvStreaming())
1214412170
{
@@ -12266,6 +12292,22 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1226612292
return false; /* not reached */
1226712293
}
1226812294

12295+
/*
12296+
* Set flag to signal the walreceiver to restart. (The startup process calls
12297+
* this on noticing a relevant configuration change.)
12298+
*/
12299+
void
12300+
StartupRequestWalReceiverRestart(void)
12301+
{
12302+
if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
12303+
{
12304+
ereport(LOG,
12305+
(errmsg("wal receiver process shutdown requested")));
12306+
12307+
pendingWalRcvRestart = true;
12308+
}
12309+
}
12310+
1226912311
/*
1227012312
* Determine what log level should be used to report a corrupt WAL record
1227112313
* in the current WAL page, previously read by XLogPageRead().

src/backend/access/transam/xlogreader.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -585,9 +585,9 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
585585
/*
586586
* Data is not in our buffer.
587587
*
588-
* Every time we actually read the page, even if we looked at parts of it
589-
* before, we need to do verification as the read_page callback might now
590-
* be rereading data from a different source.
588+
* Every time we actually read the segment, even if we looked at parts of
589+
* it before, we need to do verification as the read_page callback might
590+
* now be rereading data from a different source.
591591
*
592592
* Whenever switching to a new WAL segment, we read the first page of the
593593
* file and validate its header, even if that's not where the target

src/backend/postmaster/startup.c

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,17 +96,51 @@ StartupProcShutdownHandler(SIGNAL_ARGS)
9696
errno = save_errno;
9797
}
9898

99+
/*
100+
* Re-read the config file.
101+
*
102+
* If one of the critical walreceiver options has changed, flag xlog.c
103+
* to restart it.
104+
*/
105+
static void
106+
StartupRereadConfig(void)
107+
{
108+
char *conninfo = pstrdup(PrimaryConnInfo);
109+
char *slotname = pstrdup(PrimarySlotName);
110+
bool tempSlot = wal_receiver_create_temp_slot;
111+
bool conninfoChanged;
112+
bool slotnameChanged;
113+
bool tempSlotChanged = false;
114+
115+
ProcessConfigFile(PGC_SIGHUP);
116+
117+
conninfoChanged = strcmp(conninfo, PrimaryConnInfo) != 0;
118+
slotnameChanged = strcmp(slotname, PrimarySlotName) != 0;
119+
120+
/*
121+
* wal_receiver_create_temp_slot is used only when we have no slot
122+
* configured. We do not need to track this change if it has no effect.
123+
*/
124+
if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0)
125+
tempSlotChanged = tempSlot != wal_receiver_create_temp_slot;
126+
pfree(conninfo);
127+
pfree(slotname);
128+
129+
if (conninfoChanged || slotnameChanged || tempSlotChanged)
130+
StartupRequestWalReceiverRestart();
131+
}
132+
99133
/* Handle various signals that might be sent to the startup process */
100134
void
101135
HandleStartupProcInterrupts(void)
102136
{
103137
/*
104-
* Check if we were requested to re-read config file.
138+
* Process any requests or signals received recently.
105139
*/
106140
if (got_SIGHUP)
107141
{
108142
got_SIGHUP = false;
109-
ProcessConfigFile(PGC_SIGHUP);
143+
StartupRereadConfig();
110144
}
111145

112146
/*

src/backend/replication/walreceiver.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
679679
walrcv->walRcvState == WALRCV_STOPPING);
680680
if (walrcv->walRcvState == WALRCV_RESTARTING)
681681
{
682-
/* we don't expect primary_conninfo to change */
682+
/*
683+
* No need to handle changes in primary_conninfo or
684+
* primary_slotname here. Startup process will signal us to
685+
* terminate in case those change.
686+
*/
683687
*startpoint = walrcv->receiveStart;
684688
*startpointTLI = walrcv->receiveStartTLI;
685689
walrcv->walRcvState = WALRCV_STREAMING;

src/backend/utils/misc/guc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2050,7 +2050,7 @@ static struct config_bool ConfigureNamesBool[] =
20502050
},
20512051

20522052
{
2053-
{"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY,
2053+
{"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY,
20542054
gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."),
20552055
},
20562056
&wal_receiver_create_temp_slot,
@@ -3717,7 +3717,7 @@ static struct config_string ConfigureNamesString[] =
37173717
},
37183718

37193719
{
3720-
{"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
3720+
{"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
37213721
gettext_noop("Sets the connection string to be used to connect to the sending server."),
37223722
NULL,
37233723
GUC_SUPERUSER_ONLY
@@ -3728,7 +3728,7 @@ static struct config_string ConfigureNamesString[] =
37283728
},
37293729

37303730
{
3731-
{"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
3731+
{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
37323732
gettext_noop("Sets the name of the replication slot to use on the sending server."),
37333733
NULL
37343734
},

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,7 @@
309309
# These settings are ignored on a master server.
310310

311311
#primary_conninfo = '' # connection string to sending server
312-
# (change requires restart)
313312
#primary_slot_name = '' # replication slot on sending server
314-
# (change requires restart)
315313
#promote_trigger_file = '' # file name whose presence ends recovery
316314
#hot_standby = on # "off" disallows queries during recovery
317315
# (change requires restart)
@@ -323,7 +321,6 @@
323321
# -1 allows indefinite delay
324322
#wal_receiver_create_temp_slot = off # Create temp slot if primary_slot_name
325323
# is not set.
326-
# (change requires restart)
327324
#wal_receiver_status_interval = 10s # send replies at least this often
328325
# 0 disables
329326
#hot_standby_feedback = off # send info from standby to prevent

src/include/access/xlog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ extern bool CheckPromoteSignal(void);
319319
extern void WakeupRecovery(void);
320320
extern void SetWalWriterSleeping(bool sleeping);
321321

322+
extern void StartupRequestWalReceiverRestart(void);
322323
extern void XLogRequestWalReceiverReply(void);
323324

324325
extern void assign_max_wal_size(int newval, void *extra);

0 commit comments

Comments
 (0)