Skip to content

Commit 21d48d6

Browse files
committed
Don't pay heed to wal_sender_timeout while creating a decoding slot.
Sometimes CREATE_REPLICATION_SLOT ... LOGICAL ... needs to wait for further WAL using WalSndWaitForWal(). That used to always respect wal_sender_timeout and kill the session when waiting long enough because no feedback/ping messages can be sent while the slot is still being created. Introduce the notion that last_reply_timestamp = 0 means that the walsender currently doesn't need timeout processing to avoid that problem. Use that notion for CREATE_REPLICATION_SLOT ... LOGICAL. Bugreport and initial patch by Steve Singer, revised by me.
1 parent d1d50bf commit 21d48d6

File tree

1 file changed

+26
-5
lines changed

1 file changed

+26
-5
lines changed

src/backend/replication/walsender.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,10 @@ static StringInfoData reply_message;
148148
static StringInfoData tmpbuf;
149149

150150
/*
151-
* Timestamp of the last receipt of the reply from the standby.
151+
* Timestamp of the last receipt of the reply from the standby. Set to 0 if
152+
* wal_sender_timeout doesn't need to be active.
152153
*/
153-
static TimestampTz last_reply_timestamp;
154+
static TimestampTz last_reply_timestamp = 0;
154155

155156
/* Have we sent a heartbeat message asking for reply, since last reply? */
156157
static bool waiting_for_ping_response = false;
@@ -796,6 +797,15 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
796797
logical_read_xlog_page,
797798
WalSndPrepareWrite, WalSndWriteData);
798799

800+
/*
801+
* Signal that we don't need the timeout mechanism. We're just
802+
* creating the replication slot and don't yet accept feedback
803+
* messages or send keepalives. As we possibly need to wait for
804+
* further WAL the walsender would otherwise possibly be killed too
805+
* soon.
806+
*/
807+
last_reply_timestamp = 0;
808+
799809
/* build initial snapshot, might take a while */
800810
DecodingContextFindStartpoint(ctx);
801811

@@ -1693,7 +1703,7 @@ WalSndComputeSleeptime(TimestampTz now)
16931703
{
16941704
long sleeptime = 10000; /* 10 s */
16951705

1696-
if (wal_sender_timeout > 0)
1706+
if (wal_sender_timeout > 0 && last_reply_timestamp > 0)
16971707
{
16981708
TimestampTz wakeup_time;
16991709
long sec_to_timeout;
@@ -1735,6 +1745,10 @@ WalSndCheckTimeOut(TimestampTz now)
17351745
{
17361746
TimestampTz timeout;
17371747

1748+
/* don't bail out if we're doing something that doesn't require timeouts */
1749+
if (last_reply_timestamp <= 0)
1750+
return;
1751+
17381752
timeout = TimestampTzPlusMilliseconds(last_reply_timestamp,
17391753
wal_sender_timeout);
17401754

@@ -1764,7 +1778,10 @@ WalSndLoop(WalSndSendDataCallback send_data)
17641778
initStringInfo(&reply_message);
17651779
initStringInfo(&tmpbuf);
17661780

1767-
/* Initialize the last reply timestamp */
1781+
/*
1782+
* Initialize the last reply timestamp. That enables timeout processing
1783+
* from hereon.
1784+
*/
17681785
last_reply_timestamp = GetCurrentTimestamp();
17691786
waiting_for_ping_response = false;
17701787

@@ -2879,7 +2896,11 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
28792896
{
28802897
TimestampTz ping_time;
28812898

2882-
if (wal_sender_timeout <= 0)
2899+
/*
2900+
* Don't send keepalive messages if timeouts are globally disabled or
2901+
* we're doing something not partaking in timeouts.
2902+
*/
2903+
if (wal_sender_timeout <= 0 || last_reply_timestamp <= 0)
28832904
return;
28842905

28852906
if (waiting_for_ping_response)

0 commit comments

Comments
 (0)