Skip to content

Commit 5aa8db0

Browse files
committed
Fix walsender to exit promptly if client requests shutdown.
It's possible for WalSndWaitForWal to be asked to wait for WAL that doesn't exist yet. That's fine, in fact it's the normal situation if we're caught up; but when the client requests shutdown we should not keep waiting. The previous coding could wait indefinitely if the source server was idle. In passing, improve the rather weak comments in this area, and slightly rearrange some related code for better readability. Back-patch to 9.4 where this code was introduced. Discussion: https://postgr.es/m/14154.1498781234@sss.pgh.pa.us
1 parent e97b737 commit 5aa8db0

File tree

1 file changed

+31
-17
lines changed

1 file changed

+31
-17
lines changed

src/backend/replication/walsender.c

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -762,15 +762,14 @@ logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int req
762762
/* make sure we have enough WAL available */
763763
flushptr = WalSndWaitForWal(targetPagePtr + reqLen);
764764

765-
/* more than one block available */
766-
if (targetPagePtr + XLOG_BLCKSZ <= flushptr)
767-
count = XLOG_BLCKSZ;
768-
/* not enough WAL synced, that can happen during shutdown */
769-
else if (targetPagePtr + reqLen > flushptr)
765+
/* fail if not (implies we are going to shut down) */
766+
if (flushptr < targetPagePtr + reqLen)
770767
return -1;
771-
/* part of the page available */
768+
769+
if (targetPagePtr + XLOG_BLCKSZ <= flushptr)
770+
count = XLOG_BLCKSZ; /* more than one block available */
772771
else
773-
count = flushptr - targetPagePtr;
772+
count = flushptr - targetPagePtr; /* part of the page available */
774773

775774
/* now actually read the data, we know it's there */
776775
XLogRead(cur_page, targetPagePtr, XLOG_BLCKSZ);
@@ -1149,7 +1148,11 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
11491148
}
11501149

11511150
/*
1152-
* Wait till WAL < loc is flushed to disk so it can be safely read.
1151+
* Wait till WAL < loc is flushed to disk so it can be safely sent to client.
1152+
*
1153+
* Returns end LSN of flushed WAL. Normally this will be >= loc, but
1154+
* if we detect a shutdown request (either from postmaster or client)
1155+
* we will return early, so caller must always check.
11531156
*/
11541157
static XLogRecPtr
11551158
WalSndWaitForWal(XLogRecPtr loc)
@@ -1214,9 +1217,7 @@ WalSndWaitForWal(XLogRecPtr loc)
12141217
RecentFlushPtr = GetXLogReplayRecPtr(NULL);
12151218

12161219
/*
1217-
* If postmaster asked us to stop, don't wait here anymore. This will
1218-
* cause the xlogreader to return without reading a full record, which
1219-
* is the fastest way to reach the mainloop which then can quit.
1220+
* If postmaster asked us to stop, don't wait anymore.
12201221
*
12211222
* It's important to do this check after the recomputation of
12221223
* RecentFlushPtr, so we can send all remaining data before shutting
@@ -1247,14 +1248,20 @@ WalSndWaitForWal(XLogRecPtr loc)
12471248
WalSndCaughtUp = true;
12481249

12491250
/*
1250-
* Try to flush pending output to the client. Also wait for the socket
1251-
* becoming writable, if there's still pending output after an attempt
1252-
* to flush. Otherwise we might just sit on output data while waiting
1253-
* for new WAL being generated.
1251+
* Try to flush any pending output to the client.
12541252
*/
12551253
if (pq_flush_if_writable() != 0)
12561254
WalSndShutdown();
12571255

1256+
/*
1257+
* If we have received CopyDone from the client, sent CopyDone
1258+
* ourselves, and the output buffer is empty, it's time to exit
1259+
* streaming, so fail the current WAL fetch request.
1260+
*/
1261+
if (streamingDoneReceiving && streamingDoneSending &&
1262+
!pq_is_send_pending())
1263+
break;
1264+
12581265
now = GetCurrentTimestamp();
12591266

12601267
/* die if timeout was reached */
@@ -1263,6 +1270,13 @@ WalSndWaitForWal(XLogRecPtr loc)
12631270
/* Send keepalive if the time has come */
12641271
WalSndKeepaliveIfNecessary(now);
12651272

1273+
/*
1274+
* Sleep until something happens or we time out. Also wait for the
1275+
* socket becoming writable, if there's still pending output.
1276+
* Otherwise we might sit on sendable output data while waiting for
1277+
* new WAL to be generated. (But if we have nothing to send, we don't
1278+
* want to wake on socket-writable.)
1279+
*/
12661280
sleeptime = WalSndComputeSleeptime(now);
12671281

12681282
wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
@@ -1271,7 +1285,6 @@ WalSndWaitForWal(XLogRecPtr loc)
12711285
if (pq_is_send_pending())
12721286
wakeEvents |= WL_SOCKET_WRITEABLE;
12731287

1274-
/* Sleep until something happens or we time out */
12751288
ImmediateInterruptOK = true;
12761289
CHECK_FOR_INTERRUPTS();
12771290
WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
@@ -1861,7 +1874,8 @@ WalSndLoop(WalSndSendDataCallback send_data)
18611874
* ourselves, and the output buffer is empty, it's time to exit
18621875
* streaming.
18631876
*/
1864-
if (!pq_is_send_pending() && streamingDoneSending && streamingDoneReceiving)
1877+
if (streamingDoneReceiving && streamingDoneSending &&
1878+
!pq_is_send_pending())
18651879
break;
18661880

18671881
/*

0 commit comments

Comments
 (0)