Skip to content

Commit 29efe1b

Browse files
committed
Make WaitLatchOrSocket's timeout detection more robust.
In the previous coding, timeout would be noticed and reported only when poll() or socket() returned zero (or the equivalent behavior on Windows). Ordinarily that should work well enough, but it seems conceivable that we could get into a state where poll() always returns a nonzero value --- for example, if it is noticing a condition on one of the file descriptors that we do not think is reason to exit the loop. If that happened, we'd be in a busy-wait loop that would fail to terminate even when the timeout expires. We can make this more robust at essentially no cost, by deciding to exit of our own accord if we compute a zero or negative time-remaining-to-wait. Previously the code noted this but just clamped the time-remaining to zero, expecting that we'd detect timeout on the next loop iteration. Back-patch to 9.2. While 9.1 had a version of WaitLatchOrSocket, it was primitive compared to later versions, and did not guarantee reliable detection of timeouts anyway. (Essentially, this is a refinement of commit 3e7fdcf, which was back-patched only as far as 9.2.)
1 parent f3f037e commit 29efe1b

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

src/backend/port/unix_latch.c

+13-7
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,8 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
442442
result |= WL_SOCKET_WRITEABLE;
443443
}
444444
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
445-
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
445+
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH],
446+
&input_mask))
446447
{
447448
/*
448449
* According to the select(2) man page on Linux, select(2) may
@@ -461,17 +462,22 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
461462
#endif /* HAVE_POLL */
462463

463464
/* If we're not done, update cur_timeout for next iteration */
464-
if (result == 0 && cur_timeout >= 0)
465+
if (result == 0 && (wakeEvents & WL_TIMEOUT))
465466
{
466467
INSTR_TIME_SET_CURRENT(cur_time);
467468
INSTR_TIME_SUBTRACT(cur_time, start_time);
468469
cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
469-
if (cur_timeout < 0)
470-
cur_timeout = 0;
471-
470+
if (cur_timeout <= 0)
471+
{
472+
/* Timeout has expired, no need to continue looping */
473+
result |= WL_TIMEOUT;
474+
}
472475
#ifndef HAVE_POLL
473-
tv.tv_sec = cur_timeout / 1000L;
474-
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
476+
else
477+
{
478+
tv.tv_sec = cur_timeout / 1000L;
479+
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
480+
}
475481
#endif
476482
}
477483
} while (result == 0);

src/backend/port/win32_latch.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -259,13 +259,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
259259
elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
260260

261261
/* If we're not done, update cur_timeout for next iteration */
262-
if (result == 0 && cur_timeout != INFINITE)
262+
if (result == 0 && (wakeEvents & WL_TIMEOUT))
263263
{
264264
INSTR_TIME_SET_CURRENT(cur_time);
265265
INSTR_TIME_SUBTRACT(cur_time, start_time);
266266
cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
267-
if (cur_timeout < 0)
268-
cur_timeout = 0;
267+
if (cur_timeout <= 0)
268+
{
269+
/* Timeout has expired, no need to continue looping */
270+
result |= WL_TIMEOUT;
271+
}
269272
}
270273
} while (result == 0);
271274

0 commit comments

Comments
 (0)