Skip to content

Commit 04a09ee

Browse files
committed
Teach WaitEventSetWait() to report multiple events on Windows.
The WAIT_USE_WIN32 implementation of WaitEventSetWait() previously reported at most one event per call, because that's what the underlying WaitForMultipleObjects() call does. We can make the behavior match the three Unix implementations by looping until our output buffer is full, or there are no more events available now. This makes no difference to most callers including the regular FEBE socket code, since they ask for at most one event anyway. A difference in socket accept priority might be perceived by end users after commit 7389aad started using WaitEventSet in the postmaster. With this commit, the accept order now matches Unix systems, servicing listening sockets in round-robin order. We decided it wasn't really a bug or worth back-patching, but it seems good to align the behavior across platforms. Reviewed-by: Andres Freund <andres@anarazel.de> (earlier version) Tested-by: "Wei Wang (Fujitsu)" <wangw.fnst@fujitsu.com> Discussion: https://postgr.es/m/CA%2BhUKG%2BA2dk29hr5zRP3HVJQ-_PncNJM6HVQ7aaYLXLRBZU-xw%40mail.gmail.com
1 parent 9f06025 commit 04a09ee

File tree

1 file changed

+132
-96
lines changed

1 file changed

+132
-96
lines changed

src/backend/storage/ipc/latch.c

+132-96
Original file line numberDiff line numberDiff line change
@@ -1931,14 +1931,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
19311931
#elif defined(WAIT_USE_WIN32)
19321932

19331933
/*
1934-
* Wait using Windows' WaitForMultipleObjects().
1934+
* Wait using Windows' WaitForMultipleObjects(). Each call only "consumes" one
1935+
* event, so we keep calling until we've filled up our output buffer to match
1936+
* the behavior of the other implementations.
19351937
*
1936-
* Unfortunately this will only ever return a single readiness notification at
1937-
* a time. Note that while the official documentation for
1938-
* WaitForMultipleObjects is ambiguous about multiple events being "consumed"
1939-
* with a single bWaitAll = FALSE call,
1940-
* https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273 confirms
1941-
* that only one event is "consumed".
1938+
* https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273
19421939
*/
19431940
static inline int
19441941
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
@@ -2023,106 +2020,145 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
20232020
*/
20242021
cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];
20252022

2026-
occurred_events->pos = cur_event->pos;
2027-
occurred_events->user_data = cur_event->user_data;
2028-
occurred_events->events = 0;
2029-
2030-
if (cur_event->events == WL_LATCH_SET)
2031-
{
2032-
/*
2033-
* We cannot use set->latch->event to reset the fired event if we
2034-
* aren't waiting on this latch now.
2035-
*/
2036-
if (!ResetEvent(set->handles[cur_event->pos + 1]))
2037-
elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
2038-
2039-
if (set->latch && set->latch->is_set)
2040-
{
2041-
occurred_events->fd = PGINVALID_SOCKET;
2042-
occurred_events->events = WL_LATCH_SET;
2043-
occurred_events++;
2044-
returned_events++;
2045-
}
2046-
}
2047-
else if (cur_event->events == WL_POSTMASTER_DEATH)
2048-
{
2049-
/*
2050-
* Postmaster apparently died. Since the consequences of falsely
2051-
* returning WL_POSTMASTER_DEATH could be pretty unpleasant, we take
2052-
* the trouble to positively verify this with PostmasterIsAlive(),
2053-
* even though there is no known reason to think that the event could
2054-
* be falsely set on Windows.
2055-
*/
2056-
if (!PostmasterIsAliveInternal())
2057-
{
2058-
if (set->exit_on_postmaster_death)
2059-
proc_exit(1);
2060-
occurred_events->fd = PGINVALID_SOCKET;
2061-
occurred_events->events = WL_POSTMASTER_DEATH;
2062-
occurred_events++;
2063-
returned_events++;
2064-
}
2065-
}
2066-
else if (cur_event->events & WL_SOCKET_MASK)
2023+
for (;;)
20672024
{
2068-
WSANETWORKEVENTS resEvents;
2069-
HANDLE handle = set->handles[cur_event->pos + 1];
2070-
2071-
Assert(cur_event->fd);
2025+
int next_pos;
2026+
int count;
20722027

2073-
occurred_events->fd = cur_event->fd;
2028+
occurred_events->pos = cur_event->pos;
2029+
occurred_events->user_data = cur_event->user_data;
2030+
occurred_events->events = 0;
20742031

2075-
ZeroMemory(&resEvents, sizeof(resEvents));
2076-
if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
2077-
elog(ERROR, "failed to enumerate network events: error code %d",
2078-
WSAGetLastError());
2079-
if ((cur_event->events & WL_SOCKET_READABLE) &&
2080-
(resEvents.lNetworkEvents & FD_READ))
2032+
if (cur_event->events == WL_LATCH_SET)
20812033
{
2082-
/* data available in socket */
2083-
occurred_events->events |= WL_SOCKET_READABLE;
2084-
2085-
/*------
2086-
* WaitForMultipleObjects doesn't guarantee that a read event will
2087-
* be returned if the latch is set at the same time. Even if it
2088-
* did, the caller might drop that event expecting it to reoccur
2089-
* on next call. So, we must force the event to be reset if this
2090-
* WaitEventSet is used again in order to avoid an indefinite
2091-
* hang. Refer https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx
2092-
* for the behavior of socket events.
2093-
*------
2034+
/*
2035+
* We cannot use set->latch->event to reset the fired event if we
2036+
* aren't waiting on this latch now.
20942037
*/
2095-
cur_event->reset = true;
2096-
}
2097-
if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
2098-
(resEvents.lNetworkEvents & FD_WRITE))
2099-
{
2100-
/* writeable */
2101-
occurred_events->events |= WL_SOCKET_WRITEABLE;
2102-
}
2103-
if ((cur_event->events & WL_SOCKET_CONNECTED) &&
2104-
(resEvents.lNetworkEvents & FD_CONNECT))
2105-
{
2106-
/* connected */
2107-
occurred_events->events |= WL_SOCKET_CONNECTED;
2038+
if (!ResetEvent(set->handles[cur_event->pos + 1]))
2039+
elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
2040+
2041+
if (set->latch && set->latch->is_set)
2042+
{
2043+
occurred_events->fd = PGINVALID_SOCKET;
2044+
occurred_events->events = WL_LATCH_SET;
2045+
occurred_events++;
2046+
returned_events++;
2047+
}
21082048
}
2109-
if ((cur_event->events & WL_SOCKET_ACCEPT) &&
2110-
(resEvents.lNetworkEvents & FD_ACCEPT))
2049+
else if (cur_event->events == WL_POSTMASTER_DEATH)
21112050
{
2112-
/* incoming connection could be accepted */
2113-
occurred_events->events |= WL_SOCKET_ACCEPT;
2051+
/*
2052+
* Postmaster apparently died. Since the consequences of falsely
2053+
* returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
2054+
* take the trouble to positively verify this with
2055+
* PostmasterIsAlive(), even though there is no known reason to
2056+
* think that the event could be falsely set on Windows.
2057+
*/
2058+
if (!PostmasterIsAliveInternal())
2059+
{
2060+
if (set->exit_on_postmaster_death)
2061+
proc_exit(1);
2062+
occurred_events->fd = PGINVALID_SOCKET;
2063+
occurred_events->events = WL_POSTMASTER_DEATH;
2064+
occurred_events++;
2065+
returned_events++;
2066+
}
21142067
}
2115-
if (resEvents.lNetworkEvents & FD_CLOSE)
2068+
else if (cur_event->events & WL_SOCKET_MASK)
21162069
{
2117-
/* EOF/error, so signal all caller-requested socket flags */
2118-
occurred_events->events |= (cur_event->events & WL_SOCKET_MASK);
2119-
}
2070+
WSANETWORKEVENTS resEvents;
2071+
HANDLE handle = set->handles[cur_event->pos + 1];
21202072

2121-
if (occurred_events->events != 0)
2122-
{
2123-
occurred_events++;
2124-
returned_events++;
2073+
Assert(cur_event->fd);
2074+
2075+
occurred_events->fd = cur_event->fd;
2076+
2077+
ZeroMemory(&resEvents, sizeof(resEvents));
2078+
if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
2079+
elog(ERROR, "failed to enumerate network events: error code %d",
2080+
WSAGetLastError());
2081+
if ((cur_event->events & WL_SOCKET_READABLE) &&
2082+
(resEvents.lNetworkEvents & FD_READ))
2083+
{
2084+
/* data available in socket */
2085+
occurred_events->events |= WL_SOCKET_READABLE;
2086+
2087+
/*------
2088+
* WaitForMultipleObjects doesn't guarantee that a read event
2089+
* will be returned if the latch is set at the same time. Even
2090+
* if it did, the caller might drop that event expecting it to
2091+
* reoccur on next call. So, we must force the event to be
2092+
* reset if this WaitEventSet is used again in order to avoid
2093+
* an indefinite hang.
2094+
*
2095+
* Refer
2096+
* https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx
2097+
* for the behavior of socket events.
2098+
*------
2099+
*/
2100+
cur_event->reset = true;
2101+
}
2102+
if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
2103+
(resEvents.lNetworkEvents & FD_WRITE))
2104+
{
2105+
/* writeable */
2106+
occurred_events->events |= WL_SOCKET_WRITEABLE;
2107+
}
2108+
if ((cur_event->events & WL_SOCKET_CONNECTED) &&
2109+
(resEvents.lNetworkEvents & FD_CONNECT))
2110+
{
2111+
/* connected */
2112+
occurred_events->events |= WL_SOCKET_CONNECTED;
2113+
}
2114+
if ((cur_event->events & WL_SOCKET_ACCEPT) &&
2115+
(resEvents.lNetworkEvents & FD_ACCEPT))
2116+
{
2117+
/* incoming connection could be accepted */
2118+
occurred_events->events |= WL_SOCKET_ACCEPT;
2119+
}
2120+
if (resEvents.lNetworkEvents & FD_CLOSE)
2121+
{
2122+
/* EOF/error, so signal all caller-requested socket flags */
2123+
occurred_events->events |= (cur_event->events & WL_SOCKET_MASK);
2124+
}
2125+
2126+
if (occurred_events->events != 0)
2127+
{
2128+
occurred_events++;
2129+
returned_events++;
2130+
}
21252131
}
2132+
2133+
/* Is the output buffer full? */
2134+
if (returned_events == nevents)
2135+
break;
2136+
2137+
/* Have we run out of possible events? */
2138+
next_pos = cur_event->pos + 1;
2139+
if (next_pos == set->nevents)
2140+
break;
2141+
2142+
/*
2143+
* Poll the rest of the event handles in the array starting at
2144+
* next_pos being careful to skip over the initial signal handle too.
2145+
* This time we use a zero timeout.
2146+
*/
2147+
count = set->nevents - next_pos;
2148+
rc = WaitForMultipleObjects(count,
2149+
set->handles + 1 + next_pos,
2150+
false,
2151+
0);
2152+
2153+
/*
2154+
* We don't distinguish between errors and WAIT_TIMEOUT here because
2155+
* we already have events to report.
2156+
*/
2157+
if (rc < WAIT_OBJECT_0 || rc >= WAIT_OBJECT_0 + count)
2158+
break;
2159+
2160+
/* We have another event to decode. */
2161+
cur_event = &set->events[next_pos + (rc - WAIT_OBJECT_0)];
21262162
}
21272163

21282164
return returned_events;

0 commit comments

Comments
 (0)