Skip to content

Commit 6a2a70a

Browse files
committed
Use signalfd(2) for epoll latches.
Cut down on system calls and other overheads by reading from a signalfd instead of using a signal handler and self-pipe. Affects Linux sytems, and possibly others including illumos that implement the Linux epoll and signalfd interfaces. Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA+hUKGJjxPDpzBE0a3hyUywBvaZuC89yx3jK9RFZgfv_KHU7gg@mail.gmail.com
1 parent 83709a0 commit 6a2a70a

File tree

3 files changed

+118
-55
lines changed

3 files changed

+118
-55
lines changed

src/backend/libpq/pqsignal.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,15 @@ sigset_t UnBlockSig,
3535
* collection; it's essentially BlockSig minus SIGTERM, SIGQUIT, SIGALRM.
3636
*
3737
* UnBlockSig is the set of signals to block when we don't want to block
38-
* signals (is this ever nonzero??)
38+
* signals.
3939
*/
4040
void
4141
pqinitmask(void)
4242
{
4343
sigemptyset(&UnBlockSig);
4444

45+
/* Note: InitializeLatchSupport() modifies UnBlockSig. */
46+
4547
/* First set all signals, then clear some. */
4648
sigfillset(&BlockSig);
4749
sigfillset(&StartupBlockSig);

src/backend/storage/ipc/latch.c

+110-49
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,20 @@
33
* latch.c
44
* Routines for inter-process latches
55
*
6-
* The Unix implementation uses the so-called self-pipe trick to overcome the
7-
* race condition involved with poll() (or epoll_wait() on linux) and setting
8-
* a global flag in the signal handler. When a latch is set and the current
9-
* process is waiting for it, the signal handler wakes up the poll() in
10-
* WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
11-
* poll() on all platforms, and even on platforms where it does, a signal that
12-
* arrives just before the poll() call does not prevent poll() from entering
13-
* sleep. An incoming byte on a pipe however reliably interrupts the sleep,
14-
* and causes poll() to return immediately even if the signal arrives before
15-
* poll() begins.
6+
* The poll() implementation uses the so-called self-pipe trick to overcome the
7+
* race condition involved with poll() and setting a global flag in the signal
8+
* handler. When a latch is set and the current process is waiting for it, the
9+
* signal handler wakes up the poll() in WaitLatch by writing a byte to a pipe.
10+
* A signal by itself doesn't interrupt poll() on all platforms, and even on
11+
* platforms where it does, a signal that arrives just before the poll() call
12+
* does not prevent poll() from entering sleep. An incoming byte on a pipe
13+
* however reliably interrupts the sleep, and causes poll() to return
14+
* immediately even if the signal arrives before poll() begins.
1615
*
17-
* When SetLatch is called from the same process that owns the latch,
18-
* SetLatch writes the byte directly to the pipe. If it's owned by another
19-
* process, SIGURG is sent and the signal handler in the waiting process
20-
* writes the byte to the pipe on behalf of the signaling process.
16+
* The epoll() implementation overcomes the race with a different technique: it
17+
* keeps SIGURG blocked and consumes from a signalfd() descriptor instead. We
18+
* don't need to register a signal handler or create our own self-pipe. We
19+
* assume that any system that has Linux epoll() also has Linux signalfd().
2120
*
2221
* The Windows implementation uses Windows events that are inherited by all
2322
* postmaster child processes. There's no need for the self-pipe trick there.
@@ -46,6 +45,7 @@
4645
#include <poll.h>
4746
#endif
4847

48+
#include "libpq/pqsignal.h"
4949
#include "miscadmin.h"
5050
#include "pgstat.h"
5151
#include "port/atomics.h"
@@ -79,6 +79,10 @@
7979
#error "no wait set implementation available"
8080
#endif
8181

82+
#ifdef WAIT_USE_EPOLL
83+
#include <sys/signalfd.h>
84+
#endif
85+
8286
/* typedef in latch.h */
8387
struct WaitEventSet
8488
{
@@ -139,7 +143,14 @@ static WaitEventSet *LatchWaitSet;
139143
#ifndef WIN32
140144
/* Are we currently in WaitLatch? The signal handler would like to know. */
141145
static volatile sig_atomic_t waiting = false;
146+
#endif
142147

148+
#ifdef WAIT_USE_EPOLL
149+
/* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
150+
static int signal_fd = -1;
151+
#endif
152+
153+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
143154
/* Read and write ends of the self-pipe */
144155
static int selfpipe_readfd = -1;
145156
static int selfpipe_writefd = -1;
@@ -150,8 +161,11 @@ static int selfpipe_owner_pid = 0;
150161
/* Private function prototypes */
151162
static void latch_sigurg_handler(SIGNAL_ARGS);
152163
static void sendSelfPipeByte(void);
153-
static void drainSelfPipe(void);
154-
#endif /* WIN32 */
164+
#endif
165+
166+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
167+
static void drain(void);
168+
#endif
155169

156170
#if defined(WAIT_USE_EPOLL)
157171
static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
@@ -175,7 +189,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
175189
void
176190
InitializeLatchSupport(void)
177191
{
178-
#ifndef WIN32
192+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
179193
int pipefd[2];
180194

181195
if (IsUnderPostmaster)
@@ -247,8 +261,21 @@ InitializeLatchSupport(void)
247261
ReserveExternalFD();
248262

249263
pqsignal(SIGURG, latch_sigurg_handler);
250-
#else
251-
/* currently, nothing to do here for Windows */
264+
#endif
265+
266+
#ifdef WAIT_USE_EPOLL
267+
sigset_t signalfd_mask;
268+
269+
/* Block SIGURG, because we'll receive it through a signalfd. */
270+
sigaddset(&UnBlockSig, SIGURG);
271+
272+
/* Set up the signalfd to receive SIGURG notifications. */
273+
sigemptyset(&signalfd_mask);
274+
sigaddset(&signalfd_mask, SIGURG);
275+
signal_fd = signalfd(-1, &signalfd_mask, SFD_NONBLOCK | SFD_CLOEXEC);
276+
if (signal_fd < 0)
277+
elog(FATAL, "signalfd() failed");
278+
ReserveExternalFD();
252279
#endif
253280
}
254281

@@ -273,19 +300,28 @@ InitializeLatchWaitSet(void)
273300
void
274301
ShutdownLatchSupport(void)
275302
{
303+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
276304
pqsignal(SIGURG, SIG_IGN);
305+
#endif
277306

278307
if (LatchWaitSet)
279308
{
280309
FreeWaitEventSet(LatchWaitSet);
281310
LatchWaitSet = NULL;
282311
}
283312

313+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
284314
close(selfpipe_readfd);
285315
close(selfpipe_writefd);
286316
selfpipe_readfd = -1;
287317
selfpipe_writefd = -1;
288318
selfpipe_owner_pid = InvalidPid;
319+
#endif
320+
321+
#if defined(WAIT_USE_EPOLL)
322+
close(signal_fd);
323+
signal_fd = -1;
324+
#endif
289325
}
290326

291327
/*
@@ -299,10 +335,10 @@ InitLatch(Latch *latch)
299335
latch->owner_pid = MyProcPid;
300336
latch->is_shared = false;
301337

302-
#ifndef WIN32
338+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
303339
/* Assert InitializeLatchSupport has been called in this process */
304340
Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
305-
#else
341+
#elif defined(WAIT_USE_WIN32)
306342
latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
307343
if (latch->event == NULL)
308344
elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
@@ -363,7 +399,7 @@ OwnLatch(Latch *latch)
363399
/* Sanity checks */
364400
Assert(latch->is_shared);
365401

366-
#ifndef WIN32
402+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
367403
/* Assert InitializeLatchSupport has been called in this process */
368404
Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
369405
#endif
@@ -550,9 +586,9 @@ SetLatch(Latch *latch)
550586

551587
/*
552588
* See if anyone's waiting for the latch. It can be the current process if
553-
* we're in a signal handler. We use the self-pipe to wake up the
554-
* poll()/epoll_wait() in that case. If it's another process, send a
555-
* signal.
589+
* we're in a signal handler. We use the self-pipe or SIGURG to ourselves
590+
* to wake up WaitEventSetWaitBlock() without races in that case. If it's
591+
* another process, send a signal.
556592
*
557593
* Fetch owner_pid only once, in case the latch is concurrently getting
558594
* owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
@@ -575,11 +611,17 @@ SetLatch(Latch *latch)
575611
return;
576612
else if (owner_pid == MyProcPid)
577613
{
614+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
578615
if (waiting)
579616
sendSelfPipeByte();
617+
#else
618+
if (waiting)
619+
kill(MyProcPid, SIGURG);
620+
#endif
580621
}
581622
else
582623
kill(owner_pid, SIGURG);
624+
583625
#else
584626

585627
/*
@@ -856,8 +898,13 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
856898
{
857899
set->latch = latch;
858900
set->latch_pos = event->pos;
859-
#ifndef WIN32
901+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
860902
event->fd = selfpipe_readfd;
903+
#elif defined(WAIT_USE_EPOLL)
904+
event->fd = signal_fd;
905+
#else
906+
event->fd = PGINVALID_SOCKET;
907+
return event->pos;
861908
#endif
862909
}
863910
else if (events == WL_POSTMASTER_DEATH)
@@ -932,12 +979,13 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
932979
if (latch && latch->owner_pid != MyProcPid)
933980
elog(ERROR, "cannot wait on a latch owned by another process");
934981
set->latch = latch;
982+
935983
/*
936984
* On Unix, we don't need to modify the kernel object because the
937-
* underlying pipe is the same for all latches so we can return
938-
* immediately. On Windows, we need to update our array of handles,
939-
* but we leave the old one in place and tolerate spurious wakeups if
940-
* the latch is disabled.
985+
* underlying pipe (if there is one) is the same for all latches so we
986+
* can return immediately. On Windows, we need to update our array of
987+
* handles, but we leave the old one in place and tolerate spurious
988+
* wakeups if the latch is disabled.
941989
*/
942990
#if defined(WAIT_USE_WIN32)
943991
if (!latch)
@@ -1421,8 +1469,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
14211469
if (cur_event->events == WL_LATCH_SET &&
14221470
cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
14231471
{
1424-
/* There's data in the self-pipe, clear it. */
1425-
drainSelfPipe();
1472+
/* Drain the signalfd. */
1473+
drain();
14261474

14271475
if (set->latch && set->latch->is_set)
14281476
{
@@ -1575,7 +1623,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
15751623
cur_kqueue_event->filter == EVFILT_READ)
15761624
{
15771625
/* There's data in the self-pipe, clear it. */
1578-
drainSelfPipe();
1626+
drain();
15791627

15801628
if (set->latch && set->latch->is_set)
15811629
{
@@ -1691,7 +1739,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
16911739
(cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
16921740
{
16931741
/* There's data in the self-pipe, clear it. */
1694-
drainSelfPipe();
1742+
drain();
16951743

16961744
if (set->latch && set->latch->is_set)
16971745
{
@@ -1951,7 +1999,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
19511999
}
19522000
#endif
19532001

1954-
#ifndef WIN32
2002+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
2003+
19552004
/*
19562005
* SetLatch uses SIGURG to wake up the process waiting on the latch.
19572006
*
@@ -1967,10 +2016,8 @@ latch_sigurg_handler(SIGNAL_ARGS)
19672016

19682017
errno = save_errno;
19692018
}
1970-
#endif /* !WIN32 */
19712019

19722020
/* Send one byte to the self-pipe, to wake up WaitLatch */
1973-
#ifndef WIN32
19742021
static void
19752022
sendSelfPipeByte(void)
19762023
{
@@ -2000,45 +2047,58 @@ sendSelfPipeByte(void)
20002047
return;
20012048
}
20022049
}
2003-
#endif /* !WIN32 */
2050+
2051+
#endif
2052+
2053+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
20042054

20052055
/*
2006-
* Read all available data from the self-pipe
2056+
* Read all available data from self-pipe or signalfd.
20072057
*
20082058
* Note: this is only called when waiting = true. If it fails and doesn't
20092059
* return, it must reset that flag first (though ideally, this will never
20102060
* happen).
20112061
*/
2012-
#ifndef WIN32
20132062
static void
2014-
drainSelfPipe(void)
2063+
drain(void)
20152064
{
2016-
/*
2017-
* There shouldn't normally be more than one byte in the pipe, or maybe a
2018-
* few bytes if multiple processes run SetLatch at the same instant.
2019-
*/
2020-
char buf[16];
2065+
char buf[1024];
20212066
int rc;
2067+
int fd;
2068+
2069+
#ifdef WAIT_USE_POLL
2070+
fd = selfpipe_readfd;
2071+
#else
2072+
fd = signal_fd;
2073+
#endif
20222074

20232075
for (;;)
20242076
{
2025-
rc = read(selfpipe_readfd, buf, sizeof(buf));
2077+
rc = read(fd, buf, sizeof(buf));
20262078
if (rc < 0)
20272079
{
20282080
if (errno == EAGAIN || errno == EWOULDBLOCK)
2029-
break; /* the pipe is empty */
2081+
break; /* the descriptor is empty */
20302082
else if (errno == EINTR)
20312083
continue; /* retry */
20322084
else
20332085
{
20342086
waiting = false;
2087+
#ifdef WAIT_USE_POLL
20352088
elog(ERROR, "read() on self-pipe failed: %m");
2089+
#else
2090+
elog(ERROR, "read() on signalfd failed: %m");
2091+
#endif
20362092
}
20372093
}
20382094
else if (rc == 0)
20392095
{
20402096
waiting = false;
2097+
#ifdef WAIT_USE_POLL
20412098
elog(ERROR, "unexpected EOF on self-pipe");
2099+
#else
2100+
elog(ERROR, "unexpected EOF on signalfd");
2101+
#endif
20422102
}
20432103
else if (rc < sizeof(buf))
20442104
{
@@ -2048,4 +2108,5 @@ drainSelfPipe(void)
20482108
/* else buffer wasn't big enough, so read again */
20492109
}
20502110
}
2051-
#endif /* !WIN32 */
2111+
2112+
#endif

src/backend/utils/init/miscinit.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ InitPostmasterChild(void)
118118
/* We don't want the postmaster's proc_exit() handlers */
119119
on_exit_reset();
120120

121+
/* In EXEC_BACKEND case we will not have inherited BlockSig etc values */
122+
#ifdef EXEC_BACKEND
123+
pqinitmask();
124+
#endif
125+
121126
/* Initialize process-local latch support */
122127
InitializeLatchSupport();
123128
MyLatch = &LocalLatchData;
@@ -135,11 +140,6 @@ InitPostmasterChild(void)
135140
elog(FATAL, "setsid() failed: %m");
136141
#endif
137142

138-
/* In EXEC_BACKEND case we will not have inherited BlockSig etc values */
139-
#ifdef EXEC_BACKEND
140-
pqinitmask();
141-
#endif
142-
143143
/*
144144
* Every postmaster child process is expected to respond promptly to
145145
* SIGQUIT at all times. Therefore we centrally remove SIGQUIT from

0 commit comments

Comments
 (0)