@@ -53,18 +53,29 @@ static timeout_params *volatile active_timeouts[MAX_TIMEOUTS];
53
53
54
54
/*
55
55
* Flag controlling whether the signal handler is allowed to do anything.
56
- * We leave this "false" when we're not expecting interrupts, just in case.
56
+ * This is useful to avoid race conditions with the handler. Note in
57
+ * particular that this lets us make changes in the data structures without
58
+ * tediously disabling and re-enabling the timer signal. Most of the time,
59
+ * no interrupt would happen anyway during such critical sections, but if
60
+ * one does, this rule ensures it's safe. Leaving the signal enabled across
61
+ * multiple operations can greatly reduce the number of kernel calls we make,
62
+ * too. See comments in schedule_alarm() about that.
57
63
*
58
- * Note that we don't bother to reset any pending timer interrupt when we
59
- * disable the signal handler; it's not really worth the cycles to do so,
60
- * since the probability of the interrupt actually occurring while we have
61
- * it disabled is low. See comments in schedule_alarm() about that.
64
+ * We leave this "false" when we're not expecting interrupts, just in case.
62
65
*/
63
66
static volatile sig_atomic_t alarm_enabled = false;
64
67
65
68
#define disable_alarm () (alarm_enabled = false)
66
69
#define enable_alarm () (alarm_enabled = true)
67
70
71
+ /*
72
+ * State recording if and when we next expect the interrupt to fire.
73
+ * Note that the signal handler will unconditionally reset signal_pending to
74
+ * false, so that can change asynchronously even when alarm_enabled is false.
75
+ */
76
+ static volatile sig_atomic_t signal_pending = false;
77
+ static TimestampTz signal_due_at = 0 ; /* valid only when signal_pending */
78
+
68
79
69
80
/*****************************************************************************
70
81
* Internal helper functions
@@ -185,29 +196,50 @@ enable_timeout(TimeoutId id, TimestampTz now, TimestampTz fin_time)
185
196
* Schedule alarm for the next active timeout, if any
186
197
*
187
198
* We assume the caller has obtained the current time, or a close-enough
188
- * approximation.
199
+ * approximation. (It's okay if a tick or two has passed since "now", or
200
+ * if a little more time elapses before we reach the kernel call; that will
201
+ * cause us to ask for an interrupt a tick or two later than the nearest
202
+ * timeout, which is no big deal. Passing a "now" value that's in the future
203
+ * would be bad though.)
189
204
*/
190
205
static void
191
206
schedule_alarm (TimestampTz now )
192
207
{
193
208
if (num_active_timeouts > 0 )
194
209
{
195
210
struct itimerval timeval ;
211
+ TimestampTz nearest_timeout ;
196
212
long secs ;
197
213
int usecs ;
198
214
199
215
MemSet (& timeval , 0 , sizeof (struct itimerval ));
200
216
201
- /* Get the time remaining till the nearest pending timeout */
202
- TimestampDifference (now , active_timeouts [0 ]-> fin_time ,
203
- & secs , & usecs );
204
-
205
217
/*
206
- * It's possible that the difference is less than a microsecond;
207
- * ensure we don't cancel, rather than set, the interrupt.
218
+ * Get the time remaining till the nearest pending timeout. If it is
219
+ * negative, assume that we somehow missed an interrupt, and force
220
+ * signal_pending off. This gives us a chance to recover if the
221
+ * kernel drops a timeout request for some reason.
208
222
*/
209
- if (secs == 0 && usecs == 0 )
223
+ nearest_timeout = active_timeouts [0 ]-> fin_time ;
224
+ if (now > nearest_timeout )
225
+ {
226
+ signal_pending = false;
227
+ /* force an interrupt as soon as possible */
228
+ secs = 0 ;
210
229
usecs = 1 ;
230
+ }
231
+ else
232
+ {
233
+ TimestampDifference (now , nearest_timeout ,
234
+ & secs , & usecs );
235
+
236
+ /*
237
+ * It's possible that the difference is less than a microsecond;
238
+ * ensure we don't cancel, rather than set, the interrupt.
239
+ */
240
+ if (secs == 0 && usecs == 0 )
241
+ usecs = 1 ;
242
+ }
211
243
212
244
timeval .it_value .tv_sec = secs ;
213
245
timeval .it_value .tv_usec = usecs ;
@@ -218,7 +250,7 @@ schedule_alarm(TimestampTz now)
218
250
* interrupt could occur before we can set alarm_enabled, so that the
219
251
* signal handler would fail to do anything.
220
252
*
221
- * Because we didn't bother to reset the timer in disable_alarm(),
253
+ * Because we didn't bother to disable the timer in disable_alarm(),
222
254
* it's possible that a previously-set interrupt will fire between
223
255
* enable_alarm() and setitimer(). This is safe, however. There are
224
256
* two possible outcomes:
@@ -244,9 +276,53 @@ schedule_alarm(TimestampTz now)
244
276
*/
245
277
enable_alarm ();
246
278
279
+ /*
280
+ * If there is already an interrupt pending that's at or before the
281
+ * needed time, we need not do anything more. The signal handler will
282
+ * do the right thing in the first case, and re-schedule the interrupt
283
+ * for later in the second case. It might seem that the extra
284
+ * interrupt is wasted work, but it's not terribly much work, and this
285
+ * method has very significant advantages in the common use-case where
286
+ * we repeatedly set a timeout that we don't expect to reach and then
287
+ * cancel it. Instead of invoking setitimer() every time the timeout
288
+ * is set or canceled, we perform one interrupt and a re-scheduling
289
+ * setitimer() call at intervals roughly equal to the timeout delay.
290
+ * For example, with statement_timeout = 1s and a throughput of
291
+ * thousands of queries per second, this method requires an interrupt
292
+ * and setitimer() call roughly once a second, rather than thousands
293
+ * of setitimer() calls per second.
294
+ *
295
+ * Because of the possible passage of time between when we obtained
296
+ * "now" and when we reach setitimer(), the kernel's opinion of when
297
+ * to trigger the interrupt is likely to be a bit later than
298
+ * signal_due_at. That's fine, for the same reasons described above.
299
+ */
300
+ if (signal_pending && nearest_timeout >= signal_due_at )
301
+ return ;
302
+
303
+ /*
304
+ * As with calling enable_alarm(), we must set signal_pending *before*
305
+ * calling setitimer(); if we did it after, the signal handler could
306
+ * trigger before we set it, leaving us with a false opinion that a
307
+ * signal is still coming.
308
+ *
309
+ * Other race conditions involved with setting/checking signal_pending
310
+ * are okay, for the reasons described above.
311
+ */
312
+ signal_due_at = nearest_timeout ;
313
+ signal_pending = true;
314
+
247
315
/* Set the alarm timer */
248
316
if (setitimer (ITIMER_REAL , & timeval , NULL ) != 0 )
317
+ {
318
+ /*
319
+ * Clearing signal_pending here is a bit pro forma, but not
320
+ * entirely so, since something in the FATAL exit path could try
321
+ * to use timeout facilities.
322
+ */
323
+ signal_pending = false;
249
324
elog (FATAL , "could not enable SIGALRM timer: %m" );
325
+ }
250
326
}
251
327
}
252
328
@@ -279,6 +355,12 @@ handle_sig_alarm(SIGNAL_ARGS)
279
355
*/
280
356
SetLatch (MyLatch );
281
357
358
+ /*
359
+ * Always reset signal_pending, even if !alarm_enabled, since indeed no
360
+ * signal is now pending.
361
+ */
362
+ signal_pending = false;
363
+
282
364
/*
283
365
* Fire any pending timeouts, but only if we're enabled to do so.
284
366
*/
@@ -591,7 +673,7 @@ disable_timeouts(const DisableTimeoutParams *timeouts, int count)
591
673
}
592
674
593
675
/*
594
- * Disable SIGALRM and remove all timeouts from the active list,
676
+ * Disable the signal handler, remove all timeouts from the active list,
595
677
* and optionally reset their timeout indicators.
596
678
*/
597
679
void
@@ -602,18 +684,10 @@ disable_all_timeouts(bool keep_indicators)
602
684
disable_alarm ();
603
685
604
686
/*
605
- * Only bother to reset the timer if we think it's active. We could just
606
- * let the interrupt happen anyway, but it's probably a bit cheaper to do
607
- * setitimer() than to let the useless interrupt happen .
687
+ * We used to disable the timer interrupt here, but in common usage
688
+ * patterns it's cheaper to leave it enabled; that may save us from having
689
+ * to enable it again shortly. See comments in schedule_alarm() .
608
690
*/
609
- if (num_active_timeouts > 0 )
610
- {
611
- struct itimerval timeval ;
612
-
613
- MemSet (& timeval , 0 , sizeof (struct itimerval ));
614
- if (setitimer (ITIMER_REAL , & timeval , NULL ) != 0 )
615
- elog (FATAL , "could not disable SIGALRM timer: %m" );
616
- }
617
691
618
692
num_active_timeouts = 0 ;
619
693
0 commit comments