Skip to content

Commit f9dbac9

Browse files
HS Defer buffer pin deadlock check until deadlock_timeout has expired.
During Hot Standby we need to check for buffer pin deadlocks when the Startup process begins to wait, in case it never wakes up again. We previously made the deadlock check immediately on the basis it was cheap, though clearer thinking and prima facie evidence shows that was too simple. Refactor existing code to make it easy to add in deferral of deadlock check until deadlock_timeout allowing a good reduction in deadlock checks since far few buffer pins are held for that duration. It's worth doing anyway, though major goal is to prevent further reports of context switching with high numbers of users on occasional tests.
1 parent 5234a95 commit f9dbac9

File tree

3 files changed

+107
-52
lines changed

3 files changed

+107
-52
lines changed

src/backend/storage/ipc/standby.c

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.23 2010/05/14 07:11:49 sriggs Exp $
14+
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.24 2010/05/26 19:52:52 sriggs Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -388,12 +388,15 @@ ResolveRecoveryConflictWithBufferPin(void)
388388
}
389389
else if (MaxStandbyDelay < 0)
390390
{
391+
TimestampTz now = GetCurrentTimestamp();
392+
391393
/*
392-
* Send out a request to check for buffer pin deadlocks before we
393-
* wait. This is fairly cheap, so no need to wait for deadlock timeout
394-
* before trying to send it out.
394+
* Set timeout for deadlock check (only)
395395
*/
396-
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
396+
if (enable_standby_sig_alarm(now, now, true))
397+
sig_alarm_enabled = true;
398+
else
399+
elog(FATAL, "could not set timer for process wakeup");
397400
}
398401
else
399402
{
@@ -410,34 +413,19 @@ ResolveRecoveryConflictWithBufferPin(void)
410413
}
411414
else
412415
{
413-
TimestampTz fin_time; /* Expected wake-up time by timer */
414-
long timer_delay_secs; /* Amount of time we set timer
415-
* for */
416-
int timer_delay_usecs;
417-
418-
/*
419-
* Send out a request to check for buffer pin deadlocks before we
420-
* wait. This is fairly cheap, so no need to wait for deadlock
421-
* timeout before trying to send it out.
422-
*/
423-
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
416+
TimestampTz max_standby_time;
424417

425418
/*
426-
* How much longer we should wait?
419+
* At what point in the future do we hit MaxStandbyDelay?
427420
*/
428-
fin_time = TimestampTzPlusMilliseconds(then, MaxStandbyDelay);
429-
430-
TimestampDifference(now, fin_time,
431-
&timer_delay_secs, &timer_delay_usecs);
421+
max_standby_time = TimestampTzPlusMilliseconds(then, MaxStandbyDelay);
422+
Assert(max_standby_time > now);
432423

433424
/*
434-
* It's possible that the difference is less than a microsecond;
435-
* ensure we don't cancel, rather than set, the interrupt.
425+
* Wake up at MaxStandby delay, and check for deadlocks as well
426+
* if we will be waiting longer than deadlock_timeout
436427
*/
437-
if (timer_delay_secs == 0 && timer_delay_usecs == 0)
438-
timer_delay_usecs = 1;
439-
440-
if (enable_standby_sig_alarm(timer_delay_secs, timer_delay_usecs, fin_time))
428+
if (enable_standby_sig_alarm(now, max_standby_time, false))
441429
sig_alarm_enabled = true;
442430
else
443431
elog(FATAL, "could not set timer for process wakeup");

src/backend/storage/lmgr/proc.c

Lines changed: 89 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.218 2010/04/28 16:54:16 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.219 2010/05/26 19:52:52 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -85,6 +85,7 @@ static TimestampTz timeout_start_time;
8585

8686
/* statement_fin_time is valid only if statement_timeout_active is true */
8787
static TimestampTz statement_fin_time;
88+
static TimestampTz statement_fin_time2; /* valid only in recovery */
8889

8990

9091
static void RemoveProcFromArray(int code, Datum arg);
@@ -1619,23 +1620,61 @@ handle_sig_alarm(SIGNAL_ARGS)
16191620
* To avoid various edge cases, we must be careful to do nothing
16201621
* when there is nothing to be done. We also need to be able to
16211622
* reschedule the timer interrupt if called before end of statement.
1623+
*
1624+
* We set either deadlock_timeout_active or statement_timeout_active
1625+
* or both. Interrupts are enabled if standby_timeout_active.
16221626
*/
16231627
bool
1624-
enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time)
1628+
enable_standby_sig_alarm(TimestampTz now, TimestampTz fin_time, bool deadlock_only)
16251629
{
1626-
struct itimerval timeval;
1627-
1628-
Assert(delay_s >= 0 && delay_us >= 0);
1630+
TimestampTz deadlock_time = TimestampTzPlusMilliseconds(now, DeadlockTimeout);
16291631

1630-
statement_fin_time = fin_time;
1632+
if (deadlock_only)
1633+
{
1634+
/*
1635+
* Wake up at DeadlockTimeout only, then wait forever
1636+
*/
1637+
statement_fin_time = deadlock_time;
1638+
deadlock_timeout_active = true;
1639+
statement_timeout_active = false;
1640+
}
1641+
else if (fin_time > deadlock_time)
1642+
{
1643+
/*
1644+
* Wake up at DeadlockTimeout, then again at MaxStandbyDelay
1645+
*/
1646+
statement_fin_time = deadlock_time;
1647+
statement_fin_time2 = fin_time;
1648+
deadlock_timeout_active = true;
1649+
statement_timeout_active = true;
1650+
}
1651+
else
1652+
{
1653+
/*
1654+
* Wake only at MaxStandbyDelay because its fairly soon
1655+
*/
1656+
statement_fin_time = fin_time;
1657+
deadlock_timeout_active = false;
1658+
statement_timeout_active = true;
1659+
}
16311660

1632-
standby_timeout_active = true;
1661+
if (deadlock_timeout_active || statement_timeout_active)
1662+
{
1663+
long secs;
1664+
int usecs;
1665+
struct itimerval timeval;
1666+
TimestampDifference(now, statement_fin_time,
1667+
&secs, &usecs);
1668+
if (secs == 0 && usecs == 0)
1669+
usecs = 1;
1670+
MemSet(&timeval, 0, sizeof(struct itimerval));
1671+
timeval.it_value.tv_sec = secs;
1672+
timeval.it_value.tv_usec = usecs;
1673+
if (setitimer(ITIMER_REAL, &timeval, NULL))
1674+
return false;
1675+
standby_timeout_active = true;
1676+
}
16331677

1634-
MemSet(&timeval, 0, sizeof(struct itimerval));
1635-
timeval.it_value.tv_sec = delay_s;
1636-
timeval.it_value.tv_usec = delay_us;
1637-
if (setitimer(ITIMER_REAL, &timeval, NULL))
1638-
return false;
16391678
return true;
16401679
}
16411680

@@ -1675,37 +1714,64 @@ static bool
16751714
CheckStandbyTimeout(void)
16761715
{
16771716
TimestampTz now;
1717+
bool reschedule = false;
16781718

16791719
standby_timeout_active = false;
16801720

16811721
now = GetCurrentTimestamp();
16821722

1723+
/*
1724+
* Reschedule the timer if its not time to wake yet, or if we
1725+
* have both timers set and the first one has just been reached.
1726+
*/
16831727
if (now >= statement_fin_time)
1684-
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
1728+
{
1729+
if (deadlock_timeout_active)
1730+
{
1731+
/*
1732+
* We're still waiting when we reach DeadlockTimeout, so send out a request
1733+
* to have other backends check themselves for deadlock. Then continue
1734+
* waiting until MaxStandbyDelay.
1735+
*/
1736+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
1737+
deadlock_timeout_active = false;
1738+
1739+
/*
1740+
* Begin second waiting period to MaxStandbyDelay if required.
1741+
*/
1742+
if (statement_timeout_active)
1743+
{
1744+
reschedule = true;
1745+
statement_fin_time = statement_fin_time2;
1746+
}
1747+
}
1748+
else
1749+
{
1750+
/*
1751+
* We've now reached MaxStandbyDelay, so ask all conflicts to leave, cos
1752+
* its time for us to press ahead with applying changes in recovery.
1753+
*/
1754+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
1755+
}
1756+
}
16851757
else
1758+
reschedule = true;
1759+
1760+
if (reschedule)
16861761
{
1687-
/* Not time yet, so (re)schedule the interrupt */
16881762
long secs;
16891763
int usecs;
16901764
struct itimerval timeval;
1691-
16921765
TimestampDifference(now, statement_fin_time,
16931766
&secs, &usecs);
1694-
1695-
/*
1696-
* It's possible that the difference is less than a microsecond;
1697-
* ensure we don't cancel, rather than set, the interrupt.
1698-
*/
16991767
if (secs == 0 && usecs == 0)
17001768
usecs = 1;
1701-
1702-
standby_timeout_active = true;
1703-
17041769
MemSet(&timeval, 0, sizeof(struct itimerval));
17051770
timeval.it_value.tv_sec = secs;
17061771
timeval.it_value.tv_usec = usecs;
17071772
if (setitimer(ITIMER_REAL, &timeval, NULL))
17081773
return false;
1774+
standby_timeout_active = true;
17091775
}
17101776

17111777
return true;

src/include/storage/proc.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.121 2010/02/26 02:01:27 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.122 2010/05/26 19:52:52 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -199,7 +199,8 @@ extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
199199
extern bool disable_sig_alarm(bool is_statement_timeout);
200200
extern void handle_sig_alarm(SIGNAL_ARGS);
201201

202-
extern bool enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time);
202+
extern bool enable_standby_sig_alarm(TimestampTz now,
203+
TimestampTz fin_time, bool deadlock_only);
203204
extern bool disable_standby_sig_alarm(void);
204205
extern void handle_standby_sig_alarm(SIGNAL_ARGS);
205206

0 commit comments

Comments
 (0)