Skip to content

Commit e837718

Browse files
committed
Detect the deadlocks between backends and the startup process.
The deadlocks that the recovery conflict on lock is involved in can happen between hot-standby backends and the startup process. If a backend takes an access exclusive lock on the table and which finally triggers the deadlock, that deadlock can be detected as expected. On the other hand, previously, if the startup process took an access exclusive lock and which finally triggered the deadlock, that deadlock could not be detected and could remain even after deadlock_timeout passed. This is a bug. The cause of this bug was that the code for handling the recovery conflict on lock didn't take care of deadlock case at all. It assumed that deadlocks involving the startup process and backends were able to be detected by the deadlock detector invoked within backends. But this assumption was incorrect. The startup process also should have invoked the deadlock detector if necessary. To fix this bug, this commit makes the startup process invoke the deadlock detector if deadlock_timeout is reached while handling the recovery conflict on lock. Specifically, in that case, the startup process requests all the backends holding the conflicting locks to check themselves for deadlocks. Back-patch to v9.6. v9.5 has also this bug, but per discussion we decided not to back-patch the fix to v9.5. Because v9.5 doesn't have some infrastructure codes (e.g., 37c5486) that this bug fix patch depends on. We can apply those codes for the back-patch, but since the next minor version release is the final one for v9.5, it's risky to do that. If we unexpectedly introduce new bug to v9.5 by the back-patch, there is no chance to fix that. We determined that the back-patch to v9.5 would give more risk than gain. Author: Fujii Masao Reviewed-by: Bertrand Drouvot, Masahiko Sawada, Kyotaro Horiguchi Discussion: https://postgr.es/m/4041d6b6-cf24-a120-36fa-1294220f8243@oss.nttdata.com
1 parent 636d5ee commit e837718

File tree

5 files changed

+141
-32
lines changed

5 files changed

+141
-32
lines changed

src/backend/storage/ipc/procarray.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2660,6 +2660,13 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
26602660
*/
26612661
pid_t
26622662
CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
2663+
{
2664+
return SignalVirtualTransaction(vxid, sigmode, true);
2665+
}
2666+
2667+
pid_t
2668+
SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
2669+
bool conflictPending)
26632670
{
26642671
ProcArrayStruct *arrayP = procArray;
26652672
int index;
@@ -2678,7 +2685,7 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
26782685
if (procvxid.backendId == vxid.backendId &&
26792686
procvxid.localTransactionId == vxid.localTransactionId)
26802687
{
2681-
proc->recoveryConflictPending = true;
2688+
proc->recoveryConflictPending = conflictPending;
26822689
pid = proc->pid;
26832690
if (pid != 0)
26842691
{

src/backend/storage/ipc/standby.c

Lines changed: 114 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ int max_standby_streaming_delay = 30 * 1000;
4242

4343
static HTAB *RecoveryLockLists;
4444

45+
/* Flags set by timeout handlers */
46+
static volatile sig_atomic_t got_standby_deadlock_timeout = false;
47+
static volatile sig_atomic_t got_standby_lock_timeout = false;
48+
4549
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
4650
ProcSignalReason reason, bool report_waiting);
4751
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
@@ -389,8 +393,10 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
389393
* lock. As we are already queued to be granted the lock, no new lock
390394
* requests conflicting with ours will be granted in the meantime.
391395
*
392-
* Deadlocks involving the Startup process and an ordinary backend process
393-
* will be detected by the deadlock detector within the ordinary backend.
396+
* We also must check for deadlocks involving the Startup process and
397+
* hot-standby backend processes. If deadlock_timeout is reached in
398+
* this function, all the backends holding the conflicting locks are
399+
* requested to check themselves for deadlocks.
394400
*/
395401
void
396402
ResolveRecoveryConflictWithLock(LOCKTAG locktag)
@@ -401,7 +407,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
401407

402408
ltime = GetStandbyLimitTime();
403409

404-
if (GetCurrentTimestamp() >= ltime)
410+
if (GetCurrentTimestamp() >= ltime && ltime != 0)
405411
{
406412
/*
407413
* We're already behind, so clear a path as quickly as possible.
@@ -422,26 +428,85 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
422428
else
423429
{
424430
/*
425-
* Wait (or wait again) until ltime
431+
* Wait (or wait again) until ltime, and check for deadlocks as well
432+
* if we will be waiting longer than deadlock_timeout
426433
*/
427-
EnableTimeoutParams timeouts[1];
434+
EnableTimeoutParams timeouts[2];
435+
int cnt = 0;
436+
437+
if (ltime != 0)
438+
{
439+
got_standby_lock_timeout = false;
440+
timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
441+
timeouts[cnt].type = TMPARAM_AT;
442+
timeouts[cnt].fin_time = ltime;
443+
cnt++;
444+
}
428445

429-
timeouts[0].id = STANDBY_LOCK_TIMEOUT;
430-
timeouts[0].type = TMPARAM_AT;
431-
timeouts[0].fin_time = ltime;
432-
enable_timeouts(timeouts, 1);
446+
got_standby_deadlock_timeout = false;
447+
timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
448+
timeouts[cnt].type = TMPARAM_AFTER;
449+
timeouts[cnt].delay_ms = DeadlockTimeout;
450+
cnt++;
451+
452+
enable_timeouts(timeouts, cnt);
433453
}
434454

435455
/* Wait to be signaled by the release of the Relation Lock */
436456
ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
437457

458+
/*
459+
* Exit if ltime is reached. Then all the backends holding conflicting
460+
* locks will be canceled in the next ResolveRecoveryConflictWithLock()
461+
* call.
462+
*/
463+
if (got_standby_lock_timeout)
464+
goto cleanup;
465+
466+
if (got_standby_deadlock_timeout)
467+
{
468+
VirtualTransactionId *backends;
469+
470+
backends = GetLockConflicts(&locktag, AccessExclusiveLock);
471+
472+
/* Quick exit if there's no work to be done */
473+
if (!VirtualTransactionIdIsValid(*backends))
474+
goto cleanup;
475+
476+
/*
477+
* Send signals to all the backends holding the conflicting locks, to
478+
* ask them to check themselves for deadlocks.
479+
*/
480+
while (VirtualTransactionIdIsValid(*backends))
481+
{
482+
SignalVirtualTransaction(*backends,
483+
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
484+
false);
485+
backends++;
486+
}
487+
488+
/*
489+
* Wait again here to be signaled by the release of the Relation Lock,
490+
* to prevent the subsequent RecoveryConflictWithLock() from causing
491+
* deadlock_timeout and sending a request for deadlocks check again.
492+
* Otherwise the request continues to be sent every deadlock_timeout
493+
* until the relation locks are released or ltime is reached.
494+
*/
495+
got_standby_deadlock_timeout = false;
496+
ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
497+
}
498+
499+
cleanup:
500+
438501
/*
439502
* Clear any timeout requests established above. We assume here that the
440503
* Startup process doesn't have any other outstanding timeouts than those
441504
* used by this function. If that stops being true, we could cancel the
442505
* timeouts individually, but that'd be slower.
443506
*/
444507
disable_all_timeouts(false);
508+
got_standby_lock_timeout = false;
509+
got_standby_deadlock_timeout = false;
445510
}
446511

447512
/*
@@ -480,15 +545,7 @@ ResolveRecoveryConflictWithBufferPin(void)
480545

481546
ltime = GetStandbyLimitTime();
482547

483-
if (ltime == 0)
484-
{
485-
/*
486-
* We're willing to wait forever for conflicts, so set timeout for
487-
* deadlock check only
488-
*/
489-
enable_timeout_after(STANDBY_DEADLOCK_TIMEOUT, DeadlockTimeout);
490-
}
491-
else if (GetCurrentTimestamp() >= ltime)
548+
if (GetCurrentTimestamp() >= ltime && ltime != 0)
492549
{
493550
/*
494551
* We're already behind, so clear a path as quickly as possible.
@@ -502,26 +559,55 @@ ResolveRecoveryConflictWithBufferPin(void)
502559
* waiting longer than deadlock_timeout
503560
*/
504561
EnableTimeoutParams timeouts[2];
562+
int cnt = 0;
505563

506-
timeouts[0].id = STANDBY_TIMEOUT;
507-
timeouts[0].type = TMPARAM_AT;
508-
timeouts[0].fin_time = ltime;
509-
timeouts[1].id = STANDBY_DEADLOCK_TIMEOUT;
510-
timeouts[1].type = TMPARAM_AFTER;
511-
timeouts[1].delay_ms = DeadlockTimeout;
512-
enable_timeouts(timeouts, 2);
564+
if (ltime != 0)
565+
{
566+
timeouts[cnt].id = STANDBY_TIMEOUT;
567+
timeouts[cnt].type = TMPARAM_AT;
568+
timeouts[cnt].fin_time = ltime;
569+
cnt++;
570+
}
571+
572+
got_standby_deadlock_timeout = false;
573+
timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
574+
timeouts[cnt].type = TMPARAM_AFTER;
575+
timeouts[cnt].delay_ms = DeadlockTimeout;
576+
cnt++;
577+
578+
enable_timeouts(timeouts, cnt);
513579
}
514580

515581
/* Wait to be signaled by UnpinBuffer() */
516582
ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
517583

584+
if (got_standby_deadlock_timeout)
585+
{
586+
/*
587+
* Send out a request for hot-standby backends to check themselves for
588+
* deadlocks.
589+
*
590+
* XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
591+
* to be signaled by UnpinBuffer() again and send a request for
592+
* deadlocks check if deadlock_timeout happens. This causes the
593+
* request to continue to be sent every deadlock_timeout until the
594+
* buffer is unpinned or ltime is reached. This would increase the
595+
* workload in the startup process and backends. In practice it may
596+
* not be so harmful because the period that the buffer is kept pinned
597+
* is basically no so long. But we should fix this?
598+
*/
599+
SendRecoveryConflictWithBufferPin(
600+
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
601+
}
602+
518603
/*
519604
* Clear any timeout requests established above. We assume here that the
520605
* Startup process doesn't have any other timeouts than what this function
521606
* uses. If that stops being true, we could cancel the timeouts
522607
* individually, but that'd be slower.
523608
*/
524609
disable_all_timeouts(false);
610+
got_standby_deadlock_timeout = false;
525611
}
526612

527613
static void
@@ -581,13 +667,12 @@ CheckRecoveryConflictDeadlock(void)
581667

582668
/*
583669
* StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT
584-
* occurs before STANDBY_TIMEOUT. Send out a request for hot-standby
585-
* backends to check themselves for deadlocks.
670+
* occurs before STANDBY_TIMEOUT.
586671
*/
587672
void
588673
StandbyDeadLockHandler(void)
589674
{
590-
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
675+
got_standby_deadlock_timeout = true;
591676
}
592677

593678
/*
@@ -606,11 +691,11 @@ StandbyTimeoutHandler(void)
606691

607692
/*
608693
* StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
609-
* This doesn't need to do anything, simply waking up is enough.
610694
*/
611695
void
612696
StandbyLockTimeoutHandler(void)
613697
{
698+
got_standby_lock_timeout = true;
614699
}
615700

616701
/*

src/backend/storage/lmgr/proc.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,6 +1766,9 @@ CheckDeadLockAlert(void)
17661766
* Have to set the latch again, even if handle_sig_alarm already did. Back
17671767
* then got_deadlock_timeout wasn't yet set... It's unlikely that this
17681768
* ever would be a problem, but setting a set latch again is cheap.
1769+
*
1770+
* Note that, when this function runs inside procsignal_sigusr1_handler(),
1771+
* the handler function sets the latch again after the latch is set here.
17691772
*/
17701773
SetLatch(MyLatch);
17711774
errno = save_errno;

src/backend/tcop/postgres.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2798,11 +2798,23 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
27982798
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
27992799

28002800
/*
2801-
* If we aren't blocking the Startup process there is nothing
2802-
* more to do.
2801+
* If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we
2802+
* aren't blocking the Startup process there is nothing more
2803+
* to do.
2804+
*
2805+
* When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is
2806+
* requested, if we're waiting for locks and the startup
2807+
* process is not waiting for buffer pin (i.e., also waiting
2808+
* for locks), we set the flag so that ProcSleep() will check
2809+
* for deadlocks.
28032810
*/
28042811
if (!HoldingBufferPinThatDelaysRecovery())
2812+
{
2813+
if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
2814+
GetStartupBufferPinWaitBufId() < 0)
2815+
CheckDeadLockAlert();
28052816
return;
2817+
}
28062818

28072819
MyProc->recoveryConflictPending = true;
28082820

src/include/storage/procarray.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
105105
int *nvxids);
106106
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
107107
extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
108+
extern pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
109+
bool conflictPending);
108110

109111
extern bool MinimumActiveBackends(int min);
110112
extern int CountDBBackends(Oid databaseid);

0 commit comments

Comments
 (0)