Rework wait for AccessExclusiveLocks on Hot Standby

simonat2ndQuadrant · simonat2ndQuadrant · commit 37c54863cf71 · 2016-03-10T19:26:24.000Z
Earlier version committed in 9.0 caused spurious waits in some cases.
New infrastructure for lock waits in 9.3 used to correct and improve this.

Jeff Janes based upon a proposal by Simon Riggs, who also reviewed
Additional review comments from Amit Kapila
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
@@ -203,6 +203,7 @@ StartupProcessMain(void)
 	 */
 	RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
+	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
 
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
@@ -41,7 +41,6 @@ static List *RecoveryLockList;
 
 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 									   ProcSignalReason reason);
-static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid);
 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
 static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
@@ -339,39 +338,65 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
 	}
 }
 
-static void
-ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
+/*
+ * ResolveRecoveryConflictWithLock is called from ProcSleep()
+ * to resolve conflicts with other backends holding relation locks.
+ *
+ * The WaitLatch sleep normally done in ProcSleep()
+ * (when not InHotStandby) is performed here, for code clarity.
+ *
+ * We either resolve conflicts immediately or set a timeout to wake us at
+ * the limit of our patience.
+ *
+ * Resolve conflicts by cancelling to all backends holding a conflicting
+ * lock.  As we are already queued to be granted the lock, no new lock
+ * requests conflicting with ours will be granted in the meantime.
+ *
+ * Deadlocks involving the Startup process and an ordinary backend process
+ * will be detected by the deadlock detector within the ordinary backend.
+ */
+void
+ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 {
-	VirtualTransactionId *backends;
-	bool		lock_acquired = false;
-	int			num_attempts = 0;
-	LOCKTAG		locktag;
+	TimestampTz ltime;
 
-	SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
+	Assert(InHotStandby);
 
-	/*
-	 * If blowing away everybody with conflicting locks doesn't work, after
-	 * the first two attempts then we just start blowing everybody away until
-	 * it does work. We do this because its likely that we either have too
-	 * many locks and we just can't get one at all, or that there are many
-	 * people crowding for the same table. Recovery must win; the end
-	 * justifies the means.
-	 */
-	while (!lock_acquired)
-	{
-		if (++num_attempts < 3)
-			backends = GetLockConflicts(&locktag, AccessExclusiveLock);
-		else
-			backends = GetConflictingVirtualXIDs(InvalidTransactionId,
-												 InvalidOid);
+	ltime = GetStandbyLimitTime();
 
+	if (GetCurrentTimestamp() >= ltime)
+	{
+		/*
+		 * We're already behind, so clear a path as quickly as possible.
+		 */
+		VirtualTransactionId *backends;
+		backends = GetLockConflicts(&locktag, AccessExclusiveLock);
 		ResolveRecoveryConflictWithVirtualXIDs(backends,
 											 PROCSIG_RECOVERY_CONFLICT_LOCK);
+	}
+	else
+	{
+		/*
+		 * Wait (or wait again) until ltime
+		 */
+		EnableTimeoutParams timeouts[1];
 
-		if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
-			!= LOCKACQUIRE_NOT_AVAIL)
-			lock_acquired = true;
+		timeouts[0].id = STANDBY_LOCK_TIMEOUT;
+		timeouts[0].type = TMPARAM_AT;
+		timeouts[0].fin_time = ltime;
+		enable_timeouts(timeouts, 1);
 	}
+
+	/* Wait to be signaled by the release of the Relation Lock */
+	ProcWaitForSignal();
+
+	/*
+	 * Clear any timeout requests established above.  We assume here that the
+	 * Startup process doesn't have any other outstanding timeouts than those
+	 * used by this function. If that stops being true, we could cancel the
+	 * timeouts individually, but that'd be slower.
+	 */
+	disable_all_timeouts(false);
 }
 
 /*
@@ -534,6 +559,14 @@ StandbyTimeoutHandler(void)
 	SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 }
 
+/*
+ * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
+ * This doesn't need to do anything, simply waking up is enough.
+ */
+void
+StandbyLockTimeoutHandler(void)
+{
+}
 
 /*
  * -----------------------------------------------------
@@ -547,7 +580,7 @@ StandbyTimeoutHandler(void)
  * process is the proxy by which the original locks are implemented.
  *
  * We only keep track of AccessExclusiveLocks, which are only ever held by
- * one transaction on one relation, and don't worry about lock queuing.
+ * one transaction on one relation.
  *
  * We keep a single dynamically expandible list of locks in local memory,
  * RelationLockList, so we can keep track of the various entries made by
@@ -589,14 +622,9 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
 	newlock->relOid = relOid;
 	RecoveryLockList = lappend(RecoveryLockList, newlock);
 
-	/*
-	 * Attempt to acquire the lock as requested, if not resolve conflict
-	 */
 	SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
 
-	if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
-		== LOCKACQUIRE_NOT_AVAIL)
-		ResolveRecoveryConflictWithLock(newlock->dbOid, newlock->relOid);
+	LockAcquireExtended(&locktag, AccessExclusiveLock, true, false, false);
 }
 
 static void
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
@@ -42,6 +42,7 @@
 #include "postmaster/autovacuum.h"
 #include "replication/slot.h"
 #include "replication/syncrep.h"
+#include "storage/standby.h"
 #include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/pmsignal.h"
@@ -1169,21 +1170,27 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 *
 	 * If LockTimeout is set, also enable the timeout for that.  We can save a
 	 * few cycles by enabling both timeout sources in one call.
+	 *
+	 * If InHotStandby we set lock waits slightly later for clarity with other
+	 * code.
 	 */
-	if (LockTimeout > 0)
+	if (!InHotStandby)
 	{
-		EnableTimeoutParams timeouts[2];
-
-		timeouts[0].id = DEADLOCK_TIMEOUT;
-		timeouts[0].type = TMPARAM_AFTER;
-		timeouts[0].delay_ms = DeadlockTimeout;
-		timeouts[1].id = LOCK_TIMEOUT;
-		timeouts[1].type = TMPARAM_AFTER;
-		timeouts[1].delay_ms = LockTimeout;
-		enable_timeouts(timeouts, 2);
+		if (LockTimeout > 0)
+		{
+			EnableTimeoutParams timeouts[2];
+
+			timeouts[0].id = DEADLOCK_TIMEOUT;
+			timeouts[0].type = TMPARAM_AFTER;
+			timeouts[0].delay_ms = DeadlockTimeout;
+			timeouts[1].id = LOCK_TIMEOUT;
+			timeouts[1].type = TMPARAM_AFTER;
+			timeouts[1].delay_ms = LockTimeout;
+			enable_timeouts(timeouts, 2);
+		}
+		else
+			enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
 	}
-	else
-		enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
 
 	/*
 	 * If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1201,15 +1208,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 */
 	do
 	{
-		WaitLatch(MyLatch, WL_LATCH_SET, 0);
-		ResetLatch(MyLatch);
-		/* check for deadlocks first, as that's probably log-worthy */
-		if (got_deadlock_timeout)
+		if (InHotStandby)
+		{
+			/* Set a timer and wait for that or for the Lock to be granted */
+			ResolveRecoveryConflictWithLock(locallock->tag.lock);
+		}
+		else
 		{
-			CheckDeadLock();
-			got_deadlock_timeout = false;
+			WaitLatch(MyLatch, WL_LATCH_SET, 0);
+			ResetLatch(MyLatch);
+			/* check for deadlocks first, as that's probably log-worthy */
+			if (got_deadlock_timeout)
+			{
+				CheckDeadLock();
+				got_deadlock_timeout = false;
+			}
+			CHECK_FOR_INTERRUPTS();
 		}
-		CHECK_FOR_INTERRUPTS();
 
 		/*
 		 * waitStatus could change from STATUS_WAITING to something else
@@ -1447,18 +1462,21 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 * already caused QueryCancelPending to become set, we want the cancel to
 	 * be reported as a lock timeout, not a user cancel.
 	 */
-	if (LockTimeout > 0)
+	if (!InHotStandby)
 	{
-		DisableTimeoutParams timeouts[2];
+		if (LockTimeout > 0)
+		{
+			DisableTimeoutParams timeouts[2];
 
-		timeouts[0].id = DEADLOCK_TIMEOUT;
-		timeouts[0].keep_indicator = false;
-		timeouts[1].id = LOCK_TIMEOUT;
-		timeouts[1].keep_indicator = true;
-		disable_timeouts(timeouts, 2);
+			timeouts[0].id = DEADLOCK_TIMEOUT;
+			timeouts[0].keep_indicator = false;
+			timeouts[1].id = LOCK_TIMEOUT;
+			timeouts[1].keep_indicator = true;
+			disable_timeouts(timeouts, 2);
+		}
+		else
+			disable_timeout(DEADLOCK_TIMEOUT, false);
 	}
-	else
-		disable_timeout(DEADLOCK_TIMEOUT, false);
 
 	/*
 	 * Re-acquire the lock table's partition lock.  We have to do this to hold
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
@@ -15,6 +15,7 @@
 #define STANDBY_H
 
 #include "storage/standbydefs.h"
+#include "storage/lock.h"
 #include "storage/procsignal.h"
 #include "storage/relfilenode.h"
 
@@ -31,10 +32,12 @@ extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
 extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
 extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
 
+extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag);
 extern void ResolveRecoveryConflictWithBufferPin(void);
 extern void CheckRecoveryConflictDeadlock(void);
 extern void StandbyDeadLockHandler(void);
 extern void StandbyTimeoutHandler(void);
+extern void StandbyLockTimeoutHandler(void);
 
 /*
  * Standby Rmgr (RM_STANDBY_ID)
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
@@ -29,6 +29,7 @@ typedef enum TimeoutId
 	STATEMENT_TIMEOUT,
 	STANDBY_DEADLOCK_TIMEOUT,
 	STANDBY_TIMEOUT,
+	STANDBY_LOCK_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */